3
5
10 }
11
12 double accounted = 0.0;
14 if (profile[i].count > 0) {
15 printfQuda(
" %20s = %9.3f secs (%7.3f%%),\t with %8d calls at %6.3e us per call\n",
16 (
const char *)&pname[i][0], profile[i].time, 100 * profile[i].time / profile[
QUDA_PROFILE_TOTAL].time,
17 profile[i].count, 1e6 * profile[i].time / profile[i].count);
18 accounted += profile[i].
time;
19 }
20 }
21 if (accounted > 0.0) {
23 printfQuda(
" total accounted = %9.3f secs (%7.3f%%)\n", accounted,
25 printfQuda(
" total missing = %9.3f secs (%7.3f%%)\n", missing,
27 }
28
30 warningQuda(
"Accounted time %9.3f secs in %s is greater than total time %9.3f secs", accounted,
32 }
33
34 }
35
37 "upload",
38 "init",
39 "preamble",
40 "compute",
41 "comms",
42 "epilogue",
43 "free",
44 "file i/o",
45 "chronology",
46 "eigen",
47 "eigenLU",
48 "eigenEV",
49 "eigenQR",
50 "arpack",
51 "host compute",
52 "dummy",
53 "pack kernel",
54 "dslash kernel",
55 "gather",
56 "scatter",
57 "kernel launch",
58 "event record",
59 "event query",
60 "stream wait event",
61 "set func attribute",
62 "event synchronize",
63 "stream synchronize",
64 "device synchronize",
65 "memcpy d2d async",
66 "memcpy d2h async",
67 "memcpy2d d2h async",
68 "memcpy h2d async",
69 "memcpy default async",
70 "comms start",
71 "comms query",
72 "constant",
73 "total"};
74
75 #ifdef INTERFACE_NVTX
76 const uint32_t TimeProfile::nvtx_colors[] = { 0xff00ff00, 0xff0000ff, 0xffffff00, 0xffff00ff, 0xff00ffff, 0xffff0000, 0xffffffff };
77 const int TimeProfile::nvtx_num_colors = sizeof(nvtx_colors)/sizeof(uint32_t);
78 #endif
79
83
87 }
88
89 double accounted = 0.0;
90 bool print_timer = true; // whether to print that timer
92 if (global_profile[i].count > 0) {
93 if (print_timer)
94 printfQuda(
" %20s = %9.3f secs (%7.3f%%),\t with %8d calls at %6.3e us per call\n",
95 (const char *)&pname[i][0], global_profile[i].time,
96 100 * global_profile[i].time / global_profile[
QUDA_PROFILE_TOTAL].time, global_profile[i].count,
97 1e6 * global_profile[i].time / global_profile[i].count);
98 accounted += global_profile[i].
time;
99 }
100 }
101 if (accounted > 0.0) {
103 printfQuda(
" total accounted = %9.3f secs (%7.3f%%)\n", accounted,
105 printfQuda(
" total missing = %9.3f secs (%7.3f%%)\n", missing,
107 }
108
110 warningQuda(
"Accounted time %9.3f secs in %s is greater than total time %9.3f secs\n", accounted,
"QUDA",
112 }
113 }
114
115 }
static void PrintGlobal()
@ QUDA_PROFILE_LOWER_LEVEL