QUDA: quda/lib/timer.cpp Source File

QUDA  v1.1.0
A library for QCD on GPUs
timer.cpp
Go to the documentation of this file.
1 #include <quda_internal.h>
2 #include <timer.h>
3 
4 namespace quda {
5 
7   void TimeProfile::Print() {
8  if (profile[QUDA_PROFILE_TOTAL].time > 0.0) {
9  printfQuda("\n %20s Total time = %9.3f secs\n", fname.c_str(), profile[QUDA_PROFILE_TOTAL].time);
10  }
11 
12  double accounted = 0.0;
13  for (int i=0; i<QUDA_PROFILE_COUNT-1; i++) {
14  if (profile[i].count > 0) {
15  printfQuda(" %20s = %9.3f secs (%7.3f%%),\t with %8d calls at %6.3e us per call\n",
16  (const char *)&pname[i][0], profile[i].time, 100 * profile[i].time / profile[QUDA_PROFILE_TOTAL].time,
17  profile[i].count, 1e6 * profile[i].time / profile[i].count);
18  accounted += profile[i].time;
19  }
20  }
21  if (accounted > 0.0) {
22  double missing = profile[QUDA_PROFILE_TOTAL].time - accounted;
23  printfQuda(" total accounted = %9.3f secs (%7.3f%%)\n", accounted,
24  100 * accounted / profile[QUDA_PROFILE_TOTAL].time);
25  printfQuda(" total missing = %9.3f secs (%7.3f%%)\n", missing,
26  100 * missing / profile[QUDA_PROFILE_TOTAL].time);
27  }
28 
29  if (accounted > profile[QUDA_PROFILE_TOTAL].time) {
30  warningQuda("Accounted time %9.3f secs in %s is greater than total time %9.3f secs", accounted,
31  (const char *)&fname[0], profile[QUDA_PROFILE_TOTAL].time);
32  }
33 
34  }
35 
36  std::string TimeProfile::pname[] = {"download",
37  "upload",
38  "init",
39  "preamble",
40  "compute",
41  "comms",
42  "epilogue",
43  "free",
44  "file i/o",
45  "chronology",
46  "eigen",
47  "eigenLU",
48  "eigenEV",
49  "eigenQR",
50  "arpack",
51  "host compute",
52  "dummy",
53  "pack kernel",
54  "dslash kernel",
55  "gather",
56  "scatter",
57  "kernel launch",
58  "event record",
59  "event query",
60  "stream wait event",
61  "set func attribute",
62  "event synchronize",
63  "stream synchronize",
64  "device synchronize",
65  "memcpy d2d async",
66  "memcpy d2h async",
67  "memcpy2d d2h async",
68  "memcpy h2d async",
69  "memcpy default async",
70  "comms start",
71  "comms query",
72  "constant",
73  "total"};
74 
75 #ifdef INTERFACE_NVTX
76  const uint32_t TimeProfile::nvtx_colors[] = { 0xff00ff00, 0xff0000ff, 0xffffff00, 0xffff00ff, 0xff00ffff, 0xffff0000, 0xffffffff };
77  const int TimeProfile::nvtx_num_colors = sizeof(nvtx_colors)/sizeof(uint32_t);
78 #endif
79 
80  Timer TimeProfile::global_profile[QUDA_PROFILE_COUNT];
81  bool TimeProfile::global_switchOff[QUDA_PROFILE_COUNT] = {};
82  int TimeProfile::global_total_level[QUDA_PROFILE_COUNT] = {};
83 
84   void TimeProfile::PrintGlobal() {
85  if (global_profile[QUDA_PROFILE_TOTAL].time > 0.0) {
86  printfQuda("\n %20s Total time = %9.3f secs\n", "QUDA", global_profile[QUDA_PROFILE_TOTAL].time);
87  }
88 
89  double accounted = 0.0;
90  bool print_timer = true; // whether to print that timer
91  for (int i=0; i<QUDA_PROFILE_LOWER_LEVEL; i++) { // we do not want to print detailed lower level timers
92  if (global_profile[i].count > 0) {
93  if (print_timer)
94  printfQuda(" %20s = %9.3f secs (%7.3f%%),\t with %8d calls at %6.3e us per call\n",
95  (const char *)&pname[i][0], global_profile[i].time,
96  100 * global_profile[i].time / global_profile[QUDA_PROFILE_TOTAL].time, global_profile[i].count,
97  1e6 * global_profile[i].time / global_profile[i].count);
98  accounted += global_profile[i].time;
99  }
100  }
101  if (accounted > 0.0) {
102  double missing = global_profile[QUDA_PROFILE_TOTAL].time - accounted;
103  printfQuda(" total accounted = %9.3f secs (%7.3f%%)\n", accounted,
104  100 * accounted / global_profile[QUDA_PROFILE_TOTAL].time);
105  printfQuda(" total missing = %9.3f secs (%7.3f%%)\n", missing,
106  100 * missing / global_profile[QUDA_PROFILE_TOTAL].time);
107  }
108 
109  if (accounted > global_profile[QUDA_PROFILE_TOTAL].time) {
110  warningQuda("Accounted time %9.3f secs in %s is greater than total time %9.3f secs\n", accounted, "QUDA",
111  global_profile[QUDA_PROFILE_TOTAL].time);
112  }
113  }
114 
115 }
quda::TimeProfile::Print
void Print()
Definition: timer.cpp:7
quda::TimeProfile::PrintGlobal
static void PrintGlobal()
Definition: timer.cpp:84
quda
Definition: blas_lapack.h:24
quda::QUDA_PROFILE_COUNT
@ QUDA_PROFILE_COUNT
Definition: timer.h:150
quda::QUDA_PROFILE_TOTAL
@ QUDA_PROFILE_TOTAL
Definition: timer.h:149
quda::QUDA_PROFILE_LOWER_LEVEL
@ QUDA_PROFILE_LOWER_LEVEL
Definition: timer.h:122
testing::internal::string
::std::string string
Definition: gtest-port.h:891
quda::Timer::time
double time
Definition: timer.h:44
printfQuda
#define printfQuda(...)
Definition: util_quda.h:114
warningQuda
#define warningQuda(...)
Definition: util_quda.h:132

Generated on Thu Oct 28 2021 16:10:27 for QUDA by doxygen 1.9.1

AltStyle によって変換されたページ (->オリジナル) /