Main Page Namespace List Class Hierarchy Alphabetical List Compound List File List Namespace Members Compound Members File Members Related Pages
CUDAWrapNVML.cu

Go to the documentation of this file.
00001 
00021 #include <stdio.h>
00022 #include <stdlib.h>
00023 #include "CUDAWrapNVML.h"
00024 #include "cuda_runtime.h"
00025 
00026 /*
00027 * Wrappers to emulate dlopen() on other systems like Windows
00028 */
00029 #if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64)
00030 #include <windows.h>
00031 static void *wrap_dlopen(const char *filename) {
00032 return (void *)LoadLibrary(filename);
00033 }
00034 static void *wrap_dlsym(void *h, const char *sym) {
00035 return (void *)GetProcAddress((HINSTANCE)h, sym);
00036 }
00037 static int wrap_dlclose(void *h) {
00038 /* FreeLibrary returns nonzero on success */
00039 return (!FreeLibrary((HINSTANCE)h));
00040 }
00041 #else
00042 /* assume we can use dlopen itself... */
00043 #include <dlfcn.h>
00044 static void *wrap_dlopen(const char *filename) {
00045 return dlopen(filename, RTLD_NOW);
00046 }
00047 static void *wrap_dlsym(void *h, const char *sym) {
00048 return dlsym(h, sym);
00049 }
00050 static int wrap_dlclose(void *h) {
00051 return dlclose(h);
00052 }
00053 #endif
00054 
00055 #if defined(__cplusplus)
00056 extern "C" {
00057 #endif
00058 
00059 wrap_nvml_handle * wrap_nvml_create() {
00060 int i=0;
00061 wrap_nvml_handle *nvmlh = NULL;
00062 
00063 /* 
00064 * We use hard-coded library installation locations for the time being...
00065 * No idea where or if libnvidia-ml.so is installed on MacOS X, a 
00066 * deep scouring of the filesystem on one of the Mac CUDA build boxes
00067 * I used turned up nothing, so for now it's not going to work on OSX.
00068 */
00069 #if defined(_WIN64)
00070 /* 64-bit Windows */
00071 const char *plibnvidia_ml[] = {"c:/Program Files/NVIDIA Corporation/NVSMI/nvml.dll", NULL};
00072 #elif defined(_WIN32) || defined(_MSC_VER)
00073 /* 32-bit Windows */
00074 const char *plibnvidia_ml[] = {"c:/Program Files (x86)/NVIDIA Corporation/NVSMI/nvml.dll", NULL};
00075 #elif defined(__linux) && (defined(__i386__) || defined(__ARM_ARCH_7A__))
00076 /* 32-bit linux assumed */
00077 const char *plibnvidia_ml[] = {"/usr/lib/libnvidia-ml.so", 
00078 "/usr/lib/libnvidia-ml.so.1", 
00079 "/usr/lib/x86-linux-gnu/libnvidia-ml.so",
00080 "/usr/lib/x86-linux-gnu/libnvidia-ml.so.1",
00081 NULL};
00082 #elif defined(__linux)
00083 /* 64-bit linux assumed */
00084 const char *plibnvidia_ml[] = {"/usr/lib64/libnvidia-ml.so", 
00085 "/usr/lib64/libnvidia-ml.so.1",
00086 "/usr/lib/aarch64-linux-gnu/libnvidia-ml.so",
00087 "/usr/lib/aarch64-linux-gnu/libnvidia-ml.so.1",
00088 "/usr/lib/x86_64-linux-gnu/libnvidia-ml.so",
00089 "/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1",
00090 NULL};
00091 #else
00092 //#error "Unrecognized platform: need NVML DLL path for this platform..."
00093 const char *plibnvidia_ml[] = {NULL};
00094 #endif
00095 
00096 
00097 void *nvml_dll = NULL;
00098 const char *libnvidia_ml = NULL;
00099 
00100 // allow explicit user override of path if needed
00101 if (getenv("LIBNVMLPATH") != NULL)
00102 nvml_dll = wrap_dlopen(getenv("LIBNVMLPATH"));
00103 
00104 int sopathidx = 0;
00105 libnvidia_ml = plibnvidia_ml[sopathidx];
00106 while ((nvml_dll == NULL) && (libnvidia_ml != NULL)) {
00107 nvml_dll = wrap_dlopen(libnvidia_ml);
00108 sopathidx++;
00109 libnvidia_ml = plibnvidia_ml[sopathidx];
00110 }
00111 if (nvml_dll == NULL)
00112 return NULL;
00113 
00114 nvmlh = (wrap_nvml_handle *) calloc(1, sizeof(wrap_nvml_handle));
00115 
00116 nvmlh->nvml_dll = nvml_dll; 
00117 
00118 nvmlh->nvmlInit = (wrap_nvmlReturn_t (*)(void)) 
00119 wrap_dlsym(nvmlh->nvml_dll, "nvmlInit");
00120 
00121 nvmlh->nvmlDeviceGetCount = (wrap_nvmlReturn_t (*)(int *)) 
00122 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount_v2");
00123 
00124 nvmlh->nvmlDeviceGetHandleByIndex = (wrap_nvmlReturn_t (*)(int, wrap_nvmlDevice_t *)) 
00125 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetHandleByIndex_v2");
00126 
00127 nvmlh->nvmlDeviceGetPciInfo = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, wrap_nvmlPciInfo_t *)) 
00128 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo");
00129 
00130 nvmlh->nvmlDeviceGetName = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, char *, int))
00131 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetName");
00132 
00133 nvmlh->nvmlDeviceGetTemperature = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, int, unsigned int *))
00134 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetTemperature");
00135 
00136 nvmlh->nvmlDeviceGetFanSpeed = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *))
00137 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetFanSpeed");
00138 
00139 nvmlh->nvmlDeviceGetPowerUsage = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *))
00140 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage");
00141 
00142 nvmlh->nvmlDeviceGetCpuAffinity = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int, unsigned long *))
00143 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCpuAffinity");
00144 
00145 nvmlh->nvmlDeviceSetCpuAffinity = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t))
00146 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetCpuAffinity");
00147 
00148 nvmlh->nvmlShutdown = (wrap_nvmlReturn_t (*)()) 
00149 wrap_dlsym(nvmlh->nvml_dll, "nvmlShutdown");
00150 
00151 if (nvmlh->nvmlInit == NULL || 
00152 nvmlh->nvmlShutdown == NULL ||
00153 nvmlh->nvmlDeviceGetCount == NULL ||
00154 nvmlh->nvmlDeviceGetHandleByIndex == NULL || 
00155 nvmlh->nvmlDeviceGetPciInfo == NULL ||
00156 nvmlh->nvmlDeviceGetName == NULL ||
00157 nvmlh->nvmlDeviceGetTemperature == NULL ||
00158 nvmlh->nvmlDeviceGetFanSpeed == NULL ||
00159 nvmlh->nvmlDeviceGetPowerUsage == NULL ||
00160 nvmlh->nvmlDeviceGetCpuAffinity == NULL ||
00161 nvmlh->nvmlDeviceSetCpuAffinity == NULL
00162 ) {
00163 #if 0
00164 printf("Failed to obtain all required NVML function pointers\n");
00165 #endif
00166 wrap_dlclose(nvmlh->nvml_dll);
00167 free(nvmlh);
00168 return NULL;
00169 }
00170 
00171 nvmlh->nvmlInit();
00172 nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount);
00173 
00174 /* Query CUDA device count, in case it doesn't agree with NVML, since */
00175 /* CUDA will only report GPUs with compute capability greater than 1.0 */ 
00176 if (cudaGetDeviceCount(&nvmlh->cuda_gpucount) != cudaSuccess) {
00177 #if 0
00178 printf("Failed to query CUDA device count!\n");
00179 #endif
00180 wrap_dlclose(nvmlh->nvml_dll);
00181 free(nvmlh);
00182 return NULL;
00183 }
00184 
00185 nvmlh->devs = (wrap_nvmlDevice_t *) calloc(nvmlh->nvml_gpucount, sizeof(wrap_nvmlDevice_t));
00186 nvmlh->nvml_pci_domain_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
00187 nvmlh->nvml_pci_bus_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
00188 nvmlh->nvml_pci_device_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
00189 nvmlh->nvml_cuda_device_id = (int*) calloc(nvmlh->nvml_gpucount, sizeof(int));
00190 nvmlh->cuda_nvml_device_id = (int*) calloc(nvmlh->cuda_gpucount, sizeof(int));
00191 
00192 /* Obtain GPU device handles we're going to need repeatedly... */
00193 for (i=0; i<nvmlh->nvml_gpucount; i++) {
00194 nvmlh->nvmlDeviceGetHandleByIndex(i, &nvmlh->devs[i]);
00195 } 
00196 
00197 /* Query PCI info for each NVML device, and build table for mapping of */
00198 /* CUDA device IDs to NVML device IDs and vice versa */
00199 for (i=0; i<nvmlh->nvml_gpucount; i++) {
00200 wrap_nvmlPciInfo_t pciinfo;
00201 nvmlh->nvmlDeviceGetPciInfo(nvmlh->devs[i], &pciinfo);
00202 nvmlh->nvml_pci_domain_id[i] = pciinfo.domain;
00203 nvmlh->nvml_pci_bus_id[i] = pciinfo.bus;
00204 nvmlh->nvml_pci_device_id[i] = pciinfo.device;
00205 }
00206 
00207 /* build mapping of NVML device IDs to CUDA IDs */
00208 for (i=0; i<nvmlh->nvml_gpucount; i++) {
00209 nvmlh->nvml_cuda_device_id[i] = -1;
00210 } 
00211 for (i=0; i<nvmlh->cuda_gpucount; i++) {
00212 cudaDeviceProp props;
00213 nvmlh->cuda_nvml_device_id[i] = -1;
00214 
00215 if (cudaGetDeviceProperties(&props, i) == cudaSuccess) {
00216 int j;
00217 for (j=0; j<nvmlh->nvml_gpucount; j++) {
00218 if ((nvmlh->nvml_pci_domain_id[j] == props.pciDomainID) &&
00219 (nvmlh->nvml_pci_bus_id[j] == props.pciBusID) &&
00220 (nvmlh->nvml_pci_device_id[j] == props.pciDeviceID)) {
00221 #if 0
00222 printf("CUDA GPU[%d] matches NVML GPU[%d]\n", i, j);
00223 #endif
00224 nvmlh->nvml_cuda_device_id[j] = i;
00225 nvmlh->cuda_nvml_device_id[i] = j;
00226 }
00227 }
00228 }
00229 }
00230 
00231 return nvmlh;
00232 }
00233 
00234 
00235 int wrap_nvml_destroy(wrap_nvml_handle *nvmlh) {
00236 nvmlh->nvmlShutdown();
00237 
00238 if (nvmlh->nvml_pci_domain_id != NULL)
00239 free(nvmlh->nvml_pci_domain_id);
00240 
00241 if (nvmlh->nvml_pci_bus_id != NULL)
00242 free(nvmlh->nvml_pci_bus_id);
00243 
00244 if (nvmlh->nvml_pci_device_id != NULL)
00245 free(nvmlh->nvml_pci_device_id);
00246 
00247 if (nvmlh->nvml_cuda_device_id != NULL)
00248 free(nvmlh->nvml_cuda_device_id);
00249 
00250 if (nvmlh->cuda_nvml_device_id != NULL)
00251 free(nvmlh->cuda_nvml_device_id);
00252 
00253 if (nvmlh->devs != NULL)
00254 free(nvmlh->devs);
00255 
00256 wrap_dlclose(nvmlh->nvml_dll);
00257 free(nvmlh);
00258 return 0;
00259 }
00260 
00261 
00262 int wrap_nvml_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount) {
00263 *gpucount = nvmlh->nvml_gpucount;
00264 return 0; 
00265 }
00266 
00267 int wrap_cuda_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount) {
00268 *gpucount = nvmlh->cuda_gpucount;
00269 return 0; 
00270 }
00271 
00272 int wrap_nvml_get_gpu_name(wrap_nvml_handle *nvmlh,
00273 int cudaindex, 
00274 char *namebuf,
00275 int bufsize) {
00276 int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
00277 if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
00278 return -1;
00279 
00280 if (nvmlh->nvmlDeviceGetName(nvmlh->devs[gpuindex], namebuf, bufsize) != WRAPNVML_SUCCESS)
00281 return -1; 
00282 
00283 return 0;
00284 }
00285 
00286 
00287 int wrap_nvml_get_tempC(wrap_nvml_handle *nvmlh,
00288 int cudaindex, unsigned int *tempC) {
00289 wrap_nvmlReturn_t rc;
00290 int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
00291 if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
00292 return -1;
00293 
00294 rc = nvmlh->nvmlDeviceGetTemperature(nvmlh->devs[gpuindex], 0u /* NVML_TEMPERATURE_GPU */, tempC);
00295 if (rc != WRAPNVML_SUCCESS) {
00296 return -1; 
00297 }
00298 
00299 return 0;
00300 }
00301 
00302 
00303 int wrap_nvml_get_fanpcnt(wrap_nvml_handle *nvmlh,
00304 int cudaindex, unsigned int *fanpcnt) {
00305 wrap_nvmlReturn_t rc;
00306 int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
00307 if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
00308 return -1;
00309 
00310 rc = nvmlh->nvmlDeviceGetFanSpeed(nvmlh->devs[gpuindex], fanpcnt);
00311 if (rc != WRAPNVML_SUCCESS) {
00312 return -1; 
00313 }
00314 
00315 return 0;
00316 }
00317 
00318 
00319 int wrap_nvml_get_power_usage(wrap_nvml_handle *nvmlh,
00320 int cudaindex,
00321 unsigned int *milliwatts) {
00322 int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
00323 if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
00324 return -1;
00325 
00326 if (nvmlh->nvmlDeviceGetPowerUsage(nvmlh->devs[gpuindex], milliwatts) != WRAPNVML_SUCCESS)
00327 return -1; 
00328 
00329 return 0;
00330 }
00331 
00332 
00333 int wrap_nvml_get_cpu_affinity(wrap_nvml_handle *nvmlh,
00334 int cudaindex,
00335 unsigned int cpuSetSize,
00336 unsigned long *cpuSet) {
00337 int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
00338 if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
00339 return -1;
00340 
00341 if (nvmlh->nvmlDeviceGetCpuAffinity(nvmlh->devs[gpuindex], cpuSetSize, cpuSet) != WRAPNVML_SUCCESS)
00342 return -1; 
00343 
00344 return 0;
00345 }
00346 
00347 
00348 int wrap_nvml_set_cpu_affinity(wrap_nvml_handle *nvmlh,
00349 int cudaindex) {
00350 int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
00351 if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
00352 return -1;
00353 
00354 if (nvmlh->nvmlDeviceSetCpuAffinity(nvmlh->devs[gpuindex]) != WRAPNVML_SUCCESS)
00355 return -1; 
00356 
00357 return 0;
00358 }
00359 
00360 
00361 #if defined(__cplusplus)
00362 }
00363 #endif
00364 
00365