00001 00021 #include <stdio.h> 00022 #include <stdlib.h> 00023 #include "CUDAWrapNVML.h" 00024 #include "cuda_runtime.h" 00025 00026 /* 00027 * Wrappers to emulate dlopen() on other systems like Windows 00028 */ 00029 #if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64) 00030 #include <windows.h> 00031 static void *wrap_dlopen(const char *filename) { 00032 return (void *)LoadLibrary(filename); 00033 } 00034 static void *wrap_dlsym(void *h, const char *sym) { 00035 return (void *)GetProcAddress((HINSTANCE)h, sym); 00036 } 00037 static int wrap_dlclose(void *h) { 00038 /* FreeLibrary returns nonzero on success */ 00039 return (!FreeLibrary((HINSTANCE)h)); 00040 } 00041 #else 00042 /* assume we can use dlopen itself... */ 00043 #include <dlfcn.h> 00044 static void *wrap_dlopen(const char *filename) { 00045 return dlopen(filename, RTLD_NOW); 00046 } 00047 static void *wrap_dlsym(void *h, const char *sym) { 00048 return dlsym(h, sym); 00049 } 00050 static int wrap_dlclose(void *h) { 00051 return dlclose(h); 00052 } 00053 #endif 00054 00055 #if defined(__cplusplus) 00056 extern "C" { 00057 #endif 00058 00059 wrap_nvml_handle * wrap_nvml_create() { 00060 int i=0; 00061 wrap_nvml_handle *nvmlh = NULL; 00062 00063 /* 00064 * We use hard-coded library installation locations for the time being... 00065 * No idea where or if libnvidia-ml.so is installed on MacOS X, a 00066 * deep scouring of the filesystem on one of the Mac CUDA build boxes 00067 * I used turned up nothing, so for now it's not going to work on OSX. 00068 */ 00069 #if defined(_WIN64) 00070 /* 64-bit Windows */ 00071 const char *plibnvidia_ml[] = {"c:/Program Files/NVIDIA Corporation/NVSMI/nvml.dll", NULL}; 00072 #elif defined(_WIN32) || defined(_MSC_VER) 00073 /* 32-bit Windows */ 00074 const char *plibnvidia_ml[] = {"c:/Program Files (x86)/NVIDIA Corporation/NVSMI/nvml.dll", NULL}; 00075 #elif defined(__linux) && (defined(__i386__) || defined(__ARM_ARCH_7A__)) 00076 /* 32-bit linux assumed */ 00077 const char *plibnvidia_ml[] = {"/usr/lib/libnvidia-ml.so", 00078 "/usr/lib/libnvidia-ml.so.1", 00079 "/usr/lib/x86-linux-gnu/libnvidia-ml.so", 00080 "/usr/lib/x86-linux-gnu/libnvidia-ml.so.1", 00081 NULL}; 00082 #elif defined(__linux) 00083 /* 64-bit linux assumed */ 00084 const char *plibnvidia_ml[] = {"/usr/lib64/libnvidia-ml.so", 00085 "/usr/lib64/libnvidia-ml.so.1", 00086 "/usr/lib/aarch64-linux-gnu/libnvidia-ml.so", 00087 "/usr/lib/aarch64-linux-gnu/libnvidia-ml.so.1", 00088 "/usr/lib/x86_64-linux-gnu/libnvidia-ml.so", 00089 "/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1", 00090 NULL}; 00091 #else 00092 //#error "Unrecognized platform: need NVML DLL path for this platform..." 00093 const char *plibnvidia_ml[] = {NULL}; 00094 #endif 00095 00096 00097 void *nvml_dll = NULL; 00098 const char *libnvidia_ml = NULL; 00099 00100 // allow explicit user override of path if needed 00101 if (getenv("LIBNVMLPATH") != NULL) 00102 nvml_dll = wrap_dlopen(getenv("LIBNVMLPATH")); 00103 00104 int sopathidx = 0; 00105 libnvidia_ml = plibnvidia_ml[sopathidx]; 00106 while ((nvml_dll == NULL) && (libnvidia_ml != NULL)) { 00107 nvml_dll = wrap_dlopen(libnvidia_ml); 00108 sopathidx++; 00109 libnvidia_ml = plibnvidia_ml[sopathidx]; 00110 } 00111 if (nvml_dll == NULL) 00112 return NULL; 00113 00114 nvmlh = (wrap_nvml_handle *) calloc(1, sizeof(wrap_nvml_handle)); 00115 00116 nvmlh->nvml_dll = nvml_dll; 00117 00118 nvmlh->nvmlInit = (wrap_nvmlReturn_t (*)(void)) 00119 wrap_dlsym(nvmlh->nvml_dll, "nvmlInit"); 00120 00121 nvmlh->nvmlDeviceGetCount = (wrap_nvmlReturn_t (*)(int *)) 00122 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount_v2"); 00123 00124 nvmlh->nvmlDeviceGetHandleByIndex = (wrap_nvmlReturn_t (*)(int, wrap_nvmlDevice_t *)) 00125 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetHandleByIndex_v2"); 00126 00127 nvmlh->nvmlDeviceGetPciInfo = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, wrap_nvmlPciInfo_t *)) 00128 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo"); 00129 00130 nvmlh->nvmlDeviceGetName = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, char *, int)) 00131 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetName"); 00132 00133 nvmlh->nvmlDeviceGetTemperature = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, int, unsigned int *)) 00134 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetTemperature"); 00135 00136 nvmlh->nvmlDeviceGetFanSpeed = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *)) 00137 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetFanSpeed"); 00138 00139 nvmlh->nvmlDeviceGetPowerUsage = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *)) 00140 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage"); 00141 00142 nvmlh->nvmlDeviceGetCpuAffinity = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int, unsigned long *)) 00143 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCpuAffinity"); 00144 00145 nvmlh->nvmlDeviceSetCpuAffinity = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t)) 00146 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetCpuAffinity"); 00147 00148 nvmlh->nvmlShutdown = (wrap_nvmlReturn_t (*)()) 00149 wrap_dlsym(nvmlh->nvml_dll, "nvmlShutdown"); 00150 00151 if (nvmlh->nvmlInit == NULL || 00152 nvmlh->nvmlShutdown == NULL || 00153 nvmlh->nvmlDeviceGetCount == NULL || 00154 nvmlh->nvmlDeviceGetHandleByIndex == NULL || 00155 nvmlh->nvmlDeviceGetPciInfo == NULL || 00156 nvmlh->nvmlDeviceGetName == NULL || 00157 nvmlh->nvmlDeviceGetTemperature == NULL || 00158 nvmlh->nvmlDeviceGetFanSpeed == NULL || 00159 nvmlh->nvmlDeviceGetPowerUsage == NULL || 00160 nvmlh->nvmlDeviceGetCpuAffinity == NULL || 00161 nvmlh->nvmlDeviceSetCpuAffinity == NULL 00162 ) { 00163 #if 0 00164 printf("Failed to obtain all required NVML function pointers\n"); 00165 #endif 00166 wrap_dlclose(nvmlh->nvml_dll); 00167 free(nvmlh); 00168 return NULL; 00169 } 00170 00171 nvmlh->nvmlInit(); 00172 nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount); 00173 00174 /* Query CUDA device count, in case it doesn't agree with NVML, since */ 00175 /* CUDA will only report GPUs with compute capability greater than 1.0 */ 00176 if (cudaGetDeviceCount(&nvmlh->cuda_gpucount) != cudaSuccess) { 00177 #if 0 00178 printf("Failed to query CUDA device count!\n"); 00179 #endif 00180 wrap_dlclose(nvmlh->nvml_dll); 00181 free(nvmlh); 00182 return NULL; 00183 } 00184 00185 nvmlh->devs = (wrap_nvmlDevice_t *) calloc(nvmlh->nvml_gpucount, sizeof(wrap_nvmlDevice_t)); 00186 nvmlh->nvml_pci_domain_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); 00187 nvmlh->nvml_pci_bus_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); 00188 nvmlh->nvml_pci_device_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); 00189 nvmlh->nvml_cuda_device_id = (int*) calloc(nvmlh->nvml_gpucount, sizeof(int)); 00190 nvmlh->cuda_nvml_device_id = (int*) calloc(nvmlh->cuda_gpucount, sizeof(int)); 00191 00192 /* Obtain GPU device handles we're going to need repeatedly... */ 00193 for (i=0; i<nvmlh->nvml_gpucount; i++) { 00194 nvmlh->nvmlDeviceGetHandleByIndex(i, &nvmlh->devs[i]); 00195 } 00196 00197 /* Query PCI info for each NVML device, and build table for mapping of */ 00198 /* CUDA device IDs to NVML device IDs and vice versa */ 00199 for (i=0; i<nvmlh->nvml_gpucount; i++) { 00200 wrap_nvmlPciInfo_t pciinfo; 00201 nvmlh->nvmlDeviceGetPciInfo(nvmlh->devs[i], &pciinfo); 00202 nvmlh->nvml_pci_domain_id[i] = pciinfo.domain; 00203 nvmlh->nvml_pci_bus_id[i] = pciinfo.bus; 00204 nvmlh->nvml_pci_device_id[i] = pciinfo.device; 00205 } 00206 00207 /* build mapping of NVML device IDs to CUDA IDs */ 00208 for (i=0; i<nvmlh->nvml_gpucount; i++) { 00209 nvmlh->nvml_cuda_device_id[i] = -1; 00210 } 00211 for (i=0; i<nvmlh->cuda_gpucount; i++) { 00212 cudaDeviceProp props; 00213 nvmlh->cuda_nvml_device_id[i] = -1; 00214 00215 if (cudaGetDeviceProperties(&props, i) == cudaSuccess) { 00216 int j; 00217 for (j=0; j<nvmlh->nvml_gpucount; j++) { 00218 if ((nvmlh->nvml_pci_domain_id[j] == props.pciDomainID) && 00219 (nvmlh->nvml_pci_bus_id[j] == props.pciBusID) && 00220 (nvmlh->nvml_pci_device_id[j] == props.pciDeviceID)) { 00221 #if 0 00222 printf("CUDA GPU[%d] matches NVML GPU[%d]\n", i, j); 00223 #endif 00224 nvmlh->nvml_cuda_device_id[j] = i; 00225 nvmlh->cuda_nvml_device_id[i] = j; 00226 } 00227 } 00228 } 00229 } 00230 00231 return nvmlh; 00232 } 00233 00234 00235 int wrap_nvml_destroy(wrap_nvml_handle *nvmlh) { 00236 nvmlh->nvmlShutdown(); 00237 00238 if (nvmlh->nvml_pci_domain_id != NULL) 00239 free(nvmlh->nvml_pci_domain_id); 00240 00241 if (nvmlh->nvml_pci_bus_id != NULL) 00242 free(nvmlh->nvml_pci_bus_id); 00243 00244 if (nvmlh->nvml_pci_device_id != NULL) 00245 free(nvmlh->nvml_pci_device_id); 00246 00247 if (nvmlh->nvml_cuda_device_id != NULL) 00248 free(nvmlh->nvml_cuda_device_id); 00249 00250 if (nvmlh->cuda_nvml_device_id != NULL) 00251 free(nvmlh->cuda_nvml_device_id); 00252 00253 if (nvmlh->devs != NULL) 00254 free(nvmlh->devs); 00255 00256 wrap_dlclose(nvmlh->nvml_dll); 00257 free(nvmlh); 00258 return 0; 00259 } 00260 00261 00262 int wrap_nvml_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount) { 00263 *gpucount = nvmlh->nvml_gpucount; 00264 return 0; 00265 } 00266 00267 int wrap_cuda_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount) { 00268 *gpucount = nvmlh->cuda_gpucount; 00269 return 0; 00270 } 00271 00272 int wrap_nvml_get_gpu_name(wrap_nvml_handle *nvmlh, 00273 int cudaindex, 00274 char *namebuf, 00275 int bufsize) { 00276 int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; 00277 if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) 00278 return -1; 00279 00280 if (nvmlh->nvmlDeviceGetName(nvmlh->devs[gpuindex], namebuf, bufsize) != WRAPNVML_SUCCESS) 00281 return -1; 00282 00283 return 0; 00284 } 00285 00286 00287 int wrap_nvml_get_tempC(wrap_nvml_handle *nvmlh, 00288 int cudaindex, unsigned int *tempC) { 00289 wrap_nvmlReturn_t rc; 00290 int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; 00291 if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) 00292 return -1; 00293 00294 rc = nvmlh->nvmlDeviceGetTemperature(nvmlh->devs[gpuindex], 0u /* NVML_TEMPERATURE_GPU */, tempC); 00295 if (rc != WRAPNVML_SUCCESS) { 00296 return -1; 00297 } 00298 00299 return 0; 00300 } 00301 00302 00303 int wrap_nvml_get_fanpcnt(wrap_nvml_handle *nvmlh, 00304 int cudaindex, unsigned int *fanpcnt) { 00305 wrap_nvmlReturn_t rc; 00306 int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; 00307 if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) 00308 return -1; 00309 00310 rc = nvmlh->nvmlDeviceGetFanSpeed(nvmlh->devs[gpuindex], fanpcnt); 00311 if (rc != WRAPNVML_SUCCESS) { 00312 return -1; 00313 } 00314 00315 return 0; 00316 } 00317 00318 00319 int wrap_nvml_get_power_usage(wrap_nvml_handle *nvmlh, 00320 int cudaindex, 00321 unsigned int *milliwatts) { 00322 int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; 00323 if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) 00324 return -1; 00325 00326 if (nvmlh->nvmlDeviceGetPowerUsage(nvmlh->devs[gpuindex], milliwatts) != WRAPNVML_SUCCESS) 00327 return -1; 00328 00329 return 0; 00330 } 00331 00332 00333 int wrap_nvml_get_cpu_affinity(wrap_nvml_handle *nvmlh, 00334 int cudaindex, 00335 unsigned int cpuSetSize, 00336 unsigned long *cpuSet) { 00337 int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; 00338 if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) 00339 return -1; 00340 00341 if (nvmlh->nvmlDeviceGetCpuAffinity(nvmlh->devs[gpuindex], cpuSetSize, cpuSet) != WRAPNVML_SUCCESS) 00342 return -1; 00343 00344 return 0; 00345 } 00346 00347 00348 int wrap_nvml_set_cpu_affinity(wrap_nvml_handle *nvmlh, 00349 int cudaindex) { 00350 int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; 00351 if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) 00352 return -1; 00353 00354 if (nvmlh->nvmlDeviceSetCpuAffinity(nvmlh->devs[gpuindex]) != WRAPNVML_SUCCESS) 00355 return -1; 00356 00357 return 0; 00358 } 00359 00360 00361 #if defined(__cplusplus) 00362 } 00363 #endif 00364 00365