Main Page Namespace List Class Hierarchy Alphabetical List Compound List File List Namespace Members Compound Members File Members Related Pages

CUDABench.cu File Reference

Short benchmark kernels to measure GPU performance. More...

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cuda.h>
#include "Inform.h"
#include "WKFThreads.h"
#include "WKFUtils.h"
#include "CUDAKernels.h"
#include "Measure.h"

Go to the source code of this file.


Compounds

struct busbwthrparms
struct globmembwthrparms
struct latthrparms
struct maddthrparms

Defines

#define RESTRICT __restrict__
#define CUERR
#define FMADD16
#define GRIDSIZEX 6144
#define BLOCKSIZEX 64
#define GLOOPS 2000
#define FMADD16COUNT 32
#define FLOPSPERFMADD16 32
#define FLOPSPERLOOP (FMADD16COUNT * FLOPSPERFMADD16)
#define BWITER 500
#define LATENCYITER 50000

Typedefs

typedef float4 datatype

Functions

__global__ void madd_kernel (float *doutput)
int cudamaddgflops (int cudadev, double *gflops, int testloops)
void * cudamaddthread (void *voidparms)
int vmd_cuda_madd_gflops (int numdevs, int *devlist, double *gflops, int testloops)
int cudabusbw (int cudadev, double *hdmbsec, double *hdlatusec, double *phdmbsec, double *phdlatusec, double *dhmbsec, double *dhlatusec, double *pdhmbsec, double *pdhlatusec)
void * cudabusbwthread (void *voidparms)
int vmd_cuda_bus_bw (int numdevs, int *devlist, double *hdmbsec, double *hdlatusec, double *phdmbsec, double *phdlatusec, double *dhmbsec, double *dhlatusec, double *pdhmbsec, double *pdhlatusec)
template<class T> __global__ void gpuglobmemcpybw (T *dest, const T *src)
template<class T> __global__ void gpuglobmemsetbw (T *dest, const T val)
int cudaglobmembw (int cudadev, double *gpumemsetgbsec, double *gpumemcpygbsec)
void * cudaglobmembwthread (void *voidparms)
int vmd_cuda_globmem_bw (int numdevs, int *devlist, double *memsetgbsec, double *memcpygbsec)
void * vmddevpoollatencythread (void *voidparms)
void * vmddevpooltilelatencythread (void *voidparms)
__global__ void nopkernel (float *ddata)
__global__ void voidkernel (void)
void * vmddevpoolcudatilelatencythread (void *voidparms)
int vmd_cuda_devpool_latency (wkf_threadpool_t *devpool, int tilesize, double *kernlaunchlatency, double *barlatency, double *cyclelatency, double *tilelatency, double *kernellatency)
void * vmddevpoolcudalatencythread (void *voidparms)
int vmd_cuda_measure_latencies (wkf_threadpool_t *devpool)
int gpu_ooc_bench (wkf_threadpool_t *devpool, int nfiles, const char **trjfileset, const AtomSel *sel, int first, int last, int step)

Detailed Description

Short benchmark kernels to measure GPU performance.

Definition in file CUDABench.cu.


Define Documentation

#define BLOCKSIZEX 64

Definition at line 88 of file CUDABench.cu.

Referenced by cudamaddgflops.

#define BWITER 500

Definition at line 277 of file CUDABench.cu.

Referenced by cudabusbw.

#define CUERR

Value:

{ cudaError_t err; \
 if ((err = cudaGetLastError()) != cudaSuccess) { \
 printf("CUDA error: %s, %s line %d\n", cudaGetErrorString(err), __FILE__, __LINE__); \
 return -1; }}

Definition at line 54 of file CUDABench.cu.

Referenced by cudabusbw, cudaglobmembw, cudamaddgflops, Msmpot_cuda_compute_latcut, Msmpot_cuda_compute_shortrng, Msmpot_cuda_setup_latcut, and Msmpot_cuda_setup_shortrng.

#define FLOPSPERFMADD16 32

Definition at line 91 of file CUDABench.cu.

#define FLOPSPERLOOP (FMADD16COUNT * FLOPSPERFMADD16)

Definition at line 94 of file CUDABench.cu.

Referenced by cudamaddgflops.

#define FMADD16

Value:

tmp0 = tmp0*tmp4+tmp7; \
 tmp1 = tmp1*tmp5+tmp0; \
 tmp2 = tmp2*tmp6+tmp1; \
 tmp3 = tmp3*tmp7+tmp2; \
 tmp4 = tmp4*tmp0+tmp3; \
 tmp5 = tmp5*tmp1+tmp4; \
 tmp6 = tmp6*tmp2+tmp5; \
 tmp7 = tmp7*tmp3+tmp6; \
 tmp8 = tmp8*tmp12+tmp15; \
 tmp9 = tmp9*tmp13+tmp8; \
 tmp10 = tmp10*tmp14+tmp9; \
 tmp11 = tmp11*tmp15+tmp10; \
 tmp12 = tmp12*tmp8+tmp11; \
 tmp13 = tmp13*tmp9+tmp12; \
 tmp14 = tmp14*tmp10+tmp13; \
 tmp15 = tmp15*tmp11+tmp14;

Definition at line 68 of file CUDABench.cu.

Referenced by madd_kernel.

#define FMADD16COUNT 32

Definition at line 90 of file CUDABench.cu.

#define GLOOPS 2000

Definition at line 89 of file CUDABench.cu.

Referenced by cudamaddgflops, and madd_kernel.

#define GRIDSIZEX 6144

#define LATENCYITER 50000

Definition at line 278 of file CUDABench.cu.

Referenced by cudabusbw.

#define RESTRICT __restrict__

Definition at line 37 of file CUDABench.cu.


Typedef Documentation

typedef float4 datatype

Definition at line 536 of file CUDABench.cu.

Referenced by cudaglobmembw.


Function Documentation

int cudabusbw ( int cudadev,
double * hdmbsec,
double * hdlatusec,
double * phdmbsec,
double * phdlatusec,
double * dhmbsec,
double * dhlatusec,
double * pdhmbsec,
double * pdhlatusec
) [static]

void* cudabusbwthread ( void * voidparms ) [static]

int cudaglobmembw ( int cudadev,
double * gpumemsetgbsec,
double * gpumemcpygbsec
) [static]

Definition at line 538 of file CUDABench.cu.

References CUERR, datatype, and make_float4.

Referenced by cudaglobmembwthread.

void* cudaglobmembwthread ( void * voidparms ) [static]

int cudamaddgflops ( int cudadev,
double * gflops,
int testloops
) [static]

void* cudamaddthread ( void * voidparms ) [static]

int gpu_ooc_bench ( wkf_threadpool_t * devpool,
int nfiles,
const char ** trjfileset,
const AtomSel * sel,
int first,
int last,
int step
)

template<class T>
__global__ void gpuglobmemcpybw ( T * dest,
const T * src
)

Definition at line 525 of file CUDABench.cu.

template<class T>
__global__ void gpuglobmemsetbw ( T * dest,
const T val
)

Definition at line 531 of file CUDABench.cu.

__global__ void madd_kernel ( float * doutput ) [static]

Definition at line 96 of file CUDABench.cu.

References FMADD16, and GLOOPS.

__global__ void nopkernel ( float * ddata ) [static]

Definition at line 715 of file CUDABench.cu.

References NULL.

int vmd_cuda_bus_bw ( int numdevs,
int * devlist,
double * hdmbsec,
double * hdlatusec,
double * phdmbsec,
double * phdlatusec,
double * dhmbsec,
double * dhlatusec,
double * pdhmbsec,
double * pdhlatusec
)

int vmd_cuda_devpool_latency ( wkf_threadpool_t * devpool,
int tilesize,
double * kernlaunchlatency,
double * barlatency,
double * cyclelatency,
double * tilelatency,
double * kernellatency
)

int vmd_cuda_globmem_bw ( int numdevs,
int * devlist,
double * memsetgbsec,
double * memcpygbsec
)

int vmd_cuda_madd_gflops ( int numdevs,
int * devlist,
double * gflops,
int testloops
)

int vmd_cuda_measure_latencies ( wkf_threadpool_t * devpool )

void* vmddevpoolcudalatencythread ( void * voidparms ) [static]

void* vmddevpoolcudatilelatencythread ( void * voidparms ) [static]

void* vmddevpoollatencythread ( void * voidparms ) [static]

Definition at line 693 of file CUDABench.cu.

References NULL.

Referenced by vmd_cuda_devpool_latency.

void* vmddevpooltilelatencythread ( void * voidparms ) [static]

__global__ void voidkernel ( void ) [static]

Definition at line 725 of file CUDABench.cu.


Generated on Mon Nov 17 02:47:34 2025 for VMD (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002

AltStyle によって変換されたページ (->オリジナル) /