Main Page Namespace List Class Hierarchy Alphabetical List Compound List File List Namespace Members Compound Members File Members Related Pages
VolCPotential.C

Go to the documentation of this file.
00001 /***************************************************************************
00002 *cr
00003 *cr (C) Copyright 1995-2019 The Board of Trustees of the
00004 *cr University of Illinois
00005 *cr All Rights Reserved
00006 *cr
00007 ***************************************************************************/
00008 /***************************************************************************
00009 * RCS INFORMATION:
00010 *
00011 * $RCSfile: VolCPotential.C,v $
00012 * $Author: johns $ $Locker: $ $State: Exp $
00013 * $Revision: 1.30 $ $Date: 2019年01月17日 21:21:02 $
00014 *
00015 ***************************************************************************/
00016 /*
00017 * Calculate a coulombic potential map
00018 */
00019 #include <stdio.h>
00020 #include <stdlib.h>
00021 #include <math.h>
00022 #include <string.h>
00023 
00024 #include "config.h" // force recompile when configuration changes
00025 #include "utilities.h"
00026 #include "Inform.h"
00027 #include "WKFThreads.h"
00028 #include "WKFUtils.h"
00029 #include "VolCPotential.h" 
00030 #if defined(VMDCUDA)
00031 #include "CUDAKernels.h"
00032 #endif
00033 #if defined(VMDOPENCL)
00034 #include "OpenCLKernels.h"
00035 #endif
00036 
00037 typedef struct {
00038 float* atoms;
00039 float* grideners;
00040 long int numplane;
00041 long int numcol;
00042 long int numpt;
00043 long int natoms;
00044 float gridspacing;
00045 } enthrparms;
00046 
00047 #if 1 || defined(__INTEL_COMPILER)
00048 
00049 #define FLOPSPERATOMEVAL 6.0
00050 extern "C" void * energythread(void *voidparms) {
00051 int threadid;
00052 enthrparms *parms = NULL;
00053 wkf_threadlaunch_getdata(voidparms, (void **) &parms);
00054 wkf_threadlaunch_getid(voidparms, &threadid, NULL);
00055 
00056 /* 
00057 * copy in per-thread parameters 
00058 */
00059 const float *atoms = parms->atoms;
00060 float* grideners = parms->grideners;
00061 const long int numplane = parms->numplane;
00062 const long int numcol = parms->numcol;
00063 const long int numpt = parms->numpt;
00064 const long int natoms = parms->natoms;
00065 const float gridspacing = parms->gridspacing;
00066 int i, j, k, n;
00067 double lasttime, totaltime;
00068 
00069 /* Calculate the coulombic energy at each grid point from each atom
00070 * This is by far the most time consuming part of the process
00071 * We iterate over z,y,x, and then atoms
00072 */
00073 
00074 printf("thread %d started...\n", threadid);
00075 wkf_timerhandle timer = wkf_timer_create();
00076 wkf_timer_start(timer);
00077 wkfmsgtimer *msgt = wkf_msg_timer_create(5);
00078 
00079 // Holds atom x, dy**2+dz**2, and atom q as x/r/q/x/r/q...
00080 float * xrq = (float *) malloc(3*natoms * sizeof(float)); 
00081 int maxn = natoms * 3;
00082 
00083 // For each point in the cube...
00084 wkf_tasktile_t tile;
00085 while (wkf_threadlaunch_next_tile(voidparms, 2, &tile) != WKF_SCHED_DONE) {
00086 for (k=tile.start; k<tile.end; k++) { 
00087 const float z = gridspacing * (float) k;
00088 lasttime = wkf_timer_timenow(timer);
00089 for (j=0; j<numcol; j++) {
00090 const float y = gridspacing * (float) j;
00091 long int voxaddr = numcol*numpt*k + numpt*j;
00092 
00093 // Prebuild a table of dy and dz values on a per atom basis
00094 for (n=0; n<natoms; n++) {
00095 int addr3 = n*3;
00096 int addr4 = n*4;
00097 float dy = y - atoms[addr4 + 1];
00098 float dz = z - atoms[addr4 + 2];
00099 xrq[addr3 ] = atoms[addr4];
00100 xrq[addr3 + 1] = dz*dz + dy*dy;
00101 xrq[addr3 + 2] = atoms[addr4 + 3];
00102 }
00103 
00104 // help the vectorizer make reasonable decisions
00105 #if defined(__INTEL_COMPILER)
00106 #pragma vector always 
00107 #endif
00108 /* walk through more than one grid point at a time */
00109 for (i=0; i<numpt; i+=8) {
00110 float energy1 = 0.0f; /* Energy of first grid point */
00111 float energy2 = 0.0f; /* Energy of second grid point */
00112 float energy3 = 0.0f; /* Energy of third grid point */
00113 float energy4 = 0.0f; /* Energy of fourth grid point */
00114 float energy5 = 0.0f; /* Energy of fourth grid point */
00115 float energy6 = 0.0f; /* Energy of fourth grid point */
00116 float energy7 = 0.0f; /* Energy of fourth grid point */
00117 float energy8 = 0.0f; /* Energy of fourth grid point */
00118 
00119 const float x = gridspacing * (float) i;
00120 
00121 // help the vectorizer make reasonable decisions
00122 #if defined(__INTEL_COMPILER)
00123 #pragma vector always 
00124 #endif
00125 /* Calculate the interaction with each atom */
00126 /* SSE allows simultaneous calculations of */
00127 /* multiple iterations */
00128 /* 6 flops per grid point */
00129 for (n=0; n<maxn; n+=3) {
00130 float dy2pdz2 = xrq[n + 1];
00131 float q = xrq[n + 2];
00132 
00133 float dx1 = x - xrq[n];
00134 energy1 += q / sqrtf(dx1*dx1 + dy2pdz2);
00135 
00136 float dx2 = dx1 + gridspacing;
00137 energy2 += q / sqrtf(dx2*dx2 + dy2pdz2);
00138 
00139 float dx3 = dx2 + gridspacing;
00140 energy3 += q / sqrtf(dx3*dx3 + dy2pdz2);
00141 
00142 float dx4 = dx3 + gridspacing;
00143 energy4 += q / sqrtf(dx4*dx4 + dy2pdz2);
00144 
00145 float dx5 = dx4 + gridspacing;
00146 energy5 += q / sqrtf(dx5*dx5 + dy2pdz2);
00147 
00148 float dx6 = dx5 + gridspacing;
00149 energy6 += q / sqrtf(dx6*dx6 + dy2pdz2);
00150 
00151 float dx7 = dx6 + gridspacing;
00152 energy7 += q / sqrtf(dx7*dx7 + dy2pdz2);
00153 
00154 float dx8 = dx7 + gridspacing;
00155 energy8 += q / sqrtf(dx8*dx8 + dy2pdz2);
00156 }
00157 
00158 grideners[voxaddr + i] = energy1;
00159 if (i+1 < numpt)
00160 grideners[voxaddr + i + 1] = energy2;
00161 if (i+2 < numpt)
00162 grideners[voxaddr + i + 2] = energy3;
00163 if (i+3 < numpt)
00164 grideners[voxaddr + i + 3] = energy4;
00165 if (i+4 < numpt)
00166 grideners[voxaddr + i + 4] = energy5;
00167 if (i+5 < numpt)
00168 grideners[voxaddr + i + 5] = energy6;
00169 if (i+6 < numpt)
00170 grideners[voxaddr + i + 6] = energy7;
00171 if (i+7 < numpt)
00172 grideners[voxaddr + i + 7] = energy8;
00173 }
00174 }
00175 totaltime = wkf_timer_timenow(timer);
00176 
00177 if (wkf_msg_timer_timeout(msgt)) {
00178 // XXX: we have to use printf here as msgInfo is not thread-safe yet.
00179 printf("thread[%d] plane %d/%ld time %.2f, elapsed %.1f, est. total: %.1f\n",
00180 threadid, k, numplane,
00181 totaltime - lasttime, totaltime, 
00182 totaltime * numplane / (k+1));
00183 }
00184 }
00185 }
00186 
00187 wkf_timer_destroy(timer);
00188 wkf_msg_timer_destroy(msgt);
00189 free(xrq);
00190 
00191 return NULL;
00192 }
00193 
00194 #else
00195 
00196 #define FLOPSPERATOMEVAL 6.0
00197 extern "C" void * energythread(void *voidparms) {
00198 int threadid;
00199 enthrparms *parms = NULL;
00200 wkf_threadlaunch_getdata(voidparms, (void **) &parms);
00201 wkf_threadlaunch_getid(voidparms, &threadid, NULL);
00202 
00203 /* 
00204 * copy in per-thread parameters 
00205 */
00206 const float *atoms = parms->atoms;
00207 float* grideners = parms->grideners;
00208 const long int numplane = parms->numplane;
00209 const long int numcol = parms->numcol;
00210 const long int numpt = parms->numpt;
00211 const long int natoms = parms->natoms;
00212 const float gridspacing = parms->gridspacing;
00213 int i, j, k, n;
00214 double lasttime, totaltime;
00215 
00216 /* Calculate the coulombic energy at each grid point from each atom
00217 * This is by far the most time consuming part of the process
00218 * We iterate over z,y,x, and then atoms
00219 */
00220 
00221 printf("thread %d started...\n", threadid);
00222 wkf_timerhandle timer = wkf_timer_create();
00223 wkf_timer_start(timer);
00224 wkfmsgtimer *msgt = wkf_msg_timer_create(5);
00225 
00226 // Holds atom x, dy**2+dz**2, and atom q as x/r/q/x/r/q...
00227 float * xrq = (float *) malloc(3*natoms * sizeof(float)); 
00228 int maxn = natoms * 3;
00229 
00230 // For each point in the cube...
00231 while (!wkf_threadlaunch_next(voidparms, &k)) {
00232 const float z = gridspacing * (float) k;
00233 lasttime = wkf_timer_timenow(timer);
00234 for (j=0; j<numcol; j++) {
00235 const float y = gridspacing * (float) j;
00236 long int voxaddr = numcol*numpt*k + numpt*j;
00237 
00238 // Prebuild a table of dy and dz values on a per atom basis
00239 for (n=0; n<natoms; n++) {
00240 int addr3 = n*3;
00241 int addr4 = n*4;
00242 float dy = y - atoms[addr4 + 1];
00243 float dz = z - atoms[addr4 + 2];
00244 xrq[addr3 ] = atoms[addr4];
00245 xrq[addr3 + 1] = dz*dz + dy*dy;
00246 xrq[addr3 + 2] = atoms[addr4 + 3];
00247 }
00248 
00249 #if defined(__INTEL_COMPILER)
00250 // help the vectorizer make reasonable decisions (used prime to keep it honest)
00251 #pragma loop count(1009)
00252 #endif
00253 for (i=0; i<numpt; i++) {
00254 float energy = grideners[voxaddr + i]; // Energy at current grid point
00255 const float x = gridspacing * (float) i;
00256 
00257 #if defined(__INTEL_COMPILER)
00258 // help the vectorizer make reasonable decisions
00259 #pragma vector always 
00260 #endif
00261 // Calculate the interaction with each atom
00262 for (n=0; n<maxn; n+=3) {
00263 float dx = x - xrq[n];
00264 energy += xrq[n + 2] / sqrtf(dx*dx + xrq[n + 1]);
00265 }
00266 grideners[voxaddr + i] = energy;
00267 }
00268 }
00269 totaltime = wkf_timer_timenow(timer);
00270 
00271 if (wkf_msg_timer_timeout(msgt)) {
00272 // XXX: we have to use printf here as msgInfo is not thread-safe yet.
00273 printf("thread[%d] plane %d/%ld time %.2f, elapsed %.1f, est. total: %.1f\n",
00274 threadid, k, numplane,
00275 totaltime - lasttime, totaltime, 
00276 totaltime * numplane / (k+1));
00277 }
00278 }
00279 
00280 wkf_timer_destroy(timer);
00281 wkf_msg_timer_destroy(msgt);
00282 free(xrq);
00283 
00284 return NULL;
00285 }
00286 
00287 #endif
00288 
00289 
00290 static int vol_cpotential_cpu(long int natoms, float* atoms, float* grideners, long int numplane, long int numcol, long int numpt, float gridspacing) {
00291 int rc=0;
00292 enthrparms parms;
00293 wkf_timerhandle globaltimer;
00294 double totalruntime;
00295 
00296 #if defined(VMDTHREADS)
00297 int numprocs = wkf_thread_numprocessors();
00298 #else
00299 int numprocs = 1;
00300 #endif
00301 
00302 printf("Using %d %s\n", numprocs, ((numprocs > 1) ? "CPUs" : "CPU"));
00303 
00304 parms.atoms = atoms;
00305 parms.grideners = grideners;
00306 parms.numplane = numplane;
00307 parms.numcol = numcol;
00308 parms.numpt = numpt;
00309 parms.natoms = natoms;
00310 parms.gridspacing = gridspacing;
00311 
00312 globaltimer = wkf_timer_create();
00313 wkf_timer_start(globaltimer);
00314 
00315 /* spawn child threads to do the work */
00316 wkf_tasktile_t tile;
00317 tile.start=0;
00318 tile.end=numplane;
00319 rc = wkf_threadlaunch(numprocs, &parms, energythread, &tile);
00320 
00321 // Measure GFLOPS
00322 wkf_timer_stop(globaltimer);
00323 totalruntime = wkf_timer_time(globaltimer);
00324 wkf_timer_destroy(globaltimer);
00325 
00326 if (!rc) {
00327 double atomevalssec = ((double) numplane * numcol * numpt * natoms) / (totalruntime * 1000000000.0);
00328 printf(" %g billion atom evals/second, %g GFLOPS\n",
00329 atomevalssec, atomevalssec * FLOPSPERATOMEVAL);
00330 } else {
00331 msgWarn << "Encountered an unrecoverable error, calculation terminated." << sendmsg;
00332 }
00333 
00334 return rc;
00335 }
00336 
00337 
00338 int vol_cpotential(long int natoms, float* atoms, float* grideners, long int numplane, long int numcol, long int numpt, float gridspacing) {
00339 int rc = -1; // init rc value to indicate we haven't run yet
00340 wkf_timerhandle timer = wkf_timer_create();
00341 wkf_timer_start(timer);
00342 
00343 #if defined(VMDCUDA)
00344 if (!getenv("VMDNOCUDA")) {
00345 rc = vmd_cuda_vol_cpotential(natoms, atoms, grideners, 
00346 numplane, numcol, numpt, gridspacing);
00347 }
00348 #endif
00349 #if defined(VMDOPENCL)
00350 if ((rc != 0) && !getenv("VMDNOOPENCL")) {
00351 rc = vmd_opencl_vol_cpotential(natoms, atoms, grideners, 
00352 numplane, numcol, numpt, gridspacing);
00353 }
00354 #endif
00355 
00356 // if we tried to run on the GPU and failed, or we haven't run yet,
00357 // run on the CPU
00358 if (rc != 0)
00359 rc = vol_cpotential_cpu(natoms, atoms, grideners, 
00360 numplane, numcol, numpt, gridspacing);
00361 
00362 double totaltime = wkf_timer_timenow(timer);
00363 wkf_timer_destroy(timer);
00364 
00365 msgInfo << "Coulombic potential map calculation complete: "
00366 << totaltime << " seconds" << sendmsg;
00367 
00368 return rc;
00369 }
00370 
00371 
00372