00001 /*************************************************************************** 00002 *cr 00003 *cr (C) Copyright 2008-2009 The Board of Trustees of the 00004 *cr University of Illinois 00005 *cr All Rights Reserved 00006 *cr 00007 ***************************************************************************/ 00008 00009 /*************************************************************************** 00010 * RCS INFORMATION: 00011 * 00012 * $RCSfile: msmpot_compute.c,v $ 00013 * $Author: dhardy $ $Locker: $ $State: Exp $ 00014 * $Revision: 1.8 $ $Date: 2010年06月10日 22:36:49 $ 00015 * 00016 ***************************************************************************/ 00017 00018 #include "msmpot_internal.h" 00019 00020 /* macros below for debugging */ 00021 /* 00022 #define MSMPOT_LONGRANGE_ONLY 00023 #undef MSMPOT_LONGRANGE_ONLY 00024 00025 #define MSMPOT_SHORTRANGE_ONLY 00026 #undef MSMPOT_SHORTRANGE_ONLY 00027 00028 #define MSMPOT_CHECKMAPINDEX 00029 #undef MSMPOT_CHECKMAPINDEX 00030 00031 #define MAPINDEX 0 00032 */ 00033 00034 #define USE_BIN_HASHING 00035 00036 int Msmpot_compute(Msmpot *msm, 00037 float *epotmap, /* electrostatic potential map 00038 assumed to be length mx*my*mz, 00039 stored flat in row-major order, i.e., 00040 &ep[i,j,k] == ep + ((k*my+j)*mx+i) */ 00041 int mx, int my, int mz, /* map lattice dimensions */ 00042 float lx, float ly, float lz, /* map lattice lengths */ 00043 float x0, float y0, float z0, /* map origin (lower-left corner) */ 00044 float vx, float vy, float vz, /* periodic cell lengths along x, y, z; 00045 set to 0 for non-periodic direction */ 00046 const float *atom, /* atoms stored x/y/z/q (length 4*natoms) */ 00047 int natoms /* number of atoms */ 00048 ) { 00049 int err; 00050 00051 REPORT("Performing MSM calculation of electrostatic potential map."); 00052 00053 err = Msmpot_check_params(msm, epotmap, mx, my, mz, lx, ly, lz, 00054 vx, vy, vz, atom, natoms); 00055 if (err != MSMPOT_SUCCESS) return ERROR(err); 00056 00057 /* store user parameters */ 00058 msm->atom = atom; 00059 msm->natoms = natoms; 00060 msm->epotmap = epotmap; 00061 msm->mx = mx; 00062 msm->my = my; 00063 msm->mz = mz; 00064 msm->lx = lx; 00065 msm->ly = ly; 00066 msm->lz = lz; 00067 msm->lx0 = x0; 00068 msm->ly0 = y0; 00069 msm->lz0 = z0; 00070 msm->dx = lx / mx; 00071 msm->dy = ly / my; 00072 msm->dz = lz / mz; 00073 msm->px = vx; 00074 msm->py = vy; 00075 msm->pz = vz; 00076 msm->isperiodic = 0; /* reset flags for periodicity */ 00077 /* zero length indicates nonperiodic direction */ 00078 if (vx > 0) SET_X(msm->isperiodic); 00079 if (vy > 0) SET_Y(msm->isperiodic); 00080 if (vz > 0) SET_Z(msm->isperiodic); 00081 00082 err = Msmpot_setup(msm); 00083 if (err != MSMPOT_SUCCESS) return ERROR(err); 00084 00085 memset(epotmap, 0, mx*my*mz*sizeof(float)); /* clear epotmap */ 00086 00087 00088 #if !defined(MSMPOT_LONGRANGE_ONLY) 00089 #ifdef MSMPOT_CUDA 00090 if (msm->use_cuda_shortrng) { 00091 err = Msmpot_cuda_compute_shortrng(msm->msmcuda); 00092 if (err && msm->cuda_optional) { /* fall back on CPU */ 00093 #ifdef USE_BIN_HASHING 00094 err = Msmpot_compute_shortrng_bins(msm); 00095 #else 00096 err = Msmpot_compute_shortrng_linklist(msm, msm->atom, msm->natoms); 00097 #endif 00098 if (err) return ERROR(err); 00099 } 00100 else if (err) return ERROR(err); 00101 } 00102 else { 00103 #ifdef USE_BIN_HASHING 00104 err = Msmpot_compute_shortrng_bins(msm); 00105 #else 00106 err = Msmpot_compute_shortrng_linklist(msm, msm->atom, msm->natoms); 00107 #endif /* USE_BIN_HASHING */ 00108 if (err) return ERROR(err); 00109 } 00110 #else 00111 #ifdef USE_BIN_HASHING 00112 err = Msmpot_compute_shortrng_bins(msm); 00113 #else 00114 err = Msmpot_compute_shortrng_linklist(msm, msm->atom, msm->natoms); 00115 #endif /* USE_BIN_HASHING */ 00116 if (err) return ERROR(err); 00117 #endif 00118 #endif 00119 00120 #if !defined(MSMPOT_SHORTRANGE_ONLY) 00121 err = Msmpot_compute_longrng(msm); 00122 if (err) return ERROR(err); 00123 #endif 00124 00125 #ifdef MSMPOT_VERBOSE 00126 #ifdef MSMPOT_CHECKMAPINDEX 00127 printf("epotmap[%d]=%g\n", MAPINDEX, epotmap[MAPINDEX]); 00128 #endif 00129 #endif 00130 00131 return MSMPOT_SUCCESS; 00132 } 00133 00134 00135 00136 /*** long-range part *********************************************************/ 00137 00138 00139 int Msmpot_compute_longrng(Msmpot *msm) { 00140 int err = 0; 00141 00142 /* permit only cubic interpolation - for now */ 00143 switch (msm->interp) { 00144 case MSMPOT_INTERP_CUBIC: 00145 err = Msmpot_compute_longrng_cubic(msm); 00146 if (err) return ERROR(err); 00147 break; 00148 default: 00149 return ERRMSG(MSMPOT_ERROR_SUPPORT, 00150 "interpolation method not implemented"); 00151 } 00152 return MSMPOT_SUCCESS; 00153 } 00154 00155 00156 00157 /*** short-range part ********************************************************/ 00158 00159 00160 static int bin_evaluation(Msmpot *msm); 00161 00162 00163 /* 00164 * hash atoms into bins, evaluation of grid points loops over neighborhood 00165 * of bins, any overflowed bins are handled using linked list approach 00166 */ 00167 int Msmpot_compute_shortrng_bins(Msmpot *msm) { 00168 int err = 0; 00169 00170 REPORT("Using atom hashing into bins for short-range part."); 00171 00172 REPORT("Using tight neighborhood for nearby bins."); 00173 err = Msmpot_compute_shortrng_bin_neighborhood(msm, 00174 msm->bx, msm->by, msm->bz); 00175 if (err) return ERROR(err); 00176 00177 err = Msmpot_compute_shortrng_bin_hashing(msm); 00178 if (err) return ERROR(err); 00179 00180 err = bin_evaluation(msm); 00181 if (err) return ERROR(err); 00182 00183 if (msm->nover > 0) { 00184 #ifdef MSMPOT_REPORT 00185 char msg[120]; 00186 sprintf(msg, "Extra atoms (%d) from overflowed bins " 00187 "must also be evaluated.", msm->nover); 00188 REPORT(msg); 00189 #endif 00190 err = Msmpot_compute_shortrng_linklist(msm, msm->over, msm->nover); 00191 if (err) return ERROR(err); 00192 } 00193 00194 return MSMPOT_SUCCESS; 00195 } 00196 00197 00198 /* 00199 * Determine a tight neighborhood of bins, given a region of map points. 00200 * Store index offsets from (0,0,0). Requires rectangular bins. 00201 * 00202 * (rx,ry,rz) gives size of region from which we select one or more map points. 00203 * To setup neighborhood for GPU, the region is the smallest (unrolled) 00204 * block of space for a thread block. For CPU, set (rx,ry,rz)=(bx,by,bz). 00205 * 00206 * Space allocated as needed for bin offsets. 00207 */ 00208 00209 /* 00210 * XXX Intel icc 9.0 is choking on this routine. 00211 * The problem seems to be the triply nested loop. 00212 * Although turning off optimizations for this routine is undesirable, 00213 * it allows the compiler to keep going. 00214 */ 00215 #if defined( __INTEL_COMPILER) 00216 #pragma optimize("",off) 00217 #endif 00218 int Msmpot_compute_shortrng_bin_neighborhood(Msmpot *msm, 00219 float rx, /* region length in x-dimension */ 00220 float ry, /* region length in y-dimension */ 00221 float rz /* region length in z-dimension */ 00222 ) { 00223 00224 union { /* use this to do exact compare of floats */ 00225 float f; 00226 int i; 00227 } v0, v1; 00228 00229 const float cutoff = msm->a; /* cutoff distance */ 00230 00231 const float bx = msm->bx; /* bin length along x */ 00232 const float by = msm->by; /* bin length along y */ 00233 const float bz = msm->bz; /* bin length along z */ 00234 00235 const float invbx = msm->invbx; /* 1 / bx */ 00236 const float invby = msm->invby; /* 1 / by */ 00237 const float invbz = msm->invbz; /* 1 / bz */ 00238 00239 const float bx2 = bx*bx; 00240 const float by2 = by*by; 00241 const float bz2 = bz*bz; 00242 00243 float sqbindiag = 0.f; 00244 float r, r2; 00245 00246 int bpr0, bpr1; 00247 00248 int cx = (int) ceilf(cutoff * invbx); /* number of bins on cutoff in x */ 00249 int cy = (int) ceilf(cutoff * invby); /* number of bins on cutoff in y */ 00250 int cz = (int) ceilf(cutoff * invbz); /* number of bins on cutoff in z */ 00251 00252 int nbrx, nbry, nbrz; /* axes of ellipsoid for neighborhood */ 00253 int i, j, k; 00254 int maxboff; 00255 00256 int *boff; 00257 00258 /* x-direction */ 00259 v0.f = bx, v1.f = rx; 00260 if (v0.i == v1.i) { /* should be true for CPU code path */ 00261 bpr0 = bpr1 = 1; 00262 } 00263 else { 00264 bpr0 = (int) floorf(rx*invbx); 00265 bpr1 = (int) ceilf(rx*invbx); 00266 } 00267 00268 if (bpr0 == bpr1) { /* special case: bins exactly cover region */ 00269 nbrx = cx + (bpr0 >> 1); /* brp0 / 2 */ 00270 /* if bin cover is odd, use square of half-bin-length */ 00271 sqbindiag += ((bpr0 & 1) ? 0.25f : 1.f) * bx2; 00272 } 00273 else { 00274 nbrx = (int) ceilf((cutoff + 0.5f*rx + bx) * invbx); 00275 sqbindiag += bx2; 00276 } 00277 00278 /* y-direction */ 00279 v0.f = by, v1.f = ry; 00280 if (v0.i == v1.i) { /* should be true for CPU code path */ 00281 bpr0 = bpr1 = 1; 00282 } 00283 else { 00284 bpr0 = (int) floorf(ry*invby); 00285 bpr1 = (int) ceilf(ry*invby); 00286 } 00287 00288 if (bpr0 == bpr1) { /* special case: bins exactly cover region */ 00289 nbry = cy + (bpr0 >> 1); /* brp0 / 2 */ 00290 /* if bin cover is odd, use square of half-bin-length */ 00291 sqbindiag += ((bpr0 & 1) ? 0.25f : 1.f) * by2; 00292 } 00293 else { 00294 nbry = (int) ceilf((cutoff + 0.5f*ry + by) * invby); 00295 sqbindiag += by2; 00296 } 00297 00298 /* z-direction */ 00299 v0.f = bz, v1.f = rz; 00300 if (v0.i == v1.i) { /* should be true for CPU code path */ 00301 bpr0 = bpr1 = 1; 00302 } 00303 else { 00304 bpr0 = (int) floorf(rz*invbz); 00305 bpr1 = (int) ceilf(rz*invbz); 00306 } 00307 00308 if (bpr0 == bpr1) { /* special case: bins exactly cover region */ 00309 nbrz = cz + (bpr0 >> 1); /* brp0 / 2 */ 00310 /* if bin cover is odd, use square of half-bin-length */ 00311 sqbindiag += ((bpr0 & 1) ? 0.25f : 1.f) * bz2; 00312 } 00313 else { 00314 nbrz = (int) ceilf((cutoff + 0.5f*rz + bz) * invbz); 00315 sqbindiag += bz2; 00316 } 00317 00318 r = cutoff + 0.5f*sqrtf(rx*rx + ry*ry + rz*rz) + sqrtf(sqbindiag); 00319 r2 = r*r; 00320 00321 /* upper bound on the size of the neighborhood */ 00322 maxboff = (2*nbrx+1) * (2*nbry+1) * (2*nbrz+1); 00323 00324 if (msm->maxboff < maxboff) { 00325 void *v = realloc(msm->boff, 3*maxboff*sizeof(int)); 00326 if (NULL == v) return ERROR(MSMPOT_ERROR_MALLOC); 00327 msm->boff = (int *) v; 00328 msm->maxboff = maxboff; 00329 } 00330 00331 boff = msm->boff; 00332 for (k = -nbrz; k <= nbrz; k++) { 00333 for (j = -nbry; j <= nbry; j++) { 00334 for (i = -nbrx; i <= nbrx; i++) { 00335 if ((i*i*bx2 + j*j*by2 + k*k*bz2) >= r2) continue; 00336 *boff++ = i; 00337 *boff++ = j; 00338 *boff++ = k; 00339 } 00340 } 00341 } 00342 msm->nboff = (boff - msm->boff) / 3; /* count of the neighborhood */ 00343 00344 return MSMPOT_SUCCESS; 00345 } 00346 #if defined(__INTEL_COMPILER) 00347 #pragma optimize("",on) 00348 #endif 00349 00350 00351 int Msmpot_compute_shortrng_bin_hashing(Msmpot *msm) { 00352 00353 union { /* use this to do exact compare of floats */ 00354 float f; 00355 int i; 00356 } q; /* atom charge */ 00357 00358 int i, j, k; 00359 int n; /* index atoms */ 00360 int nb; /* index bins */ 00361 const int ispx = (IS_SET_X(msm->isperiodic) != 0); 00362 const int ispy = (IS_SET_Y(msm->isperiodic) != 0); 00363 const int ispz = (IS_SET_Z(msm->isperiodic) != 0); 00364 const float px0 = msm->px0; /* domain origin */ 00365 const float py0 = msm->py0; 00366 const float pz0 = msm->pz0; 00367 const float px = msm->px; /* domain lengths */ 00368 const float py = msm->py; 00369 const float pz = msm->pz; 00370 const float invbx = msm->invbx; 00371 const float invby = msm->invby; 00372 const float invbz = msm->invbz; 00373 float x, y, z; /* atom position relative to (xmin,ymin,zmin) */ 00374 00375 const float *atom = msm->atom; 00376 const int natoms = msm->natoms; 00377 00378 const int nbx = msm->nbx; 00379 const int nby = msm->nby; 00380 const int nbz = msm->nbz; 00381 const int nbins = (nbx * nby * nbz); 00382 const int bindepth = msm->bindepth; 00383 float *bin = msm->bin; 00384 int *bincount = msm->bincount; 00385 00386 memset(bin, 0, nbins*bindepth*ATOM_SIZE*sizeof(float)); /* clear bins */ 00387 memset(bincount, 0, nbins*sizeof(int)); 00388 msm->nover = 0; /* clear count of overflowed bins */ 00389 00390 for (n = 0; n < natoms; n++) { 00391 00392 /* atoms with zero charge make no contribution */ 00393 q.f = atom[ATOM_Q(n)]; 00394 if (0==q.i) continue; 00395 00396 x = atom[ATOM_X(n)] - px0; /* atom position wrt domain */ 00397 y = atom[ATOM_Y(n)] - py0; 00398 z = atom[ATOM_Z(n)] - pz0; 00399 i = (int) floorf(x * invbx); 00400 j = (int) floorf(y * invby); 00401 k = (int) floorf(z * invbz); 00402 00403 /* for periodic directions, wrap bin index and atom coordinate */ 00404 if (ispx) { 00405 if (i < 0) do { i += nbx; x += px; } while (i < 0); 00406 else if (i >= nbx) do { i -= nbx; x -= px; } while (i >= nbx); 00407 } 00408 if (ispy) { 00409 if (j < 0) do { j += nby; y += py; } while (j < 0); 00410 else if (j >= nby) do { j -= nby; y -= py; } while (j >= nby); 00411 } 00412 if (ispz) { 00413 if (k < 0) do { k += nbz; z += pz; } while (k < 0); 00414 else if (k >= nbz) do { k -= nbz; z -= pz; } while (k >= nbz); 00415 } 00416 00417 #if 0 00418 if (i < 0 || i >= nbx || 00419 j < 0 || j >= nby || 00420 k < 0 || k >= nbz) { 00421 printf("nbx=%d nby=%d nbz=%d\n", nbx, nby, nbz); 00422 printf("i=%d j=%d k=%d\n", i, j, k); 00423 return ERROR(MSMPOT_ERROR_ASSERT); 00424 } 00425 #endif 00426 00427 nb = (k*nby + j)*nbx + i; 00428 ASSERT(0 <= nb && nb < nbins); 00429 if (bincount[nb] < bindepth) { 00430 float *p = bin + (nb*bindepth + bincount[nb])*ATOM_SIZE; 00431 *p++ = x; 00432 *p++ = y; 00433 *p++ = z; 00434 *p = q.f; 00435 bincount[nb]++; 00436 } 00437 else { /* atom must be appended to overflow bin array */ 00438 int nover = msm->nover; 00439 float *p; 00440 if (nover == msm->maxover) { /* extend length of overflow bin array */ 00441 void *v = realloc(msm->over, 2*msm->maxover*ATOM_SIZE*sizeof(float)); 00442 if (NULL == v) return ERROR(MSMPOT_ERROR_MALLOC); 00443 msm->over = (float *) v; 00444 msm->maxover *= 2; 00445 } 00446 p = msm->over + nover*ATOM_SIZE; 00447 *p++ = x; 00448 *p++ = y; 00449 *p++ = z; 00450 *p = q.f; 00451 msm->nover++; 00452 } 00453 } 00454 return MSMPOT_SUCCESS; 00455 } /* Msmpot_compute_shortrng_bin_hashing() */ 00456 00457 00458 int bin_evaluation(Msmpot *msm) { 00459 00460 const float lx0 = msm->lx0; /* epotmap origin */ 00461 const float ly0 = msm->ly0; 00462 const float lz0 = msm->lz0; 00463 00464 const float dx = msm->dx; /* epotmap spacings */ 00465 const float dy = msm->dy; 00466 const float dz = msm->dz; 00467 00468 const float px0 = msm->lx0; /* domain origin */ 00469 const float py0 = msm->ly0; 00470 const float pz0 = msm->lz0; 00471 00472 const float px = msm->px; /* domain length */ 00473 const float py = msm->py; 00474 const float pz = msm->pz; 00475 00476 const float invbx = msm->invbx; /* inverse bin length */ 00477 const float invby = msm->invby; 00478 const float invbz = msm->invbz; 00479 00480 const int nbx = msm->nbx; /* number of bins along each dimension */ 00481 const int nby = msm->nby; 00482 const int nbz = msm->nbz; 00483 00484 const int bindepth = msm->bindepth; /* number of atom slots per bin */ 00485 00486 const int ispx = (IS_SET_X(msm->isperiodic) != 0); 00487 const int ispy = (IS_SET_Y(msm->isperiodic) != 0); 00488 const int ispz = (IS_SET_Z(msm->isperiodic) != 0); 00489 00490 const float a = msm->a; /* cutoff for splitting */ 00491 const float a2 = a*a; 00492 const float a_1 = 1/a; 00493 const float inv_a2 = a_1 * a_1; 00494 00495 const int split = msm->split; 00496 00497 float *epotmap = msm->epotmap; /* the map */ 00498 00499 const int mx = msm->mx; /* lengths of epotmap lattice */ 00500 const int my = msm->my; 00501 const int mz = msm->mz; 00502 00503 int n; 00504 int ic, jc, kc; 00505 int ig, jg, kg; 00506 int i, j, k; 00507 00508 for (kg = 0; kg < mz; kg++) { /* loop over map points */ 00509 for (jg = 0; jg < my; jg++) { 00510 for (ig = 0; ig < mx; ig++) { 00511 00512 float e = 0; /* accumulate potential on each map point */ 00513 00514 float xg = ig * dx; /* coordinate of map point wrt map origin */ 00515 float yg = jg * dy; 00516 float zg = kg * dz; 00517 00518 int index = (kg*my + jg)*mx + ig; /* map point index */ 00519 00520 xg += lx0; /* absolute position */ 00521 yg += ly0; 00522 zg += lz0; 00523 00524 xg -= px0; /* position wrt domain origin */ 00525 yg -= py0; 00526 zg -= pz0; 00527 00528 ic = (int) floorf(xg * invbx); /* find bin containing this point */ 00529 jc = (int) floorf(yg * invby); 00530 kc = (int) floorf(zg * invbz); 00531 00532 for (n = 0; n < msm->nboff; n++) { 00533 float *pbin; /* point into this bin */ 00534 int bindex, bincount, m; 00535 int iw, jw, kw; 00536 00537 float xw = 0; /* periodic offset for wrapping coordinates */ 00538 float yw = 0; 00539 float zw = 0; 00540 00541 i = ic + msm->boff[3*n ]; 00542 j = jc + msm->boff[3*n + 1]; 00543 k = kc + msm->boff[3*n + 2]; 00544 00545 if ( ! ispx && (i < 0 || i >= nbx) ) continue; 00546 if ( ! ispy && (j < 0 || j >= nby) ) continue; 00547 if ( ! ispz && (k < 0 || k >= nbz) ) continue; 00548 00549 iw = i; 00550 jw = j; 00551 kw = k; 00552 00553 if (ispx) { /* wrap bin neighborhood around periodic edges */ 00554 while (iw < 0) { iw += nbx; xw -= px; } 00555 while (iw >= nbx) { iw -= nbx; xw += px; } 00556 } 00557 if (ispy) { 00558 while (jw < 0) { jw += nby; yw -= py; } 00559 while (jw >= nby) { jw -= nby; yw += py; } 00560 } 00561 if (ispz) { 00562 while (kw < 0) { kw += nbz; zw -= pz; } 00563 while (kw >= nbz) { kw -= nbz; zw += pz; } 00564 } 00565 00566 bindex = (kw*nby + jw)*nbx + iw; /* the bin index */ 00567 pbin = msm->bin + bindex*bindepth*ATOM_SIZE; /* first atom */ 00568 bincount = msm->bincount[bindex]; 00569 00570 for (m = 0; m < bincount; m++) { 00571 float x = *pbin++; /* get next atom from bin */ 00572 float y = *pbin++; 00573 float z = *pbin++; 00574 float q = *pbin++; 00575 00576 float rx = (x+xw) - xg; /* positions both relative to domain */ 00577 float ry = (y+yw) - yg; 00578 float rz = (z+zw) - zg; 00579 00580 float r2 = rx*rx + ry*ry + rz*rz; 00581 float s, gs; 00582 00583 if (r2 >= a2) continue; 00584 00585 s = r2 * inv_a2; 00586 SPOLY(&gs, s, split); /* macro expands into switch */ 00587 e += q * (1/sqrtf(r2) - a_1 * gs); /* accumulate potential */ 00588 } /* loop over binned atoms */ 00589 00590 } /* loop over bin neighborhood */ 00591 00592 epotmap[index] = e; /* store entire potential at map point */ 00593 } 00594 } 00595 } /* loop over map points */ 00596 00597 return MSMPOT_SUCCESS; 00598 } 00599 00600 00601 static int linklist_hashing(Msmpot *msm, const float *atom, int natoms); 00602 static int linklist_evaluation(Msmpot *msm, const float *atom); 00603 00604 00605 /* 00606 * explicitly pass atoms, so we can use this for bin overflow array 00607 */ 00608 int Msmpot_compute_shortrng_linklist(Msmpot *msm, 00609 const float *atom, /* array of atoms stored x/y/z/q */ 00610 int natoms /* number of atoms in array */ 00611 ) { 00612 int err = 0; 00613 00614 REPORT("Using linked lists of atoms for short-range part."); 00615 00616 err = linklist_hashing(msm, atom, natoms); 00617 if (err) return ERROR(err); 00618 00619 err = linklist_evaluation(msm, atom); 00620 if (err) return ERROR(err); 00621 00622 return MSMPOT_SUCCESS; 00623 } 00624 00625 00626 /* 00627 * perform spatial hashing of atoms, store results using linked list 00628 */ 00629 int linklist_hashing(Msmpot *msm, const float *atom, int natoms) { 00630 00631 union { /* use this to do exact compare of floats */ 00632 float f; 00633 int i; 00634 } q; /* atom charge */ 00635 00636 int i, j, k; 00637 int n; /* index atoms */ 00638 int nb; /* index bins */ 00639 const int ispx = (IS_SET_X(msm->isperiodic) != 0); 00640 const int ispy = (IS_SET_Y(msm->isperiodic) != 0); 00641 const int ispz = (IS_SET_Z(msm->isperiodic) != 0); 00642 const float x0 = msm->px0; /* domain origin */ 00643 const float y0 = msm->py0; 00644 const float z0 = msm->pz0; 00645 const float invbx = msm->invbx; 00646 const float invby = msm->invby; 00647 const float invbz = msm->invbz; 00648 float x, y, z; /* atom position relative to (xmin,ymin,zmin) */ 00649 int *first = msm->first_atom_index; /* first index in grid cell list */ 00650 int *next = msm->next_atom_index; /* next index in grid cell list */ 00651 const int nbx = msm->nbx; 00652 const int nby = msm->nby; 00653 const int nbz = msm->nbz; 00654 const int nbins = (nbx * nby * nbz); 00655 00656 #ifdef MSMPOT_VERBOSE 00657 printf("bin array: %d %d %d\n", nbx, nby, nbz); 00658 printf("bin lengths: %f %f %f\n", 1/invbx, 1/invby, 1/invbz); 00659 #endif 00660 00661 /* must clear first and next links before we hash */ 00662 for (nb = 0; nb < nbins; nb++) first[nb] = -1; 00663 for (n = 0; n < natoms; n++) next[n] = -1; 00664 00665 for (n = 0; n < natoms; n++) { 00666 00667 /* atoms with zero charge make no contribution */ 00668 q.f = atom[ATOM_Q(n)]; 00669 if (0==q.i) continue; 00670 00671 x = atom[ATOM_X(n)] - x0; 00672 y = atom[ATOM_Y(n)] - y0; 00673 z = atom[ATOM_Z(n)] - z0; 00674 i = (int) floorf(x * invbx); 00675 j = (int) floorf(y * invby); 00676 k = (int) floorf(z * invbz); 00677 00678 /* for periodic directions, make sure bin index is wrapped */ 00679 if (ispx) { 00680 if (i < 0) do { i += nbx; } while (i < 0); 00681 else if (i >= nbx) do { i -= nbx; } while (i >= nbx); 00682 } 00683 if (ispy) { 00684 if (j < 0) do { j += nby; } while (j < 0); 00685 else if (j >= nby) do { j -= nby; } while (j >= nby); 00686 } 00687 if (ispz) { 00688 if (k < 0) do { k += nbz; } while (k < 0); 00689 else if (k >= nbz) do { k -= nbz; } while (k >= nbz); 00690 } 00691 00692 nb = (k*nby + j)*nbx + i; /* flat bin index */ 00693 next[n] = first[nb]; /* insert index n at front of list nb */ 00694 first[nb] = n; 00695 } 00696 return MSMPOT_SUCCESS; 00697 } /* linklist_hashing() */ 00698 00699 00700 /* 00701 * evaluate short-range contribution of atoms to mapped potential, 00702 * must first perform linklist_hashing() 00703 */ 00704 int linklist_evaluation(Msmpot *msm, const float *atom) { 00705 00706 const float lx0 = msm->lx0; /* epotmap origin */ 00707 const float ly0 = msm->ly0; 00708 const float lz0 = msm->lz0; 00709 00710 const float dx = msm->dx; /* epotmap spacings */ 00711 const float dy = msm->dy; 00712 const float dz = msm->dz; 00713 const float inv_dx = 1/dx; 00714 const float inv_dy = 1/dy; 00715 const float inv_dz = 1/dz; 00716 00717 const float px = msm->px; 00718 const float py = msm->py; 00719 const float pz = msm->pz; 00720 00721 const float plx = px - msm->lx; 00722 const float ply = py - msm->ly; 00723 const float plz = pz - msm->lz; 00724 00725 const float pxd = px * inv_dx; 00726 const float pyd = py * inv_dy; 00727 const float pzd = pz * inv_dz; 00728 00729 const int ispx = (IS_SET_X(msm->isperiodic) != 0); 00730 const int ispy = (IS_SET_Y(msm->isperiodic) != 0); 00731 const int ispz = (IS_SET_Z(msm->isperiodic) != 0); 00732 00733 const float a = msm->a; /* cutoff for splitting */ 00734 const float a2 = a*a; 00735 const float a_1 = 1/a; 00736 const float inv_a2 = a_1 * a_1; 00737 00738 float x, y, z; /* position of atom relative to epotmap origin */ 00739 float q; /* charge on atom */ 00740 00741 float xg, yg, zg; /* positions given in grid spacings */ 00742 00743 int i, j, k; 00744 int ic, jc, kc; /* closest map point less than or equal to atom */ 00745 int ia, ib; /* extent of surrounding box in x-direction */ 00746 int ja, jb; /* extent of surrounding box in y-direction */ 00747 int ka, kb; /* extent of surrounding box in z-direction */ 00748 int iw, jw, kw; /* wrapped epotmap indexes within loops */ 00749 int n; /* index atoms */ 00750 int nbs; /* index bins */ 00751 long index; /* index into epotmap */ 00752 long jkoff; /* tabulate stride into epotmap */ 00753 int koff; /* tabulate stride into epotmap */ 00754 float rx, ry, rz; /* distances between an atom and a map point */ 00755 float rz2, ryrz2; /* squared circle and cylinder distances */ 00756 float r2; /* squared pairwise distance */ 00757 float s; /* normalized distance squared */ 00758 float gs; /* result of normalized splitting */ 00759 float e; /* contribution to short-range potential */ 00760 00761 const int split = msm->split; 00762 00763 const int mx = msm->mx; /* lengths of epotmap lattice */ 00764 const int my = msm->my; 00765 const int mz = msm->mz; 00766 00767 const int mri = (int) ceilf(a * inv_dx) - 1; 00768 const int mrj = (int) ceilf(a * inv_dy) - 1; 00769 const int mrk = (int) ceilf(a * inv_dz) - 1; 00770 /* lengths (measured in points) of ellipsoid axes */ 00771 00772 const int nbins = (msm->nbx * msm->nby * msm->nbz); 00773 const int *first = msm->first_atom_index; 00774 const int *next = msm->next_atom_index; 00775 00776 float *epotmap = msm->epotmap; 00777 #if 0 00778 float *pem = NULL; /* point into epotmap */ 00779 #endif 00780 00781 for (nbs = 0; nbs < nbins; nbs++) { 00782 for (n = first[nbs]; n != -1; n = next[n]) { 00783 00784 /* position of atom relative to epotmap origin */ 00785 x = atom[ATOM_X(n)] - lx0; 00786 y = atom[ATOM_Y(n)] - ly0; 00787 z = atom[ATOM_Z(n)] - lz0; 00788 00789 /* charge on atom */ 00790 q = atom[ATOM_Q(n)]; 00791 00792 /* 00793 * make sure atom is wrapped into cell along periodic directions 00794 * 00795 * NOTE: we can avoid this redundancy by storing wrapped 00796 * coordinate during geometric hashing 00797 */ 00798 if (ispx) { 00799 if (x < 0) do { x += px; } while (x < 0); 00800 else if (x >= px) do { x -= px; } while (x >= px); 00801 } 00802 if (ispy) { 00803 if (y < 0) do { y += py; } while (y < 0); 00804 else if (y >= py) do { y -= py; } while (y >= py); 00805 } 00806 if (ispz) { 00807 if (z < 0) do { z += pz; } while (z < 0); 00808 else if (z >= pz) do { z -= pz; } while (z >= pz); 00809 } 00810 00811 /* calculate position in units of grid spacings */ 00812 xg = x * inv_dx; 00813 yg = y * inv_dy; 00814 zg = z * inv_dz; 00815 00816 /* find closest map point with position less than or equal to atom */ 00817 ic = (int) floorf(xg); 00818 jc = (int) floorf(yg); 00819 kc = (int) floorf(zg); 00820 00821 /* find extent of surrounding box of map points */ 00822 ia = ic - mri; 00823 ib = ic + mri + 1; 00824 ja = jc - mrj; 00825 jb = jc + mrj + 1; 00826 ka = kc - mrk; 00827 kb = kc + mrk + 1; 00828 00829 /* for nonperiodic directions, trim box edges to be within map */ 00830 if ( ! ispx ) { 00831 if (ia < 0) ia = 0; 00832 if (ib >= mx) ib = mx - 1; 00833 } 00834 else { 00835 if (ia-1 < (mx-1) - pxd) { 00836 /* atom influence wraps around low end of cell */ 00837 int na = ((int) floorf(xg + pxd)) - mri; 00838 if (na < 0) na = 0; 00839 ia = na - mx; 00840 } 00841 if (ib+1 > pxd) { 00842 /* atom influence wraps around high end of cell */ 00843 int nb = ((int) floorf(xg - pxd)) + mri + 1; 00844 if (nb >= mx) nb = mx - 1; 00845 ib = nb + mx; 00846 } 00847 } 00848 00849 if ( ! ispy ) { 00850 if (ja < 0) ja = 0; 00851 if (jb >= my) jb = my - 1; 00852 } 00853 else { 00854 if (ja-1 < (my-1) - pyd) { 00855 /* atom influence wraps around low end of cell */ 00856 int na = ((int) floorf(yg + pyd)) - mrj; 00857 if (na < 0) na = 0; 00858 ja = na - my; 00859 } 00860 if (jb+1 > pyd) { 00861 /* atom influence wraps around high end of cell */ 00862 int nb = ((int) floorf(yg - pyd)) + mrj + 1; 00863 if (nb >= my) nb = my - 1; 00864 jb = nb + my; 00865 } 00866 } 00867 00868 if ( ! ispz ) { 00869 if (ka < 0) ka = 0; 00870 if (kb >= mz) kb = mz - 1; 00871 } 00872 else { 00873 if (ka-1 < (mz-1) - pzd) { 00874 /* atom influence wraps around low end of cell */ 00875 int na = ((int) floorf(zg + pzd)) - mrk; 00876 if (na < 0) na = 0; 00877 ka = na - mz; 00878 } 00879 if (kb+1 > pzd) { 00880 /* atom influence wraps around high end of cell */ 00881 int nb = ((int) floorf(zg - pzd)) + mrk + 1; 00882 if (nb >= mz) nb = mz - 1; 00883 kb = nb + mz; 00884 } 00885 } 00886 00887 /* loop over surrounding map points, add contribution into epotmap */ 00888 for (k = ka; k <= kb; k++) { 00889 rz = k*dz - z; 00890 kw = k; 00891 if (k < 0) { 00892 rz -= plz; 00893 kw += mz; 00894 } 00895 else if (k >= mz) { 00896 rz += plz; 00897 kw -= mz; 00898 } 00899 koff = kw*my; 00900 rz2 = rz*rz; 00901 00902 #ifdef MSMPOT_CHECK_CIRCLE_CPU 00903 /* clipping to the circle makes it slower */ 00904 if (rz2 >= a2) continue; 00905 #endif 00906 00907 for (j = ja; j <= jb; j++) { 00908 ry = j*dy - y; 00909 jw = j; 00910 if (j < 0) { 00911 ry -= ply; 00912 jw += my; 00913 } 00914 else if (j >= my) { 00915 ry += ply; 00916 jw -= my; 00917 } 00918 jkoff = (koff + jw)*(long)mx; 00919 ryrz2 = ry*ry + rz2; 00920 00921 #ifdef MSMPOT_CHECK_CYLINDER_CPU 00922 /* clipping to the cylinder is faster */ 00923 if (ryrz2 >= a2) continue; 00924 #endif 00925 00926 #if 0 00927 #if defined(__INTEL_COMPILER) 00928 for (i = ia; i <= ib; i++, pem++, rx += dx) { 00929 r2 = rx*rx + ryrz2; 00930 s = r2 * inv_a2; 00931 gs = 1.875f + s*(-1.25f + s*0.375f); /* TAYLOR2 */ 00932 e = q * (1/sqrtf(r2) - a_1 * gs); 00933 *pem += (r2 < a2 ? e : 0); /* LOOP VECTORIZED! */ 00934 } 00935 #else 00936 for (i = ia; i <= ib; i++, pem++, rx += dx) { 00937 r2 = rx*rx + ryrz2; 00938 if (r2 >= a2) continue; 00939 s = r2 * inv_a2; 00940 gs = 1.875f + s*(-1.25f + s*0.375f); /* TAYLOR2 */ 00941 e = q * (1/sqrtf(r2) - a_1 * gs); 00942 *pem += e; 00943 } 00944 #endif 00945 #else 00946 for (i = ia; i <= ib; i++) { 00947 rx = i*dx - x; 00948 iw = i; 00949 if (i < 0) { 00950 rx -= plx; 00951 iw += mx; 00952 } 00953 else if (i >= mx) { 00954 rx += plx; 00955 iw -= mx; 00956 } 00957 index = jkoff + iw; 00958 r2 = rx*rx + ryrz2; 00959 if (r2 >= a2) continue; 00960 s = r2 * inv_a2; 00961 SPOLY(&gs, s, split); /* macro expands into switch */ 00962 e = q * (1/sqrtf(r2) - a_1 * gs); 00963 epotmap[index] += e; 00964 } 00965 #endif 00966 00967 } 00968 } /* end loop over surrounding map points */ 00969 00970 } /* end loop over atoms in grid cell */ 00971 } /* end loop over grid cells */ 00972 00973 return MSMPOT_SUCCESS; 00974 } /* linklist_evaluation() */