00001 /*************************************************************************** 00002 *cr 00003 *cr (C) Copyright 1995-2019 The Board of Trustees of the 00004 *cr University of Illinois 00005 *cr All Rights Reserved 00006 *cr 00007 ***************************************************************************/ 00008 00009 /*************************************************************************** 00010 * RCS INFORMATION: 00011 * 00012 * $RCSfile: BondSearch.C,v $ 00013 * $Author: johns $ $Locker: $ $State: Exp $ 00014 * $Revision: 1.77 $ $Date: 2020年07月08日 04:22:12 $ 00015 * 00016 *************************************************************************** 00017 * DESCRIPTION: 00018 * 00019 * Distance based bond search code 00020 * 00021 ***************************************************************************/ 00022 00023 #include <stdio.h> 00024 #include <math.h> 00025 #include <stdlib.h> 00026 #include "BondSearch.h" 00027 #include "Timestep.h" 00028 #include "BaseMolecule.h" 00029 #include "Molecule.h" 00030 #include "Inform.h" 00031 #include "WKFThreads.h" 00032 #include "WKFUtils.h" 00033 #include <ctype.h> // needed for isdigit() 00034 #include <string.h> 00035 00036 00037 GridSearchPairlist *vmd_gridsearch_bonds(const float *pos, const float *radii, 00038 int natoms, float pairdist, int maxpairs) { 00039 float min[3], max[3]; 00040 int i, xb, yb, zb, xytotb, totb; 00041 int **boxatom, *numinbox, *maxinbox, **nbrlist; 00042 int numon = 0; 00043 float sidelen[3], volume; 00044 int paircount = 0; 00045 00046 // find bounding box for selected atoms, and number of atoms in selection. 00047 #if 1 00048 minmax_3fv_aligned(pos, natoms, min, max); 00049 #else 00050 find_minmax_all(pos, natoms, min, max); 00051 #endif 00052 00053 // check for NaN coordinates propagating to the bounding box result 00054 if (!(max[0] >= min[0] && max[1] >= min[1] && max[2] >= min[2])) { 00055 msgErr << "vmd_gridsearch_bonds: NaN coordinates in bounds, aborting!" << sendmsg; 00056 return NULL; 00057 } 00058 00059 // do sanity checks and complain if we've got bogus atom coordinates, 00060 // we shouldn't ever have density higher than 0.1 atom/A^3, but we'll 00061 // be generous and allow much higher densities. 00062 if (maxpairs != -1) { 00063 vec_sub(sidelen, max, min); 00064 // include estimate for atom radius (1 Angstrom) in volume determination 00065 volume = fabsf((sidelen[0] + 2.0f) * (sidelen[1] + 2.0f) * (sidelen[2] + 2.0f)); 00066 if ((numon / volume) > 1.0) { 00067 msgWarn << "vmd_gridsearch_bonds: insane atom density" << sendmsg; 00068 } 00069 } 00070 00071 // I don't want the grid to get too large, otherwise I could run out 00072 // of memory. Octrees would be cool, but I'll just limit the grid size 00073 // and let the performance degrade a little for pathological systems. 00074 // Note that pairdist^2 is what gets used for the actual distance checks; 00075 // from here on out pairdist is only used to set the grid size, so we 00076 // can set it to anything larger than the original pairdist. 00077 const int MAXBOXES = 4000000; 00078 totb = MAXBOXES + 1; 00079 00080 float newpairdist = pairdist; 00081 float xrange = max[0]-min[0]; 00082 float yrange = max[1]-min[1]; 00083 float zrange = max[2]-min[2]; 00084 do { 00085 pairdist = newpairdist; 00086 const float invpairdist = 1.0f / pairdist; 00087 xb = ((int)(xrange*invpairdist))+1; 00088 yb = ((int)(yrange*invpairdist))+1; 00089 zb = ((int)(zrange*invpairdist))+1; 00090 xytotb = yb * xb; 00091 totb = xytotb * zb; 00092 newpairdist = pairdist * 1.26f; // cbrt(2) is about 1.26 00093 } while (totb > MAXBOXES || totb < 1); // check for integer wraparound too 00094 00095 // 2. Sort each atom into appropriate bins 00096 boxatom = (int **) calloc(1, totb*sizeof(int *)); 00097 numinbox = (int *) calloc(1, totb*sizeof(int)); 00098 maxinbox = (int *) calloc(1, totb*sizeof(int)); 00099 if (boxatom == NULL || numinbox == NULL || maxinbox == NULL) { 00100 if (boxatom != NULL) 00101 free(boxatom); 00102 if (numinbox != NULL) 00103 free(numinbox); 00104 if (maxinbox != NULL) 00105 free(maxinbox); 00106 msgErr << "Bondsearch memory allocation failed, bailing out" << sendmsg; 00107 return NULL; // ran out of memory, bail out! 00108 } 00109 00110 const float invpairdist = 1.0f / pairdist; 00111 for (i=0; i<natoms; i++) { 00112 int axb, ayb, azb, aindex, num; 00113 00114 // compute box index for new atom 00115 const float *loc = pos + 3L*i; 00116 axb = (int)((loc[0] - min[0])*invpairdist); 00117 ayb = (int)((loc[1] - min[1])*invpairdist); 00118 azb = (int)((loc[2] - min[2])*invpairdist); 00119 00120 // clamp box indices to valid range in case of FP error 00121 if (axb >= xb) axb = xb-1; 00122 if (ayb >= yb) ayb = yb-1; 00123 if (azb >= zb) azb = zb-1; 00124 00125 aindex = azb * xytotb + ayb * xb + axb; 00126 00127 // grow box if necessary 00128 if ((num = numinbox[aindex]) == maxinbox[aindex]) { 00129 boxatom[aindex] = (int *) realloc(boxatom[aindex], (num+4)*sizeof(int)); 00130 maxinbox[aindex] += 4; 00131 } 00132 00133 // store atom index in box 00134 boxatom[aindex][num] = i; 00135 numinbox[aindex]++; 00136 } 00137 free(maxinbox); 00138 00139 nbrlist = (int **) calloc(1, totb*sizeof(int *)); 00140 if (make_neighborlist(nbrlist, xb, yb, zb)) { 00141 if (boxatom != NULL) { 00142 for (i=0; i<totb; i++) { 00143 if (boxatom[i] != NULL) free(boxatom[i]); 00144 } 00145 free(boxatom); 00146 } 00147 if (nbrlist != NULL) { 00148 for (i=0; i<totb; i++) { 00149 if (nbrlist[i] != NULL) free(nbrlist[i]); 00150 } 00151 free(nbrlist); 00152 } 00153 free(numinbox); 00154 msgErr << "Bondsearch memory allocation failed, bailing out" << sendmsg; 00155 return NULL; // ran out of memory, bail out! 00156 } 00157 00158 // if maxpairs is "unlimited", set it to the biggest positive int 00159 if (maxpairs < 0) { 00160 maxpairs = 2147483647; 00161 } 00162 00163 // setup head of pairlist 00164 GridSearchPairlist *head, *cur; 00165 head = (GridSearchPairlist *) malloc(sizeof(GridSearchPairlist)); 00166 head->next = NULL; 00167 paircount = vmd_bondsearch_thr(pos, radii, head, totb, 00168 boxatom, numinbox, nbrlist, 00169 maxpairs, pairdist); 00170 00171 for (i=0; i<totb; i++) { 00172 free(boxatom[i]); 00173 free(nbrlist[i]); 00174 } 00175 free(boxatom); 00176 free(nbrlist); 00177 free(numinbox); 00178 00179 cur = head->next; 00180 free(head); 00181 00182 if (paircount > maxpairs) 00183 msgErr << "vmdgridsearch_bonds: exceeded pairlist sanity check, aborted" << sendmsg; 00184 00185 return cur; 00186 } 00187 00188 00189 00190 // bond search thread parameter structure 00191 typedef struct { 00192 int threadid; 00193 int threadcount; 00194 wkf_mutex_t *pairlistmutex; 00195 GridSearchPairlist * head; 00196 float *pos; 00197 float *radii; 00198 int totb; 00199 int **boxatom; 00200 int *numinbox; 00201 int **nbrlist; 00202 int maxpairs; 00203 float pairdist; 00204 } bondsearchthrparms; 00205 00206 // thread prototype 00207 extern "C" void * bondsearchthread(void *); 00208 00209 // setup and launch bond search threads 00210 int vmd_bondsearch_thr(const float *pos, const float *radii, 00211 GridSearchPairlist * head, 00212 int totb, int **boxatom, 00213 int *numinbox, int **nbrlist, int maxpairs, 00214 float pairdist) { 00215 int i; 00216 bondsearchthrparms *parms; 00217 wkf_thread_t * threads; 00218 wkf_mutex_t pairlistmutex; 00219 wkf_mutex_init(&pairlistmutex); // init mutex before use 00220 00221 int numprocs = wkf_thread_numprocessors(); 00222 00223 /* allocate array of threads */ 00224 threads = (wkf_thread_t *) calloc(numprocs * sizeof(wkf_thread_t), 1); 00225 00226 /* allocate and initialize array of thread parameters */ 00227 parms = (bondsearchthrparms *) malloc(numprocs * sizeof(bondsearchthrparms)); 00228 for (i=0; i<numprocs; i++) { 00229 parms[i].threadid = i; 00230 parms[i].threadcount = numprocs; 00231 parms[i].pairlistmutex = &pairlistmutex; 00232 parms[i].head = NULL; 00233 parms[i].pos = (float *) pos; 00234 parms[i].radii = (float *) radii; 00235 parms[i].totb = totb; 00236 parms[i].boxatom = boxatom; 00237 parms[i].numinbox = numinbox; 00238 parms[i].nbrlist = nbrlist; 00239 parms[i].maxpairs = maxpairs; 00240 parms[i].pairdist = pairdist; 00241 } 00242 00243 #if defined(VMDTHREADS) 00244 /* spawn child threads to do the work */ 00245 for (i=0; i<numprocs; i++) { 00246 wkf_thread_create(&threads[i], bondsearchthread, &parms[i]); 00247 } 00248 00249 /* join the threads after work is done */ 00250 for (i=0; i<numprocs; i++) { 00251 wkf_thread_join(threads[i], NULL); 00252 } 00253 #else 00254 bondsearchthread(&parms[0]); // single-threaded code 00255 #endif 00256 00257 // assemble final pairlist from sublists 00258 for (i=0; i<numprocs; i++) { 00259 if (parms[i].head != NULL) { 00260 GridSearchPairlist *tmp = head->next; 00261 head->next = parms[i].head; 00262 parms[i].head->next = tmp; 00263 } 00264 } 00265 00266 wkf_mutex_destroy(&pairlistmutex); // destroy mutex when finished 00267 00268 /* free thread parms */ 00269 free(parms); 00270 free(threads); 00271 00272 return 0; 00273 } 00274 00275 extern "C" void * bondsearchthread(void *voidparms) { 00276 int i, j, aindex; 00277 int paircount = 0; 00278 00279 bondsearchthrparms *parms = (bondsearchthrparms *) voidparms; 00280 00281 const int threadid = parms->threadid; 00282 const int threadcount = parms->threadcount; 00283 wkf_mutex_t *pairlistmutex = parms->pairlistmutex; 00284 const float *pos = parms->pos; 00285 const float *radii = parms->radii; 00286 const int totb = parms->totb; 00287 const int **boxatom = (const int **) parms->boxatom; 00288 const int *numinbox = parms->numinbox; 00289 const int **nbrlist = (const int **) parms->nbrlist; 00290 const int maxpairs = parms->maxpairs; 00291 const float pairdist = parms->pairdist; 00292 00293 ResizeArray<int> *pairs = new ResizeArray<int>; 00294 float sqdist = pairdist * pairdist; 00295 00296 wkfmsgtimer *msgt = wkf_msg_timer_create(5); 00297 for (aindex = threadid; (aindex < totb) && (paircount < maxpairs); aindex+=threadcount) { 00298 const int *tmpbox, *nbr; 00299 tmpbox = boxatom[aindex]; 00300 int anbox = numinbox[aindex]; 00301 00302 if (((aindex & 255) == 0) && wkf_msg_timer_timeout(msgt)) { 00303 char tmpbuf[128] = { 0 }; 00304 sprintf(tmpbuf, "%6.2f", (100.0f * aindex) / (float) totb); 00305 // XXX: we have to use printf here as msgInfo is not thread-safe. 00306 // msgInfo << "vmd_gridsearch_bonds (thread " << threadid << "): " 00307 // << tmpbuf << "% complete" << sendmsg; 00308 printf("vmd_gridsearch_bonds (thread %d): %s %% complete\n", 00309 threadid, tmpbuf); 00310 } 00311 00312 for (nbr = nbrlist[aindex]; (*nbr != -1) && (paircount < maxpairs); nbr++) { 00313 int nnbr=*nbr; 00314 const int *nbrbox = boxatom[nnbr]; 00315 int nbox=numinbox[nnbr]; 00316 int self = (aindex == nnbr) ? 1 : 0; 00317 00318 for (i=0; (i<anbox) && (paircount < maxpairs); i++) { 00319 int ind1 = tmpbox[i]; 00320 const float *p1 = pos + 3L*ind1; 00321 00322 // skip over self and already-tested atoms 00323 int startj = (self) ? i+1 : 0; 00324 00325 for (j=startj; (j<nbox) && (paircount < maxpairs); j++) { 00326 int ind2 = nbrbox[j]; 00327 const float *p2 = pos + 3L*ind2; 00328 float dx = p1[0] - p2[0]; 00329 float dy = p1[1] - p2[1]; 00330 float dz = p1[2] - p2[2]; 00331 float ds2 = dx*dx + dy*dy + dz*dz; 00332 00333 // perform distance test, but ignore pairs between atoms 00334 // with nearly identical coords 00335 if ((ds2 > sqdist) || (ds2 < 0.001)) 00336 continue; 00337 00338 if (radii) { // Do atom-specific distance check 00339 float cut = 0.6f * (radii[ind1] + radii[ind2]); 00340 if (ds2 > cut*cut) 00341 continue; 00342 } 00343 00344 pairs->append(ind1); 00345 pairs->append(ind2); 00346 paircount++; 00347 } 00348 } 00349 } 00350 } 00351 00352 // setup results pairlist node 00353 GridSearchPairlist *head; 00354 head = (GridSearchPairlist *) malloc(sizeof(GridSearchPairlist)); 00355 head->next = NULL; 00356 head->pairlist = pairs; 00357 00358 wkf_mutex_lock(pairlistmutex); // lock pairlist before update 00359 parms->head = head; 00360 wkf_mutex_unlock(pairlistmutex); // unlock pairlist after update 00361 00362 wkf_msg_timer_destroy(msgt); 00363 00364 return NULL; 00365 } 00366 00367 00368 00369 00370 00371 // determine bonds from position of atoms previously read. 00372 // If cutoff < 0, use vdw radius to determine if bonded. 00373 int vmd_bond_search(BaseMolecule *mol, const Timestep *ts, 00374 float cutoff, int dupcheck) { 00375 const float *pos; 00376 int natoms; 00377 int i; 00378 const float *radius = mol->radius(); 00379 00380 if (ts == NULL) { 00381 msgErr << "Internal Error: NULL Timestep in vmd_bond_search" << sendmsg; 00382 return 0; 00383 } 00384 00385 natoms = mol->nAtoms; 00386 if (natoms == 0 || cutoff == 0.0) 00387 return 1; 00388 00389 msgInfo << "Determining bond structure from distance search ..." << sendmsg; 00390 00391 if (dupcheck) 00392 msgInfo << "Eliminating bonds duplicated from existing structure..." << sendmsg; 00393 00394 // Set box distance to either the cutoff, or 1.2 times the largest VDW radius 00395 float dist = cutoff; 00396 if (cutoff < 0.0) { 00397 // set minimum cutoff distance for the case when the loaded molecule 00398 // has radii set to zero. This must be >0.0 or the grid search will hang. 00399 dist = 0.833f; 00400 for (i=0; i<natoms; i++) { 00401 float rad = radius[i]; 00402 if (rad > dist) dist = rad; 00403 } 00404 dist = 1.2f * dist; 00405 } 00406 00407 pos = ts->pos; 00408 00409 // Call the bond search to get all atom pairs within the specified distance 00410 // XXX set maxpairs to 27 bonds per atom, which ought to be ridiculously high 00411 // for any real structure someone would load, but well below N^2 00412 GridSearchPairlist *pairlist = vmd_gridsearch_bonds(pos, 00413 (cutoff < 0) ? radius : NULL, 00414 natoms, dist, natoms * 27L); 00415 00416 // Go through the pairlist adding validated bonds freeing nodes as we go. 00417 GridSearchPairlist *p, *tmp; 00418 for (p = pairlist; p != NULL; p = tmp) { 00419 int numpairs = p->pairlist->num() / 2; 00420 00421 for (i=0; i<numpairs; i++) { 00422 int ind1 = (*p->pairlist)[i*2L ]; 00423 int ind2 = (*p->pairlist)[i*2L+1]; 00424 00425 MolAtom *atom1 = mol->atom(ind1); 00426 MolAtom *atom2 = mol->atom(ind2); 00427 00428 // don't bond atoms that aren't part of the same conformation 00429 // or that aren't in the all-conformations part of the structure 00430 if ((atom1->altlocindex != atom2->altlocindex) && 00431 ((mol->altlocNames.name(atom1->altlocindex)[0] != '0円') && 00432 (mol->altlocNames.name(atom2->altlocindex)[0] != '0円'))) { 00433 continue; 00434 } 00435 00436 // Prevent hydrogens from bonding with each other. 00437 #if 1 00438 // XXX must use the atom name strings presently because the 00439 // hydrogen flags aren't necessarily set by the time the bond search 00440 // code executes. It may soon be time to do something a different 00441 // with per-atom flag storage so that the atom types can be setup 00442 // during the earliest phase of structure analysis, eliminating this 00443 // and other potential gotchas. 00444 if (!IS_HYDROGEN(mol->atomNames.name(atom1->nameindex)) || 00445 !IS_HYDROGEN(mol->atomNames.name(atom2->nameindex)) ) { 00446 #else 00447 // Use atomType info derived during initial molecule analysis for speed. 00448 if (!(atom1->atomType == ATOMHYDROGEN) || 00449 !(atom2->atomType == ATOMHYDROGEN)) { 00450 #endif 00451 // Add a bond, bondorder defaults to 1, bond type to -1 00452 if (dupcheck) 00453 mol->add_bond_dupcheck(ind1, ind2, 1, -1); 00454 else 00455 mol->add_bond(ind1, ind2, 1, -1); 00456 } 00457 } 00458 00459 // free this pairlist node and its ResizeArray of pairs 00460 tmp = p->next; 00461 delete p->pairlist; 00462 free(p); 00463 } 00464 00465 return 1; 00466 } 00467 00468 00469 00470