00001 /*************************************************************************** 00002 *cr 00003 *cr (C) Copyright 1995-2019 The Board of Trustees of the 00004 *cr University of Illinois 00005 *cr All Rights Reserved 00006 *cr 00007 ***************************************************************************/ 00008 00009 /*************************************************************************** 00010 * RCS INFORMATION: 00011 * 00012 * $RCSfile: Stride.C,v $ 00013 * $Author: johns $ $Locker: $ $State: Exp $ 00014 * $Revision: 1.51 $ $Date: 2021年11月18日 07:33:43 $ 00015 * 00016 *************************************************************************** 00017 * DESCRIPTION: 00018 * Stride interface class. 00019 * STRIDE source code: 00020 * http://webclu.bio.wzw.tum.de/stride/ 00021 * http://webclu.bio.wzw.tum.de/stride/stride.tar.gz 00022 * https://en.wikipedia.org/wiki/STRIDE 00023 * 00024 * DSSP/xDSSP/HSSP family; 00025 * https://swift.cmbi.umcn.nl/gv/dssp/DSSP_3.html 00026 * https://github.com/cmbi/dssp 00027 * https://github.com/cmbi/hssp/releases 00028 * 00029 * Paper on evaluation of secondary structure assignment methods: 00030 * https://pubmed.ncbi.nlm.nih.gov/16164759/ 00031 * 00032 * Secondary structure assignment for conformationally irregular peptides: 00033 * https://pubmed.ncbi.nlm.nih.gov/25424660/ 00034 * 00035 ***************************************************************************/ 00036 00037 #include <stdio.h> // for tmpfile 00038 #include "DrawMolecule.h" 00039 #include "Timestep.h" 00040 #include "Residue.h" 00041 #include "Inform.h" 00042 #include "Stride.h" 00043 #include "utilities.h" // for vmd_delete_file 00044 #include "VMDDir.h" 00045 #include "PeriodicTable.h" 00046 00047 // 00048 // Despite prolific compile-time and link-time warnings about 00049 // the use of APIs such as tempnam(), we need to use this approach 00050 // because VMD itself doesn't create the STRIDE output, so temp file 00051 // API variants that return an open file handle aren't useful to us, 00052 // and the potential race condition they're meant to protect against 00053 // is inherent when using system() or exec() calls to run an external 00054 // process on input/output files. 00055 // 00056 // Note: the caller must free the returned C string. 00057 // 00058 char * get_temporary_filename(void) { 00059 char * filename = NULL; 00060 #if defined(ARCH_MACOSXARM64) || defined(ARCH_MACOSXX86) || defined(ARCH_MACOSXX86_64) 00061 char *tmpstr = tmpnam(NULL); 00062 if (tmpstr !=NULL) 00063 filename = strdup(tmpstr); 00064 #else 00065 filename = tempnam(NULL, NULL); 00066 #endif 00067 return filename; 00068 } 00069 00070 00071 // Write a PDB of the given timestep, stripping out all of the non-protein 00072 // atoms since they are not used for secondary structure assignment. 00073 // The PDB input can be used with STRIDE, DSSP, and likely other SS programs. 00074 // Write the uniq_resid of the residues written to the stride input file 00075 // into the residues argument. 00076 int write_ss_input_pdb(DrawMolecule *mol, const char *inputfilename, 00077 ResizeArray<int>& residues) { 00078 const float *pos = mol->current()->pos; 00079 char name[6], resname[5], atomicelement[4], chain[2]; 00080 00081 residues.clear(); 00082 00083 FILE *inputfile = fopen(inputfilename,"w"); 00084 if (!inputfile) { 00085 msgErr << "Unable to open input file '" << inputfilename 00086 << "' for writing." << sendmsg; 00087 return 1; 00088 } 00089 00090 int prev = -1; // previous uniq_resid 00091 int atomcount = 0; 00092 00093 for (int i=0; i < mol->nAtoms; i++) { 00094 MolAtom *atom = mol->atom(i); 00095 // skip if this atom isn't part of protein 00096 if (atom->residueType != RESPROTEIN) 00097 continue; 00098 00099 strncpy(name, (mol->atomNames).name(atom->nameindex), 4); 00100 name[4]='0円'; 00101 00102 strncpy(resname,(mol->resNames).name(atom->resnameindex),4); 00103 resname[4] = '0円'; 00104 00105 strncpy(atomicelement, get_pte_label(atom->atomicnumber), 4); 00106 atomicelement[1] = ' '; 00107 atomicelement[2] = ' '; 00108 atomicelement[3] = '0円'; 00109 00110 chain[0] = (mol->chainNames).name(atom->chainindex)[0]; 00111 chain[1] = '0円'; 00112 00113 if (atom->uniq_resid != prev) { 00114 prev = atom->uniq_resid; 00115 residues.append(prev); 00116 } 00117 00118 const float *loc=pos + 3L*i; 00119 const char *emptycols = " "; 00120 if (fprintf(inputfile, "ATOM %5d %-4s %-4s%c%4ld %8.3f%8.3f%8.3f%s%3s\n", 00121 ++atomcount, name, resname, chain[0], long(residues.num()-1), 00122 loc[0], loc[1], loc[2], emptycols, atomicelement) < 0) { 00123 msgErr << "Error writing line in Stride/DSSP input file for atom " << i << sendmsg; 00124 return 1; 00125 } 00126 } 00127 fprintf(inputfile, "TER \n"); 00128 00129 fclose(inputfile); 00130 return 0; 00131 } 00132 00133 00134 // 00135 // Here's the sprintf command stride uses on output in report.c: 00136 // sprintf(Tmp,"ASG %3s %c %4s %4d %c %11s %7.2f %7.2f %7.1f", 00137 // p->ResType,SpaceToDash(Chain[Cn]->Id),p->PDB_ResNumb,i+1, 00138 // p->Prop->Asn,Translate(p->Prop->Asn),p->Prop->Phi, 00139 // p->Prop->Psi,p->Prop->Solv); 00140 // 00141 // Here's a typical line: 00142 //ASG THR - 9 9 B Bridge -113.85 157.34 21.9 ~~~~ 00143 // 00144 00145 static int read_stride_record( DrawMolecule *mol, 00146 const char *stridefile, 00147 const ResizeArray<int> &residues ) { 00148 FILE *in = fopen(stridefile, "rt"); 00149 if (!in) { 00150 msgErr << "Unable to find Stride output file: " 00151 << stridefile << sendmsg; 00152 return 1; 00153 } 00154 00155 const int BUF_LEN = 90; 00156 char buf[BUF_LEN], resname[4], chain[1], uniq_residstr[5], ss; 00157 int resid=0; 00158 00159 while (fgets(buf, BUF_LEN, in)) { 00160 if (strncmp(buf, "ASG", 3)) 00161 continue; 00162 00163 sscanf(buf,"ASG %3s %c %4s %4d %c", 00164 resname, chain, uniq_residstr, &resid, &ss); 00165 00166 int index = atoi(uniq_residstr); 00167 if (index < 0 || index >= residues.num()) { 00168 msgErr << "invalid resid found in stride output file!" << sendmsg; 00169 msgErr << "Error found in the following line: " << sendmsg; 00170 msgErr << buf << sendmsg; 00171 fclose(in); 00172 return -1; 00173 } 00174 00175 int uniq_resid = residues[index]; 00176 switch (ss) { 00177 case 'H': // H: helix 00178 mol->residueList[uniq_resid]->sstruct = SS_HELIX_ALPHA; 00179 break; 00180 00181 case 'G': // G: 3-10 helix 00182 mol->residueList[uniq_resid]->sstruct = SS_HELIX_3_10; 00183 break; 00184 00185 case 'I': // I: PI helix 00186 mol->residueList[uniq_resid]->sstruct = SS_HELIX_PI; 00187 break; 00188 00189 case 'E': // E: Beta sheet 00190 mol->residueList[uniq_resid]->sstruct = SS_BETA; 00191 break; 00192 00193 case 'B': // B: bridge 00194 case 'b': // b: bridge 00195 mol->residueList[uniq_resid]->sstruct = SS_BRIDGE; 00196 break; 00197 00198 case 'T': // T: turn 00199 mol->residueList[uniq_resid]->sstruct = SS_TURN; 00200 break; 00201 00202 default: 00203 // msgErr << "Internal error in read_stride_record\n" << sendmsg; 00204 mol->residueList[uniq_resid]->sstruct = SS_COIL; 00205 } 00206 } 00207 00208 fclose(in); 00209 return 0; 00210 } 00211 00212 00213 int ss_from_stride(DrawMolecule *mol) { 00214 int rc = 0; 00215 00216 char *stridebin = getenv("STRIDE_BIN"); 00217 if (!stridebin) { 00218 msgErr << "No STRIDE binary found; please set STRIDE_BIN environment variable" << sendmsg; 00219 msgErr << "to the location of the STRIDE binary." << sendmsg; 00220 return 1; 00221 } 00222 00223 // check to see if the executable exists 00224 if (!vmd_file_is_executable(stridebin)) { 00225 msgErr << "STRIDE binary " << stridebin << " cannot be run; check permissions." << sendmsg; 00226 return 1; 00227 } 00228 00229 char *infilename = get_temporary_filename(); 00230 char *outfilename = get_temporary_filename(); 00231 if (infilename == NULL || outfilename == NULL) { 00232 msgErr << "Unable to create temporary files for STRIDE." << sendmsg; 00233 return 1; 00234 } 00235 00236 char *stridecall = new char[strlen(stridebin) 00237 +strlen(infilename) 00238 +strlen(outfilename) 00239 +16]; 00240 00241 sprintf(stridecall,"\"%s\" %s -f%s", stridebin, infilename, outfilename); 00242 00243 ResizeArray<int> residues; 00244 00245 if (write_ss_input_pdb(mol,infilename,residues)) { 00246 msgErr << "write_ss_input_pdb(): unable " 00247 << "to write input file for Stride\n" << sendmsg; 00248 rc = 1; 00249 } 00250 00251 if (!rc) { 00252 vmd_system(stridecall); 00253 00254 if (read_stride_record(mol,outfilename,residues)) { 00255 msgErr << "Stride::read_stride_record: unable " 00256 << "to read output file from Stride\n" << sendmsg; 00257 rc = 1; 00258 } 00259 } 00260 00261 delete [] stridecall; 00262 vmd_delete_file(outfilename); 00263 vmd_delete_file(infilename); 00264 free(outfilename); 00265 free(infilename); 00266 00267 return rc; 00268 } 00269 00270 00271 00272 // 00273 // DSSP implementation 00274 // 00275 00276 /* 00277 Dssp output format: 00278 return (kDSSPResidueLine % residue.GetNumber() % ca.mResSeq % ca.mICode % chainChar % code % ss % helix[0] % helix[1] % helix[2] % bend % chirality % bridgelabel[0] % bridgelabel[1] % bp[0] % bp[1] % sheet % floor(residue.Accessibility() + 0.5) % NHO[0] % ONH[0] % NHO[1] % ONH[1] % residue.TCO() % residue.Kappa() % alpha % residue.Phi() % residue.Psi() % ca.mLoc.mX % ca.mLoc.mY % ca.mLoc.mZ % long_ChainID1 % long_ChainID2).str(); 00279 00280 This is the header line for the residue lines in a DSSP file: 00281 00282 # RESIDUE AA STRUCTURE BP1 BP2 ACC N-H-->O O-->H-N N-H-->O O-->H-N TCO KAPPA ALPHA PHI PSI X-CA Y-CA Z-CA CHAIN AUTHCHAIN 00283 */ 00284 00285 00286 00287 static int read_dssp_record(DrawMolecule *mol, 00288 const char *dsspfile, 00289 const ResizeArray<int> &residues ) { 00290 FILE *in = fopen(dsspfile, "rt"); 00291 if (!in) { 00292 msgErr << "Unable to find DSSP output file: " 00293 << dsspfile << sendmsg; 00294 return 1; 00295 } 00296 00297 const int BUF_LEN = 1024; 00298 char buf[BUF_LEN], atom_num[6], chain[2], uniq_residstr[5], ss; 00299 bool start_flag = false; 00300 00301 while (fgets(buf, BUF_LEN, in)) { 00302 // check if the header line is reached 00303 if (!start_flag){ 00304 if (strncmp(buf, " # RESIDUE", 12)) { 00305 continue; 00306 } else { 00307 if (fgets(buf, BUF_LEN, in) == NULL) { 00308 fclose(in); 00309 return -1; // failed 00310 } 00311 start_flag = true; 00312 } 00313 } 00314 00315 sscanf(buf,"%5s %4s %1s %c", 00316 atom_num, uniq_residstr, chain, &ss); 00317 00318 ss = buf[16]; 00319 00320 int index = atoi(uniq_residstr); 00321 if (!index) 00322 continue; // for the gap between chains in DSSP output 00323 00324 if (index < 0 || index >= residues.num()) { 00325 msgErr << "invalid resid found in DSSP output file!" << sendmsg; 00326 msgErr << "Error found in the following line: " << sendmsg; 00327 msgErr << buf << sendmsg; 00328 fclose(in); 00329 return -1; 00330 } 00331 00332 int uniq_resid = residues[index]; 00333 #if 0 00334 msgErr << atom_num << " " << uniq_residstr << " " << chain << " " << ss << sendmsg; 00335 msgErr << buf << sendmsg; 00336 #endif 00337 00338 switch (ss) { 00339 case 'H': // H 00340 mol->residueList[uniq_resid]->sstruct = SS_HELIX_ALPHA; 00341 break; 00342 00343 case 'B': // B 00344 mol->residueList[uniq_resid]->sstruct = SS_BRIDGE; 00345 break; 00346 00347 case 'E': // E 00348 mol->residueList[uniq_resid]->sstruct = SS_BETA; 00349 break; 00350 00351 case 'G': // G 00352 mol->residueList[uniq_resid]->sstruct = SS_HELIX_3_10; 00353 break; 00354 00355 case 'I': // I 00356 mol->residueList[uniq_resid]->sstruct = SS_HELIX_PI; 00357 break; 00358 00359 case 'T': // T 00360 mol->residueList[uniq_resid]->sstruct = SS_TURN; 00361 break; 00362 00363 case 'S': // S 00364 // this is the case where vmd doesn't cover 00365 // structure code "bent" assigned by DSSP 00366 00367 default: 00368 msgErr << "Internal error in read_dssp_record\n" << sendmsg; 00369 // printf("DSSP uniq_resid: %d SS: %c\n", uniq_resid, ss); 00370 // printf("DSSP buf: '%s'\n\n", buf); 00371 mol->residueList[uniq_resid]->sstruct = SS_COIL; 00372 } 00373 } 00374 00375 fclose(in); 00376 return 0; 00377 } 00378 00379 00380 00381 00382 int ss_from_dssp(DrawMolecule *mol) { 00383 int rc = 0; 00384 00385 char *dsspbin = getenv("DSSP_BIN"); 00386 if (!dsspbin) { 00387 msgErr << "No DSSP/XDSSP binary found; please set DSSP_BIN environment variable" << sendmsg; 00388 msgErr << "to the location of the DSSP binary." << sendmsg; 00389 return 1; 00390 } 00391 00392 // check to see if the executable exists 00393 if (!vmd_file_is_executable(dsspbin)) { 00394 msgErr << "DSSP binary " << dsspbin << " cannot be run; check permissions." << sendmsg; 00395 return 1; 00396 } 00397 00398 char *infilename = get_temporary_filename(); 00399 char *outfilename = get_temporary_filename(); 00400 if (infilename == NULL || outfilename == NULL) { 00401 msgErr << "Unable to create temporary files for DSSP." << sendmsg; 00402 return 1; 00403 } 00404 00405 char *dsspcall = new char[strlen(dsspbin) 00406 +strlen(infilename) 00407 +strlen(outfilename) 00408 +16]; 00409 00410 sprintf(dsspcall,"\"%s\" -i %s -o %s -v", dsspbin, infilename, outfilename); 00411 00412 ResizeArray<int> residues; 00413 00414 if (write_ss_input_pdb(mol,infilename,residues)) { 00415 msgErr << "write_ss_input_pdb(): unable " 00416 << "to write input file for DSSP\n" << sendmsg; 00417 rc = 1; 00418 } 00419 00420 if (!rc) { 00421 vmd_system(dsspcall); 00422 00423 if (read_dssp_record(mol,outfilename,residues)) { 00424 msgErr << "Dssp::read_dssp_record: unable " 00425 << "to read output file from DSSP\n" << sendmsg; 00426 rc = 1; 00427 } 00428 } 00429 00430 delete [] dsspcall; 00431 vmd_delete_file(outfilename); 00432 vmd_delete_file(infilename); 00433 free(outfilename); 00434 free(infilename); 00435 return rc; 00436 } 00437 00438 00439