Main Page Namespace List Class Hierarchy Alphabetical List Compound List File List Namespace Members Compound Members File Members Related Pages

Stride.C

Go to the documentation of this file.
00001 /***************************************************************************
00002 *cr
00003 *cr (C) Copyright 1995-2019 The Board of Trustees of the
00004 *cr University of Illinois
00005 *cr All Rights Reserved
00006 *cr
00007 ***************************************************************************/
00008 
00009 /***************************************************************************
00010 * RCS INFORMATION:
00011 *
00012 * $RCSfile: Stride.C,v $
00013 * $Author: johns $ $Locker: $ $State: Exp $
00014 * $Revision: 1.51 $ $Date: 2021年11月18日 07:33:43 $
00015 *
00016 ***************************************************************************
00017 * DESCRIPTION:
00018 * Stride interface class.
00019 * STRIDE source code:
00020 * http://webclu.bio.wzw.tum.de/stride/
00021 * http://webclu.bio.wzw.tum.de/stride/stride.tar.gz
00022 * https://en.wikipedia.org/wiki/STRIDE
00023 * 
00024 * DSSP/xDSSP/HSSP family; 
00025 * https://swift.cmbi.umcn.nl/gv/dssp/DSSP_3.html
00026 * https://github.com/cmbi/dssp
00027 * https://github.com/cmbi/hssp/releases
00028 *
00029 * Paper on evaluation of secondary structure assignment methods:
00030 * https://pubmed.ncbi.nlm.nih.gov/16164759/
00031 *
00032 * Secondary structure assignment for conformationally irregular peptides:
00033 * https://pubmed.ncbi.nlm.nih.gov/25424660/
00034 * 
00035 ***************************************************************************/
00036 
00037 #include <stdio.h> // for tmpfile
00038 #include "DrawMolecule.h"
00039 #include "Timestep.h"
00040 #include "Residue.h"
00041 #include "Inform.h"
00042 #include "Stride.h"
00043 #include "utilities.h" // for vmd_delete_file
00044 #include "VMDDir.h"
00045 #include "PeriodicTable.h"
00046 
00047 // 
00048 // Despite prolific compile-time and link-time warnings about
00049 // the use of APIs such as tempnam(), we need to use this approach
00050 // because VMD itself doesn't create the STRIDE output, so temp file
00051 // API variants that return an open file handle aren't useful to us,
00052 // and the potential race condition they're meant to protect against
00053 // is inherent when using system() or exec() calls to run an external
00054 // process on input/output files.
00055 //
00056 // Note: the caller must free the returned C string.
00057 //
00058 char * get_temporary_filename(void) {
00059 char * filename = NULL;
00060 #if defined(ARCH_MACOSXARM64) || defined(ARCH_MACOSXX86) || defined(ARCH_MACOSXX86_64)
00061 char *tmpstr = tmpnam(NULL);
00062 if (tmpstr !=NULL)
00063 filename = strdup(tmpstr);
00064 #else
00065 filename = tempnam(NULL, NULL);
00066 #endif
00067 return filename;
00068 }
00069 
00070 
00071 // Write a PDB of the given timestep, stripping out all of the non-protein 
00072 // atoms since they are not used for secondary structure assignment.
00073 // The PDB input can be used with STRIDE, DSSP, and likely other SS programs.
00074 // Write the uniq_resid of the residues written to the stride input file
00075 // into the residues argument.
00076 int write_ss_input_pdb(DrawMolecule *mol, const char *inputfilename,
00077 ResizeArray<int>& residues) {
00078 const float *pos = mol->current()->pos;
00079 char name[6], resname[5], atomicelement[4], chain[2];
00080 
00081 residues.clear();
00082 
00083 FILE *inputfile = fopen(inputfilename,"w");
00084 if (!inputfile) {
00085 msgErr << "Unable to open input file '" << inputfilename 
00086 << "' for writing." << sendmsg;
00087 return 1;
00088 }
00089 
00090 int prev = -1; // previous uniq_resid
00091 int atomcount = 0;
00092 
00093 for (int i=0; i < mol->nAtoms; i++) {
00094 MolAtom *atom = mol->atom(i);
00095 // skip if this atom isn't part of protein
00096 if (atom->residueType != RESPROTEIN)
00097 continue;
00098 
00099 strncpy(name, (mol->atomNames).name(atom->nameindex), 4);
00100 name[4]='0円';
00101 
00102 strncpy(resname,(mol->resNames).name(atom->resnameindex),4); 
00103 resname[4] = '0円';
00104 
00105 strncpy(atomicelement, get_pte_label(atom->atomicnumber), 4); 
00106 atomicelement[1] = ' ';
00107 atomicelement[2] = ' ';
00108 atomicelement[3] = '0円';
00109 
00110 chain[0] = (mol->chainNames).name(atom->chainindex)[0];
00111 chain[1] = '0円';
00112 
00113 if (atom->uniq_resid != prev) {
00114 prev = atom->uniq_resid;
00115 residues.append(prev);
00116 }
00117 
00118 const float *loc=pos + 3L*i;
00119 const char *emptycols = " ";
00120 if (fprintf(inputfile, "ATOM %5d %-4s %-4s%c%4ld %8.3f%8.3f%8.3f%s%3s\n",
00121 ++atomcount, name, resname, chain[0], long(residues.num()-1),
00122 loc[0], loc[1], loc[2], emptycols, atomicelement) < 0) {
00123 msgErr << "Error writing line in Stride/DSSP input file for atom " << i << sendmsg;
00124 return 1;
00125 }
00126 }
00127 fprintf(inputfile, "TER \n");
00128 
00129 fclose(inputfile);
00130 return 0;
00131 }
00132 
00133 
00134 //
00135 // Here's the sprintf command stride uses on output in report.c:
00136 // sprintf(Tmp,"ASG %3s %c %4s %4d %c %11s %7.2f %7.2f %7.1f",
00137 // p->ResType,SpaceToDash(Chain[Cn]->Id),p->PDB_ResNumb,i+1,
00138 // p->Prop->Asn,Translate(p->Prop->Asn),p->Prop->Phi,
00139 // p->Prop->Psi,p->Prop->Solv);
00140 //
00141 // Here's a typical line:
00142 //ASG THR - 9 9 B Bridge -113.85 157.34 21.9 ~~~~
00143 // 
00144 
00145 static int read_stride_record( DrawMolecule *mol, 
00146 const char *stridefile,
00147 const ResizeArray<int> &residues ) { 
00148 FILE *in = fopen(stridefile, "rt");
00149 if (!in) {
00150 msgErr << "Unable to find Stride output file: "
00151 << stridefile << sendmsg;
00152 return 1;
00153 }
00154 
00155 const int BUF_LEN = 90;
00156 char buf[BUF_LEN], resname[4], chain[1], uniq_residstr[5], ss;
00157 int resid=0;
00158 
00159 while (fgets(buf, BUF_LEN, in)) {
00160 if (strncmp(buf, "ASG", 3))
00161 continue;
00162 
00163 sscanf(buf,"ASG %3s %c %4s %4d %c",
00164 resname, chain, uniq_residstr, &resid, &ss);
00165 
00166 int index = atoi(uniq_residstr);
00167 if (index < 0 || index >= residues.num()) {
00168 msgErr << "invalid resid found in stride output file!" << sendmsg;
00169 msgErr << "Error found in the following line: " << sendmsg;
00170 msgErr << buf << sendmsg;
00171 fclose(in);
00172 return -1;
00173 }
00174 
00175 int uniq_resid = residues[index];
00176 switch (ss) {
00177 case 'H': // H: helix
00178 mol->residueList[uniq_resid]->sstruct = SS_HELIX_ALPHA;
00179 break;
00180 
00181 case 'G': // G: 3-10 helix
00182 mol->residueList[uniq_resid]->sstruct = SS_HELIX_3_10;
00183 break; 
00184 
00185 case 'I': // I: PI helix
00186 mol->residueList[uniq_resid]->sstruct = SS_HELIX_PI;
00187 break;
00188 
00189 case 'E': // E: Beta sheet
00190 mol->residueList[uniq_resid]->sstruct = SS_BETA;
00191 break;
00192 
00193 case 'B': // B: bridge
00194 case 'b': // b: bridge
00195 mol->residueList[uniq_resid]->sstruct = SS_BRIDGE;
00196 break;
00197 
00198 case 'T': // T: turn
00199 mol->residueList[uniq_resid]->sstruct = SS_TURN;
00200 break;
00201 
00202 default:
00203 // msgErr << "Internal error in read_stride_record\n" << sendmsg;
00204 mol->residueList[uniq_resid]->sstruct = SS_COIL;
00205 }
00206 }
00207 
00208 fclose(in);
00209 return 0;
00210 } 
00211 
00212 
00213 int ss_from_stride(DrawMolecule *mol) {
00214 int rc = 0;
00215 
00216 char *stridebin = getenv("STRIDE_BIN");
00217 if (!stridebin) {
00218 msgErr << "No STRIDE binary found; please set STRIDE_BIN environment variable" << sendmsg;
00219 msgErr << "to the location of the STRIDE binary." << sendmsg;
00220 return 1;
00221 }
00222 
00223 // check to see if the executable exists
00224 if (!vmd_file_is_executable(stridebin)) {
00225 msgErr << "STRIDE binary " << stridebin << " cannot be run; check permissions." << sendmsg;
00226 return 1;
00227 }
00228 
00229 char *infilename = get_temporary_filename();
00230 char *outfilename = get_temporary_filename();
00231 if (infilename == NULL || outfilename == NULL) {
00232 msgErr << "Unable to create temporary files for STRIDE." << sendmsg;
00233 return 1;
00234 }
00235 
00236 char *stridecall = new char[strlen(stridebin)
00237 +strlen(infilename)
00238 +strlen(outfilename)
00239 +16];
00240 
00241 sprintf(stridecall,"\"%s\" %s -f%s", stridebin, infilename, outfilename);
00242 
00243 ResizeArray<int> residues;
00244 
00245 if (write_ss_input_pdb(mol,infilename,residues)) {
00246 msgErr << "write_ss_input_pdb(): unable "
00247 << "to write input file for Stride\n" << sendmsg;
00248 rc = 1;
00249 }
00250 
00251 if (!rc) {
00252 vmd_system(stridecall);
00253 
00254 if (read_stride_record(mol,outfilename,residues)) {
00255 msgErr << "Stride::read_stride_record: unable "
00256 << "to read output file from Stride\n" << sendmsg;
00257 rc = 1;
00258 }
00259 }
00260 
00261 delete [] stridecall;
00262 vmd_delete_file(outfilename);
00263 vmd_delete_file(infilename);
00264 free(outfilename);
00265 free(infilename);
00266 
00267 return rc;
00268 } 
00269 
00270 
00271 
00272 //
00273 // DSSP implementation
00274 //
00275 
00276 /* 
00277 Dssp output format:
00278 return (kDSSPResidueLine % residue.GetNumber() % ca.mResSeq % ca.mICode % chainChar % code % ss % helix[0] % helix[1] % helix[2] % bend % chirality % bridgelabel[0] % bridgelabel[1] % bp[0] % bp[1] % sheet % floor(residue.Accessibility() + 0.5) % NHO[0] % ONH[0] % NHO[1] % ONH[1] % residue.TCO() % residue.Kappa() % alpha % residue.Phi() % residue.Psi() % ca.mLoc.mX % ca.mLoc.mY % ca.mLoc.mZ % long_ChainID1 % long_ChainID2).str();
00279 
00280 This is the header line for the residue lines in a DSSP file:
00281 
00282 # RESIDUE AA STRUCTURE BP1 BP2 ACC N-H-->O O-->H-N N-H-->O O-->H-N TCO KAPPA ALPHA PHI PSI X-CA Y-CA Z-CA CHAIN AUTHCHAIN
00283 */
00284 
00285 
00286 
00287 static int read_dssp_record(DrawMolecule *mol, 
00288 const char *dsspfile,
00289 const ResizeArray<int> &residues ) { 
00290 FILE *in = fopen(dsspfile, "rt");
00291 if (!in) {
00292 msgErr << "Unable to find DSSP output file: "
00293 << dsspfile << sendmsg;
00294 return 1;
00295 }
00296 
00297 const int BUF_LEN = 1024;
00298 char buf[BUF_LEN], atom_num[6], chain[2], uniq_residstr[5], ss;
00299 bool start_flag = false;
00300 
00301 while (fgets(buf, BUF_LEN, in)) {
00302 // check if the header line is reached
00303 if (!start_flag){
00304 if (strncmp(buf, " # RESIDUE", 12)) {
00305 continue;
00306 } else {
00307 if (fgets(buf, BUF_LEN, in) == NULL) {
00308 fclose(in);
00309 return -1; // failed 
00310 }
00311 start_flag = true;
00312 }
00313 }
00314 
00315 sscanf(buf,"%5s %4s %1s %c",
00316 atom_num, uniq_residstr, chain, &ss);
00317 
00318 ss = buf[16];
00319 
00320 int index = atoi(uniq_residstr);
00321 if (!index) 
00322 continue; // for the gap between chains in DSSP output
00323 
00324 if (index < 0 || index >= residues.num()) {
00325 msgErr << "invalid resid found in DSSP output file!" << sendmsg;
00326 msgErr << "Error found in the following line: " << sendmsg;
00327 msgErr << buf << sendmsg;
00328 fclose(in);
00329 return -1;
00330 }
00331 
00332 int uniq_resid = residues[index];
00333 #if 0
00334 msgErr << atom_num << " " << uniq_residstr << " " << chain << " " << ss << sendmsg;
00335 msgErr << buf << sendmsg;
00336 #endif
00337 
00338 switch (ss) {
00339 case 'H': // H
00340 mol->residueList[uniq_resid]->sstruct = SS_HELIX_ALPHA;
00341 break;
00342 
00343 case 'B': // B
00344 mol->residueList[uniq_resid]->sstruct = SS_BRIDGE;
00345 break; 
00346 
00347 case 'E': // E
00348 mol->residueList[uniq_resid]->sstruct = SS_BETA;
00349 break;
00350 
00351 case 'G': // G
00352 mol->residueList[uniq_resid]->sstruct = SS_HELIX_3_10;
00353 break;
00354 
00355 case 'I': // I
00356 mol->residueList[uniq_resid]->sstruct = SS_HELIX_PI;
00357 break;
00358 
00359 case 'T': // T
00360 mol->residueList[uniq_resid]->sstruct = SS_TURN;
00361 break;
00362 
00363 case 'S': // S
00364 // this is the case where vmd doesn't cover
00365 // structure code "bent" assigned by DSSP 
00366 
00367 default:
00368 msgErr << "Internal error in read_dssp_record\n" << sendmsg;
00369 // printf("DSSP uniq_resid: %d SS: %c\n", uniq_resid, ss);
00370 // printf("DSSP buf: '%s'\n\n", buf);
00371 mol->residueList[uniq_resid]->sstruct = SS_COIL;
00372 }
00373 }
00374 
00375 fclose(in);
00376 return 0;
00377 } 
00378 
00379 
00380 
00381 
00382 int ss_from_dssp(DrawMolecule *mol) {
00383 int rc = 0;
00384 
00385 char *dsspbin = getenv("DSSP_BIN");
00386 if (!dsspbin) {
00387 msgErr << "No DSSP/XDSSP binary found; please set DSSP_BIN environment variable" << sendmsg;
00388 msgErr << "to the location of the DSSP binary." << sendmsg;
00389 return 1;
00390 }
00391 
00392 // check to see if the executable exists
00393 if (!vmd_file_is_executable(dsspbin)) {
00394 msgErr << "DSSP binary " << dsspbin << " cannot be run; check permissions." << sendmsg;
00395 return 1;
00396 }
00397 
00398 char *infilename = get_temporary_filename();
00399 char *outfilename = get_temporary_filename();
00400 if (infilename == NULL || outfilename == NULL) {
00401 msgErr << "Unable to create temporary files for DSSP." << sendmsg;
00402 return 1;
00403 }
00404 
00405 char *dsspcall = new char[strlen(dsspbin)
00406 +strlen(infilename)
00407 +strlen(outfilename)
00408 +16];
00409 
00410 sprintf(dsspcall,"\"%s\" -i %s -o %s -v", dsspbin, infilename, outfilename);
00411 
00412 ResizeArray<int> residues;
00413 
00414 if (write_ss_input_pdb(mol,infilename,residues)) {
00415 msgErr << "write_ss_input_pdb(): unable "
00416 << "to write input file for DSSP\n" << sendmsg;
00417 rc = 1;
00418 }
00419 
00420 if (!rc) {
00421 vmd_system(dsspcall);
00422 
00423 if (read_dssp_record(mol,outfilename,residues)) {
00424 msgErr << "Dssp::read_dssp_record: unable "
00425 << "to read output file from DSSP\n" << sendmsg;
00426 rc = 1;
00427 }
00428 }
00429 
00430 delete [] dsspcall;
00431 vmd_delete_file(outfilename);
00432 vmd_delete_file(infilename);
00433 free(outfilename);
00434 free(infilename);
00435 return rc;
00436 } 
00437 
00438 
00439 

Generated on Mon Nov 17 02:47:12 2025 for VMD (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002

AltStyle によって変換されたページ (->オリジナル) /