Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Sparta.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 /* */
3 /* ---- SPARTA ---- */
4 /* Shifts Prediction from Analogue of Residue type and Torsion Angle */
5 /* Yang Shen and Ad Bax */
6 /* J. Biomol. NMR, xx, xxx-xxx (2010) */
7 /* NIH, NIDDK, Laboratory of Chemical Physics */
8 /* version, 1.00 (build 2010.0607.00) */
9 /* */
10 /* for any problem, please contact */
11 /* shenyang@niddk.nih.gov */
12 /* */
13 /******************************************************************************/
14 /// modified for use inside CS-Rosetta by Oliver Lange
15 ///
16 // vi: set ts=2 noet:
17 //
18 // (c) Copyright Rosetta Commons Member Institutions.
19 // (c) This file is part of the Rosetta software suite and is made available under license.
20 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
21 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
22 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
23 
24 
25 ///Problems found during porting:
26 // ANN is loaded with residues r1+1 .. rN-1
27 // but PRED_SUM is loaded from r1 .. rN --> first residue can be uninitialized (came only up in MSPARTA_PI runs for some reason... )
28 // -- HA3 -> means aN.size -> 9 need to have extra memory alloacated
29 // string functions were weird ... potential memory problems... replaced in util.cc where fishy...
30 
31 
32 
33 
34 /// @author Oliver Lange
35 
36 // Unit Headers
39 // AUTO-REMOVED #include <protocols/sparta/util.hh>
40 
41 #include <core/pose/Pose.hh>
42 
43 #include <core/types.hh>
44 
45 #include <basic/Tracer.hh>
46 #include <utility/vector1.hh>
47 #include <utility/vector0.hh>
48 
49 
50 //// C++ headers
51 #include <cstdlib>
52 #include <string>
53 // AUTO-REMOVED #include <cmath>
54 
55 #ifdef WIN32
56 #include <direct.h>
57 #include <ctime>
58 #else
59 // AUTO-REMOVED #include <dirent.h>
60 // AUTO-REMOVED #include <sys/stat.h>
61 // AUTO-REMOVED #include <sys/timeb.h>
62 #endif
63 
64 
66 #include <basic/options/option.hh>
67 #include <basic/options/keys/evaluation.OptionKeys.gen.hh>
68 #include <basic/database/open.hh>
69 
70 #include <numeric/NumericTraits.hh>
71 
72 namespace protocols {
73 namespace sparta {
74 
75 static basic::Tracer tr("protocols.sparta");
76 
78 
80  using namespace basic::options;
81  using namespace basic::options::OptionKeys;
82  if ( options_registered_ ) return;
83  options_registered_ = true;
84 }
85 
86 
88  //deallocate_arrays();
89 }
90 
92 
93 using namespace core;
94 using namespace std;
96  string libvar;
97  if( getenv( "SPARTA_DIR" ) == NULL ) {
98  SPARTA_DIR = ".";
99  } else{
100  SPARTA_DIR = getenv( "SPARTA_DIR" );
101  }
102 
103  using namespace basic::options;
104  using namespace basic::options::OptionKeys;
105 
106  slash_char = "/"; //default Unix
107  SPARTA_DIR=basic::database::full_name( "external/SPARTA+" );
108  if ( option[ OptionKeys::evaluation::sparta_dir ].user() ) SPARTA_DIR=option[ OptionKeys::evaluation::sparta_dir ]();
109 
110  if ( SPARTA_DIR.find("/") != string::npos ) slash_char = "/"; // unix
111  else if ( SPARTA_DIR.find("\\") != string::npos ) slash_char = "\\"; // Windows
112  else SPARTA_DIR = ".";
113 
114  string temp;
115  if( getenv( "PATH" ) != NULL ) {
116  temp = getenv( "PATH" );
117  if(temp.find("/") != string::npos ) slash_char = "/"; // unix
118  else if(temp.find("\\") != string::npos ) slash_char = "\\"; // Windows
119  };
120 
121  aN[1]="N"; aN[2]="HA"; aN[3]="C"; aN[4]="CA"; aN[5]="CB"; aN[6]="HN"; //aN[7]="H3";
122  /*
123  aN_ALL[1]="N"; aN_ALL[2]="HA"; aN_ALL[3]="C"; aN_ALL[4]="CA"; aN_ALL[5]="CB"; aN_ALL[6]="HN"; //aN_ALL[7]="H3";
124  */
125 
126  //if ( option[ OptionKeys::sparta::dir ].user() ) SPARTA_DIR = option[ OptionKeys::sparta::dir ]();
127 
128  init();
129 }
130 
131 
132 Sparta::Sparta( std::string const & cs_file ) :
133  REF_CS_Tab( cs_file ),
134  bCreateOutput_( false )
135 {
137  refCSFileName=cs_file;
138 }
139 
141  inName = "INTERNAL";
142  inPDB.loadPDB( pose );
144 
145  r1 = inPDB.r1;
146  rN = inPDB.rN;
147 
148  if (firstRes < r1) firstRes = r1;
149  if (lastRes < r1) lastRes = r1;
150 
151  if (firstRes > rN) firstRes = rN;
152  if (lastRes > rN) lastRes = rN;
153 
154  if (firstRes > lastRes) {
155  int itemp = firstRes;
156  firstRes = lastRes;
157  lastRes = itemp;
158  }
159  tr.Info << "run ANN Sparta for pose with " << rN-r1+1 << " residues " << std::endl;
160 }
161 
163  lib().setup_for_scoring( pose );
164  return run_A_ANN_Prediction();
165 }
166 
168  core::pose::Pose const & pose
169 ) {
170  lib().setup_for_scoring(pose);
171 
172  GDB PRED_SUM = lib().get_ANN_data( bCreateOutput_ );
174 
175  GDB COMP_TAB;
176  utility::vector1< float > scores( pose.total_residue(), 0.0 );
177  calc_per_residue_scores( lib().aN, PRED_SUM, REF_CS_Tab, COMP_TAB, scores );
178  return scores;
179 }
180 
181 //preset the args form command line SHIFT_DIR
183 
184  TAB_DIR = SPARTA_DIR + slash_char+ "tab";
185  SHIFT_DIR = SPARTA_DIR + slash_char+ "shifts";
186  PDB_DIR = SPARTA_DIR + slash_char+ "pdb";
187 
188  //later use Evaluator to determine scratch dir as in ExternalEvaluator...
189  PRED_DIR = "pred";
190  inName = "INTERNAL";
191  // if( args["in"].length() > 0 ) inName = args["in"];
192 // if( args["ins"].length() > 0 ) inNames = args["ins"];
193 
194  tripFileName = TAB_DIR + slash_char+ "sparta.tab";
195  weightFileName = TAB_DIR + slash_char + "weight.tab";
196  homoFileName = TAB_DIR + slash_char + "homology.tab";
197  fitFileName = TAB_DIR + slash_char + "fitting.tab";
198  sumName = PRED_DIR + slash_char +"pred.tab";
199 
200  // if( args["ref"].length() > 0 ) refCSFileName = args["ref"];
201 
202  rcFileName = TAB_DIR + slash_char + "randcoil.tab";
203  adjFileName = TAB_DIR + slash_char + "rcadj.tab";
204  prevFileName = TAB_DIR + slash_char + "rcprev.tab";
205  nextFileName = TAB_DIR + slash_char + "rcnext.tab";
206 
207  //Other Options
208  EXCLUDED="";
209 
210  // if(args["atom"].length() > 0 ) {
211 // aN.clear();
212 // utility::vector0< string > temp = GDB::split(" ", args["atom"]);
213 // int cnt = 1;
214 // for(int i = 0; i < temp.size(); i++)
215 // {
216 // if( temp[i]!="N" && temp[i]!="HA"&& temp[i]!="C"&& temp[i]!="CA"&& temp[i]!="CB"&& temp[i]!="HN")
217 // {
218 // cerr << "\tInvalid atom -" << temp[i] << endl;
219 // exit(0);
220 // }
221 // aN[cnt++] = temp[i];
222 // }
223 // }
224 
225  matchCount = 20;
226  tVal = 500.0; // not used
227  firstRes = -9999;
228  lastRes = 9999;
229 
230 }
231 
233  setup_defaults();
234  tr.Info << "Reading Random Coil Shifts from " << rcFileName << endl;
235  RC_Tab.loadGDB( rcFileName );
236 
237  tr.Info << "Reading RC Adjustments from " << adjFileName << endl;
238  ADJ_Tab.loadGDB( adjFileName );
239 
240  //load BLOSUM62 table
241  AAlist = "A C D E F G H I K L M N P Q R S T V W Y";
242  GDB B62;
243  string B62_fname = TAB_DIR + slash_char+ "BLOSUM62.tab";
244  tr.Info << "Reading BLOSUM62 Table from " << B62_fname << endl;
245  B62.loadGDB( B62_fname );
246  boost::unordered_map< string, string >::iterator itS;
247  for ( it = B62.Entries.begin(); it != B62.Entries.end(); it++ ) {
248  //int index=it->first;
249  string aa = (it->second)["RESNAME"];
250 
251  for ( itS = (it->second).begin(); itS != (it->second).end(); itS++ ) {
252  if(AAlist.find(itS->first) != string::npos) {
253  BLOSUM_62[aa].push_back( atof( (itS->second).c_str() )/10.0 );
254  }
255  }
256 
257  } // end of assigning sequence homology vector (using blosum62 matrix)
258 
259  tr.Info << "Load ANN parameters ... ";
260  for(itN = aN.begin(); itN != aN.end(); itN++) {
261  string atomName = itN->second;
262  if( atomName == "H" ) atomName="HN";
263 
264  SPARTA_ANN[atomName].init(113,30,1,9,6,3,TAB_DIR,atomName);
265  }
266  init_PredErrorSurface();
267  tr.Info << "done " << std::endl;
268 }
269 
270 //Get the list of angles\ring shifts\h-bond information from coordinates for all possible residues
271 //**************** NOT ABLE TO HANDLE PROTEIN WITH MULTIPLE CHAINS ****************
272 void Sparta::SpartaLib::getResInfo( bool create_output )
273 {
274  const Real SPARTA_PI = numeric::NumericTraits<Real>::pi();
275  const Real SPARTA_RADS_PER_DEG = SPARTA_PI / 180.0;
276  const Real SIN_PI = sin(SPARTA_PI);
277  const Real COS_PI = cos(SPARTA_PI);
278 
279  inTab.Entries.clear();
280 
281  // allocation
282  int n = rN-r1+1, m = 10;
283  U_ANGLES = new float* [n];
284  U_ANGLES[0] = new float [n*m];
285  for(int i = 1; i < n; ++i)
286  U_ANGLES[i] = U_ANGLES[i-1] + m;
287 
288  U_RING_SHIFTS = new float* [n];
289  U_RING_SHIFTS[0] = new float [n*(aN.size()+1)];
290  for(int i = 1; i < n; ++i)
291  U_RING_SHIFTS[i] = U_RING_SHIFTS[i-1] + aN.size()+1;
292 
293  n = rN-r1+1; m = 4;
294  U_NAME = new string* [n];
295  U_NAME[0] = new string [n*m];
296  for(int i = 1; i < n; ++i)
297  U_NAME[i] = U_NAME[i-1] + m;
298 
299  U_HN_HB = new float [n];
300  U_HA_HB = new float [n];
301  U_CO_HB = new float [n];
302 
303  int pos0 = inName.find_last_of(slash_char)+1;
304  int pos1 = inName.find_last_of(".");
305 
306  sourceName=inName.substr(pos0,pos1-pos0);
307 
308  std::map<int, string >::iterator itN;
309  int cnt = 0;
310  // format the sequence read from PDB coordinates
311  sequence="";
312  for(itN = residList.begin(); itN != residList.end(); itN++){
313  sequence += itN->second;
314  cnt++;
315  if( cnt%10 == 0 ) sequence += " "; //separator for each 10 residues
316 
317  itN++;
318  if(itN != residList.end()) {//add "?" if sequence numbers are not consecutive
319 
320  int j = itN->first;
321  --itN;
322  for(int i = 1; i< j - itN->first; i++) {
323 
324  sequence += "?"; cnt++;
325  if( cnt%10 == 0 ) sequence += " ";
326  }
327  }
328  else --itN;
329  }
330 
331 // clock_t start, finish;
332 // start = clock();
333 
334  inPDB.initOrbitalShift();
335  //finish = clock();
336  //tr.Info << "\n\t initOrbitalShift running time: " << (float)(finish - start)/ CLOCKS_PER_SEC << " seconds" << endl;
337  inPDB.initHBond();
338  //finish = clock();
339  //tr.Info << "\n\t initHBond running time: " << (float)(finish - start)/ CLOCKS_PER_SEC << " seconds" << endl;
340  inPDB.collect_HN_S2_and_EF();
341  //inPDB.calc_HN_S2();
342  //finish = clock();
343  //tr.Info << "\n\t calc_HN_S2 running time: " << (float)(finish - start)/ CLOCKS_PER_SEC << " seconds" << endl;
344 
345  inTab.setData("SEQUENCE", sequence);
346  inTab.VARS_str_parser(" RESID_R1 RESNAME_R1 PHI_R1 PSI_R1 CHI1_R1 RESID_R2 RESNAME_R2 PHI_R2 PSI_R2 CHI1_R2 RESID_R3 RESNAME_R3 PHI_R3 PSI_R3 CHI1_R3 N_HM HA_HM C_HM CA_HM CB_HM H_HM H_HB HA_HB CO_HB SOURCE");
347  inTab.FORMAT_str_parser("%4d %s %8.3f %8.3f %8.3f %4d %s %8.3f %8.3f %8.3f %4d %s %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %s");
348 
349  //calculate H-bond for the first residue
350  float dist = inPDB.HBDistList[r1]["HN"];
351  U_HN_HB[0] = dist;
352  dist = inPDB.HBDistList[r1]["HA"];
353  U_HA_HB[0] = dist;
354  dist = inPDB.HBDistList[r1]["O"];
355  U_CO_HB[0] = dist;
356 
357 
358  for ( int i = r1; i <= rN; i++ ) {
359  CHI2_ANGLES[i] = inPDB.getChi2(1,i); // chi2 angle for residue r1, index by i-r1 (confusing..., but consistent with the loop)
360  //OMEGA_ANGLES[i] = inPDB.getOmega(1,i); // chi2 angle for residue r1, index by i-r1 (confusing..., but consistent with the loop)
361  }
362 
363  float shift;
364  //loop for the polypeptide chain
365  for ( int i = r1+1; i < rN; i++ ) {
366  int index = i-r1;
367 
368  if( residList.find(i-1) == residList.end() ||
369  residList.find(i) == residList.end() ||
370  residList.find(i+1) == residList.end()) continue;
371 
372 
373  shift = inPDB.getPhi(1,i+1);
374  U_ANGLES[index][7] = shift;
375  shift = inPDB.getPsi(1,i+1);
376  U_ANGLES[index][8] = shift;
377  shift = inPDB.getChi1(1,i+1);
378  U_ANGLES[index][9] = shift;
379 
380  U_NAME[index][3] = residList[i+1];
381 
382  inTab.Entries[index]["PHI_R3"] = ftoa(U_ANGLES[index][7], buf);
383  inTab.Entries[index]["PSI_R3"] = ftoa(U_ANGLES[index][8], buf);
384  inTab.Entries[index]["CHI1_R3"] = ftoa(U_ANGLES[index][9], buf);
385  inTab.Entries[index]["RESID_R3"] = itoa(i+1, buf);
386  inTab.Entries[index]["RESNAME_R3"] = residList[i+1];
387  inTab.Entries[index]["SOURCE"] = inName.substr(pos0,pos1-pos0);
388 
389  //Ring current shifts
390  boost::unordered_map< int, string >::iterator itN_unordered;
391  //std::map<int, string >::iterator itN;
392  for(itN_unordered = aN.begin(); itN_unordered != aN.end(); itN_unordered++) {
393  string name = itN_unordered->second;
394  if( name == "H" ) {
395  name = "HN";
396  if( residList[i] == "P" ) continue;
397  } else if( name == "HA" && residList[i] == "G" ) {
398  U_RING_SHIFTS[index][7-1] = inPDB.getOrbitalShift(1,i,"HA3"); // change to use standard HA2/3 names
399  name = "HA2";
400  } else if( name == "CB" && residList[i] == "G" ) continue;
401 
402  U_RING_SHIFTS[index][itN_unordered->first-1] = inPDB.getOrbitalShift(1,i,name) ;
403  inTab.Entries[index][itN_unordered->second+"_HM"] = ftoa(U_RING_SHIFTS[index][itN_unordered->first-1], buf);
404  }
405 
406  //H-Honds
407  dist = inPDB.HBDistList[i]["HN"];
408  inTab.Entries[index]["H_HB"] = ftoa(dist, buf);
409  U_HN_HB[index] = dist;
410 
411  dist = inPDB.HBDistList[i]["HA"];
412  inTab.Entries[index]["HA_HB"] = ftoa(dist, buf);
413  U_HA_HB[index] = dist;
414 
415  dist = inPDB.HBDistList[i]["O"];
416  inTab.Entries[index]["CO_HB"] = ftoa(dist, buf);
417  U_CO_HB[index] = dist;
418 
419  if( inTab.Entries.find(index-1) != inTab.Entries.end() ) {
420  //if tripet i-1 exist
421  //assign the values of positions 1 and 2 of triplet i using the postions 2 and 3 of triplet i-1
422  U_ANGLES[index][1] = U_ANGLES[index-1][4];
423  U_ANGLES[index][2] = U_ANGLES[index-1][5];
424  U_ANGLES[index][3] = U_ANGLES[index-1][6];
425  U_ANGLES[index][4] = U_ANGLES[index-1][7];
426  U_ANGLES[index][5] = U_ANGLES[index-1][8];
427  U_ANGLES[index][6] = U_ANGLES[index-1][9];
428  U_NAME[index][1] = U_NAME[index-1][2];
429  U_NAME[index][2] = U_NAME[index-1][3];
430 
431  inTab.Entries[index]["PHI_R1"] = inTab.Entries[index-1]["PHI_R2"];
432  inTab.Entries[index]["PSI_R1"] = inTab.Entries[index-1]["PSI_R2"];
433  inTab.Entries[index]["CHI1_R1"] = inTab.Entries[index-1]["CHI1_R2"];
434  inTab.Entries[index]["RESID_R1"] = inTab.Entries[index-1]["RESID_R2"];
435  inTab.Entries[index]["RESNAME_R1"] = inTab.Entries[index-1]["RESNAME_R2"];
436 
437  inTab.Entries[index]["PHI_R2"] = inTab.Entries[index-1]["PHI_R3"];
438  inTab.Entries[index]["PSI_R2"] = inTab.Entries[index-1]["PSI_R3"];
439  inTab.Entries[index]["CHI1_R2"] = inTab.Entries[index-1]["CHI1_R3"];
440  inTab.Entries[index]["RESID_R2"] = inTab.Entries[index-1]["RESID_R3"];
441  inTab.Entries[index]["RESNAME_R2"] = inTab.Entries[index-1]["RESNAME_R3"];
442  }
443  else { //else, calculate the values from coordinates
444 
445  shift = inPDB.getPhi(1,i-1);
446  U_ANGLES[index][1] = shift;
447  shift = inPDB.getPsi(1,i-1);
448  U_ANGLES[index][2] = shift;
449  shift = inPDB.getChi1(1,i-1);
450  U_ANGLES[index][3] = shift;
451  U_NAME[index][1] = residList[i-1];
452 
453  inTab.setEntry(index, "PHI_R1", ftoa(U_ANGLES[index][1], buf) );
454  inTab.setEntry(index, "PSI_R1", ftoa(U_ANGLES[index][2], buf) );
455  inTab.setEntry(index, "CHI1_R1", ftoa(U_ANGLES[index][3], buf) );
456  inTab.setEntry(index, "RESID_R1", itoa(i-1, buf) );
457  inTab.setEntry(index, "RESNAME_R1", residList[i-1] );
458 
459  shift = inPDB.getPhi(1,i);
460  U_ANGLES[index][4] = shift;
461  shift = inPDB.getPsi(1,i);
462  U_ANGLES[index][5] = shift;
463  shift = inPDB.getChi1(1,i);
464  U_ANGLES[index][6] = shift;
465  U_NAME[index][2] = residList[i];
466 
467  inTab.Entries[index]["PHI_R2"] = ftoa(U_ANGLES[index][4], buf);
468  inTab.Entries[index]["PSI_R2"] = ftoa(U_ANGLES[index][5], buf);
469  inTab.Entries[index]["CHI1_R2"] = ftoa(U_ANGLES[index][6], buf);
470  inTab.Entries[index]["RESID_R2"] = itoa(i, buf);
471  inTab.Entries[index]["RESNAME_R2"] = residList[i];
472 
473  if ( tr.Trace.visible() ) {
474  tr.Trace << std::endl;
475  }
476  }
477 
478  //ANN input preparation
479  // (20 BLOSSUM + 2 PHI + 2 PSI + 2 CHI1 + 2 CHI2)*3 + (4 ASA)*3 -chi2_c_asa
480  // (20 BLOSSUM + 2 PHI + 2 PSI + 2 CHI1 + 2 CHI2 + 2 Oemga)*3 + (4 H-bond)*5 [O(i-1),HN,HA,O,HN(i+1)]
482  //add ANN input for residue i-1
483  string resName=residList[i-1]; if(resName=="c") resName="C";
484  temp.insert(temp.end(), BLOSUM_62[resName].begin(), BLOSUM_62[resName].end());
485  float phi = U_ANGLES[index][1], psi = U_ANGLES[index][2], chi1 = U_ANGLES[index][3], chi2 = CHI2_ANGLES[i-1];//, omega=OMEGA_ANGLES[i-1];
486  if( phi<999 ) { temp.push_back(sin(phi*SPARTA_RADS_PER_DEG)); temp.push_back(cos(phi*SPARTA_RADS_PER_DEG));}//phi
487  else { temp.push_back(SIN_PI); temp.push_back(COS_PI);}
488  if( psi<999 ) { temp.push_back(sin(psi*SPARTA_RADS_PER_DEG)); temp.push_back(cos(psi*SPARTA_RADS_PER_DEG));}//psi
489  else { temp.push_back(SIN_PI); temp.push_back(COS_PI);}
490  if( chi1<999 ) { temp.push_back(sin(chi1*SPARTA_RADS_PER_DEG)); temp.push_back(cos(chi1*SPARTA_RADS_PER_DEG));}//chi1
491  else { temp.push_back(0); temp.push_back(0);}
492  temp.push_back(chi1<999);
493  if( chi2<999 ) { temp.push_back(sin(chi2*SPARTA_RADS_PER_DEG)); temp.push_back(cos(chi2*SPARTA_RADS_PER_DEG));}//chi2
494  else { temp.push_back(0); temp.push_back(0);}
495  temp.push_back(chi2<999);
496 
497  //add ANN input for residue i
498  resName=residList[i]; if(resName=="c") resName="C";
499  temp.insert(temp.end(), BLOSUM_62[resName].begin(), BLOSUM_62[resName].end());
500  phi = U_ANGLES[index][4]; psi = U_ANGLES[index][5]; chi1 = U_ANGLES[index][6]; chi2 = CHI2_ANGLES[i];//, omega=OMEGA_ANGLES[i];
501  if( phi<999 ) { temp.push_back(sin(phi*SPARTA_RADS_PER_DEG)); temp.push_back(cos(phi*SPARTA_RADS_PER_DEG));}//phi
502  else { temp.push_back(SIN_PI); temp.push_back(COS_PI);}
503  if( psi<999 ) { temp.push_back(sin(psi*SPARTA_RADS_PER_DEG)); temp.push_back(cos(psi*SPARTA_RADS_PER_DEG));}//psi
504  else { temp.push_back(SIN_PI); temp.push_back(COS_PI);}
505  if( chi1<999 ) { temp.push_back(sin(chi1*SPARTA_RADS_PER_DEG)); temp.push_back(cos(chi1*SPARTA_RADS_PER_DEG));}//chi1
506  else { temp.push_back(0); temp.push_back(0);}
507  temp.push_back(chi1<999);
508  if( chi2<999 ) { temp.push_back(sin(chi2*SPARTA_RADS_PER_DEG)); temp.push_back(cos(chi2*SPARTA_RADS_PER_DEG));}//chi2
509  else { temp.push_back(0); temp.push_back(0);}
510  temp.push_back(chi2<999);
511 
512  //add ANN input for residue i+1
513  resName=residList[i+1]; if(resName=="c") resName="C";
514  temp.insert(temp.end(), BLOSUM_62[resName].begin(), BLOSUM_62[resName].end());
515  phi = U_ANGLES[index][7]; psi = U_ANGLES[index][8]; chi1 = U_ANGLES[index][9]; chi2 = CHI2_ANGLES[i+1];//, omega=OMEGA_ANGLES[i+1];
516  if( phi<999 ) { temp.push_back(sin(phi*SPARTA_RADS_PER_DEG)); temp.push_back(cos(phi*SPARTA_RADS_PER_DEG));}//phi
517  else { temp.push_back(SIN_PI); temp.push_back(COS_PI);}
518  if( psi<999 ) { temp.push_back(sin(psi*SPARTA_RADS_PER_DEG)); temp.push_back(cos(psi*SPARTA_RADS_PER_DEG));}//psi
519  else { temp.push_back(SIN_PI); temp.push_back(COS_PI);}
520  if( chi1<999 ) { temp.push_back(sin(chi1*SPARTA_RADS_PER_DEG)); temp.push_back(cos(chi1*SPARTA_RADS_PER_DEG));}//chi1
521  else { temp.push_back(0); temp.push_back(0);}
522  temp.push_back(chi1<999);
523  if( chi2<999 ) { temp.push_back(sin(chi2*SPARTA_RADS_PER_DEG)); temp.push_back(cos(chi2*SPARTA_RADS_PER_DEG));}//chi2
524  else { temp.push_back(0); temp.push_back(0);}
525  temp.push_back(chi2<999);
526 
527 
528  float hb = inPDB.HBDistList[i-1]["O"];
529  if(hb>0) {
530  temp.push_back(1.0); temp.push_back(hb); temp.push_back( cos(inPDB.HB_DHO_AngleList[i-1]["O"]*SPARTA_RADS_PER_DEG) ); temp.push_back( cos(inPDB.HB_HOA_AngleList[i-1]["O"]*SPARTA_RADS_PER_DEG) );
531  }
532  else {temp.push_back(0.0); temp.push_back(0.0); temp.push_back(0.0); temp.push_back(0.0);}
533  hb = inPDB.HBDistList[i]["HN"];
534  if(hb>0) {
535  temp.push_back(1.0); temp.push_back(hb); temp.push_back( cos(inPDB.HB_DHO_AngleList[i]["HN"]*SPARTA_RADS_PER_DEG) ); temp.push_back( cos(inPDB.HB_HOA_AngleList[i]["HN"]*SPARTA_RADS_PER_DEG) );
536  }
537  else {temp.push_back(0.0); temp.push_back(0.0); temp.push_back(0.0); temp.push_back(0.0);}
538  hb = inPDB.HBDistList[i]["HA"];
539  if(hb>0) {
540  temp.push_back(1.0); temp.push_back(hb); temp.push_back( cos(inPDB.HB_DHO_AngleList[i]["HA"]*SPARTA_RADS_PER_DEG) ); temp.push_back( cos(inPDB.HB_HOA_AngleList[i]["HA"]*SPARTA_RADS_PER_DEG) );
541  }
542  else {temp.push_back(0.0); temp.push_back(0.0); temp.push_back(0.0); temp.push_back(0.0);}
543  hb = inPDB.HBDistList[i]["O"];
544  if(hb>0) {
545  temp.push_back(1.0); temp.push_back(hb); temp.push_back( cos(inPDB.HB_DHO_AngleList[i]["O"]*SPARTA_RADS_PER_DEG) ); temp.push_back( cos(inPDB.HB_HOA_AngleList[i]["O"]*SPARTA_RADS_PER_DEG) );
546  }
547  else {temp.push_back(0.0); temp.push_back(0.0); temp.push_back(0.0); temp.push_back(0.0);}
548  hb = inPDB.HBDistList[i+1]["HN"];
549  if(hb>0) {
550  temp.push_back(1.0); temp.push_back(hb); temp.push_back( cos(inPDB.HB_DHO_AngleList[i+1]["HN"]*SPARTA_RADS_PER_DEG) ); temp.push_back( cos(inPDB.HB_HOA_AngleList[i+1]["HN"]*SPARTA_RADS_PER_DEG) );
551  } else {temp.push_back(0.0); temp.push_back(0.0); temp.push_back(0.0); temp.push_back(0.0);}
552 
553  temp.push_back(inPDB.HN_S2[i-1]);
554  temp.push_back(inPDB.HN_S2[i]);
555  temp.push_back(inPDB.HN_S2[i+1]);
556 
557  if( temp.size() == 113) ANN_IN_MTX[i]=temp;
558  }
559 
560 
561  //calculate H-bond for the last residue
562  dist = inPDB.HBDistList[rN]["HN"];
563  U_HN_HB[rN-r1] = dist;
564  dist = inPDB.HBDistList[rN]["HA"];
565  U_HA_HB[rN-r1] = dist;
566  dist = inPDB.HBDistList[rN]["O"];
567  U_CO_HB[rN-r1] = dist;
568 
569  if ( create_output ) inTab.saveGDB(PRED_DIR+slash_char+inName.substr(pos0,pos1-pos0) + "_in.tab");
570 
571  if ( tr.Trace.visible() ) {
572  boost::unordered_map<int, utility::vector0<float> >::iterator itX;
573  for(itX = ANN_IN_MTX.begin(); itX != ANN_IN_MTX.end(); itX++) {
574  for(int i=0; i< (int)(itX->second).size();i++)
575  tr.Trace << (itX->first) << " " << (itX->second)[i] << std::endl;
576  }
577  }
578 }
579 
580 
581 
582 // run ANN prediction for a single protein chain
583 //void Sparta::runANN_Prediction() {
584 // clock_t start/*, finish*/;
585 // start = clock();
586 
587 // // init(); now in constructor
588 // if ( bCreateOutput_ ) {
589 // // mkdir for prediction
590 // if (PRED_DIR.find_last_of(slash_char) == PRED_DIR.length()-1 ) {
591 // PRED_DIR = PRED_DIR.substr(0,PRED_DIR.length()-1);
592 // }
593 // mkdir_pred(PRED_DIR);
594 // }
595 // //for( itN = aN.begin(); itN != aN.end(); itN++ )
596 // // mkdir_pred(PRED_DIR+slash_char+itN->second);
597 
598 // tr.Info << "Reading PDB Coordinates from " << inName << endl;
599 // inPDB.loadPDB(inName);
600 
601 // residList = inPDB.residListOne;
602 
603 // r1 = inPDB.r1;
604 // rN = inPDB.rN;
605 
606 // if (firstRes < r1) firstRes = r1;
607 // if (lastRes < r1) lastRes = r1;
608 
609 // if (firstRes > rN) firstRes = rN;
610 // if (lastRes > rN) lastRes = rN;
611 
612 // if (firstRes > lastRes) {
613 // int itemp = firstRes;
614 // firstRes = lastRes;
615 // lastRes = itemp;
616 // }
617 
618 // run_A_ANN_Prediction();
619 // }
620 
621 // run ANN prediction for a single protein chain
623 
624  GDB COMP_Tab;
625  GDB PRED_SUM = lib().get_ANN_data( bCreateOutput_ );
626  Real score( compareRef_fxn( lib().aN, PRED_SUM, REF_CS_Tab, COMP_Tab ) );
627 
628  if ( bCreateOutput_ ) {
629  COMP_Tab.addRemark( "Observed chemical shift from: " + refCSFileName );
630  COMP_Tab.saveGDB( lib().sumName );
631  REF_CS_Tab.saveGDB( lib().PRED_DIR + lib().slash_char + "ref.tab" );
632  }
633 
635  return score;
636 }
637 
639  // deallocation - structures created in getResInfo
640  delete [] U_ANGLES[0];
641  delete [] U_ANGLES;
642  delete [] U_NAME[0];
643  delete [] U_NAME;
644  delete [] U_RING_SHIFTS[0];
645  delete [] U_RING_SHIFTS;
646  delete [] U_HN_HB;
647  delete [] U_HA_HB;
648  delete [] U_CO_HB;
649 }
650 
651 GDB Sparta::SpartaLib::get_ANN_data( bool create_output ) {
652  /*clock_t start, finish*/;
653  //start = clock();
654 
655  GDB PRED_SUM;
656 
657  tr.Info << "Analyzing " << inName << " " ;
658  getResInfo( create_output ); // loads info from PDB
659  tr.Info << residList.size() << " residues read " << endl;
660 
661  //finish = clock();
662  //tr.Info << "\t getResInfo() running time: " << (float)(finish - start)/ CLOCKS_PER_SEC << " seconds" << endl;
663 
664  //start = clock();
665  tr.Info << "ANN prediction ..." << endl;
666  for(itN = aN.begin(); itN != aN.end(); itN++) {
667  string atomName = itN->second;
668  if( atomName == "H" ) atomName="HN";
669 
670  SPARTA_ANN[atomName].ANN_OUT_MTX_LEVEL1.clear();
671  SPARTA_ANN[atomName].runSpartaANN(ANN_IN_MTX);
672 
673  ANN_CS_OUTPUT_FULL[atomName] = SPARTA_ANN[atomName].ANN_OUT_MTX_LEVEL1;
674  }
675 
676  //finish = clock();
677  //tr.Info << "\t ANNPredict() running time: " << (float)(finish - start)/ CLOCKS_PER_SEC << " seconds" << endl;
678 
679  //GDB PRED_SUM;
680  PRED_SUM.VARS_str_parser(" RESID RESNAME ATOMNAME SS_SHIFT SHIFT RC_SHIFT HM_SHIFT EF_SHIFT SIGMA SOURCE");
681  PRED_SUM.FORMAT_str_parser(" %4d %4s %4s %9.3f %9.3f %9.3f %9.3f %9.3f %9.3f %s");
682  string str = itoa(r1, buf);
683  PRED_SUM.setData("FIRST_RESID", str+"\n");
684  PRED_SUM.setData("SEQUENCE", sequence);
685 
686  float RC, RCadj, pred_2nd_shift, pred_shift/*, HB*/;
687  for ( int i = r1+1; i <= rN-1; i++ ) { //olange: we have not loaded the ANN with stuff for residue 1 or rN as it would be the 0,1,2 triplett.. ignore here TOO!
688  for(itN = aN.begin(); itN != aN.end(); itN++) {
689  string atomName = itN->second;
690  if( atomName == "H" ) atomName="HN";
691  int index = PRED_SUM.Entries.size()+1;
692 
693  if( residList[i].empty() ) continue;
694  if( residList[i] == "P" && (atomName == "HN" || atomName == "N") ) continue;
695  if( residList[i] == "G" && atomName == "CB" ) continue;
696  if( i==r1 && (atomName == "HN"|| atomName == "N") ) continue; //added from email Yang Shen/ Aug 6th.
697  if( i==rN && atomName == "C" ) continue; //added from email Yang Shen/ Aug 6th
698 
699  PRED_SUM.setEntry(index, "RESID", itoa(i,buf));
700  PRED_SUM.setEntry(index, "RESNAME", residList[i]);
701  if (atomName=="HA" && residList[i] == "G") PRED_SUM.setEntry(index, "ATOMNAME", "HA2"); //added from email YangShen Aug 6th.
702  else PRED_SUM.setEntry(index, "ATOMNAME", atomName);
703 
704 
705  RC = getRC(residList[i],atomName);
706  RCadj = getRCadj(residList[i],atomName);
707  if (i==r1 || i==rN) {
708  pred_2nd_shift = 0.0; //may not good for the last residue, for which the neighoring residue effect is not considered.
709  } else {
710  pred_2nd_shift = 0.0;
711  if ( ANN_CS_OUTPUT_FULL[atomName].size() >= static_cast< Size > (i) ) {
712  pred_2nd_shift = ANN_CS_OUTPUT_FULL[atomName][i][0];
713  }
714 
715  if (atomName == "HA") pred_2nd_shift /= 4.0;
716  else if (atomName == "HN") pred_2nd_shift /= 2.0;
717  else if (atomName == "N") pred_2nd_shift *= 2.5;
718 
719  if ( pred_2nd_shift > 20.0 || pred_2nd_shift < -20.0 ) pred_shift = 0.0;
720 
721  /*
722  if(atomName == "HA") pred_2nd_shift = ANN_CS_OUTPUT_FULL[atomName][i][0]/4.0;
723  else if(atomName == "HN") pred_2nd_shift = ANN_CS_OUTPUT_FULL[atomName][i][0]/2.0;
724  else if(atomName == "N") pred_2nd_shift = ANN_CS_OUTPUT_FULL[atomName][i][0]*2.5;
725  else pred_2nd_shift = ANN_CS_OUTPUT_FULL[atomName][i][0];
726  if( pred_2nd_shift > 20.0 || pred_2nd_shift < -20.0 ) pred_shift = 0.0;
727  */
728  }
729 
730  pred_shift = pred_2nd_shift + RC + RCadj; // + PrevRCadj + NextRCadj;
731  if( pred_shift > 999.0) pred_shift = SPARTA_MAX_NUM;
732 
733 
734  PRED_SUM.setEntry(index, "SS_SHIFT", ftoa(pred_2nd_shift,buf));
735 
736  pred_shift += 0.6*atof(inTab.Entries[i-r1][atomName+"_HM"].c_str());
737  if(atomName == "HN" || atomName == "HA" ) pred_shift-= inPDB.ElectricField[i][atomName]; //marked off to exclude shifts from "global" contacts and to test MFR
738  PRED_SUM.setEntry(index, "SHIFT", ftoa(pred_shift,buf));
739 
740  PRED_SUM.setEntry(index, "RC_SHIFT", ftoa(RC+RCadj ,buf) );
741  PRED_SUM.setEntry(index, "SOURCE", sourceName );
742  PRED_SUM.setEntry(index, "SIGMA", ftoa(getANN_PredError(U_ANGLES[i-r1][4],U_ANGLES[i-r1][5],residList[i],atomName),buf) );
743  //tr.Info << U_ANGLES[i-r1][4] << "\t" << U_ANGLES[i-r1][5] << "\t" << getANN_PredError(U_ANGLES[i-r1][4],U_ANGLES[i-r1][5],residList[i],atomName) << endl;
744  PRED_SUM.setEntry(index, "HM_SHIFT", inTab.Entries[i-r1][atomName+"_HM"] );
745  PRED_SUM.setEntry(index, "EF_SHIFT", ftoa(inPDB.ElectricField[i][atomName],buf) );
746 
747  if(atomName=="HA" && residList[i] == "G") { // for GLY HA3
748  index++;
749  PRED_SUM.setEntry(index, "RESID", itoa(i,buf));
750  PRED_SUM.setEntry(index, "RESNAME", residList[i]);
751  PRED_SUM.setEntry(index, "ATOMNAME", "HA3");
752  PRED_SUM.setEntry(index, "SS_SHIFT", ftoa(pred_2nd_shift,buf));
753  pred_shift = pred_2nd_shift + RC + RCadj + 0.6*U_RING_SHIFTS[i-r1][0]; // atof(inTab.Entries[i-r1][atomName+"_HM"].c_str());;
754  pred_shift-= inPDB.ElectricField[i]["HA"];
755  PRED_SUM.setEntry(index, "SHIFT", ftoa(pred_shift,buf));
756  PRED_SUM.setEntry(index, "RC_SHIFT", ftoa(RC+RCadj ,buf) );
757  PRED_SUM.setEntry(index, "SOURCE", sourceName );
758  PRED_SUM.setEntry(index, "SIGMA", ftoa(getANN_PredError(U_ANGLES[i-r1][4],U_ANGLES[i-r1][5],residList[i],atomName),buf) );
759  PRED_SUM.setEntry(index, "HM_SHIFT", ftoa(U_RING_SHIFTS[i-r1][0],buf) );
760  PRED_SUM.setEntry(index, "EF_SHIFT", ftoa(inPDB.ElectricField[i]["HA"],buf) );
761  }
762  }
763  }
764 
765  if ( tr.Debug.visible() ) {
766  tr.Debug << " ============== PRED_SUM ==================== " << std::endl;
767  PRED_SUM.showGDB( tr.Debug );
768  tr.Debug << " ============== END_ PRED_SUM ==================== " << std::endl;
769  }
770  //finish = clock();
771  //tr.Info << "\t ANNPredict() running time: " << (float)(finish - start)/ CLOCKS_PER_SEC << " seconds" << endl;
772  if ( create_output ) {
773  PRED_SUM.saveGDB(sumName);
774  // snippet moved from compareRef. Not sure why this is done twice
775  // in slightly different ways ...
776  int pos = sumName.find_last_of(".");
777  PRED_SUM.saveGDB( sumName.substr(0, pos) + "_full.tab" ); //save the original prediction summary file to a new name
778  }
779 
780  return PRED_SUM;
781 } // get_ANN_data
782 
783 // // run ANN prediction for multiple protein chains
784 // void Sparta::runANN_Predictions() {
785 // //init(); //now in constructor
786 // // mkdir for prediction
787 // if ( bCreateOutput_ ) {
788 
789 // if (PRED_DIR.find_last_of(slash_char) == PRED_DIR.length()-1 ) {
790 // PRED_DIR = PRED_DIR.substr(0,PRED_DIR.length()-1);
791 // }
792 // mkdir_pred(PRED_DIR);
793 // // for( itN = aN.begin(); itN != aN.end(); itN++ )
794 // // mkdir_pred(PRED_DIR+slash_char+itN->second);
795 // }
796 
797 // utility::vector0< string > temp = split(" ", inNames);
798 // string outName = sumName;
799 
800 // tr.Info << inNames << endl;
801 
802 // for ( Size i = 0; i < temp.size(); i++) {
803 // inName = temp[i];
804 // //tr.Info << "Reading PDB Coordinates from " << inName << endl;
805 // inPDB.loadPDB(inName);
806 
807 // residList = inPDB.residListOne;
808 
809 // r1 = inPDB.r1;
810 // rN = inPDB.rN;
811 
812 // if (firstRes < r1) firstRes = r1;
813 // if (lastRes < r1) lastRes = r1;
814 
815 // if (firstRes > rN) firstRes = rN;
816 // if (lastRes > rN) lastRes = rN;
817 
818 // if (firstRes > lastRes) {
819 // int itemp = firstRes;
820 // firstRes = lastRes;
821 // lastRes = itemp;
822 // }
823 
824 // ANN_IN_MTX.clear();
825 // ANN_CS_OUTPUT_FULL.clear();
826 
827 // int pos0 = inName.find_last_of(slash_char)+1;
828 // int pos1 = inName.find_last_of(".");
829 
830 // sourceName=inName.substr(pos0,pos1-pos0);
831 // sumName = PRED_DIR + slash_char + sourceName + "_pred.tab";
832 
833 // run_A_ANN_Prediction();
834 // tr.Info << "\tPrediction file " << sumName << " is ready for protein " << inName << endl;
835 
836 // }
837 
838 // }
839 
840 
841 
842 // Initiate an ANN prediction for a single protein using its file name
843 //void Sparta::runANN_Prediction(const string& pName)
844 //{
845  //init(); now in constructor
846 // if ( bCreateOutput_ ) {
847 
848 // // mkdir for prediction
849 // if (PRED_DIR.find_last_of(slash_char) == PRED_DIR.length()-1 )
850 // PRED_DIR = PRED_DIR.substr(0,PRED_DIR.length()-1);
851 // for( itN = aN.begin(); itN != aN.end(); itN++ )
852 // mkdir_pred(PRED_DIR+slash_char+itN->second);
853 // }
854 // inName = pName;
855 // inPDB.loadPDB(inName);
856 
857 // run_A_ANN_Prediction();
858 //}
859 
860 
861 
863 {
864  int step = 5;
865 
866  for(itN = aN.begin(); itN != aN.end(); itN++) {
867  string atomName = itN->second;
868  if( atomName == "H" ) atomName="HN";
869 
870  for( Size i=0; i<AAlist.length();i++) {
871  string AA = AAlist.substr(i,1);
872  if( AA == " " ) continue;
873 
874  if( AA == "G" && atomName == "CB" ) continue;
875  if( AA == "P" && atomName == "HN" ) continue;
876 
877  string surfName = TAB_DIR + slash_char + "errorSurface" + slash_char + atomName + slash_char + AA + "..A450.S5.RMS.tab";
878  GDB surf(surfName);
879 
880  for (it = surf.Entries.begin(); it != surf.Entries.end(); it++) {
881  int phi = atoi( it->second["PHI"].c_str() );
882  for(int y=-180; y<180; y+=step) {
883  string psi=itoa(y,buf);
884  SPARTA_ERR_SURF[AA][atomName][phi][y] = atof( it->second[psi].c_str() );
885  }
886  }
887  }
888  }
889 }
890 
891 
892 
893 float Sparta::SpartaLib::getANN_PredError(float phi, float psi, string aa, string aName)
894 {
895  return SPARTA_ERR_SURF[aa][aName][5*int(phi/5)][5*int(psi/5)];
896 }
897 
898 
899 
900 // get random coil chemical shift for atom 'aName' of residue 'resName'
901 float Sparta::SpartaLib::getRC(const string& resName, const string& aName)
902 {
903  GDB_Entry temp = RC_Tab.getEntry("RESNAME",resName,1);
904 
905  if(temp.size() != 0) return atof( temp[aName].c_str() );
906 
907  return 9999.0;
908 }
909 
910 
911 
912 float Sparta::SpartaLib::getRCadj(const string& resName, const string& aName)
913 {
914  GDB_Entry temp = ADJ_Tab.getEntry("RESNAME",resName,1);
915 
916  if(temp.size() != 0) return atof( temp[aName].c_str() );
917 
918  return 0.0;
919 }
920 
921 
922 
923 float Sparta::SpartaLib::getPrevRCadj(const string& prev_rName, const string& aName)
924 {
925  GDB_Entry temp = PREV_Tab.getEntry("RESNAME",prev_rName,1);
926 
927  if(temp.size() != 0) return atof( temp[aName].c_str() );
928 
929  return 0.0;
930 }
931 
932 
933 
934 float Sparta::SpartaLib::getNextRCadj(const string& next_rName, const string& aName)
935 {
936  GDB_Entry temp = NEXT_Tab.getEntry("RESNAME",next_rName,1);
937 
938  if(temp.size() != 0) return atof( temp[aName].c_str() );
939 
940  return 0.0;
941 }
942 
943 float Sparta::SpartaLib::getWeight(const string& Name, const string& aName)
944 {
945  GDB_Entry temp = WEIGHT_Tab.getEntry("RESNAME",Name,1);
946 
947  if(temp.size() != 0) return atof( temp[aName].c_str() );
948 
949  return 9999.0;
950 
951 }
952 
953 void Sparta::SpartaLib::mkdir_pred(const string& d)// create a directory for prediction results
954 {
955  if ( ! isDirExists( d ) ) {
956  int i = MKDIR(d.c_str())+1;
957  if ( !i ) { // if not success
958  string parentD = d.substr(0,d.find_last_of(slash_char)+1);
959  mkdir_pred(parentD.c_str());
960  if ( !(MKDIR(d.c_str())+1) ) {
961  string msg("\tCan't create prediction directory " + d);
962  utility_exit_with_message(msg);
963  }
964  }
965  }
966 }
967 
968 } // namespace sparta
969 } // namespace protocols