Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
GDB.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 /* */
3 /* ---- SPARTA ---- */
4 /* Shifts Prediction from Analogue of Residue type and Torsion Angle */
5 /* Yang Shen and Ad Bax */
6 /* J. Biomol. NMR, 38, 289-302 (2007) */
7 /* NIH, NIDDK, Laboratory of Chemical Physics */
8 /* version, 1.00 (build 2010.0607.00) */
9 /* */
10 /* for any problem, please contact */
11 /* shenyang@niddk.nih.gov */
12 /* */
13 /******************************************************************************/
14 
15 
16 /* GDB.cpp: class for a simple generic database */
17 
18 
19 #include <fstream>
20 #include <protocols/sparta/GDB.hh>
21 #include <protocols/sparta/util.hh>
22 #include <utility/exit.hh>
23 // Utility headers
24 #include <basic/Tracer.hh>
25 #include <boost/unordered_map.hpp>
26 
27 #include <utility/vector0.hh>
28 #include <utility/vector1.hh>
29 
30 
31 
32 static basic::Tracer tr("protocols.sparta");
33 
34 namespace protocols {
35 namespace sparta {
36 
37 using namespace std;
38 
40 {
41  VarsNumber = 0;
42  plain_text = false;
43 }
44 
45 
46 GDB::GDB(const string &fileName)
47 {
48  GDBfileName = fileName;
49 
50  VarsNumber = 0;
51 
52  loadGDB(fileName);
53  plain_text = false;
54 }
55 
56 
57 void GDB::loadGDB(const string &fileName)
58 {
59  GDBfileName = fileName;
60 
61  ifstream file(fileName.c_str());
62  if (! file.is_open() ){
63  tr.Error << "\tCan't open file " << fileName << " for reading" << endl;
64  exit(0);
65  }
66 
67  int entry_count=1;
68  string str;
69  bool DATA_START = false;
70 
71  while (! file.eof() ) {
72  getline (file, str);
73  if (str.empty() || str.size() == 0) continue;
74 
75  //StringList fields = split(" ", simplifyWhiteSpace(str)); split_WhiteSpace
76  StringList fields = split_WhiteSpace(str);
77  if ( fields.size() == 0) continue;
78 
79  //read entries
80  if ( DATA_START && VarsNumber > 0 && (int) fields.size() == VarsNumber )
81  {
82  for(int i = 0; i < VarsNumber; i++)
83  {
84  Entries[ entry_count ][ VARS[i] ] = fields[i];
85  }
86  entry_count++;
87  }
88 
89  //skip the "REMARK"
90  if ( fields[0] == "REMARK" )
91  {
92  REMARKS.push_back( str.substr( fields[0].length()+1, str.length() ) );
93  }
94  else if ( fields[0] == "DATA" ) {
95  string text;
96  text = str.substr( fields[0].length()+1, str.length() );
97  DATA.push_back( text );
98  } else if ( fields[0] == "VARS" ) { //read the VARS
99  string text;
100  text = str.substr(fields[0].length(), str.length());
101  VARS_str = simplifyWhiteSpace(text);
102  VARS_str_parser(VARS_str);
103  } else if ( fields[0].compare("FORMAT") == 0 ) {//read the FORMAT
104  FORMAT_str = str.substr(fields[0].length(), str.length());
105  FORMAT_str_parser(FORMAT_str);
106  DATA_START = true;
107  }
108  }
109 
110  file.close();
111 
112  //re-format the SEQUENCE if there exist SEQUENCE DATA
113  string seq = getData("SEQUENCE");
114  string firstResS = getData("FIRST_RESID");
115 
116  firstResID = (firstResS.length() > 0)? atoi( firstResS.c_str() ):1;
117  if ( seq.length() > 0 )
118  {
119  for(int i = 0; i < (int) seq.length(); i++)
120  {
121  if ( !(seq[i]>='A' && seq[i]<'Z') && seq[i]!='c' && seq[i]!='p' && seq[i]!='?') continue;
122  residList[residList.size()+firstResID] = seq.substr(i,1); // insert one-letter amino acid code
123  }
124  }
125 
126 }
127 
128 
129 
130 void GDB::saveGDB(const string &fileName)
131 {
132  ofstream out(fileName.c_str(), ios::trunc);
133 
134  if ( !out ){
135  tr.Error << "\tCan't save file " << fileName.c_str() << endl;
136  }
137  showGDB( out );
138 }
139 
140 void GDB::showGDB( std::ostream & out ) {
141 
142  if (!plain_text) {
143  for(int i = 0; i < (int)REMARKS.size(); i++) {
144  out << "REMARK " << REMARKS[i] << endl;
145  }
146  out << endl;
147 
148  for(int i = 0; i < (int)DATA.size(); i++) {
149  int pos = DATA[i].find_first_of(' ');
150  string name = DATA[i].substr(0,pos);
151 
152  if ( name == "SEQUENCE" ) {
153  string seq = simplifyWhiteSpace( DATA[i].substr( pos+1,DATA[i].length()-pos-1 ) );
154  int len = seq.length();
155  if (len <= 0) continue;
156 
157  for(int i = 0; i<= len/55; i++) {
158  string temp = seq.substr(i*55, 55);
159  out << "DATA SEQUENCE " << temp.c_str() << endl;
160  }
161  } else out << "DATA " << DATA[i] << endl;
162  }
163  out << endl;
164 
165  boost::unordered_map<int, string>::iterator iterR;
166  out << "VARS ";
167  for ( iterR = VARS.begin(); iterR != VARS.end(); iterR++ )
168  out << iterR->second.c_str() << " ";
169  out << endl;
170 
171  out << "FORMAT ";
172  for ( iterR = FORMAT.begin(); iterR != FORMAT.end(); iterR++ )
173  out << iterR->second.c_str();
174  out << endl << endl;
175  }
176 
177  // write the entries
178  boost::unordered_map< int, boost::unordered_map<string, string> >::iterator it;
179  for ( it = Entries.begin(); it != Entries.end(); it++ ) {
180  GDB_Entry ent = it->second;
181 
182  for(int i = 0; i < (int) VARS.size(); i++) {
183  if ( contains(FORMAT[i],'s') == 1) {
184  sprintf(buf, FORMAT[i].c_str(), ent[ VARS[i] ].c_str() ) ;
185  out << buf;
186  } else if ( contains(FORMAT[i],'d') == 1) {
187  sprintf(buf, FORMAT[i].c_str(), atoi(ent[ VARS[i] ].c_str() )) ;
188  out << buf;
189  } else if ( contains(FORMAT[i],'f') == 1) {
190  sprintf(buf, FORMAT[i].c_str(), atof(ent[ VARS[i] ].c_str() )) ;
191  out << buf ;
192  }
193  }
194  out << endl;
195  }
196 
197 }
198 
199 
200 
202 {
203  return Entries[number];
204 }
205 
206 
207 // get the index-th entry with VName=VVal, default return the first satisfied entry
208 GDB_Entry GDB::getEntry(const string &VName, const string &VVal, int index)
209 {
210  GDB_Entry ent;
211 
212  int count = 0;
213  boost::unordered_map< int, boost::unordered_map<string, string> >::iterator it;
214  for ( it = Entries.begin(); it != Entries.end(); it++ )
215  {
216  ent = it->second;
217  if ( ent[VName] == VVal ) count++;
218 
219  if ( count == index && index > 0 ) return ent;
220  }
221 
222  ent.clear();
223 
224  return ent;
225 }
226 
227 
228 // get the index-th entry with VName1=VVal1 and VName2=VVal2, default return the first satisfied entry
229 GDB_Entry GDB::getEntry(const string &VName1, const string &VVal1, const string &VName2, const string &VVal2, int index)
230 {
231  GDB_Entry ent;
232 
233  int count = 0;
234  boost::unordered_map< int, boost::unordered_map<string, string> >::iterator it;
235  for ( it = Entries.begin(); it != Entries.end(); it++ )
236  {
237  ent = it->second;
238 
239  if ( ent[VName1] == VVal1 && ent[VName2] == VVal2 ) count++;
240 
241  if ( count == index && index > 0 ) return ent;
242  }
243 
244  ent.clear();
245 
246  return ent;
247 }
248 
249 
250 
251 string GDB::getResidName(int rNum)
252 {
253  return residList[rNum];
254 }
255 
256 
257 
258 int GDB::getEntryCount() // return size of current entries
259 {
260  return Entries.size();
261 }
262 
263 
264 
265 //set value to a variable of a given entry
267  int index,
268  const string & VarName,
269  const string & VarVal
270 ) {
271  boost::unordered_map<int, string>::iterator it_V;
272 
273  for( it_V = VARS.begin(); it_V != VARS.end(); it_V++ )
274  if ( it_V->second == VarName ) break;
275 
276  if ( it_V != VARS.end() ) {
277  (Entries[ index ])[VarName.c_str()] = VarVal.c_str();
278  } else {
279  string const msg( "\tInvalid variable name '" + VarName + "'\n" );
280  //tr.Error << "\tInvalid varible name '" << VarName << "'" << endl;
281  tr.Error << msg;
282  utility_exit_with_message(msg);
283  }
284 }
285 
286 
287 
288 //add a new entry to the end of entries list
289 void GDB::addEntry(const string &VarName, const string &VarVal)
290 {
291  setEntry( Entries.size()+1, VarName, VarVal);
292 }
293 
294 
295 
296 //add one VAR with given FORMAT to the end of VARS list
297 void GDB::addVAR(const string &VAR_Name, const string &FORMAT_Name)
298 {
299  int size = VARS.size();
300 
301  if ( !checkFormat(FORMAT_Name) )
302  tr.Error << "\tBad format syntax '" << FORMAT_Name << "'" << endl;
303  else if (contains(VAR_Name, ' ') > 0)
304  tr.Error << "\tInvalid varible name '" << VAR_Name << "' (with space)" << endl;
305  else {
306  if (size == 0)
307  {
308  VARS[0] = VAR_Name;
309  FORMAT[0] = FORMAT_Name;
310  VarsNumber++;
311  }
312  else {
313  int i;
314  for(i=0; i< size; i++)
315  if (VARS[i] == VAR_Name) break;
316 
317  VARS[i] = VAR_Name;
318  FORMAT[i] = FORMAT_Name;
319 
320  if (i >= size) // VAR is not exist, add the VAR and its FORMAT
321  VarsNumber++;
322  }
323  }
324 }
325 
326 
327 //re-set one VAR with given FORMAT, 'index' number starts from 1 and can't larger than current size + 1
328 //if 'index' equals to current VARS size + 1, add new VAR to the end of VARS list
329 void GDB::setVAR(int index, const string &VAR_Name, const string &FORMAT_Name)
330 {
331  int size = VARS.size();
332 
333  if ( !checkFormat(FORMAT_Name) )
334  tr.Error << "\tBad format syntax '" << FORMAT_Name << "'" << endl;
335  else if (contains(VAR_Name, ' ') > 0)
336  tr.Error << "\tInvalid varible name '" << VAR_Name << "' (with space)" << endl;
337  else {
338  if (index > size+1 || index < 1)
339  tr.Error << "\tPlease use number 1-" << size+1 << " as index for varible '" << VAR_Name << "' with format '" << FORMAT_Name << "'" << endl ;
340  else {
341  VARS[index-1] = VAR_Name;
342  FORMAT[index-1] = FORMAT_Name;
343  if (index == size+1) VarsNumber++;
344  }
345  }
346 }
347 
348 
349 
350 //add a new REMARK entry
351 void GDB::addRemark(const string &str)
352 {
353  REMARKS.push_back(str);
354 }
355 
356 
357 
358 void GDB::setData(const string &DataName, const string &DataVal)
359 {
360  DATA.push_back( DataName+" "+DataVal );
361 }
362 
363 
364 
365 string GDB::getData(const string &DataName)
366 {
367  string data="";
368 
369  for(int i = 0; i < (int) DATA.size(); i++)
370  {
371  int pos = DATA[i].find_first_of(' ');
372 
373  if (DATA[i].substr(0,pos) == DataName )
374  data = data+ DATA[i].substr( pos+1,DATA[i].length()-pos-1 );
375  }
376 
377  return data;
378 }
379 
380 
381 
382 bool GDB::isVarFloat(int index)
383 {
384  if (contains(FORMAT[index],'f') == 1) return true;
385 
386  return false;
387 }
388 
389 
390 
391 bool GDB::isVarFloat(const string &VarName)
392 {
393  boost::unordered_map<int, string>::iterator it_V;
394 
395  for( it_V = VARS.begin(); it_V != VARS.end(); it_V++ )
396  {
397  if ( it_V->second == VarName ) return isVarFloat(it_V->first);
398  }
399 
400  return false;
401 }
402 
403 
404 
405 bool GDB::isVarInt(int index)
406 {
407  if (contains(FORMAT[index],'d') == 1) return true;
408 
409  return false;
410 }
411 
412 
413 
414 bool GDB::isVarInt(const string &VarName)
415 {
416  boost::unordered_map<int, string>::iterator it_V;
417 
418  for( it_V = VARS.begin(); it_V != VARS.end(); it_V++ )
419  {
420  if ( it_V->second == VarName ) return isVarInt(it_V->first);
421  }
422 
423  return false;
424 }
425 
426 
427 
428 bool GDB::isVarString(int index)
429 {
430  if (contains(FORMAT[index],'s') == 1) return true;
431 
432  return false;
433 }
434 
435 
436 
437 bool GDB::isVarString(const string &VarName)
438 {
439  boost::unordered_map<int, string>::iterator it_V;
440 
441  for( it_V = VARS.begin(); it_V != VARS.end(); it_V++ )
442  {
443  if ( it_V->second == VarName ) return isVarString(it_V->first);
444  }
445 
446  return false;
447 }
448 
449 
450 
451 // check if f is a valid FORMAT
452 bool GDB::checkFormat(const string& f)
453 {
454  string str = simplifyWhiteSpace(f);
455  int last = str.length()-1;
456 
457  if ( contains(str, '%') != 1 || str[0] != '%') return false;
458  if ( contains(str, 's') != 1 && contains(str, 'd') != 1 && contains(str, 'f') != 1 ) return false;
459  if ( str[last] != 's' && str[last] != 'd' && str[last] != 'f' ) return false;
460 
461  for(int i=0; i< (int)str.length();i++)
462  {
463  if ( str[i] != '%' && str[i] != 's' && str[i] != 'd' && str[i] != 'f' && str[i] != '-' && str[i] != '.' && !isDigit(str[i]) )
464  return false;
465  else if ( (str[i] == 's' || str[i] == 'd' || str[i] == 'f') && i != last)
466  return false;
467  }
468 
469  return true;
470 }
471 
472 
473 //parse the 'VARS' string and store to VARS list
474 void GDB::VARS_str_parser(const string &str)
475 {
476  VARS.clear();
477  Entries.clear();
478 
479  StringList V_Fields = split(" ", str);
480  VarsNumber = V_Fields.size();
481 
482  for(int i = 0; i < VarsNumber; i++)
483  VARS[i] = V_Fields[i];
484 }
485 
486 
487 //parse the 'FORMAT' string and store to FORMAT list
488 void GDB::FORMAT_str_parser(const string &str)
489 {
490  FORMAT.clear();
491  Entries.clear();
492 
493  string temp = str;
494  temp = " " + temp;
495 
496  if (contains(temp,'%') == VarsNumber) {
497  for(int i = 0; i < VarsNumber; i++) {
498  string f_str = "%" + (string) section(temp,'%',buf,i+1, i+1);
499 
500  if ( checkFormat(f_str) )
501  FORMAT[i] = f_str;
502  else {
503  tr.Error << "\tBad format syntax '" << f_str << "'" << endl;
504  exit(0);
505  }
506  }
507  }
508 }
509 
510 
511 // pre-set the VARS and FORMAT for a specified Class
512 void GDB::presetClass(const string &ClassName)
513 {
514  if (ClassName == "TALOS_SHIFT" ){
515  ClassType = "TALOS_SHIFT";
516 
517  VARS_str_parser("RESID RESNAME ATOMNAME SHIFT");
518  FORMAT_str_parser("%4d %1s %4s %8.3f");
519  }
520  else if (ClassName == "TALOS_PRED" ){
521  ClassType = "TALOS_PRED";
522 
523  VARS_str_parser("INDEX PHI PSI DIST W R1 R2 R3 SOURCE");
524  FORMAT_str_parser("%2d %9.3f %9.3f %8.3f %5.3f %-4s %-4s %-4s %s");
525  }
526 }
527 
528 
530 {
531  plain_text = true;
532 }
533 
534 
535 
536 }
537 }