Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
HeaderInformation.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 // :noTabs=false:tabSize=4:indentSize=4:
4 //
5 // (c) Copyright Rosetta Commons Member Institutions.
6 // (c) This file is part of the Rosetta software suite and is made available under license.
7 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
8 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
9 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
10 
11 /// @file core/io/pdb/HeaderInformation.cc
12 ///
13 /// @brief Information stored in the HEADER record in the PDB format
14 /// @author Matthew O'Meara
15 
16 // Unit headers
18 #include <core/io/pdb/Field.hh>
19 
20 // Platform headers
21 #include <core/types.hh>
22 
23 // Basic headers
24 #include <basic/Tracer.hh>
25 
26 // Utility headers
27 #include <utility/exit.hh>
28 
29 // ObjexxFCL headers
30 #include <ObjexxFCL/format.hh>
31 
32 // Boost headers
33 #include <boost/lexical_cast.hpp>
34 
35 // C++ Headers
36 #include <cstdlib>
37 #include <cstdio>
38 #include <fstream>
39 #include <sstream>
40 #include <string>
41 #include <utility>
42 
43 
44 namespace core {
45 namespace io {
46 namespace pdb {
47 
48 using std::string;
49 using std::list;
50 using std::endl;
51 using std::pair;
52 using ObjexxFCL::rstrip_whitespace; // by reference
53 using ObjexxFCL::strip_whitespace; // by reference
54 using ObjexxFCL::rstripped_whitespace;// copy
55 using ObjexxFCL::stripped_whitespace; // copy
56 
57 static basic::Tracer TR("core.io.pdb.HeaderInformation");
58 
59 HeaderInformation::HeaderInformation() : utility::pointer::ReferenceCount(),
60  classification_(""),
61  dep_year_(0),
62  dep_month_(0),
63  dep_day_(0),
64  idCode_(""),
65  title_(""),
66  keywords_(),
67  keyword_in_progress_(false),
68  compounds_(),
69  compound_in_progress_(false),
70  experimental_techniques_(),
71  experimental_technique_in_progress_("")
72 {}
73 
75  HeaderInformation const & src) : utility::pointer::ReferenceCount(),
76  classification_(src.classification_),
77  dep_year_(src.dep_year_),
78  dep_month_(src.dep_month_),
79  dep_day_(src.dep_day_),
80  idCode_(src.idCode_),
81  title_(""),
82  keywords_(src.keywords_),
83  keyword_in_progress_(src.keyword_in_progress_),
84  compounds_(src.compounds_),
85  compound_in_progress_(src.compound_in_progress_),
86  experimental_techniques_(src.experimental_techniques_),
87  experimental_technique_in_progress_(src.experimental_technique_in_progress_)
88 {}
89 
91 
92 
93 void
95  string const & type = R["type"].value;
96  if(type == "HEADER"){
97  store_classification(R["classification"].value);
98  store_deposition_date(R["depDate"].value);
99  store_idCode(R["idCode"].value);
100  } else if(type == "TITLE "){
101  store_title(R["title"].value);
102  } else if(type == "KEYWDS"){
103  store_keywords(R["keywords"].value);
104  } else if(type == "COMPND"){
105  store_compound(R["compound"].value);
106  } else if(type == "EXPDTA"){
107  store_experimental_techniques(R["technique"].value);
108  } else {
109  std::stringstream err_msg;
110  err_msg
111  << "Attempting to add unrecognized record type '" << type << "' "
112  << "to header information.";
113  utility_exit_with_message(err_msg.str());
114  }
115 }
116 
117 void
122 }
123 
124 bool
126  return
130 }
131 
132 void
134  std::vector<Record> & VR
135 ) const {
136  fill_header_record(VR);
137  fill_title_records(VR);
141 }
142 
143 ////////////// HEADER ///////////////////
144 
145 void
146 HeaderInformation::store_classification(string const & classification){
148  rstrip_whitespace(classification_);
149 
150  // TODO: and that the classification is on the list
151  // http://www.wwpdb.org/documentation/wwpdb20070104appendices_c.pdf
152 
153 }
154 
155 string
157  return classification_;
158 }
159 
160 void
162 
163  dep_day_ = atoi(depDate.substr(0,2).c_str());
164  if(dep_day_ > 31 || dep_day_ < 1){
165  TR.Warning << "Deposition day not in range [1, 31]: " << dep_day_ << endl;
166  }
167 
168  string const & mon(depDate.substr(3,3));
169  if( mon == "JAN" ) dep_month_ = 1;
170  else if( mon == "FEB" ) dep_month_ = 2;
171  else if( mon == "MAR" ) dep_month_ = 3;
172  else if( mon == "APR" ) dep_month_ = 4;
173  else if( mon == "MAY" ) dep_month_ = 5;
174  else if( mon == "JUN" ) dep_month_ = 6;
175  else if( mon == "JUL" ) dep_month_ = 7;
176  else if( mon == "AUG" ) dep_month_ = 8;
177  else if( mon == "SEP" ) dep_month_ = 9;
178  else if( mon == "OCT" ) dep_month_ = 10;
179  else if( mon == "NOV" ) dep_month_ = 11;
180  else if( mon == "DEC" ) dep_month_ = 12;
181  else {
182  TR.Warning << "Unrecognized month in HEADER deposition date " + depDate << mon << std::endl;
183  }
184 
185  dep_year_ = boost::lexical_cast<Size>(depDate.substr(7,4));
186 }
187 
188 
189 void
191  Size yy,
192  Size mm,
193  Size dd
194 ) {
195 
196  dep_year_ = yy;
197  if(dep_month_ > 99 || dep_day_ < 1){
198  TR.Warning << "Deposition month not in range [01, 99]: " << dep_month_ << endl;
199  }
200 
201  dep_month_ = mm;
202  if(dep_month_ > 12 || dep_day_ < 1){
203  TR.Warning << "Deposition month not in range [1, 12]: " << dep_month_ << endl;
204  }
205 
206  dep_day_ = dd;
207  if(dep_day_ > 31 || dep_day_ < 1){
208  TR.Warning << "Deposition day not in range [1, 31]: " << dep_day_ << endl;
209  }
210 }
211 
212 
213 string
215  std::stringstream dep_date;
216 
217  if(dep_day_ > 31 || dep_day_ < 1){
218  utility_exit_with_message("deposition day is outside of range [1,31]: " + dep_day_);
219  }
220  dep_date << dep_day_ << "-";
221  switch(dep_month_){
222  case 1: dep_date << "JAN"; break;
223  case 2: dep_date << "FEB"; break;
224  case 3: dep_date << "MAR"; break;
225  case 4: dep_date << "APR"; break;
226  case 5: dep_date << "MAY"; break;
227  case 6: dep_date << "JUN"; break;
228  case 7: dep_date << "JUL"; break;
229  case 8: dep_date << "AUG"; break;
230  case 9: dep_date << "SEP"; break;
231  case 10: dep_date << "OCT"; break;
232  case 11: dep_date << "NOV"; break;
233  case 12: dep_date << "DEC"; break;
234  default:
235  utility_exit_with_message("Unrecognized deposition month index " + dep_month_);
236  }
237  if( dep_year_ > 99 || dep_year_ < 1){
238  utility_exit_with_message("Deposition year is out side of range [01,99]: " + dep_year_);
239  }
240  dep_date << "-" << (dep_year_ < 10 ? "0" : "") << dep_year_;
241  return dep_date.str();
242 }
243 
244 void
246  Size & yy,
247  Size & mm,
248  Size & dd
249 ) const {
250  yy = dep_year_;
251  mm = dep_month_;
252  dd = dep_day_;
253 }
254 
257  return idCode_;
258 }
259 
260 void
261 HeaderInformation::store_idCode(string const & idCode) {
262  idCode_ = idCode;
263 }
264 
265 void
267  std::vector< Record > & VR
268 ) const {
269  if(!classification_.empty() &&
270  dep_year_ && dep_month_ && dep_day_ && !idCode_.empty()) {
271  Record R = Field::getRecordCollection()["HEADER"];
272  R["type"].value = "HEADER";
273  R["classification"].value = classification();
274  R["depDate"].value = deposition_date();
275  R["idCode"].value = idCode();
276  VR.push_back(R);
277  }
278 }
279 
280 
281 ////////////// TITLE ///////////////////
282 
283 /// @details Append title, strip off white space on the left for the
284 /// first record and on the right for all records.
285 void
286 HeaderInformation::store_title(string const & title){
287  if(title.empty()){
288  TR.Warning << "Attempting to store empty title record field." << endl;
289  return;
290  }
291 
292  if(title_.empty()) {
293  title_ = title;
295  } else {
296 
297  title_.append(rstripped_whitespace(title));
298  }
299 }
300 
301 void
303  title_.clear();
304 }
305 
306 std::string const &
308  return title_;
309 }
310 
311 void
313  std::vector< Record > & VR
314 ) const {
315 
316  if(!title_.empty()) {
317  Size line_no(1);
318  fill_wrapped_records("TITLE ", "title", title_, line_no, VR);
319  }
320 }
321 
322 ////////////// KEYWDS ///////////////////
323 
324 void
325 HeaderInformation::store_keywords(string const & keywords){
326  if(keywords.empty()){
327  TR.Warning << "Attempting to add empty keywords string." << endl;
328  return;
329  }
330 
331  size_t i(keywords.find_first_not_of(' '));
332  size_t j(i);
333  while(i != std::string::npos) {
334  j = keywords.find(',', i);
336  keywords_.back().append(
337  " " + rstripped_whitespace(keywords.substr(i, j-i)));
338  keyword_in_progress_ = false;
339  } else {
340  keywords_.push_back(rstripped_whitespace(keywords.substr(i, j-i)));
341  }
342  if(j != std::string::npos){
343  i = keywords.find_first_not_of(' ', j+1);
344  } else {
345  keyword_in_progress_ = true;
346  return;
347  }
348  }
349 }
350 
351 list< string > const &
353  return keywords_;
354 }
355 
356 void
358  keyword_in_progress_ = false;
359 }
360 
361 bool
363  return keyword_in_progress_;
364 }
365 
366 void
368  keywords_.clear();
369 }
370 
371 void
373  std::vector< Record > & VR
374 ) const {
375  if(keywords_.empty()) return;
376 
377  string keywords;
378  list< string >::const_iterator k = keywords_.begin(), ke = keywords_.end();
379  for(; k!= ke; ++k){
380  if(!keywords.empty()) keywords.append(", ");
381  keywords.append(*k);
382  }
383  Size line_no(1);
384  fill_wrapped_records("KEYWDS", "keywords", keywords, line_no, VR);
385 }
386 
387 ///////////// COMPND ///////////////////
388 
391  string token_str;
392  switch(token){
393  case MOL_ID: token_str = "MOL_ID"; break;
394  case MOLECULE: token_str = "MOLECULE"; break;
395  case CHAIN: token_str = "CHAIN"; break;
396  case FRAGMENT: token_str = "FRAGMENT"; break;
397  case SYNONYM: token_str = "SYNONYM"; break;
398  case EC: token_str = "EC"; break;
399  case ENGINEERED: token_str = "ENGINEERED"; break;
400  case MUTATION: token_str = "MUTATION"; break;
401  case OTHER_DETAILS: token_str = "OTHER_DETAILS"; break;
402  default:
403  TR.Error << "Unrecognized compound token '" << token << "'" << endl;
404  utility_exit();
405  }
406  return token_str;
407 }
408 
411  if(token == "MOL_ID") return MOL_ID;
412  else if(token == "MOLECULE") return MOLECULE;
413  else if(token == "CHAIN") return CHAIN;
414  else if(token == "FRAGMENT") return FRAGMENT;
415  else if(token == "SYNONYM") return SYNONYM;
416  else if(token == "EC") return EC;
417  else if(token == "ENGINEERED") return ENGINEERED;
418  else if(token == "MUTATION") return MUTATION;
419  else if(token == "OTHER_DETAILS") return OTHER_DETAILS;
420  else {
421  TR.Error << "Unrecognized compound token string '" << token << "'" << endl;
422  utility_exit();
423  }
424  return CompoundToken_max;
425 }
426 
427 
428 /// @details Assume each new compound token/value pair begins on a new
429 /// line but the value can be multiple lines. So, if a compound record
430 /// is encountered when "in progress" then append the results to the
431 /// value of the previous pair.
432 void
434 
436  size_t v_end(compound.find(';'));
437  compound_in_progress_ = (v_end == std::string::npos);
438  compounds_[compounds_.size()].second.append(
439  rstripped_whitespace(compound.substr(0,v_end)));
440  return;
441  }
442 
443  size_t t_begin(compound.find_first_not_of(' '));
444  size_t t_end(compound.find(':', t_begin));
445  if(t_end == std::string::npos) {
446  TR.Error
447  << "Attempting to add compound to header information "
448  << "but no compund token was found in '" << compound << "'" << endl;
449  utility_exit();
450  }
451  CompoundToken token(
452  string_to_compound_token(compound.substr(t_begin,t_end - t_begin)));
453 
454  size_t v_begin(compound.find_first_not_of(' ', t_end + 1));
455  if(v_begin == std::string::npos){
456  TR.Error
457  << "Attempting to add compound to header information "
458  << "but no compund value was found in '" << compound << "'" << endl;
459  utility_exit();
460  }
461  size_t v_end(compound.find(';', v_begin));
462  if(v_end == std::string::npos){
463  compound_in_progress_ = true;
464  }
465  compounds_.push_back(
466  make_pair(token, compound.substr(v_begin, v_end - v_begin)));
467 }
468 
469 void
472  string const & value
473 ) {
474  compounds_.push_back(make_pair(token, value));
475 }
476 
479  return compounds_;
480 }
481 
482 void
484  compound_in_progress_ = false;
485 }
486 
487 bool
489  return compound_in_progress_;
490 }
491 
492 void
494  compounds_.clear();
495 }
496 
497 void
499  std::vector< Record > & VR
500 ) const {
501 
502  Size line_no(1);
503 
504  for(Size t=1, te = compounds_.size(); t <= te; ++t){
505 
506  std::stringstream comp_field;
507  comp_field
508  // defacto standard in PDB is to add a space after a continuation field
509  << (line_no == 1 ? "" : " ")
511  static_cast<CompoundToken>(compounds_[t].first))
512  << ": "
513  << compounds_[t].second
514  // only add ';' to separate compound records
515  << (t < compounds_.size() ? ";" : "");
516 
517  fill_wrapped_records("COMPND", "compound", comp_field.str(), line_no, VR);
518  }
519 }
520 
521 ////////////// EXPDTA ///////////////////
522 
523 string
525  ExperimentalTechnique technique
526 ) {
527  string t;
528  switch(technique){
529  case X_RAY_DIFFRACTION: t = "X-RAY DIFFRACTION"; break;
530  case FIBER_DIFFRACTION: t = "FIBER DIFFRACTION"; break;
531  case NEUTRON_DIFFRACTION: t = "NEUTRON DIFFRACTION"; break;
532  case ELECTRON_CRYSTALLOGRAPHY: t = "ELECTRON CRYSTALLOGRAPHY"; break;
533  case ELECTRON_MICROSCOPY: t = "ELECTRON MICROSCOPY"; break;
534  case SOLID_STATE_NMR: t = "SOLID-STATE NMR"; break;
535  case SOLUTION_NMR: t = "SOLUTION NMR"; break;
536  case SOLUTION_SCATTERING: t = "SOLUTION SCATTERING"; break;
537  case THEORETICAL_MODEL: t = "THEORETICAL MODEL"; break;
538 
539  case ELECTRON_DEFRACTION:
540  t = "ELECTRON DEFRACTION";
541  TR.Warning
542  << "Encountered obsolete experimental technqiue coding '"
543  << t << "'" << endl;
544  break;
545 
547  t = "CRYO-ELECTRON MICROSCOPY";
548  TR.Warning
549  << "Encountered obsolete experimental technqiue coding '"
550  << t << "'" << endl;
551  break;
552 
554  t = "SOLUTION SCATTERING, THEORETICAL MODEL";
555  TR.Warning
556  << "Encountered obsolete experimental technqiue coding '"
557  << t << "'" << endl;
558  break;
559 
560  case FLORECENCE_TRANSFER:
561  t = "FLORECENCE TRANSFER";
562  TR.Warning
563  << "Encountered obsolete experimental technqiue coding '"
564  << t << "'" << endl;
565  break;
566 
567  case NMR:
568  t = "NMR";
569  TR.Warning
570  << "Encountered obsolete experimental technqiue coding '"
571  << t << "'" << endl;
572  break;
573 
574  default:
575  TR.Error
576  << "Unrecognized experimental technique value '"
577  << technique << "'" << endl;
578  utility_exit();
579  }
580  return t;
581 }
582 
585  string const & technique
586 ) {
587  if(technique == "X-RAY DIFFRACTION") return X_RAY_DIFFRACTION;
588  else if(technique == "FIBER DIFFRACTION") return FIBER_DIFFRACTION;
589  else if(technique == "NEUTRON DIFFRACTION") return NEUTRON_DIFFRACTION;
590  else if(technique == "ELECTRON CRYSTALLOGRAPHY")
592  else if(technique == "ELECTRON MICROSCOPY") return ELECTRON_MICROSCOPY;
593  else if(technique == "SOLID-STATE NMR") return SOLID_STATE_NMR;
594  else if(technique == "SOLUTION NMR") return SOLUTION_NMR;
595  else if(technique == "SOLUTION SCATTERING") return SOLUTION_SCATTERING;
596  else if(technique == "THEORETICAL MODEL") return THEORETICAL_MODEL;
597 
598  // Handle obsolete technique strings
599  else if(technique == "ELECTRON DEFRACTION") {
600  TR.Warning
601  << "Encountered obsolete experimental technqiue string '"
602  << technique << "'" << endl;
603  return ELECTRON_DEFRACTION;
604  } else if(technique == "CRYO-ELECTRON MICROSCOPY") {
605  TR.Warning
606  << "Encountered obsolete experimental technqiue string '"
607  << technique << "'" << endl;
609  } else if(technique == "FLORECENCE TRANSFER") {
610  TR.Warning
611  << "Encountered obsolete experimental technqiue string '"
612  << technique << "'" << endl;
613  return FLORECENCE_TRANSFER;
614  } else if(technique == "NMR") {
615  TR.Warning
616  << "Encountered obsolete experimental technqiue string '"
617  << technique << "'" << endl;
618  return NMR;
619  } else {
620  TR.Error
621  << "Unrecognized experimental technique string '"
622  << technique << "'" << endl;
623  utility_exit();
624  }
625  return THEORETICAL_MODEL;
626 }
627 
628 void
630  string const & exp) {
631  if(exp.empty()){
632  TR.Error << "Attempting to add empty experimental technique string." << endl;
633  utility_exit();
634  }
635 
636  size_t t_begin, t_len, t_end(-1);
637 
638  while(true){
639  t_begin = exp.find_first_not_of(' ', t_end+1);
640  if(t_begin == std::string::npos) return;
641 
642  t_end = exp.find(';', t_begin);
643  if(t_end == std::string::npos){
645  rstripped_whitespace(exp.substr(t_begin, t_len));
646  return;
647  } else if(exp.length() - t_begin >= 3 && exp.compare(t_begin, 3, "NMR") == 0){
648  // The obsolete NMR tag took extra information that is ignored here
649  t_len = 3;
650  } else {
651  t_len = t_end - t_begin;
652  }
654  experimental_techniques_.push_back(
655  string_to_experimental_technique(exp.substr(t_begin, t_len)));
656  } else {
658  experimental_technique_in_progress_.append(exp.substr(t_begin, t_len));
659  experimental_techniques_.push_back(
662  }
663 
664  }
665  return;
666 }
667 
668 void
671  experimental_techniques_.push_back(technique);
672 }
673 
674 list< HeaderInformation::ExperimentalTechnique > const &
677 }
678 
679 void
682  experimental_techniques_.push_back(
685  }
686 }
687 
688 bool
690  return !experimental_technique_in_progress_.empty();
691 }
692 
693 void
695  experimental_techniques_.clear();
696 }
697 
698 bool
701 ) const {
702  list< HeaderInformation::ExperimentalTechnique >::const_iterator
704  technique);
705 
706  return t != experimental_techniques_.end();
707 }
708 
709 void
711  std::vector< Record > & VR
712 ) const {
713  if(parse_in_progress()){
714  TR.Error
715  << "Attempting to fill experimental technique records the "
716  << "HeaderInformation is in the middle of parsing. If you think the "
717  << "parsing is complete and you have reached this recording in error, "
718  << "please call finalize_parse()";
719  utility_exit();
720  }
721 
722  if(experimental_techniques_.empty()) return;
723  string techniques;
724  ExperimentalTechniques::const_iterator
725  k = experimental_techniques_.begin(),
726  ke= experimental_techniques_.end();
727  for(; k != ke; ++k){
728  if(!techniques.empty()) techniques.append("; ");
729  techniques.append(experimental_technique_to_string(*k));
730  }
731  Size line_no(1);
732  fill_wrapped_records("EXPDTA", "technique", techniques, line_no, VR);
733 }
734 
735 
736 
737 ////////// Helper Functions /////////////
738 
739 
740 void
742  string const & record_type,
743  string const & field_name,
744  string const & contents,
745  Size & line_no,
746  std::vector< Record > & VR
747 ) const {
748  // Assume contents string is stripped of white space
749  size_t l_begin(0), l_len(0), l_end(0);
750  size_t field_width(60);
751  while(l_begin != contents.length()){
752  Record R = Field::getRecordCollection()[record_type];
753  R["type"].value = record_type;
754  set_line_continuation(R, line_no);
755 
756  //Will the remainder of the contents fit on this line?
757  if(contents.length() - l_begin <= field_width){
758  l_len = contents.length() - l_begin;
759  } else {
760  // Walk back from end where the field would truncate to locate
761  // a reasonable place to word wrapping.
762 
763  l_end = l_begin + field_width;
764  // Note: Since the rest of the contents don't fit in the field,
765  // l_end < contents.length()
766  while(true){
767  if(l_end == l_begin){
768  // We have walked all the way to l_begin. The next word is
769  // so big it cannot fit in the field
770  TR.Error
771  << "The for record type '" << record_type << "', "
772  << "field '" << field_name << "' "
773  << "contains a word that has more than 59 characters and "
774  << "is too long to fit on one line." << endl;
775  TR.Error << field_name << ": " << contents << endl;
776  utility_exit();
777  }
778  if (contents[l_end] == ' ' || contents[l_end - 1] == '-'){
779  break;
780  } else {
781  --l_end;
782  }
783  }
784  l_len = l_end - l_begin;
785  }
786  R[field_name].value = contents.substr(l_begin, l_len);
787  VR.push_back(R);
788  ++line_no;
789 
790  // Note this puts l_begin at a ' ' which is how wrapped records
791  // are written in the PDB
792  l_begin = l_begin + l_len;
793  }
794 }
795 
796 
797 void
799  Record & R,
800  Size const line_no
801 ) const {
802  std::string & con_field = R["continuation"].value;
803  if(line_no == 0){
804  TR.Error << "Attempting to write a line continuation record for line 0, please begin the line continuation count at 1." << endl;
805  utility_exit();
806  }
807  if(line_no == 1){
808  con_field = " ";
809  return;
810  } else if(line_no > 99){
811  TR.Error << "Attempting to write record that takes more than 99 lines, which overflows the continuation field in the." << endl;
812  utility_exit();
813  } else {
814  con_field.resize(2);
815  sprintf(&con_field[0], "%2d", static_cast<int>(line_no));
816  }
817 }
818 
819 } // namespace pdb
820 } // namespace io
821 } // namespace core
822 
823