Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
pdb_dynamic_reader.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 // :noTabs=false:tabSize=4:indentSize=4:
4 //
5 // (c) Copyright Rosetta Commons Member Institutions.
6 // (c) This file is part of the Rosetta software suite and is made available under license.
7 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
8 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
9 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
10 
11 /// @file core/io/pdb/pdb_dynamic_reader.hh
12 ///
13 /// @brief PDB Dynamic reader
14 /// @author Sergey Lyskov (Sergey.Lyskov@jhu.edu)
15 
16 
17 // Unit headers
20 
21 //
22 #include <core/io/pdb/Field.hh>
24 #include <core/io/pdb/file_data.hh>
25 #include <core/pose/Remarks.hh>
26 #include <core/types.hh>
27 #include <basic/options/option.hh>
28 #include <basic/options/keys/in.OptionKeys.gen.hh>
29 #include <basic/options/keys/run.OptionKeys.gen.hh>
30 
31 // Utility headers
32 #include <utility/tools/make_map.hh>
33 #include <numeric/xyzVector.hh>
34 #include <ObjexxFCL/string.functions.hh>
35 // AUTO-REMOVED #include <utility/vector0.hh>
36 #include <basic/Tracer.hh>
37 
38 // C++ headers
39 #include <cstdlib>
40 // AUTO-REMOVED
41 #include <cstdio>
42 #include <algorithm>
43 
44 #include <utility/vector1.hh>
45 
46 //#include <cstdlib>
47 //#include <map>
48 //#include <vector>
49 
50 static basic::Tracer TR("core.io.pdb.pdb_dynamic_reader");
51 
52 namespace core {
53 namespace io {
54 namespace pdb {
55 
56 using core::Size;
57 using core::SSize;
58 
59 /// @details create Record Object with field collection (depending of the type information in _s),
60 /// and read fields values.
62 {
64 
65  String s(_s);
66  s.resize(80, ' ');
67  Field T = Field("type", 1, 6);
68 
69  T.getValueFrom(s);
70 
71  Record R;
72  if( pdb_records.count(T.value) ) { R = pdb_records[ T.value ]; }
73  else { R = pdb_records["UNKNOW"]; }
74 
75  for(Record::iterator p=R.begin(); p!=R.end(); p++) (*p).second.getValueFrom(s);
76 
77  return R;
78 }
79 
80 /// @details split String by new line symbols, return vector of string.
81 std::vector<String> split(const String &s)
82 {
83  std::vector<String> r;
84  Size start=0, i=0;
85  while(start < s.size()) {
86  if( s[i] == '\n' || s[i] == '\r' /* || i==s.size()-1 */) {
87  r.push_back( String(s.begin()+start, s.begin()+i) );
88  start = i+1;
89  }
90  i++;
91  if( i == s.size() ) {
92  r.push_back( std::string(s.begin()+start, s.begin()+i) );
93  break;
94  }
95  }
96  for(SSize i=r.size()-1; i>=0; i--) { /// removing empty lines
97  if( r[i].size() == 0 ) r.erase( r.begin()+i );
98  }
99  return r;
100 }
101 
102 /// @details Parse given PDB data (represented as a string) into vector of Records.
103 std::vector<Record> PDB_DReader::parse(const String &pdb)
104 {
105  runtime_assert(!pdb.empty()); //we're wasting time if there's no data here...
106  std::vector<String> sl = split(pdb);
107  std::vector<Record> r( sl.size() );
108  std::transform(sl.begin(), sl.end(), r.begin(), PDB_DReader::mapStringToRecord);
109  return r;
110 }
111 
112 /// @details Create FileData object from a given vector of Records.
113 FileData PDB_DReader::createFileData(std::vector<Record> & VR)
114 {
115  PDB_DReaderOptions options;
116  return createFileData( VR, options );
117 }
118 
119 /// @details Create FileData object from a given vector of Records.
120 FileData PDB_DReader::createFileData(std::vector<Record> & VR, PDB_DReaderOptions const & options)
121 {
122  FileData fd;
123 
124  bool read_pdb_header =
125  basic::options::option[basic::options::OptionKeys::run::preserve_header]();
126  bool read_link_records =
127  basic::options::option[basic::options::OptionKeys::in::file::read_pdb_link_records]();
128 
130 
131  typedef std::map<char, AtomChain> ChainMap;
132  ChainMap m;
133 
134  int terCount = 0;
135  std::vector< char > chain_list; // preserve order
136  std::map<char,Size> chain_to_idx;
137  std::map<std::pair<Size,Size>,char> modelchain_to_chain;
138  std::string chainletters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
139  for(Size i = 0; i < chainletters.size(); ++i) {
140  modelchain_to_chain[std::pair<Size,Size>(0,i)] = chainletters[i];
141  modelchain_to_chain[std::pair<Size,Size>(1,i)] = chainletters[i];
142  }
143  Size modelidx = 1;
144  bool modeltags_present = false;
145 
146  // Loop over all PDB records stored in vector VR.
147  for(Size i=0; i<VR.size(); i++) {
148  std::string record_type = VR[i]["type"].value;
149 
150  // jec reading multimodel PDBs
151  if (record_type == "MODEL " ) {
152  // store the serial number as the filename, which will become the PDBInfo name of the pose
153  std::string temp_model = ObjexxFCL::strip_whitespace( VR[i]["serial"].value ) ;
154  fd.modeltag = temp_model.c_str();
155  if( options.new_chain_order() ) {
156  if(modeltags_present) {
157  // second model... all chains should be present...
158  for(Size model_idx=2;model_idx*chain_to_idx.size()<chainletters.size();++model_idx) {
159  for(Size chain_idx=1; chain_idx <= chain_to_idx.size(); ++chain_idx) {
160  TR << "REARRANGE CHAINS " << model_idx << " " << chain_idx << " " << (model_idx-1)*chain_to_idx.size()+chain_idx << std::endl;
161  modelchain_to_chain[std::pair<Size,Size>(model_idx,chain_idx)] = chainletters[(model_idx-1)*chain_to_idx.size()+chain_idx-1];
162  }
163  }
164  modelidx++;
165  if(modelidx > 8) utility_exit_with_message("quitting: too many MODELs");
166  } else {
167  modeltags_present = true;
168  }
169  }
170 
171  // Record contains "header information", i.e., is from the Title Section of the PDB file.
172  } else if (
173  record_type == "HEADER" || record_type == "KEYWDS" ||
174  record_type == "TITLE " || record_type == "COMPND" ||
175  record_type == "EXPDTA") {
176  if( read_pdb_header ){
177  fd.store_header_record(VR[i]);
178  }
179 
180  // Record contains nonstandard polymer linkage information from the Connectivity Annotation Section of the PDB
181  //file.
182  } else if (record_type == "LINK ") {
183  if (read_link_records) {
184  fd.store_link_record(VR[i]);
185  }
186 
187  // Record contains heterogen nomenclature information from the Heterogen section of the PDB file.
188  } else if (record_type == "HETNAM") {
189  fd.store_heterogen_names(VR[i]["hetID"].value, VR[i]["text"].value);
190 
191  // Record contains atom information from the Coordinate Section of the PDB file.
192  } else if( record_type == "ATOM " || record_type == "HETATM") {
193  Record & R(VR[i]);
194 
195  AtomInformation ai;
196  ai.isHet = (R["type"].value == "HETATM");
197  ai.serial = atoi( R["serial"].value.c_str() );
198  ai.name = R["name"].value;
199  ai.altLoc = 0; if( R["altLoc"].value.size() > 0 ) ai.altLoc = R["altLoc"].value[0];
200  ai.resName = R["resName"].value;
201 
202  ai.chainID = 0; if( R["chainID"].value.size() > 0 ) ai.chainID = R["chainID"].value[0];
203  if( options.new_chain_order() ) {
204  if( R["chainID"].value.size() > 0 ) {
205  char chainid = R["chainID"].value[0];
206  if( chain_to_idx.find(chainid) == chain_to_idx.end() ) {
207  chain_to_idx[chainid] = chain_to_idx.size()+1;
208  TR << "found new chain " << chainid << " " << chain_to_idx.size() << std::endl;
209  }
210  ai.chainID = modelchain_to_chain[std::pair<Size,Size>(modelidx,chain_to_idx[chainid])];
211  }
212  }
213 
214  ai.resSeq = atoi( R["resSeq"].value.c_str() );
215  ai.iCode = 0; if( R["iCode"].value.size() > 0 ) ai.iCode = R["iCode"].value[0];
216 
217  // how can you check properly if something will successfully convert to a number !?!?!?
218  bool force_no_occupancy = false;
219  if( R["x"].value == " nan"){ai.x =0.0;force_no_occupancy=true;} else { ai.x = atof( R["x"].value.c_str() ); }
220  if( R["y"].value == " nan"){ai.y =0.0;force_no_occupancy=true;} else { ai.y = atof( R["y"].value.c_str() ); }
221  if( R["z"].value == " nan"){ai.z =0.0;force_no_occupancy=true;} else { ai.z = atof( R["z"].value.c_str() ); }
222 
223  // check that the occupancy column actually exists. If it doesn't, assume full occupancy.
224  // otherwise read it.
225  if( R["occupancy"].value == " ") ai.occupancy = 1.0;
226  else ai.occupancy = atof( R["occupancy"].value.c_str() );
227  if(force_no_occupancy) ai.occupancy = -1.0;
228 
229  ai.temperature = atof( R["tempFactor"].value.c_str() );
230  ai.element = R["element"].value;
231  ai.terCount = terCount;
232 
233  m[ai.chainID].push_back(ai);
234  if ( std::find( chain_list.begin(), chain_list.end(), ai.chainID ) == chain_list.end() ) {
235  chain_list.push_back( ai.chainID );
236  }
237  } else if( record_type == "TER " || record_type == "END ") {
238  terCount++;
239  } else if( (record_type == "ENDMDL") &&
240  (options.obey_ENDMDL()) ) {
241  TR.Warning << "hit ENDMDL, not reading anything further" << std::endl;
242  break;
243 
244  // Record contains a remark.
245  } else if( record_type == "REMARK") {
246  pose::RemarkInfo ri;
247  ri.num = atoi( VR[i]["remarkNum"].value.c_str() ),
248  ri.value = VR[i]["value"].value;
249 
250  fd.remarks->push_back(ri);
251  }
252  }
253 
254  if( read_pdb_header ) {
256  }
257 
258  for ( Size i=0; i< chain_list.size(); ++i ) { // std::vector
259  fd.chains.push_back( m.find( chain_list[i] )->second );
260  }
261 // for(ChainMap::const_iterator p=m.begin(); p!=m.end(); p++ ) {
262 // fd.chains.push_back( (*p).second );
263 // }
264 
265  return fd;
266 }
267 
268 /// @details Create FileData from a given PDB data (represented as a string).
270 {
271  PDB_DReaderOptions options;
272  return createFileData( data, options );
273 }
274 
276 {
277  std::vector<Record> VR( parse(data) );
278  return createFileData(VR, options);
279 }
280 
281 /// @details create PDB string from Record data.
283 {
284  String s(80, ' ');
285  for(Record::const_iterator p=R.begin(); p!=R.end(); p++ ) {
286  String v = p->second.value; v.resize(p->second.end - p->second.start +1, ' ');
287  s.replace( p->second.start-1, p->second.end - p->second.start +1, v);
288  }
289  return(s);
290 }
291 
292 /// @details create PDB file (represented as a string) from FileData object.
294 {
295  std::vector<Record> VR( PDB_DReader::createRecords(fd) );
296 
297  String r; r.reserve(81*VR.size());
298  for(Size i=0; i<VR.size(); i++) {
299  //std::cout << VR[i] << '\n' << createPDBString( VR[i] ) << "\n";
300  r += createPDBString( VR[i] ) + '\n';
301  }
302  return r;
303 }
304 
307  std::vector<Record> VR( PDB_DReader::createRecords(fd) );
308 
310  lines.reserve( VR.size() );
311  for ( Size i = 0; i < VR.size(); i++ ) {
312  lines.push_back( createPDBString( VR[i] ) );
313  }
314  return lines;
315 }
316 
317 /// @details print int with format to string
318 std::string print_i(const char *format, int I)
319 {
320  std::string buf; buf.resize(1024);
321  sprintf(&buf[0], format, I);
322  return buf;
323 }
324 
325 /// @details print double with format to string
326 std::string print_d(const char *format, double d)
327 {
328  std::string buf; buf.resize(1024);
329  sprintf(&buf[0], format, d);
330  return buf;
331 }
332 
333 /// @details Create vector of Record from given FileData object.
334 // Used in PDB writing support.
335 std::vector<Record> PDB_DReader::createRecords(FileData const & fd)
336 {
337 
338  std::vector<Record> VR;
339 
340  if(fd.header_information()){
341  fd.fill_header_records(VR);
342  }
343 
344  Record R = Field::getRecordCollection()["REMARK"];
345  for(Size i=0; i<fd.remarks->size(); i++) {
346  pose::RemarkInfo const & ri( fd.remarks->at(i) );
347 
348  R["type"].value = "REMARK";
349  R["remarkNum"].value = print_i("%3d", ri.num);
350  R["value"].value = ri.value;
351  VR.push_back(R);
352  }
353 
354 
355  R = Field::getRecordCollection()["ATOM "];
356  for(Size i=0; i<fd.chains.size(); i++) {
357  for(Size j=0; j<fd.chains[i].size(); j++) {
358  AtomInformation const & ai( fd.chains[i][j] );
359  R["type"].value = (ai.isHet ? "HETATM" : "ATOM ");
360  R["serial"].value = print_i("%5d", ai.serial);
361  R["name"].value = ai.name;
362  R["resName"].value = ai.resName;
363  std::string cid(" "); cid[0] = ai.chainID;
364  R["chainID"].value = cid;
365  R["resSeq"].value = print_i("%4d", ai.resSeq);
366  R["iCode"].value = ai.iCode;
367  R["x"].value = print_d("%8.3f", ai.x);
368  R["y"].value = print_d("%8.3f", ai.y);
369  R["z"].value = print_d("%8.3f", ai.z);
370  R["element"].value = ai.element;
371  R["occupancy"].value = print_d("%6.2f", ai.occupancy);
372  R["tempFactor"].value = print_d("%6.2f", ai.temperature);
373  VR.push_back(R);
374  }
375  }
376 
377  // Adding 'TER' line at the end of PDB.
378  Record T = Field::getRecordCollection()["TER "];
379  T["type"].value = "TER ";
380  VR.push_back(T);
381 
382  return VR;
383 }
384 
385 } // namespace pdb
386 } // namespace io
387 } // namespace core
388