Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
file_data.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 // :noTabs=false:tabSize=4:indentSize=4:
4 //
5 // (c) Copyright Rosetta Commons Member Institutions.
6 // (c) This file is part of the Rosetta software suite and is made available under license.
7 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
8 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
9 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
10 
11 /// @file core/io/pdb/file_data.cc
12 ///
13 /// @brief
14 /// @author Sergey Lyskov
15 
16 // Unit headers
17 #include <core/io/pdb/Field.hh>
19 #include <core/io/pdb/file_data.hh>
20 
21 #include <core/io/pdb/pose_io.hh>
22 #include <core/types.hh>
23 
27 #include <core/pose/PDBInfo.hh>
28 
29 #include <core/chemical/AA.hh>
31 #include <core/chemical/Patch.hh>
38 // AUTO-REMOVED #include <core/chemical/orbitals/OrbitalType.hh>
39 
42 
44 
46 
47 #include <core/pose/util.hh>
48 
49 
50 // Basic headers
51 #include <basic/options/option.hh>
52 #include <basic/Tracer.hh>
53 
54 // option key includes
55 #include <basic/options/option.hh>
56 #include <basic/options/keys/out.OptionKeys.gen.hh>
57 #include <basic/options/keys/run.OptionKeys.gen.hh>
58 #include <basic/options/keys/in.OptionKeys.gen.hh>
59 #include <basic/options/keys/inout.OptionKeys.gen.hh>
60 #include <basic/options/keys/packing.OptionKeys.gen.hh>
61 
62 
63 #include <numeric/random/random.hh>
64 
65 #include <utility/string_util.hh>
66 #include <utility/io/ozstream.hh>
67 #include <utility/io/izstream.hh>
68 #include <utility/exit.hh>
69 
70 #include <fstream>
71 #include <sstream>
72 #include <cstdlib>
73 #include <cstdio>
74 #include <utility>
75 #include <ObjexxFCL/format.hh>
76 
77 // option key includes
78 #include <basic/options/keys/out.OptionKeys.gen.hh>
79 //#include <basic/options/keys/run.OptionKeys.gen.hh> BDW
80 //#include <basic/options/keys/in.OptionKeys.gen.hh> BDW
81 #include <basic/options/keys/inout.OptionKeys.gen.hh>
82 // AUTO-REMOVED #include <basic/options/keys/packing.OptionKeys.gen.hh>
83 //#include <basic/options/keys/pH.OptionKeys.gen.hh> BDW
84 
85 #include <core/pose/util.hh>
86 #include <utility/vector1.hh>
87 
88 //Auto Headers
89 #include <core/pose/util.tmpl.hh>
90 
91 namespace core {
92 namespace io {
93 namespace pdb {
94 
95 using core::Size;
96 using core::SSize;
97 
98 using basic::T;
99 using basic::Error;
100 using basic::Warning;
101 
102 using std::string;
103 using std::iostream;
104 
105 using namespace ObjexxFCL;
106 using namespace ObjexxFCL::fmt;
107 
108 // Tracer instance for this file
109 static basic::Tracer TR("core.io.pdb.file_data");
110 
111 // random number generator for randomizing missing density coordinates
112 static numeric::random::RandomGenerator RG(231411); // <- Magic number, do not change it!
113 
114 // TODO: move this to core/chemical/types.hh
115 // TODO: Confirm that not allowing ' ' as a chain id is intended--as this is inconsistent with the PDB spec
116 static string const chr_chains( "ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890abcdefghijklmnopqrstuvwxyz" );
117 
119  resid( "" ),
120  resName( "" ),
121  chainID( ' ' ),
122  resSeq( 0 ),
123  iCode( ' ' ),
124  terCount( 0 ),
125  atoms(),
126  xyz(),
127  temps()
128 {}
129 
131  AtomInformation const & ai) :
132  resid( "" ),
133  resName( ai.resName ),
134  chainID( ai.chainID ),
135  resSeq( ai.resSeq ),
136  iCode( ai.iCode ),
137  terCount( ai.terCount ),
138  atoms(),
139  xyz(),
140  temps()
141 {}
142 
143 bool
145  ResidueInformation const & that) const {
146  return
147  resName == that.resName &&
148  chainID == that.chainID &&
149  resSeq == that.resSeq &&
150  iCode == that.iCode &&
151  terCount == that.terCount;
152 }
153 
154 bool
156  ResidueInformation const & that) const {
157  return !(*this == that);
158 }
159 
160 
161 /////////////////////////////////////////////
162 
164 {
165 }
166 
167 void
169  core::conformation::Residue const & rsd,
170  core::Size & atom_index,
171  core::pose::Pose const & pose // for pdb numbering and chains, could change to PDBInfo if necessary (but casting here is perhaps best)
172 )
173 {
174  using namespace core;
175 
176  //extract PDBInfo pointer
177  pose::PDBInfoCOP pdb_info = pose.pdb_info();
178 
179  bool use_PDB(false);
180  if (
181  pdb_info
182  && !(pdb_info->obsolete())
183  && !(basic::options::option[ basic::options::OptionKeys::out::file::renumber_pdb ].value()) ) {
184  use_PDB = true;
185  }
186 
187  bool renumber_chains(false);
188  if ( basic::options::option[ basic::options::OptionKeys::out::file::per_chain_renumbering ].value() ) {
189  renumber_chains = true;
190  }
191 
192 
193  for ( Size j=1; j<= rsd.natoms(); ++j ) {
194  conformation::Atom const & atom( rsd.atom(j) );
195 
196  //skip outputting virtual atom unless specified
197  if ( !basic::options::option[ basic::options::OptionKeys::out::file::output_virtual ]() &&
198  rsd.atom_type(j).is_virtual() ) continue;
199 
200  // skip outputting zero occupancy atoms if specified
201  if ( use_PDB && basic::options::option[ basic::options::OptionKeys::out::file::suppress_zero_occ_pdb_output ]() &&
202  ( rsd.seqpos() <= pdb_info->nres() ) ) {
203  if ( pdb_info->occupancy( rsd.seqpos(), j ) < 0.0001 ) continue;
204  }
205 
206  ++atom_index;
207 
208  AtomInformation ai;
209  AtomInformation orb;//have to initialize this out here.
210 
211  ai.isHet = (!rsd.is_polymer() || rsd.is_ligand());
212  ai.serial = atom_index;
213  ai.name = rsd.atom_name(j);
214  ai.resName = rsd.name3();
215  ai.x = atom.xyz()(1);
216  ai.y = atom.xyz()(2);
217  ai.z = atom.xyz()(3);
218  ai.occupancy = 1.0; // dummy occupancy, can be overridden by PDBInfo
219 
220  // output with pdb specific info if possible
221  if ( use_PDB && rsd.seqpos() <= pdb_info->nres() ) {
222  // residue
223  ai.chainID = pdb_info->chain( rsd.seqpos() );
224  if ( ai.chainID == pose::PDBInfo::empty_record() ) { // safety
225  TR.Warning << "PDBInfo chain id was left as character '" << pose::PDBInfo::empty_record()
226  << "' denoting empty record, for convenience replacing with space" << std::endl;
227  ai.chainID = ' ';
228  }
229  ai.resSeq = pdb_info->number( rsd.seqpos() );
230  ai.iCode = pdb_info->icode( rsd.seqpos() );
231 
232  // atom
233  if ( pdb_info->is_het( rsd.seqpos(), j ) ) { // override standard het only if .is_het() is true
234  ai.isHet = true;
235  }
236  ai.altLoc = pdb_info->alt_loc( rsd.seqpos(), j );
237  ai.occupancy = pdb_info->occupancy( rsd.seqpos(), j );
238  ai.temperature = pdb_info->temperature( rsd.seqpos(), j );
239  } else {
240  // residue
241  runtime_assert( rsd.chain() > 0 );
242  ai.chainID = chr_chains[ ( rsd.chain() - 1 ) % chr_chains.size() ];
243  ai.resSeq = rsd.seqpos();
244 
245  // if option is specified, renumber per-chain
246  if ( renumber_chains ) {
247  utility::vector1< Size > const &chn_ends = pose.conformation().chain_endings();
248  //for(int i=1; i<=chn_ends.size(); ++i) {
249  for ( Size i=1; i<=chn_ends.size(); ++i ) {
250  if (chn_ends[i] < rsd.seqpos()) ai.resSeq = rsd.seqpos() - chn_ends[i];
251  }
252  }
253 
254  // fix for >10k residues
255  ai.resSeq = ai.resSeq % 10000;
256  }
257 
258  // 'chains' is member data
259  if ( chains.size() < Size(rsd.chain() + 1) ) chains.resize( rsd.chain() + 1 );
260  AtomChain & AC(chains[rsd.chain()]);
261  AC.push_back(ai);
262 
263  }
264 }
265 /// @details
266 /// init FileData structure from pose object.
267 /// read atoms/residue information from Pose object and put it in FileData object.
268 ///
270 {
271  FileDataOptions options;
272  init_from_pose( pose, options );
273 }
274 
275 
276 ///@details prepare the HeaderInformation data structure;
277 void
279  header = new HeaderInformation();
280 }
281 
284  return header;
285 }
286 
287 ///@details Store information in the header record into the HeaderInformation
288 ///@remarks HeaderInformation must be created explicitly before it can be filled!
289 void
291  header->store_record(R);
292 }
293 
294 ///@details Populate the header records from the data in the HeaderInformation
295 ///@remarks HeaderInformation must be created explicitly before it can be filled!
296 void
298  std::vector<Record> & VR
299 ) const {
300  header->fill_records(VR);
301 }
302 
303 ///@details finalize storing records from the data in the HeaderInformation
304 ///@remarks HeaderInformation must be created explicitly before it can be filled!
305 void
307  header->finalize_parse();
308 
309 }
310 
311 
312 // Store (non-standard) polymer linkages in a map.
313 void
315 {
316  using namespace std;
317 
318  LinkInformation link;
319 
320  // Extract values from record fields.
321  link.name1_ = record["name1"].value; // 1st atom name
322  link.resName1_ = record["resName1"].value;
323  link.resID1_ = record["resSeq1"].value + record["iCode1"].value + record["chainID1"].value;
324 
325  link.name2_ = record["name2"].value; // 2nd atom name
326  link.resName2_ = record["resName2"].value;
327  link.resID2_ = record["resSeq2"].value + record["iCode2"].value + record["chainID2"].value;
328 
329  link.length_ = atof(record["length"].value.c_str()); // bond length
330 
331  links[link.resID1_] = link;
332 
333  TR.Debug << "LINK record information stored successfully." << std::endl;
334 }
335 
336 
337 // Store heterogen name information in map.
338 /// @remarks heterogen "names" for carbohydrates (from "Rosetta-ready" PDB files) instead have the name field parsed
339 /// to extract the base (non-variant) ResidueType needed for a particular residue.
340 void
342 {
343  using namespace std;
344  using namespace core::chemical::carbohydrates;
345 
346  if (hetID.empty()) {
347  TR.Warning << "PDB HETNAM record is missing an heterogen ID field." << endl;
348  return;
349  }
350  if (text.empty()) {
351  TR.Warning << "PDB HETNAM chemical name field is an empty string." << endl;
352  return;
353  }
354 
355  string name;
356  if (heterogen_names.count(hetID)) {
357  name = heterogen_names[hetID];
358  name.append(rstripped_whitespace(text));
359  } else {
360  name = text;
361  strip_whitespace(name);
362  }
363 
364  // If the hetID is found in the map of Rosetta-allowed carbohydrate 3-letter codes....
365  if (CarbohydrateInfo::CODE_TO_ROOT_MAP.count(hetID)) {
367  } else {
368  heterogen_names[hetID] = name; // Non-carbohydrate heterogen names are simply stored in the standard PDB way.
369  }
370 }
371 
372 // Parse heterogen name data for a given carbohydrate and save the particular base (non-variant) ResidueType needed in
373 // a map.
374 /// @details The standard PDB HETNAM record is insufficient for indicating the type of carbohydrate residue found at
375 /// each position of the sequence. The PDB format only allows one heterogen name per 3-letter code. To work around
376 /// this, a PDB file containing carbohydrates will need to be made "Rosetta-ready". 3-letter codes will need to be
377 /// converted from, e.g., GLC, which implies the vague "ALPHA-D-GLUCOSE", to Glc. When Rosetta reads in a "sentence
378 /// case" code such as this, it will check a resID-to-ResidueType map to determine which type of alpha-D-glucose to
379 /// use, e.g., ->4)-alpha-D-glucopyranosyl or ->6)-alpha-D-glucopyranosyl or ->4)-alpha-D-glucofuranosyl, etc. This
380 /// function fills that map from a "Rosetta-ready" HETNAM text field, which includes the resID information (in the
381 /// same order as in an ATOM or HETATOM record) followed by a space and the base (non-variant) ResidueType.
382 void
384 {
385  using namespace std;
386 
387  string chainID = string(text.begin(), text.begin() + 1); // 1 character for chainID
388  string resSeq = string(text.begin() + 1, text.begin() + 5); // 4 characters for resSeq
389  string iCode = string(text.begin() + 5, text.begin() + 6); // 1 character for iCode
390  string key = resSeq + iCode + chainID; // a resID, as defined elsewhere in FileData
391 
392  string needed_residue_type_base_name = string(text.begin() + 7, text.end()); // name starts after 7th character
393 
394  carbohydrate_residue_type_base_names[key] = needed_residue_type_base_name;
395 }
396 
397 
398 /// @details
399 /// init FileData structure from pose object.
400 /// read atoms/residue information from Pose object and put it in FileData object using options defined in FileDataOptions.
401 void FileData::init_from_pose(core::pose::Pose const & pose, FileDataOptions const & options)
402 {
403  using namespace core;
404  core::Size const nres( pose.total_residue() );
405  core::Size atom_index(0);
406 
407  //get OP to PDBInfo object for remarks header
408  using core::pose::PDBInfo;
409  if( options.preserve_header() == true && pose.pdb_info() ) {
410  *remarks = pose.pdb_info()->remarks();
411  if(pose.pdb_info()->header_information()){
412  header = new HeaderInformation(*(pose.pdb_info()->header_information()));
413  } else {
414  header = new HeaderInformation();
415  }
416  }
417 
418  chains.resize(0);
419 
420  for ( Size i=1; i<= nres; ++i ) {
421  conformation::Residue const & rsd( pose.residue(i) );
422  append_residue( rsd, atom_index, pose );
423  }
424 }
425 
426 /// @details
427 /// a lightweight, direct way of limiting pose pdb output to a subset of residues
428 /// the alternative of constructing new subposes for output only would be unnecessary/less efficient (?)
430  core::pose::Pose const & pose,
431  utility::vector1< core::Size > const & residue_indices
432 )
433 {
434  using namespace core;
435  core::Size const nres( pose.total_residue() );
436  core::Size atom_index(0);
437 
438  chains.resize(0); // 'chains' is member data
439  for ( utility::vector1< Size >::const_iterator index( residue_indices.begin() ),
440  end( residue_indices.end() ); index != end; ++index ) {
441  if ( *index < 1 || *index > nres ) { runtime_assert(false); continue; }
442  append_residue( pose.residue( *index ), atom_index, pose );
443  }
444 }
445 
446 
447 /// @details Convert given Pose object in to PDB format and send it to the given stream.
449  core::pose::Pose const & pose,
450  std::ostream & out,
451  string const & /* tag */,
452  bool write_fold_tree
453 )
454 {
455  string data;
456  FileData fd;
457  fd.init_from_pose(pose);
458 
459  data = PDB_DReader::createPDBData(fd);
460  out.write( data.c_str(), data.size() );
461 
462  write_additional_pdb_data( out, pose, fd, write_fold_tree );
463 }
464 
465 /// @details Convert given Pose object in to PDB format and save it to 'file_name' file.
466 /// return: true if operation was completed without error, false other wise.
468  core::pose::Pose const & pose,
469  string const & file_name,
470  string const & tag,
471  bool write_fold_tree)
472 {
473  utility::io::ozstream file(file_name.c_str(), std::ios::out | std::ios::binary);
474  if(!file) {
475  Error() << "FileData::dump_pdb: Unable to open file:" << file_name << " for writing!!!" << std::endl;
476  return false;
477  }
478  dump_pdb(pose, file, tag, write_fold_tree);
479 
480  file.close();
481 
482  return true;
483 }
484 
485 /// @details Convert given Pose object in to PDB format and send it to the given stream.
486 /// only the residues corresponding to indices in 'residue_indices' will be output
487 void
489  core::pose::Pose const & pose,
490  std::ostream & out,
491  utility::vector1< core::Size > const & residue_indices,
492  string const & /* tag */
493 )
494 {
495  FileData fd;
496  string data;
497  fd.init_from_pose( pose, residue_indices );
498 // data = "MODEL " + tag + "\n";
499 // out.write( data.c_str(), data.size() );
500 
501  data = PDB_DReader::createPDBData(fd);
502  out.write( data.c_str(), data.size() );
503 
504 // data = "ENDMDL\n";
505 // out.write( data.c_str(), data.size() );
506 
507  write_additional_pdb_data( out, pose, fd );
508 }
509 
510 
511 /// @details Debug/Info function.
512 /// Output FileData object to TR like stream in human redable format.
513 std::ostream& operator <<(std::ostream &os, FileData const & fd)
514 {
515  os << "<FileData>{";
516  for(Size i=0; i<fd.chains.size(); i++) {
517  os << "Chain<" << i << ">";
518  for(Size j=0; j<fd.chains[i].size(); j++) {
519  os << "[" << j << ":" << fd.chains[i][j] << "]" << "\n";
520  }
521  }
522  os << "}";
523  return os;
524 }
525 
526 /// @details Temporary hacky hack
527 /// Need better mechanism for this
530 {
531  if ( name == " DA" ) return " A";
532  else if ( name == " DC" ) return " C";
533  else if ( name == " DG" ) return " G";
534  else if ( name == " DT" ) return " T";
535  else if ( name == " Ad" ) return " A";
536  else if ( name == " Cd" ) return " C";
537  else if ( name == " Gd" ) return " G";
538  else if ( name == " Td" ) return " T";
539  else if ( name == "MSE" ) {
540  TR << "Reading MSE as MET!" << std::endl;
541  return "MET";
542  }
543  return name;
544 }
545 
547 convert_atom_name( std::string const & res_name, std::string atom_name )
548 {
549  if( atom_name.size() != 4 ){
550  std::string message= res_name+" has atom "+ atom_name+", with size!=4";
551  utility_exit_with_message(message);
552  };
553  //atom_name = strip_whitespace( atom_name );
554  if ( res_name == "5MC" ||
555  res_name == " A" ||
556  res_name == " C" ||
557  res_name == " G" ||
558  res_name == " T" ||
559  res_name == " U" ) {
560  /// DNA or RNA
561  if ( atom_name == " OP1" ) return " O1P";
562  if ( atom_name == " OP2" ) return " O2P";
563  if ( atom_name[3] == '\'' ) return atom_name.substr(0,3)+"*";
564  if ( res_name == " T" && atom_name == " C7 " ) return " C5M";
565  } else if ( res_name == "MET" && atom_name == " S " ) {
566  return " SD ";
567  } else if ( res_name == "MET" && atom_name == "SE " ) {
568  TR << "Reading Selenium SE from MSE as SD from MET" << std::endl;
569  return " SD ";
570  }
571  return atom_name;
572 }
573 
574 /// @details Convert FileData in to set of residues, sequences, coordinates.
575 /// this is a convenience function, no magic done here.
576 /// Well, maybe a little.
579 )
580 {
581  FileDataOptions options;
582  create_working_data( rinfo, options );
583 }
584 
585 /// @details Convert FileData in to set of residues, sequences, coordinates.
586 /// this is a convenience function, no magic done here.
587 /// Well, maybe a little.
590  FileDataOptions const & options
591 )
592 {
593  using namespace basic::options;
594  using namespace basic::options::OptionKeys;
595 
596  rinfo.clear();
597  std::string buf; buf.resize(1024);
598 
599  for(Size ch=0; ch<chains.size(); ch++) {
600  for(Size i=0; i<chains[ch].size(); i++) {
601  AtomInformation & ai( chains[ch][i] );
602  // we should make a copy instead of taking a reference if "fixing" the names causes problems
603  std::string const res_name( convert_res_name( ai.resName ) );
604  std::string const atom_name( convert_atom_name( res_name, ai.name ) );
605  ai.resName = res_name;
606  ai.name = atom_name;
607 
608  sprintf(&buf[0], "%4d%c%c", ai.resSeq, ai.iCode, ai.chainID);
609  std::string resid( buf ); // include chain ID
610  resid.resize(6);
611 
612  //chu modify the logic how atoms are treated with zero or negative occupancy field.
613  if ( ai.occupancy == 0.0 ) {
614  if( options.randomize_missing_coords() ) {
616  } else if ( !options.ignore_zero_occupancy() ) {
617  // do nothing and keep this atom as it is
618  } else {
619  //When flag default changes from true to false, change to TR.Debug and remove second line
620  TR.Warning << "PDB reader is ignoring atom " << atom_name << " in residue " << resid
621  << ". Pass flag -ignore_zero_occupancy false to change this behavior" << std::endl;
622  continue; // skip this atom with zero occ by default
623  }
624  } else if ( ai.occupancy < 0.0 ) { // always randomize coords for atoms with negative occ
626  } else {
627  // do nothing for normal atoms with positive occ
628  }
629 
630  ResidueInformation new_res( ai );
631  new_res.resid = resid;
632  if( rinfo.size() == 0 || rinfo.back() != new_res ) rinfo.push_back(new_res);
633  ResidueInformation & curr_res = rinfo.back();
634  // Only insert atoms once, so we capture just the first alt conf.
635  // Would be nice in the future to take the highest occupancy instead...
636  if( curr_res.xyz.count(ai.name) == 0 ) {
637  curr_res.atoms.push_back(ai); // this *does* make a copy
638  Vector coords( ai.x, ai.y, ai.z );
639  curr_res.xyz[ ai.name ] = coords;
640  curr_res.temps[ ai.name ] = ai.temperature;
641  }
642  }
643  }
644 }
645 
646 
647 // Helper Functions
648 /// @details Remove spaces from given string.
650 {
651  std::string trimmed_name( name );
652  left_justify( trimmed_name ); trim( trimmed_name ); // simpler way to dothis?
653  return trimmed_name;
654 }
655 
656 
657 /// @brief The missing density regions in the input pdb should have 0.000 in the placeholders
658 /// this routine puts random coordinates wherever there is 0.000 for mainchain atoms.
659 /// tex - that's a stupid way of defining missing density, as atoms can be at the origin for other
660 /// reasons. This has been updated to check for occupancy to define missing density rather than atoms
661 /// located at the origin.
663  // if( ai.resSeq == 1 && ai.name == " N ") return;//ignore first atom. Rosetta pdbs start with 0.000
664  if ( ai.x == 0.000 && ai.y == 0.000 && ai.z == 0.000 && ai.occupancy <= 0.0 ){
665  TR << "Randomized: " << ai.name << " " << ai.resName << " " << ai.resSeq << std::endl;
666  //v if ( ai.name == " N " || ai.name == " CA " || ai.name == " C " ||
667  //v ai.name == " O " || ai.name == " CB " ) {
668  ai.x = ai.x + 900.000 + RG.uniform()*100.000;
669  ai.y = ai.y + 900.000 + RG.uniform()*100.000;
670  ai.z = ai.z + 900.000 + RG.uniform()*100.000;
671  //v }
672  }
673  return;
674 }
675 
676 /// @brief Writes
677 void
679  std::ostream & out,
680  pose::Pose const & pose,
681  io::pdb::FileData const &,
682  bool write_fold_tree
683 )
684 {
685 
686  using namespace basic::options;
687 
688  // added by rhiju --> "CONECT" lines. Useful for coarse-grained/centroid poses, so that
689  // rasmol/pymol draws bonds between atoms 'bonded' in Rosetta that are far apart.
690  // perhaps turn on with a flag?
691  if ( pose.residue(1).is_coarse() || option[ OptionKeys::inout::dump_connect_info]() ) dump_connect_info( pose, out );
692 
693  if ( write_fold_tree || option[ OptionKeys::inout::fold_tree_io ].user() ) {
694  out << "REMARK " << pose.fold_tree();
695  }
696  if ( basic::options::option[ OptionKeys::out::file::pdb_parents]() ) {
697  std::string value;
698  bool has_parents = core::pose::get_comment( pose, "parents", value );
699  if( has_parents ){
700  out << "REMARK PARENT " << value.substr(0,5) << std::endl;
701  }
702  }
703  if(basic::options::option[ basic::options::OptionKeys::out::file::output_orbitals]){
704  static std::string const chains( " ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890" );
705  for(core::Size i=1; i <=pose.n_residue(); ++i){
707  core::Size number(0);
708  char const chain( chains[ rsd.chain() ] );
709  for(core::Size j=1; j<=rsd.natoms(); ++j){
710  if(rsd.atom_type(j).atom_has_orbital()){
711  utility::vector1<core::Size> const & orbital_indices(rsd.bonded_orbitals(j));
712  for(
714  orbital_index = orbital_indices.begin(),
715  orbital_index_end = orbital_indices.end();
716  orbital_index != orbital_index_end; ++orbital_index
717  ){
718  ++number;
719  Vector orbital_xyz(rsd.orbital_xyz(*orbital_index));
720  out << "ATOM " << I(5,number) << ' ' << rsd.orbital_name(*orbital_index) << ' ' <<
721  rsd.name3() << ' ' << chain << I(4,rsd.seqpos() ) << " " <<
722  F(8,3,orbital_xyz.x()) <<
723  F(8,3,orbital_xyz.y()) <<
724  F(8,3,orbital_xyz.z()) <<
725  F(6,2,1.0) << F(6,2,1.0) << '\n';
726  }
727  }
728  }
729  }
730  }
731  if (basic::options::option[ basic::options::OptionKeys::out::file::output_torsions ]){
732  if ( !core::pose::is_ideal_pose(pose) ) {
733  TR << "Ignoring out::file::output_torsions option because pose is non-ideal!" << std::endl;
734  } else {
735  ObjexxFCL::FArray1D_char dssp_reduced_secstruct(pose.n_residue());
736  scoring::dssp::Dssp(pose).dssp_reduced(dssp_reduced_secstruct);
737  out << "REMARK torsions: res pdbres pdbchain seq dssp phi psi omega" << std::endl;
738  for (core::Size i=1; i<=pose.n_residue(); ++i) {
739  out << "REMARK " << I( 4, i ) << " " << I( 4, pose.pdb_info()->number(i)) << " " << pose.pdb_info()->chain(i) << " " << pose.residue( i ).name1() << " " <<
740  dssp_reduced_secstruct(i) << " " << F( 9, 3, pose.phi(i)) << " " << F( 9, 3, pose.psi(i)) << " " << F( 9, 3, pose.omega(i)) << std::endl;
741  }
742  }
743  }
744 }
745 
746 void
748  pose::Pose & pose,
749  std::string const & filename
750 )
751 {
752  PDB_DReaderOptions options;
753  build_pose_from_pdb_as_is( pose, filename, options );
754 }
755 
756 void
758  pose::Pose & pose,
759  std::string const & filename,
760  PDB_DReaderOptions const & pdr_options
761 )
762 {
763  using namespace chemical;
764  build_pose_from_pdb_as_is( pose, * ChemicalManager::get_instance()->residue_type_set( FA_STANDARD ), filename, pdr_options );
765 }
766 
767 void
769  pose::Pose & pose,
770  chemical::ResidueTypeSet const & residue_set,
771  std::string const & filename
772 )
773 {
774  PDB_DReaderOptions options;
775  build_pose_from_pdb_as_is( pose, residue_set, filename, options );
776 }
777 
778 void
780  pose::Pose & pose,
781  chemical::ResidueTypeSet const & residue_set,
782  std::string const & filename,
783  PDB_DReaderOptions const & pdr_options
784 )
785 {
786  std::string all_lines, sub_lines;
787 
788  utility::io::izstream file( filename );
789  if (!file) {
790  TR.Error << "File:" << filename << " not found!" << std::endl;
791  utility_exit_with_message( "Cannot open file " + filename );
792  } else {
793  TR.Debug << "read file: " << filename << std::endl;
794  }
795 
796  utility::slurp( file, all_lines );
797  FileData fd = PDB_DReader::createFileData( all_lines, pdr_options );
798  if ( fd.filename == "" ) {
799  fd.filename = filename;
800  }
801  id::AtomID_Mask missing( false );
802  build_pose_as_is1( fd, pose, residue_set, missing, pdr_options);
803 
804 }
805 
806 //void
807 //build_pose_as_is1( io::pdb::FileData & fd, pose::Pose & pose, chemical::ResidueTypeSet const & residue_set, id::AtomID_Mask & missing )
808 //void
809 //build_pose_as_is1(
810 // io::pdb::FileData & fd,
811 // pose::Pose & pose,
812 // chemical::ResidueTypeSet const & residue_set,
813 // id::AtomID_Mask & missing
814 //)
815 //{
816 // FileDataOptions options;
817 // build_pose_as_is1(fd, pose, residue_set, missing, options );
818 //}
819 
820 void
822  io::pdb::FileData & fd,
823  pose::Pose & pose,
824  chemical::ResidueTypeSet const & residue_set,
825  id::AtomID_Mask & missing,
826  FileDataOptions const & options
827 )
828 {
829  typedef std::map< std::string, double > ResidueTemps;
830  typedef std::map< std::string, ResidueTemps > Temps;
831  typedef std::map< std::string, Vector > ResidueCoords;
832  typedef std::map< std::string, ResidueCoords > Coords;
834 
835  using namespace chemical;
836  using namespace conformation;
837 
839 
840  // reset current data
841  pose.clear();
842 
844  // Map pose residue numbers to indices into rinfos.
845  // Some residues in the input file may be discarded (missing atoms, unrecognized, etc)
846  utility::vector1< Size > pose_to_rinfo;
847  fd.create_working_data( rinfos, options );
848  //Temps temps;
849  //Coords coords;
850  //Strings resids, sequence,
851  Strings pose_resids;
853  Strings branch_lower_termini;
854 
855  int const nres_pdb( rinfos.size() );
856 
857  utility::vector1<Size> UA_res_nums;
858  utility::vector1<std::string> UA_res_names, UA_atom_names;
861 
862  std::string chains_whose_residues_are_separate_chemical_entities = options.chains_whose_residues_are_separate_chemical_entities();
863  std::string::const_iterator const entities_begin = chains_whose_residues_are_separate_chemical_entities.begin();
864  std::string::const_iterator const entities_end = chains_whose_residues_are_separate_chemical_entities.end();
865 
866  std::string chains_to_check_if_Ntermini= options.check_if_residues_are_Ntermini() ;
867  std::string::const_iterator const check_Ntermini_begin = chains_to_check_if_Ntermini.begin();
868  std::string::const_iterator const check_Ntermini_end = chains_to_check_if_Ntermini.end();
869  std::string chains_to_check_if_Ctermini= options.check_if_residues_are_Ctermini() ;
870  std::string::const_iterator const check_Ctermini_begin = chains_to_check_if_Ctermini.begin();
871  std::string::const_iterator const check_Ctermini_end = chains_to_check_if_Ctermini.end();
872 
873  //mjo do not add residue by bond if the last residue was not
874  //recognized
875  bool last_residue_was_recognized(true);
876 
877  // Loop over every residue in the FileData extracted from the PDB file, select appropriate ResidueTypes,
878  // create Residues, and build the Pose.
879  for ( int i=1; i<= nres_pdb; ++i ) {
880  ResidueInformation const & rinfo = rinfos[i];
881  std::string const & pdb_name = rinfo.resName;
882  std::string const & resid = rinfo.resid;
883  char chainID = rinfo.chainID;
884 
885  runtime_assert( resid.size() == 6 );
886  bool const separate_chemical_entity = find(entities_begin, entities_end, chainID ) != entities_end;
887  bool const same_chain_prev = ( i > 1 && chainID == rinfos[i-1].chainID &&
888  rinfo.terCount == rinfos[i-1].terCount && !separate_chemical_entity);
889  bool const same_chain_next = ( i < nres_pdb && chainID == rinfos[i+1].chainID &&
890  rinfo.terCount == rinfos[i+1].terCount && !separate_chemical_entity);
891  bool const check_Ntermini_for_this_chain = ("ALL" == chains_to_check_if_Ntermini) ?
892  true : find(check_Ntermini_begin, check_Ntermini_end, chainID ) == check_Ntermini_end;
893  bool const check_Ctermini_for_this_chain = ("ALL" == chains_to_check_if_Ctermini) ?
894  true : find(check_Ctermini_begin, check_Ctermini_end, chainID ) == check_Ctermini_end;
895 
896  // Determine polymer information: termini, branch points, etc.
897  bool const is_branch_point = fd.links.count(resid); // if found in the links map
898  if (is_branch_point) {
899  // Find and store associated 1st residue of the branch to access later.
900  branch_lower_termini.push_back(fd.links[resid].resID2_);
901  }
902  bool const is_branch_lower_terminus = branch_lower_termini.contains(resid);
903  bool const is_lower_terminus( ( i == 1 || rinfos.empty() || (!same_chain_prev && !is_branch_lower_terminus) ) && check_Ntermini_for_this_chain );
904  bool const is_upper_terminus( ( i == nres_pdb || !same_chain_next ) && check_Ctermini_for_this_chain );
905 
906  TR.Debug << "Residue " << i << std::endl;
907  if (is_lower_terminus) {
908  TR.Debug << "...is a lower terminus." << std::endl;
909  }
910  if (is_upper_terminus) {
911  TR.Debug << "...is an upper terminus." << std::endl;
912  }
913  if (is_branch_point) {
914  TR.Debug << "...is a branch point." << std::endl;
915  }
916  if (is_branch_lower_terminus) {
917  TR.Debug << "...is the lower terminus of a branch." << std::endl;
918  }
919 
920  ResidueCoords const & xyz = rinfo.xyz;
921  ResidueTemps const & rtemp = rinfo.temps;
922 
923  // Get a list of ResidueTypes that could apply for this particular 3-letter PDB residue name.
924  ResidueTypeCOPs const & rsd_type_list( residue_set.name3_map( pdb_name ) );
926  i, pdb_name, rsd_type_list, xyz, rtemp,
927  UA_res_nums, UA_res_names, UA_atom_names, UA_coords, UA_temps, options)) {
928  last_residue_was_recognized = false;
929  continue;
930  }
931 
932  // look for best match:
933  // rsd_type should have all the atoms present in xyz
934  // try to minimize atoms missing from xyz
935  Size best_index(0), best_rsd_missing( 99999 ), best_xyz_missing( 99999 );
936 
937 
938  for ( Size j=1; j<= rsd_type_list.size(); ++j ) {
939  ResidueType const & rsd_type( *(rsd_type_list[j]) );
940  bool const is_polymer( rsd_type.is_polymer() ); // need an example residue type, though this will
941  // remain fixed for all residue_types with the same name3
942 
943  //TR.Debug << rsd_type.name() << " is_polymer " << is_polymer << std::endl;
944  //TR.Debug << rsd_type.name() << " is_lower_terminus " << rsd_type.has_variant_type( LOWER_TERMINUS ) << std::endl;
945  //TR.Debug << rsd_type.name() << " is_upper_terminus " << rsd_type.has_variant_type( UPPER_TERMINUS ) << std::endl;
946 
947  // only take the desired variants
948  if ( is_polymer && ( is_lower_terminus != rsd_type.has_variant_type( LOWER_TERMINUS ) ||
949  is_upper_terminus != rsd_type.has_variant_type( UPPER_TERMINUS )) ) {
950  TR.Debug << "Discarding '" << rsd_type.name() << "' ResidueType" << std::endl;
951  //TR.Debug << "because a polymer and not a terminus" << std::endl;
952  continue;
953  }
954  if (is_polymer && (is_branch_point != rsd_type.has_variant_type(BRANCH_POINT))) {
955  TR.Debug << "Discarding '" << rsd_type.name() << "' ResidueType" << std::endl;
956  //TR.Debug << "because a polymer and not a branchpoint" << std::endl;
957  continue;
958  }
959  if (is_polymer && (is_branch_lower_terminus != rsd_type.has_variant_type(BRANCH_LOWER_TERMINUS))) {
960  TR.Debug << "Discarding '" << rsd_type.name() << "' ResidueType" << std::endl;
961  //TR.Debug << "because a polymer and not a branch lower terminus" << std::endl;
962  continue;
963  }
964  if ( rsd_type.aa() == aa_cys && rsd_type.has_variant_type( DISULFIDE ) && pdb_name != "CYD" ) {
965  TR.Debug << "Discarding '" << rsd_type.name() << "' ResidueType" << std::endl;
966  //TR.Debug << "because CYS and is disulfide and not CYD" << std::endl;
967  continue;
968  }
969  if ( !options.keep_input_protonation_state() &&
970  ( rsd_type.has_variant_type( PROTONATED ) || rsd_type.has_variant_type( DEPROTONATED ) )){
971  TR.Debug << "Discarding '" << rsd_type.name() << "' ResidueType" << std::endl;
972  //TR.Debug << "because of protonation state" << std::endl;
973  continue;
974  }
975  if (rsd_type.is_carbohydrate() && residue_type_base_name(rsd_type) != fd.carbohydrate_residue_type_base_names[resid]) {
976  TR.Debug << "Discarding '" << rsd_type.name() << "' ResidueType" << std::endl;
977  //TR.Debug << "because is carbohydrate" << std::endl;
978  continue;
979  }
980 
981  TR.Debug << "Trying '" << rsd_type.name() << "' ResidueType" << std::endl;
982 
983  Size rsd_missing(0), xyz_missing(0);
984 
985  for ( Size k=1; k<= rsd_type.natoms(); ++k ) {
986  if ( xyz.count( rsd_type.atom_name(k) ) == 0 ) ++xyz_missing;
987  }
988 
989  for ( ResidueCoords::const_iterator iter=xyz.begin(), iter_end=xyz.end(); iter!= iter_end; ++iter ) {
990  if ( !rsd_type.has( local_strip_whitespace(iter->first) ) &&
991  !( iter->first == " H " && is_lower_terminus ) ) { // don't worry about missing backbone H if Nterm
992  ++rsd_missing;
993  }
994  }
995 
996  if ( ( rsd_missing < best_rsd_missing ) ||
997  ( rsd_missing == best_rsd_missing && xyz_missing < best_xyz_missing ) ) {
998  best_rsd_missing = rsd_missing;
999  best_xyz_missing = xyz_missing;
1000  best_index = j;
1001  }
1002 // if ( rsd_missing == 0 && xyz_missing < best_xyz_missing ) {
1003 // best_xyz_missing = xyz_missing;
1004 // best_index = j;
1005 // }
1006  } // j=1,rsd_type_list.size()
1007 
1008  if(!best_index){
1009  utility_exit_with_message( "Unrecognized residue: " + pdb_name );
1010  }
1011 
1012  ResidueType const & rsd_type( *(rsd_type_list[ best_index ]) );
1013  TR.Debug << "Match: '" << rsd_type.name() << "'; missing " << best_xyz_missing << " coordinates" << std::endl;
1014 
1015  if ( best_rsd_missing ) {
1016  TR << "[ WARNING ] discarding " << best_rsd_missing << " atoms at position " << i <<
1017  " in file " << fd.filename << ". Best match rsd_type: " << rsd_type.name() << std::endl;
1018  }
1019 
1020  // check for missing mainchain atoms:
1021  if ( rsd_type.is_polymer() ) {
1022  AtomIndices const & mainchain( rsd_type.mainchain_atoms() );
1023  Size const nbb( mainchain.size() );
1024  if ( nbb >= 3 ) {
1025  bool mainchain_core_present( false );
1026  for ( Size k=1; k<= nbb-2; ++k ) {
1027  if ( xyz.count( rsd_type.atom_name(mainchain[k ])) &&
1028  xyz.count( rsd_type.atom_name(mainchain[k+1])) &&
1029  xyz.count( rsd_type.atom_name(mainchain[k+2])) ) {
1030  mainchain_core_present = true;
1031  break;
1032  }
1033  }
1034  if ( !mainchain_core_present ) {
1035  TR << "[ WARNING ] skipping pdb residue b/c its missing too many mainchain atoms: " << resid <<
1036  ' ' << pdb_name << ' ' << rsd_type.name() << std::endl;
1037  for ( Size k=1; k<= nbb; ++k ) {
1038  if ( !xyz.count( rsd_type.atom_name(mainchain[k] ) ) ) {
1039  TR << "missing: " << rsd_type.atom_name( mainchain[k] ) << std::endl;
1040  }
1041  }
1042  if( options.exit_if_missing_heavy_atoms() == true ) {
1043  utility_exit_with_message("quitting due to missing heavy atoms");
1044  }
1045  continue;
1046  }
1047  }
1048  }
1049 
1050  // found a match, now fill in the coords
1051  ResidueOP new_rsd( ResidueFactory::create_residue( rsd_type ) );
1052 
1053  for ( ResidueCoords::const_iterator iter=xyz.begin(), iter_end=xyz.end(); iter!= iter_end; ++iter ) {
1054  if ( new_rsd->has( local_strip_whitespace(iter->first) ) ) {
1055  // offsetting all coordinates by a small constant prevents problems with atoms located
1056  // at position (0,0,0).
1057  // This is a bit of a dirty hack but it fixes the major problem of reading in rosetta
1058  // pdbs which usually start at 0,0,0. However the magnitude of this offset is so small
1059  // that the output pdbs should still match input pdbs. hopefully. yes. aehm.
1060  double offset = 1e-250; // coordinates now double, so we can use _really_ small offset.
1061  new_rsd->atom( local_strip_whitespace(iter->first) ).xyz( iter->second + offset );
1062  }
1063  //else runtime_assert( iter->first == " H " && rsd_type.is_terminus() ); // special casee
1064  }
1065 
1066  // fill in b-factor from pdb file
1067  // for ( ResidueTemps::const_iterator iter=res_temps.begin(), iter_end = res_temps.end();
1068  // iter != iter_end;
1069  // ++iter ) {
1070  // if ( new_rsd->has( local_strip_whitespace(iter->first) ) ) {
1071  // new_rsd->atom( local_strip_whitespace(iter->first) ).temperature( iter->second );
1072  // }
1073  // }
1074 
1075  // Add this new residue to the pose by appending.
1076  Size const old_nres( pose.total_residue() );
1077  if (!old_nres) /*first residue?*/ {
1078  pose.append_residue_by_bond( *new_rsd );
1079  } else {
1080  if ( ( is_lower_terminus || !check_Ntermini_for_this_chain ) ||
1081  is_branch_lower_terminus ||
1082  !new_rsd->is_polymer() ||
1083  !pose.residue_type(old_nres).is_polymer() ||
1084  !last_residue_was_recognized) {
1085  pose.append_residue_by_jump(*new_rsd, 1);
1086  } else {
1087  pose.append_residue_by_bond(*new_rsd);
1088  }
1089  }
1090  pose_to_rinfo.push_back( Size(i) );
1091  pose_resids.push_back( rinfo.resid );
1092  pose_temps.push_back( rinfo.temps );
1093 
1094 
1095  // update the pose-internal chain label if necessary
1096  if ( ( ( is_lower_terminus || !check_Ntermini_for_this_chain ) || is_branch_lower_terminus) && pose.total_residue() > 1 ) {
1097  pose.conformation().insert_chain_ending( pose.total_residue() - 1 );
1098  }
1099 
1100  last_residue_was_recognized = true;
1101  } // i=1,nres_pdb
1102 
1103 
1104  // Check termini status of newly created pose residues.
1105  // Will this ever happen? ~ Labonte
1106 
1107  Size const nres( pose.total_residue() );
1108  for ( Size i=1; i<= nres; ++i ) {
1109  ResidueInformation const & rinfo = rinfos[i];
1110  char chainID = rinfo.chainID;
1111 
1112  bool const check_Ntermini_for_this_chain = ("ALL" == chains_to_check_if_Ntermini) ?
1113  true : find(check_Ntermini_begin, check_Ntermini_end, chainID ) == check_Ntermini_end;
1114  bool const check_Ctermini_for_this_chain = ("ALL" == chains_to_check_if_Ctermini) ?
1115  true : find(check_Ctermini_begin, check_Ctermini_end, chainID ) == check_Ctermini_end;
1116 
1117  if ( !check_Ntermini_for_this_chain ) continue;
1118  if ( !check_Ctermini_for_this_chain ) continue;
1119 
1120  //Residue const & rsd( pose.residue( i ) ); // THIS WAS A BAD BUG
1121  if ( !pose.residue_type(i).is_polymer() ) continue;
1122  if ( !pose.residue_type(i).is_lower_terminus() &&
1123  ( i == 1 ||
1124  !pose.residue_type( i-1 ).is_polymer() ||
1125  (pose.residue_type( i-1 ).is_upper_terminus() &&
1127  TR << "Adding undetected lower terminus type to residue " << i << std::endl;
1129  }
1130  if ( !pose.residue_type(i).is_upper_terminus() &&
1131  ( i == nres ||
1132  !pose.residue_type(i+1).is_polymer() ||
1133  pose.residue_type(i+1).is_lower_terminus() ||
1135  TR << "Adding undetected upper terminus type to residue " << i << std::endl;
1137  }
1138  }
1139 
1140  //make_upper_terminus( pose, residue_set, pose.total_residue() );
1141 
1142  // now handle missing atoms
1143  //id::AtomID_Mask missing( false );
1144  Size num_heavy_missing = 0;
1145 
1146  core::pose::initialize_atomid_map( missing, pose ); // dimension the missing-atom mask
1147  if ( pose.total_residue() == 0 ) {
1148 
1149  // PDBInfo setup
1150  core::pose::PDBInfoOP pdb_info( new core::pose::PDBInfo( pose.total_residue() ) );
1151  for( Size i = 1; i <= UA_res_nums.size(); ++i ) {
1152  pdb_info->add_unrecognized_atom( UA_res_nums[i], UA_res_names[i], UA_atom_names[i], UA_coords[i], UA_temps[i] );
1153  }
1154  // store pdb info
1155  pose.pdb_info( pdb_info );
1156  return;
1157 
1158  utility_exit_with_message("ERROR: No residues in pose, empty file ? " );
1159  // if unchecked it segfaults further down...
1160  }
1161  for ( Size i=1; i<= pose.total_residue(); ++i ) {
1162  ResidueCoords const & xyz( rinfos[pose_to_rinfo[i]].xyz );
1163 
1164  Residue const & rsd( pose.residue(i) );
1165  for ( Size j=1; j<= rsd.natoms(); ++j ) {
1166  if ( xyz.count( rsd.atom_name(j) ) == 0 ) {
1167  missing[ id::AtomID( j, i ) ] = true;
1168  if( !rsd.atom_is_hydrogen(j) ) num_heavy_missing++;
1169  }
1170  }
1171  }
1172 
1173 
1174  //ja save the pdb residue indices in the Pose //well, PDBInfo
1175  //ja pdb residue indices can be negative
1176  utility::vector1< int > pdb_numbering;
1177  //sml chain char
1178  utility::vector1< char > pdb_chains, insertion_codes;
1179  //Size const nres( pose.total_residue() );
1180  for ( Size i(1); i <= nres; ++i ) {
1181  ResidueInformation const & rinfo = rinfos[pose_to_rinfo[i]];
1182  std::string resid( rinfo.resid.substr(0,4) );
1183  // pdb residue numbers can be negative
1184  int resid_num;
1185  std::istringstream ss( resid );
1186  ss >> resid_num;
1187  pdb_numbering.push_back( resid_num );
1188 
1189  char const chain( rinfo.resid[5] );
1190  pdb_chains.push_back( chain );
1191 
1192  char const icode( rinfo.resid[4] );
1193  insertion_codes.push_back( icode );
1194  }
1195 
1196  // PDBInfo setup
1197  core::pose::PDBInfoOP pdb_info( new core::pose::PDBInfo( pose.total_residue() ) );
1198 
1199  // set pdb-wide information
1200  pdb_info->name( fd.filename );
1201  if(fd.modeltag=="") {
1202  pdb_info->modeltag( fd.filename );
1203  } else {
1204  pdb_info->modeltag( fd.modeltag );
1205  }
1206 
1207  if( options.preserve_header() == true ) {
1208  pdb_info->remarks( *fd.remarks );
1209  pdb_info->header_information( fd.header_information()() );
1210  }
1211 
1212  // set residue level pdb information
1213  pdb_info->set_numbering( pdb_numbering );
1214  pdb_info->set_chains( pdb_chains );
1215  pdb_info->set_icodes( insertion_codes );
1216 
1217  pose.conformation().fill_missing_atoms( missing );
1218 
1219  // most DNA structures lack 5' phosphate groups. 5' phosphates must be built to serve as part of the backbone for
1220  // atom/fold tree purposes. Here they are made virtual so as not to affect physical calculations.
1221  for ( uint seqpos(1), nres( pose.total_residue() ); seqpos <= nres; ++seqpos ) {
1222  Residue const & rsd( pose.residue( seqpos ) );
1223  if ( ! rsd.type().is_DNA() ) continue;
1224  for ( uint atomi(1), natoms( rsd.natoms() ); atomi <= natoms; ++atomi ) {
1225  id::AtomID const id( atomi, seqpos );
1226  if ( missing[ id ] && rsd.atom_name(atomi) == " P " ) {
1227  TR << "Virtualizing missing phosphate that was built in at seqpos " << seqpos << std::endl;
1228  core::pose::add_variant_type_to_pose_residue( pose, "VIRTUAL_DNA_PHOSPHATE", seqpos );
1229  break;
1230  }
1231  }
1232  }
1233 
1234  // Look for and create any remaining non-mainchain (Edge::CHEMICAL) bonds based on a specified radius from any
1235  // unsatisfied residue connections. This is used for such things as branched polymers, ubiquitination, or covalent
1236  // intermediates. Note: The fold tree will remain with a jump between each such bond until import_pose::
1237  // set_reasonable_fold_tree() is called later, which actually adds the CHEMICAL edges to fold tree; this method
1238  // simply makes the bonds.
1239  pose.conformation().detect_bonds();
1240 
1241  //mjo TODO: this can try to access pose->pdb_info() which is not yet
1242  //initialized. Moving it after the pose->pdb_info has been
1243  //initialized causes integration test changes
1245 
1246  if(pose.n_residue()>1){// 1 residue fragments for ligand design.
1248  }
1249 
1250  // ensure enough space for atom level pdb information
1251  pdb_info->resize_atom_records( pose );
1252 
1253  // add unrecognized atoms to PDBInfo
1254  for( Size i = 1; i <= UA_res_nums.size(); ++i ) {
1255  pdb_info->add_unrecognized_atom( UA_res_nums[i], UA_res_names[i], UA_atom_names[i], UA_coords[i], UA_temps[i] );
1256  }
1257 
1258  // add temps to PDBInfo
1259  for( core::Size ir = 1; ir <= pose.total_residue(); ir++ ) {
1260  // fill in b-factor from pdb file
1261  ResidueTemps & res_temps( rinfos[pose_to_rinfo[ir]].temps );
1262  for( ResidueTemps::const_iterator iter=res_temps.begin(); iter != res_temps.end(); ++iter ) {
1263  if( pose.residue(ir).type().has( local_strip_whitespace(iter->first) ) ) {
1264  // printf("setting temp: res %d atom %s temp %f\n",ir,iter->first.c_str(),iter->second);
1265  core::Size ia = pose.residue(ir).type().atom_index(local_strip_whitespace(iter->first)) ;
1266  pdb_info->temperature( ir, ia, iter->second );
1267  } else {
1268  if( (iter->first)[0] == 'H' || ((iter->first)[0] == ' ' && (iter->first)[1] == 'H') ) {
1269  ;// don't warn if H
1270  } else {
1271  TR << "[ WARNING ] can't find atom for res " << ir << " atom " << iter->first << " (trying to set temp)" << std::endl;
1272  }
1273  }
1274  }
1275  }
1276 
1277  // mark PDBInfo as ok and store in Pose
1278  pdb_info->obsolete( false );
1279  pose.pdb_info( pdb_info );
1280 }
1281 
1282 ///@details The input rsd_type_list are all the residue types that have
1283 ///the same 3 letter code as pdb_name. Return true if the list is
1284 ///non-empty and false otherwise. If no residue types match, then
1285 ///either exit, ignore or remember the residue based on the following
1286 ///options in the option system:
1287 ///
1288 /// -in:ignore_waters
1289 /// -in:ignore_unrecognized_res
1290 /// -in:remember_unrecognized_waters
1291 /// -in:remember_unrecognized_res
1293  Size const pdb_residue_index,
1294  std::string const & pdb_name,
1295  core::chemical::ResidueTypeCOPs const & rsd_type_list,
1296  std::map< std::string, Vector > const & xyz,
1297  std::map< std::string, double > const & rtemp,
1298  utility::vector1<Size> & UA_res_nums,
1299  utility::vector1<std::string> & UA_res_names,
1300  utility::vector1<std::string> & UA_atom_names,
1302  utility::vector1<core::Real> & UA_temps){
1303 
1304  FileDataOptions options;
1305  return is_residue_type_recognized( pdb_residue_index, pdb_name, rsd_type_list, xyz, rtemp, UA_res_nums, UA_res_names, UA_atom_names, UA_coords, UA_temps, options );
1306 }
1307 
1308 ///@details The input rsd_type_list are all the residue types that have
1309 ///the same 3 letter code as pdb_name. Return true if the list is
1310 ///non-empty and false otherwise. If no residue types match, then
1311 ///either exit, ignore or remember the residue based on the following
1312 ///options in a FileDataOptions instance:
1313 ///
1314 /// -ignore_waters
1315 /// -ignore_unrecognized_res
1316 /// -remember_unrecognized_waters
1317 /// -remember_unrecognized_res
1319  Size const pdb_residue_index,
1320  std::string const & pdb_name,
1321  core::chemical::ResidueTypeCOPs const & rsd_type_list,
1322  std::map< std::string, Vector > const & xyz,
1323  std::map< std::string, double > const & rtemp,
1324  utility::vector1<Size> & UA_res_nums,
1325  utility::vector1<std::string> & UA_res_names,
1326  utility::vector1<std::string> & UA_atom_names,
1328  utility::vector1<core::Real> & UA_temps,
1329  FileDataOptions const & options){
1330 
1331  if(!rsd_type_list.empty()){
1332  return true;
1333  }
1334 
1335  using namespace basic::options;
1336  if( !(options.ignore_unrecognized_res() ||
1337  options.remember_unrecognized_res() ||
1338  (pdb_name == "HOH" && options.ignore_waters())) ) {
1339  // We should fail fast on unrecognized input rather than produce bad results!
1340  utility_exit_with_message(" unrecognized aa " + pdb_name );
1341  }
1342 
1343  if( !options.remember_unrecognized_water() ) {
1344  // don't bother with water
1345  if( pdb_name == "HOH" ){
1346  return false;
1347  }
1348  }
1349 
1350  if( options.remember_unrecognized_res() ) {
1351  for(std::map<std::string, Vector>::const_iterator iter=xyz.begin(), iter_end=xyz.end(); iter!= iter_end; ++iter ) {
1352  if( UA_res_nums.size() > 5000 ) {
1353  utility_exit_with_message("can't handle more than 5000 atoms worth of unknown residues\n");
1354  }
1355  TR << "remember unrecognized atom " << pdb_residue_index << " " << pdb_name << " " << local_strip_whitespace(iter->first)
1356  << " temp " << rtemp.find(iter->first)->second << std::endl;
1357  UA_res_nums.push_back( pdb_residue_index );
1358  UA_res_names.push_back( pdb_name );
1359  UA_atom_names.push_back( local_strip_whitespace(iter->first) );
1360  UA_coords.push_back( iter->second );
1361  UA_temps.push_back( rtemp.find(iter->first)->second );
1362  }
1363  }
1364  return false;
1365 }
1366 
1367 void
1369  pose::Pose & new_pose,
1370  pose::Pose const & old_pose,
1371  utility::vector1< core::Size > const & residue_indices
1372 ){
1373  FileDataOptions options;
1374  pose_from_pose( new_pose, old_pose, residue_indices, options );
1375 }
1376 
1377 void
1379  pose::Pose & new_pose,
1380  pose::Pose const & old_pose,
1381  utility::vector1< core::Size > const & residue_indices,
1382  FileDataOptions const & options
1383 ){
1384  using namespace chemical;
1385  ResidueTypeSetCAP residue_set(
1386  ChemicalManager::get_instance()->residue_type_set( FA_STANDARD )
1387  );
1388  pose_from_pose( new_pose, old_pose, *residue_set, residue_indices, options);
1389 }
1390 
1391 void
1393  pose::Pose & new_pose,
1394  pose::Pose const & old_pose,
1395  chemical::ResidueTypeSet const & residue_set,
1396  utility::vector1< core::Size > const & residue_indices
1397 ){
1398  FileDataOptions options;
1399  pose_from_pose( new_pose, old_pose, residue_set, residue_indices, options );
1400 }
1401 
1402 void
1404  pose::Pose & new_pose,
1405  pose::Pose const & old_pose,
1406  chemical::ResidueTypeSet const & residue_set,
1407  utility::vector1< core::Size > const & residue_indices,
1408  FileDataOptions const & options
1409 ){
1410  FileData fd;
1411  std::string data;
1412  fd.init_from_pose( old_pose, residue_indices );
1413  id::AtomID_Mask missing( false );
1414  build_pose_as_is1( fd, new_pose, residue_set, missing, options );
1415 }
1416 
1417 
1418 
1419 } // namespace pdb
1420 } // namespace io
1421 } // namespace core
1422 
1423