Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
BinaryRNASilentStruct.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 // :noTabs=false:tabSize=4:indentSize=4:
4 //
5 // (c) Copyright Rosetta Commons Member Institutions.
6 // (c) This file is part of the Rosetta software suite and is made available under license.
7 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
8 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
9 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
10 
11 /// @file core/io/silent/BinaryRNASilentStruct.cc
12 ///
13 /// @brief
14 /// @author Rhiju Das
15 
16 // C++ Headers
17 #include <cmath>
18 #include <cstdlib>
19 // AUTO-REMOVED #include <fstream>
20 #include <iostream>
21 #include <utility>
22 #include <vector>
23 // AUTO-REMOVED #include <list>
24 #include <string>
25 #include <map>
26 #include <sstream>
27 
28 // mini headers
29 // AUTO-REMOVED #include <ObjexxFCL/format.hh>
30 // AUTO-REMOVED #include <ObjexxFCL/char.functions.hh>
31 // AUTO-REMOVED #include <ObjexxFCL/string.functions.hh>
32 
33 // AUTO-REMOVED #include <utility/io/izstream.hh>
34 // AUTO-REMOVED #include <utility/io/ozstream.hh>
35 // AUTO-REMOVED #include <utility/file/file_sys_util.hh>
36 
37 #include <utility/Binary_Util.hh>
38 
39 #include <basic/Tracer.hh>
40 
41 // AUTO-REMOVED #include <core/chemical/ResidueTypeSet.hh>
49 
50 // AUTO-REMOVED #include <basic/options/option.hh>
51 
53 
54 #include <core/id/AtomID.hh>
55 // AUTO-REMOVED #include <core/id/NamedStubID.hh>
56 
57 #include <core/pose/Pose.hh>
58 
60 // AUTO-REMOVED #include <core/conformation/ResidueFactory.hh>
61 
62 #include <numeric/model_quality/rms.hh>
63 
64 
65 // option key includes
66 // AUTO-REMOVED #include <basic/options/keys/in.OptionKeys.gen.hh>
67 
70 #include <utility/vector1.hh>
71 
72 //Auto Headers
73 #include <basic/options/keys/OptionKeys.hh>
74 
75 
76 
77 namespace core {
78 namespace io {
79 namespace silent {
80 
81 static basic::Tracer tr("core.io.silent");
82 
83 
84 /// @brief Constructors.
86 {
87  using namespace basic::options;
88  using namespace basic::options::OptionKeys;
89 
90  fullatom_ = true; //option[ in::file::fullatom ]();
92  nres ( nres_in );
93  resize( nres_in );
94 }
95 
97 {
98  using namespace basic::options;
99  using namespace basic::options::OptionKeys;
100 
101  fullatom_ = true; //ption[ in::file::fullatom ]();
102  bJumps_use_IntraResStub_ = false;
103  nres( 0 );
104  decoy_tag( "empty" );
105 }
106 
107 
109  core::pose::Pose const & pose,
110  std::string tag
111 ) {
112  fullatom_ = true;
113  bJumps_use_IntraResStub_ = false;
114  fill_struct( pose, tag );
115 } // BinaryRNASilentStruct
116 
117 void
119  core::pose::Pose const & pose,
120  std::string tag
121 ) {
122  tr.Trace << "binary:fill_struct... " << std::endl;
123  decoy_tag( tag );
124 
125  if ( tag == "empty_tag" ) set_tag_from_pose( pose );
126 
127  fullatom_ = !pose.residue(1).is_coarse();
128  // using namespace core::chemical;
129  // if ( pose.residue(1).residue_type_set().name() == ChemicalManager::get_instance()->residue_type_set(FA_STANDARD)->name() ) {
130  // fullatom_ = true;
131  // } else {
132  // fullatom_ = false;
133  // }
134  tr.Trace << "get energies from pose..." << std::endl;
135  energies_from_pose( pose );
136 
137  // conformation information
138  //sequence_ = pose.annotated_sequence();
139  sequence( pose.annotated_sequence() );
140  resize( pose.total_residue() );
141 
142  tr.Trace << "read coords..." << std::endl;
143  for ( unsigned int i = 1; i <= pose.total_residue(); ++i ) {
144  core::conformation::Residue const& resi = pose.residue(i);
145 
146  atm_coords_[i].resize( resi.natoms() );
147  for (unsigned int j = 1; j <= resi.natoms(); ++j) {
148  atm_coords_[i][j] = resi.atom(j).xyz();
149  }
150  secstruct_[i] = pose.secstruct(i);
151  } // for ( unsigned int i = 1; i <= pose.total_residue(); ++i )
152 
153  fold_tree_ = pose.fold_tree();
154  jumps_.clear();
155  for ( Size nr = 1; nr <= fold_tree().num_jump(); nr++) {
156  add_jump( pose.jump(nr) );
157  }
158 } // BinaryRNASilentStruct
159 
161  utility::vector1< std::string > const & lines,
162  SilentFileData & container
163 ) {
164  utility::vector1< std::string > energy_names_;
166  if ( iter->substr(0,9) != "SEQUENCE:" ) {
167  // get sequence and scorename data from the silent-file data object, because I don't have it!
168  EnergyNamesOP enames = EnergyNamesOP(
169  static_cast< EnergyNames * > ( container.get_shared_silent_data( energynames )() )
170  );
171 
173  static_cast< SimpleSequenceData * > ( container.get_shared_silent_data( simplesequencedata )() )
174  );
175 
176  sequence ( seqdata->sequence() );
177  energy_names_ = enames ->energy_names();
178  } else {
179  // get sequence and scorename data from the first two lines provided,
180  // put them into container for further use by other SilentStruct
181  // objects.
182 
183  // first line is SEQUENCE:
184  std::istringstream line_stream( *iter );
185  std::string tag;
186  tr.Debug << "reading sequence from " << *iter << std::endl;
187  ++iter;
188 
189  std::string temp_seq;
190  line_stream >> tag >> temp_seq;
191  if ( line_stream.fail() || tag != "SEQUENCE:" ) {
192  tr.Error << "bad format in sequence line of silent file" << std::endl;
193  tr.Error << "line = " << *iter << std::endl;
194  tr.Error << "tag = " << tag << std::endl;
195  return false;
196  }
197  sequence( temp_seq );
198 
199  // second line is a list of score names
200  std::istringstream score_line_stream( *iter );
201  tr.Debug << "reading score names from " << *iter << std::endl;
202  ++iter;
203 
204  score_line_stream >> tag; // SCORE:
205  if ( score_line_stream.fail() || tag != "SCORE:" ) {
206  tr.Error << "bad format in second line of silent file" << std::endl;
207  tr.Error << "tag = " << tag << std::endl;
208  tr.Error << "line = " << *iter << std::endl;
209  }
210 
211  score_line_stream >> tag; // first score name
212  while ( ! score_line_stream.fail() ) {
213  energy_names_.push_back( tag );
214  score_line_stream >> tag; // try to get next score name
215  }
216 
217  EnergyNamesOP enames( new EnergyNames() );
218  SimpleSequenceDataOP seqdata( new SimpleSequenceData() );
219 
220  enames ->energy_names( energy_names_ );
221  seqdata->set_sequence( sequence() );
222 
223  container.set_shared_silent_data( energynames , enames );
224  container.set_shared_silent_data( simplesequencedata, seqdata );
225  } // get header information
226 
227  int currpos = 1;
228  bool bitflip = false;
230  iter != end; ++iter
231  ) {
232  std::string tag;
233  std::istringstream line_stream( *iter );
234 
235  // std::cout << (*iter) << std::endl;
236 
237  if ( iter->substr(0,6) == "REMARK" ){
238  get_parent_remark_from_line( line_stream.str() );
239  continue; // skip comments if record_old_remarks==false
240  }
241 
242  if ( iter->substr(0,7) == "SCORE: " ) {
243  // SCORE: line with values from this structure.
244  Size nres = one_letter_sequence().length();
245  resize( nres );
246 
247  std::string tag;
248  line_stream >> tag;
249  if ( line_stream.fail() || tag != "SCORE:" ) {
250  tr.Error << "bad format in first score line of silent file" << std::endl;
251  tr.Error << "line = " << *iter << std::endl;
252  tr.Error << "tag = " << tag << std::endl;
253  }
254 
255  parse_energies( line_stream, energy_names_ );
256 
257  } else { // conformation lines
258 
259  if ( Size(currpos) > nres() ) continue;
260 
261  // parse fold_tree and jump lines
262  if ( iter->substr(0,10) == "FOLD_TREE " ) {
264  line_stream >> f;
265  //NOTE!!!!!!!!!
266  // In BinaryProteinSilentStruct, used the function fold_tree( f ),
267  // which does something weird (e.g., doesn't use a const fold_tree as input).
268  set_fold_tree( f ); // add fold-tree to this SilentStruct
269  tr.Debug << "read fold-tree " << f; //"\n" is in fold-tree output
270  tr.Debug << "reading " << f.num_jump() << " jumps " << std::endl;
271  continue;
272  } else if ( iter->substr(0,2) == "RT" ) {
274  line_stream >> jump;
275  tr.Debug << "read jump " << jump << std::endl;
276  add_jump( jump );
277  // modern style jumps, defined completely with the FoldTree
278  bJumps_use_IntraResStub_ = false;
279  continue;
280  } else if ( iter->substr(0,9) == "SEQUENCE:" ) {
281  tr.Debug << "Skipping duplicate sequence declaration " << std::endl;
282  continue;
283  } else if ( iter->substr(0,19) == "ANNOTATED_SEQUENCE:" ) {
284  std::string annotated_seq;
285  line_stream >> tag; //ANNOTATED_SEQUENCE
286  line_stream >> annotated_seq;
287  sequence( annotated_seq );
288  tr.Debug << "read annotated sequence as: " << sequence() << std::endl;
289  // resize pose according to number of resiudes in annotated sequence
290  resize( one_letter_sequence().length() );
291  continue;
292  } else if ( iter->substr(0,4) == "JUMP" ) {
293  // support for rosetta++ silent files
294  std::string tag;
295  Size nr;
296  line_stream >> tag; //JUMP
297  line_stream >> nr;
298  if ( nr != fold_tree().num_jump() ) {
299  tr.Warning
300  << "WARNING: corrupted silent file read line JUMP X -- X should match number of jumps in FOLD_TREE " << std::endl;
301  }
302  for ( Size i = 1; i<= nr; i++ ) {
304  line_stream >> jump;
305  add_jump( jump );
306  }
307  bJumps_use_IntraResStub_ = true;// jump is defined via N-C-CA rosetta++ style
308  continue;
309  }
310 
311  // parse coords
312  line_stream >> tag;
313 
314  if (tag.length() < 1) {
315  tr.Warning << "WARNING: read blank line in decoy tag " << decoy_tag() << std::endl;
316  continue;
317  }
318  secstruct_[currpos] = tag[0]; // first char is sec struct
319 
320  int natoms = (tag.length()-1) / 16;
321  utility::vector1< numeric::xyzVector <float> > atm_buff( natoms+1 );
322  utility::decode6bit( (unsigned char*)&(atm_buff[1]) , tag.substr(1) );
323 
324  // endianness check ...
325  // check the dist between atoms 1 and 2 is unreasonable .. and flipping fixes then turn bitflip
326  // on
327 
328  if (currpos == 1) {
329  core::Real len_check12 = (atm_buff[1]-atm_buff[2]).length();
330  if ( len_check12 < 0.5 || len_check12 > 2.0 ) {
331  utility::swap4_aligned ( (void*) &(atm_buff[1][0]) , 3*natoms );
332  // recheck; if not better flip back
333  len_check12 = (atm_buff[1]-atm_buff[2]).length();
334  if ( len_check12 < 0.5 || len_check12 > 2.0 ) {
335  utility::swap4_aligned ( (void*) &(atm_buff[1][0]) , 3*natoms );
336  } else {
337  tr.Warning << "reading big-endian binary silent file! " << decoy_tag() << std::endl;
338  bitflip = true;
339  }
340  }
341  } else {
342  if (bitflip ) {
343  utility::swap4_aligned ( (void*) &(atm_buff[1][0]) , 3*natoms );
344  }
345  }
346 
347  atm_coords_[currpos].resize( natoms ); // allocate space for coords
348  for (int j=1; j<=natoms; ++j) {
349  atm_coords_[currpos][j] = atm_buff[j];
350  }
351  currpos++;
352  //tr.Debug << "processing line " << *iter << std::endl;
353  } // conformation lines
354  } // for ( iter ... )
355 
356  if ( fold_tree().num_jump() != jumps_.size() ) {
357  tr.Warning << "parse error: found " << jumps_.size()
358  << " RT lines for a fold-tree with " << fold_tree().num_jump()
359  << " for decoy tag " << decoy_tag() << std::endl;
360  return false;
361  }
362 
363  //if ( (unsigned int) currpos != total_residue + 1 ) {
364  if ( atm_coords_.size() != nres() ) {
365  tr.Error << "ERROR: didn't find coordinates for all sequence positions of "
366  << decoy_tag() << std::endl;
367  tr.Error << " expected " << nres()
368  << ", found " << currpos-1 << std::endl;
369  return false; //no success
370  }
371 
372  if ( fold_tree().size() < 1 ) {
374  tr.Debug << " generating simple fold-tree " << fold_tree();
375  }
376 
377  if ( bJumps_use_IntraResStub_ ) { //for rosetta++ file-format
378  //prepares of setting RT via N, CA, C
380  //on could also think of making this a temporary change after read is
381  //finished return to a standard fold_tree...
382  }
383 
384  tr.Debug << "(TEX) FOLD TREE: " << fold_tree();
385 
386  return true;
387 } // init_from_lines
388 
389 /// @brief Resize this silent-struct to the appropriate number of residues.
390 void
392  Size const nres_in
393 ) {
394  //nres_ = nres_in;
395  nres( nres_in );
396  secstruct_.resize( nres() );
397  atm_coords_.resize( nres() );
398 
399  // make a new FoldTree if we're just replacing a simple FoldTree, otherwise
400  // trust the user to have provided us a reasonable tree in a FOLD_TREE line.
401  if ( fold_tree_.is_simple_tree() ) {
403  }
404 }
405 
407  core::pose::Pose & pose
408 ) const {
409  using namespace core::chemical;
410  ResidueTypeSetCAP residue_set;
411  // std::cout << "RESIDUE TYPE SET RNA " << std::endl;
412  if ( one_letter_sequence()[0] != 'Z' /* Mg(2+) */ && atm_coords_[1].size() < 8 ) { //hmm, may be dangerous.
414  } else {
416  }
417  fill_pose( pose, *residue_set );
418 } // fill_pose
419 
421  core::pose::Pose & pose,
422  core::chemical::ResidueTypeSet const & residue_set
423 ) const {
424  core::pose::make_pose_from_sequence( pose, sequence(), residue_set );
425  tr.Debug << "SEQUENCE: " << sequence();
426  tr.Debug << "FOLD TREE: " << fold_tree();
427 
428  // set fold_tree
429  pose.fold_tree( fold_tree() );
430 
431  // WE DON'T NEED THIS, SINCE XYZ OF ALL ATOMS IS DEFINED... RIGHT?
432  // set jumps
433  // for ( Size nr = 1; nr <= fold_tree().num_jump(); nr++) {
434  // if ( !bJumps_use_IntraResStub_ ) { //default modern file-format
435  // pose.set_jump( nr, jump( nr ) );
436  // }
437  // }
438 
439  tr.Debug << "nres = " << nres() << std::endl;
440  tr.Debug << "one_letter_sequence() = " << one_letter_sequence().length() << std::endl;
441 
442  if( nres() != one_letter_sequence().length() ){
443  utility_exit_with_message( "RuntimeAssert failed: nres() == one_letter_sequence().length()" );
444  }
445 
446  // coords
447  for ( Size seqpos = 1; seqpos <= nres(); ++seqpos ) {
448  int natoms_pose = pose.residue_type(seqpos).natoms() ,
449  natoms_struct = atm_coords_[seqpos].size();
450  int natoms_total = std::min( natoms_pose , natoms_struct );
451 
452  if ( natoms_pose != natoms_struct) {
453  tr.Warning << "[ WARNING ] Number of atoms in pose and silent file disagree! ";
454  tr.Warning << "Attempting to continue ..." << std::endl;
455  tr.Warning << "[ WARNING ] (in residue "
456  << seqpos << " natoms_pose=" << natoms_pose
457  << " natoms_struct=" << natoms_struct << ")" << std::endl;
458  }
459 
460  natoms_pose = pose.residue_type(seqpos).natoms();
461  natoms_total = std::min( natoms_pose, natoms_struct );
462 
463  for ( int j = 1; j <= natoms_total; ++j ){
464  id::AtomID id( j, seqpos );
465  numeric::xyzVector< core::Real> atom_i(atm_coords_[seqpos][j][0], atm_coords_[seqpos][j][1], atm_coords_[seqpos][j][2]);
466  pose.set_xyz( id, atom_i );
467  }
468  pose.set_secstruct( seqpos, secstruct_[seqpos] );
469 
470  } // for ( seqpos )
471 
472  tr.Debug << "Hallelujah! " << pose.total_residue() << std::endl;
473 
474  finish_pose( pose );
475 
476 } // fill_pose
477 
478 void
479 BinaryRNASilentStruct::print_header( std::ostream& out ) const
480 {
482  if ( fullatom_ ) {
483  out << "REMARK BINARY_SILENTFILE RNA \n";
484  } else {
485  out << "REMARK BINARY_SILENTFILE RNA COARSE\n";
486  }
487 }
488 
489 
491  std::ostream & output
492 ) const {
493  if ( fold_tree().size() > 1 || fold_tree().num_jump() > 0 ) { //assume non-trivial fold_tree only if more than one edge, i.e., EDGE 1 <nres> -1
494  output << "FOLD_TREE ";
496  it = fold_tree().begin(), it_end = fold_tree().end();
497  it != it_end; ++it
498  ) {
499  output << *it;
500  }
501  // output << fold_tree(); this produces a new-line --- wrong behaviour
502  // of fold_tree but I don't want to fix 1000 u-tracer unit-tests!
503  output << ' ' << decoy_tag() << "\n";
504  }
505  for ( Size i = 1; i <= fold_tree().num_jump(); i++ ) {
506  output << jump( i ) << ' ' << decoy_tag() << "\n";
507  }
508  output << "ANNOTATED_SEQUENCE: " << sequence() << " " << decoy_tag() << "\n"; //chu print annotated_sequence per decoy
509  //tr.Debug << "FOLD_TREE Size: " << fold_tree().size() << " " << fold_tree() << std::endl;
510 
511  // fullatom flag
512  //int fullatom_flag = (fullatom_? 1 : 2);
513  std::string resline;
514  //encode6bit( (unsigned char*)&fullatom_flag, 4, resline );
515  //output << resline << "\n";
516 
517  for ( Size i = 1; i <= nres(); ++i ) {
518  // make sure secstruct is valid
519  char this_secstr = secstruct_[i];
520  if (this_secstr < 'A' || this_secstr > 'Z') this_secstr = 'L';
521  utility::encode6bit( (unsigned char*)&atm_coords_[i][1][0], atm_coords_[i].size()*12, resline ); // ASSUMES FLOAT == 4 BYTES!!! (eep!)
522  output << this_secstr << resline << ' ' << decoy_tag() << "\n";
523  } // for ( Size i = 1; i <= nres; ++i )
524 } // print_conformation
525 
526 
528  pose::Pose temp_pose;
529  ObjexxFCL::FArray2D< Real > rebuilt_coords ( 3, atm_coords_.size() ),
530  original_coords( 3, atm_coords_.size() );
531 
532  // build temp_pose from coordinates
533  fill_pose( temp_pose );
534 
535  Size const c4star_index = temp_pose.residue(1).atom_index( " C4*" );
536 
537  for ( Size i = 1; i <= temp_pose.total_residue(); ++i ) {
538  for ( Size k = 1; k <= 3; ++k ) { // k = X, Y and Z
539  rebuilt_coords (k,i) = temp_pose.residue(i).xyz( " C4*" )[k-1];
540  original_coords(k,i) = atm_coords_[i][c4star_index][k-1];
541  }
542  }
543 
544  Real rmsd = numeric::model_quality::rms_wrapper( temp_pose.total_residue(), rebuilt_coords, original_coords );
545  return rmsd;
546 }
547 
548 ObjexxFCL::FArray2D< Real >
550  core::Size n_residues = nres();
551  ObjexxFCL::FArray2D< Real > my_coords( 3, n_residues );
552  for ( Size i = 1; i <= n_residues; ++i ) { // i = n_residues
553  for ( Size k = 1; k <= 3; ++k ) { // k = X, Y and Z
554  my_coords(k,i) = atm_coords_[i][2][k-1];
555  } // k
556  } // i
557 
558  return my_coords;
559 } // get_CA_positions
560 
562  ObjexxFCL::FArray2D< Real > my_coords = get_CA_xyz();
563  ObjexxFCL::FArray2D< Real > other_coords = other_pss.get_CA_xyz();
564  Real rmsd = numeric::model_quality::rms_wrapper( nres(), my_coords, other_coords );
565 
566  return rmsd;
567 } // RNA_SilentStruct::CA_rmsd
568 
569 
570 } // namespace silent
571 } // namespace io
572 } // namespace core