Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
RNA_SilentStruct.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 // :noTabs=false:tabSize=4:indentSize=4:
4 //
5 // (c) Copyright Rosetta Commons Member Institutions.
6 // (c) This file is part of the Rosetta software suite and is made available under license.
7 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
8 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
9 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
10 
11 /// @file core/io/silent/RNA_SilentStruct.cc
12 ///
13 /// @brief Representation of rosetta++ protein silent-file structures.
14 /// @author James Thompson
15 
16 // C++ Headers
17 #include <cmath>
18 #include <cstdlib>
19 // AUTO-REMOVED #include <fstream>
20 #include <iostream>
21 // AUTO-REMOVED #include <utility>
22 #include <vector>
23 // AUTO-REMOVED #include <list>
24 #include <string>
25 #include <map>
26 #include <sstream>
27 
28 // mini headers
29 #include <ObjexxFCL/FArray2D.hh>
30 #include <ObjexxFCL/string.functions.hh>
31 
32 // AUTO-REMOVED #include <utility/io/izstream.hh>
33 // AUTO-REMOVED #include <utility/io/ozstream.hh>
34 // AUTO-REMOVED #include <utility/file/file_sys_util.hh>
35 #include <utility/exit.hh>
36 
37 #include <basic/Tracer.hh>
38 
39 // AUTO-REMOVED #include <core/chemical/ResidueTypeSet.hh>
40 #include <core/id/TorsionID.hh>
48 
50 
51 // AUTO-REMOVED #include <core/id/AtomID.hh>
52 #include <core/pose/Pose.hh>
54 
55 #include <numeric/model_quality/rms.hh>
56 
58 #include <utility/vector1.hh>
59 #include <numeric/xyz.functions.hh>
60 #include <ObjexxFCL/format.hh>
61 
62 
63 namespace core {
64 namespace io {
65 namespace silent {
66 
67 using namespace ObjexxFCL;
68 using namespace ObjexxFCL::fmt;
69 
70 static basic::Tracer tr("core.io.silent");
71 
72 /////////////////////////////////////////////////////////////////////////
73 // Following should be easy to generalize for protein, RNA, DNA.
74 // This may eventually be critical as we start to look at mixed systems.
75 // For now, just for safety and (perhaps) to avoid confusion,
76 // we'll go ahead and make this a separate class.
77 // -- Rhiju, April 2008
78 /////////////////////////////////////////////////////////////////////////
79 
81  core::pose::Pose const & pose,
82  std::string tag,
83  bool fa
84 ) :
85  fullatom_( fa )
86 {
87  fill_struct( pose, tag );
88 } // RNA_SilentStruct
89 
90 // RNA_SilentStruct::RNA_SilentStruct( core::io::silent::RNA_SilentStruct const & src )
91 // {
92 // nres( src.nres_ );
93 // fullatom_ = src.fullatom_;
94 // non_main_chain_sugar_coords_defined_ = src.non_main_chain_sugar_coords_defined_;
95 // resize( nres );
96 // for (Size n = 1; n <= nres; n++ ) {
97 // secstruct_[ n ] = src.secstruct_[ n ];
98 // for (Size k = 1; k <= src.mainchain_torsions_[n].size();k++ ) {
99 // mainchain_torsions_[n][k] = src.mainchain_torsions_[n][k];
100 // }
101 // }
102 // }
103 
104 void
106  decoy_tag( tag );
107 
108  energies_from_pose( pose );
109 
110  // conformation information
111  sequence( pose.sequence() );
112  resize( pose.total_residue() );
113  static const std::string important_atom = "C4*";
114  for ( Size i = 1; i <= pose.total_residue(); ++i ) {
115  core::conformation::Residue resi = pose.residue(i);
116 
117  secstruct_[i] = pose.secstruct(i);
118  // I wonder if we can just grab these torsions...
120  coords_[i] = resi.xyz( important_atom );
121  if ( fullatom() ) {
122  chi_torsions_[i] = resi.chi();
123  } // if ( fullatom )
124 
125  //New (Feb. 2009)...
126  // x-y-z of coordinates of C2*, C1*, and O4*, in a local coordinate system defined
127  // by C3*, C4*, and C5* (as "stub" atoms).
128  {
130  kinematics::Stub const input_stub( resi.xyz( " C3*" ), resi.xyz( " C3*" ), resi.xyz( " C4*" ), resi.xyz( " C5*" ) );
133  for (Size n = 1; n <= scoring::rna::non_main_chain_sugar_atoms.size(); n++ ) {
134  Vector v = input_stub.global2local( resi.xyz( scoring::rna::non_main_chain_sugar_atoms[ n ] ) );
135  vecs.push_back( v );
136  }
138  }
139 
140  } // for ( Size i = 1; i <= pose.total_residue(); ++i )
141 
142  fold_tree_ = pose.fold_tree();
143  jumps_.clear();
144  for ( Size nr = 1; nr <= fold_tree().num_jump(); nr++) {
145  add_jump( pose.jump(nr) );
146  }
147 } // RNA_SilentStruct
148 
149  //Following should be easy to generalize for protein vs. RNA.
150 
152  utility::vector1< std::string > const & lines,
153  SilentFileData & container
154 ) {
155  bool success( false );
156 
157  utility::vector1< std::string > energy_names_;
159  if ( iter->substr(0,9) != "SEQUENCE:" ) {
160  // get sequence and scorename data from the silent-file data object, because I don't have it!
161  EnergyNamesOP enames = EnergyNamesOP(
162  static_cast< EnergyNames * > ( container.get_shared_silent_data( energynames )() )
163  );
164 
166  static_cast< SimpleSequenceData * > ( container.get_shared_silent_data( simplesequencedata )() )
167  );
168 
169  sequence( seqdata->sequence() );
170  energy_names_ = enames ->energy_names();
171  } else {
172  // get sequence and scorename data from the first two lines provided, put them into container for further use
173  // by other RNA_SilentStruct objects.
174 
175  // first line is SEQUENCE:
176  std::istringstream line_stream( *iter );
177  std::string tag;
178  tr.Debug << "reading sequence from " << *iter << std::endl;
179  ++iter;
180 
181  std::string temp_seq;
182  line_stream >> tag >> temp_seq;
183  if ( line_stream.fail() || tag != "SEQUENCE:" ) {
184  tr.Error << "bad format in sequence line of silent file" << std::endl;
185  tr.Error << "line = " << *iter << std::endl;
186  tr.Error << "tag = " << tag << std::endl;
187  return success;
188  }
189  sequence( temp_seq );
190 
191  // second line is a list of score names
192  std::istringstream score_line_stream( *iter );
193  tr.Debug << "reading score names from " << *iter << std::endl;
194  ++iter;
195 
196  score_line_stream >> tag; // SCORE:
197  if ( score_line_stream.fail() || tag != "SCORE:" ) {
198  tr.Error << "bad format in second line of silent file" << std::endl;
199  tr.Error << "tag = " << tag << std::endl;
200  tr.Error << "line = " << *iter << std::endl;
201  }
202 
203  score_line_stream >> tag; // first score name
204  while ( ! score_line_stream.fail() ) {
205  energy_names_.push_back( tag );
206  score_line_stream >> tag; // try to get next score name
207  }
208 
209  EnergyNamesOP enames( new EnergyNames() );
210  SimpleSequenceDataOP seqdata( new SimpleSequenceData() );
211 
212  enames ->energy_names( energy_names_ );
213  seqdata->set_sequence( sequence() );
214 
215  container.set_shared_silent_data( energynames , enames );
216  container.set_shared_silent_data( simplesequencedata, seqdata );
217  } // get header information
218 
219  // resize myself appropriately, according to length of sequence
220  resize( sequence().length() );
221 
222  for ( utility::vector1< std::string >::const_iterator end = lines.end(); iter != end; ++iter ) {
223  std::string tag;
224  std::istringstream line_stream( *iter );
225 
226  if ( iter->substr(0,7) == "SCORE: " ) { // SCORE: line with values from this structure.
227  resize( sequence().length() ); // sequence_ should be defined by now.
228 
229  std::string tag;
230  line_stream >> tag;
231  if ( line_stream.fail() || tag != "SCORE:" ) {
232  tr.Error << "bad format in first score line of silent file" << std::endl;
233  tr.Error << "line = " << *iter << std::endl;
234  tr.Error << "tag = " << tag << std::endl;
235  }
236 
238  for ( energy_iter = energy_names_.begin(); energy_iter != energy_names_.end(); ++energy_iter ) {
239  line_stream >> tag;
240  if ( *energy_iter != "description" ) { // currently the only text-based field, might change in future.
241  Real score_val = (Real) float_of( tag );
242  add_energy( *energy_iter, score_val );
243  } else {
244  line_stream >> tag;
245  }
246  } // for ( energy_iter ... )
247  decoy_tag( tag ); // decoy_tag should be last column of this line.
248  } else { // conformation lines
249  // parse fold_tree and jump lines
250  if ( iter->substr(0,10) == "FOLD_TREE " ) {
252  line_stream >> f;
253  set_fold_tree( f ); // add fold-tree to this SilentStruct
254  tr.Debug << "read fold-tree " << f; //"\n" is in fold-tree output
255  tr.Debug << "reading " << f.num_jump() << " jumps " << std::endl;
256  continue;
257  } else if ( iter->substr(0,2) == "RT" ) {
259  line_stream >> jump;
260  tr.Debug << "read jump " << jump << std::endl;
261  add_jump( jump );
262  continue;
263  } else if ( iter->substr(0,9) == "SEQUENCE:" ) {
264  //tr.Warning << "skipping duplicate sequence declaration " << std::endl;
265  continue;
266  } else if ( iter->substr(0,6) == "REMARK" ) {
267  //tr.Warning << "skipping duplicate sequence declaration " << std::endl;
268  continue;
269  } else if ( iter->substr(0,6) == "REMARK" ) {
270  continue;
271  }
272 
273  // parse ss,torsions, and c-alpha coords
274  int seqpos;
275  Real x, y, z, torsion_value;
276  char ss;
277  utility::vector1< Real > temp_mainchain_torsions, temp_chi_torsions;
278 
279  line_stream >> tag;
280  if ( !is_int( tag ) ) {
281  tr.Error << "ERROR: !is_int( " << tag << " ) from line (" << *iter << ")\n";
282  }
283  runtime_assert( is_int( tag ) ); // this tag should represent the sequence position within the silent-file
284  seqpos = int_of( tag );
285  line_stream >> ss;
286 
287  // It would be nice to not hard-wire these values --
288  // since we'll eventually need to use RNA (or protein) .params files
289  // to create the pose, couldn't we look up the residue
290  // and figure out how many torsions are required?
291  for ( Size n = 1; n <= core::scoring::rna::NUM_RNA_MAINCHAIN_TORSIONS; n++ ){
292  line_stream >> torsion_value;
293  temp_mainchain_torsions.push_back( torsion_value );
294  }
295  if (fullatom_) {
296  for ( Size n = 1; n <= core::scoring::rna::NUM_RNA_CHI_TORSIONS; n++ ){
297  line_stream >> torsion_value;
298  temp_chi_torsions.push_back( torsion_value );
299  }
300  }
301 
302  //Added Feb. 2009... information on C2*, C1*, and O4* -- in general they have
303  // varying bond lengths and bond angles to keep sugar ring closed.
304  line_stream >> x >> y >> z;
305  Vector temp_vec( x, y, z );
306 
307  line_stream >> tag;
308 
309  if ( is_float( tag ) /* New silent format with extra info on sugar atoms*/){
311  vecs.push_back( temp_vec );
312 
313  x = float_of( tag );
314  line_stream >> y >> z;
315  temp_vec = Vector( x, y, z );
316  vecs.push_back( temp_vec );
317 
318  line_stream >> x >> y >> z;
319  temp_vec = Vector( x, y, z );
320  vecs.push_back( temp_vec );
321 
322  set_non_main_chain_sugar_coords( seqpos, vecs );
323 
324  line_stream >> x >> y >> z;
325  temp_vec = Vector( x, y, z );
326 
327  line_stream >> tag;
328  }
329 
330  set_secstruct( seqpos, ss );
331  set_coords ( seqpos, temp_vec );
332  set_mainchain_torsions( seqpos, temp_mainchain_torsions );
333  set_chi_torsions ( seqpos, temp_chi_torsions );
334 
335 
336  if ( tag != decoy_tag() ) { // decoy_tag should be last tag.
337  tr.Warning << "parse error(" << *iter << ") " << tag << " != " << decoy_tag() << std::endl;
338  success = false;
339  break;
340  }
341  } // conformation lines
342  } // for ( iter ... )
343  // if no fold-tree available generate a standard tree
344  if ( fold_tree().size() < 1 ) {
346  tr.Debug << " generating simple fold-tree " << fold_tree();
347  }
348 
349  success = true;
350  return success;
351 } // init_from_lines
352 
353 /// @brief Resize this silent-struct to the appropriate number of residues.
354 void
356  Size const nres_in
357 ) {
358  nres( nres_in );
359  secstruct_.resize( nres() );
360  coords_ .resize( nres() );
361  mainchain_torsions_.resize( nres() );
362  chi_torsions_ .resize( nres() );
365 }
366 
367 // @brief Fill a Pose with the data in this RNA_SilentStruct.
369  core::pose::Pose & pose
370 ) const {
371  using namespace core::chemical;
372  ResidueTypeSetCAP residue_set;
373  if ( fullatom() ) {
375  }
376  fill_pose( pose, *residue_set );
377 } // fill_pose
378 
380  core::pose::Pose & pose,
381  core::chemical::ResidueTypeSet const & /*residue_set*/
382 ) const {
383  using namespace core::chemical;
384 
385  bool const use_input_pose( false ); // tex hack for refactoring!
386  if (use_input_pose) {
387  tr.Info << "Using bond lengths and angles from an input pose." << std::endl;
388  } else {
389  tr.Info << "Using ideal geometry from params files..." << std::endl;
390  //RHIJU HACK!
391  //tr.Info << "USING RNA PARAMS FILES " << std::endl;
392  static const ResidueTypeSetCAP rna_residue_set = ChemicalManager::get_instance()->residue_type_set( RNA );
393  core::pose::make_pose_from_sequence( pose, sequence(), *rna_residue_set );
394  }
395  tr.Debug << "FOLD TREE: " << fold_tree();
396 
397 
398  // set fold_tree
399  pose.fold_tree( fold_tree() );
400 
401  // set jumps
402  for ( Size nr = 1; nr <= fold_tree().num_jump(); nr++) {
403  pose.set_jump( nr, jump( nr ) );
404  }
405 
406  assert( nres() == sequence().length() );
407 
408  for ( Size seqpos = 1; seqpos <= nres(); ++seqpos ) {
409 
410  // It would be nice to not hard-wire these values --
411  // since we'll eventually need to use RNA (or protein) .params files
412  // to create the pose, couldn't we look up the residue
413  // and figure out how many torsions are required?
414  for ( Size n = 1; n <= core::scoring::rna::NUM_RNA_MAINCHAIN_TORSIONS; n++ ){
415  id::TorsionID rna_torsion_id( seqpos, id::BB, n );
416  // std::cout << rna_torsion_id << " " << mainchain_torsions_[ seqpos ][n ] << std::endl;
417  pose.set_torsion( rna_torsion_id,
418  mainchain_torsions_[seqpos][n] );
419  }
420 
421 
422  if (fullatom_) {
423  for ( Size n = 1; n <= core::scoring::rna::NUM_RNA_CHI_TORSIONS; n++ ){
424  id::TorsionID rna_torsion_id( seqpos, id::CHI, n );
425  pose.set_torsion( rna_torsion_id,
426  chi_torsions_[seqpos][n] );
427  }
428  }
429 
430  pose.set_secstruct( seqpos, secstruct_[seqpos] );
431  }
432 
433 
435  //Force one refold.
436  pose.residue(1).xyz( 1 );
437 
438  pose::Pose const & reference_pose = pose; /*try to avoid refolds*/
439  for ( Size seqpos = 1; seqpos <= nres(); ++seqpos ) {
440  scoring::rna::apply_non_main_chain_sugar_coords( non_main_chain_sugar_coords_[ seqpos ], pose, reference_pose, seqpos );
441  }
442  }
443 
444  finish_pose( pose );
445 } // fill_pose
446 
447 
448 void
449 RNA_SilentStruct::print_header( std::ostream& out ) const
450 {
452  out << "REMARK RNA \n";
453 }
454 
455 void RNA_SilentStruct::print_conformation( std::ostream & output ) const {
456 
457  if ( fold_tree().size() > 1 ) { //assume non-trivial fold_tree only if more than one edge, i.e., EDGE 1 <nres> -1
458  output << fold_tree();
459  }
460  for ( Size i = 1; i <= fold_tree().num_jump(); i++ ) {
461  output << jump( i ) << "\n";
462  }
463 
464  tr.Debug << "FOLD_TREE Size: " << fold_tree().size() << " " << fold_tree()
465  << std::endl;
466  for ( Size i = 1; i <= nres(); ++i ) {
467  // make sure secstruct is valid
468  char this_secstr = secstruct_[i];
469  if (this_secstr < 'A' || this_secstr > 'Z')
470  this_secstr = 'L';
471 
472  output << I( 4, i ) << ' '
473  << this_secstr << ' ';
474 
475  for ( Size n = 1; n <= core::scoring::rna::NUM_RNA_MAINCHAIN_TORSIONS; n++ ){
476  output << F( 9, 3, mainchain_torsions_[i][n] );
477  }
478  if ( fullatom_ ) {
479  for ( Size n = 1; n <= core::scoring::rna::NUM_RNA_CHI_TORSIONS; n++ ){
480  output << F( 9, 3, chi_torsions_[i][n] );
481  }
482  }
483 
484  //New, Feb. 2009
486  for (Size n = 1; n <= non_main_chain_sugar_coords_[i].size(); n++ ) {
487  output << F( 12, 6, non_main_chain_sugar_coords_[i][n].x() )
488  << F( 12, 6, non_main_chain_sugar_coords_[i][n].y() )
489  << F( 12, 6, non_main_chain_sugar_coords_[i][n].z() );
490  }
491  }
492 
493  output << F( 9, 3, coords_[i].x() )
494  << F( 9, 3, coords_[i].y() )
495  << F( 9, 3, coords_[i].z() );
496 
497  output << ' ' << decoy_tag();
498  output << "\n";
499  } // for ( Size i = 1; i <= nres; ++i )
500 } // print_conformation
501 
503  pose::Pose temp_pose;
504  FArray2D< Real > rebuilt_coords (3, coords_.size() ), original_coords( 3, coords_.size() );
505  static std::string atom_name = "C4*";
506 
507  // build temp_pose from coordinates
508  fill_pose( temp_pose );
509 
510  for ( Size i = 1; i <= temp_pose.total_residue(); ++i ) {
511  for ( Size k = 1; k <= 3; ++k ) { // k = X, Y and Z
512  rebuilt_coords (k,i) = temp_pose.residue(i).xyz( atom_name )[k-1];
513  original_coords(k,i) = coords_[i][k-1];
514  }
515  }
516 
517  Real rmsd = numeric::model_quality::rms_wrapper( temp_pose.total_residue(), rebuilt_coords, original_coords );
518  return rmsd;
519 }
520 
521 ObjexxFCL::FArray2D< Real >
523  core::Size n_residues = nres();
524  FArray2D< Real > my_coords( 3, n_residues );
525  for ( Size i = 1; i <= n_residues; ++i ) { // i = n_residues
526  for ( Size k = 1; k <= 3; ++k ) { // k = X, Y and Z
527  my_coords(k,i) = coords_[i][k-1];
528  } // k
529  } // i
530 
531  return my_coords;
532 } // get_CA_positions
533 
535  FArray2D< Real > my_coords = get_CA_xyz();
536  FArray2D< Real > other_coords = other_pss.get_CA_xyz();
537  Real rmsd = numeric::model_quality::rms_wrapper( nres(), my_coords, other_coords );
538 
539  return rmsd;
540 } // RNA_SilentStruct::CA_rmsd
541 
543  RNA_SilentStruct const &
544 )
545 {
546  utility_exit_with_message( "called ProteinSilentStruct::operator=)" );
547  exit(0); // just to keep the compiler happy
548 }
549 
550 } // namespace silent
551 } // namespace io
552 } // namespace core