Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
RNA_FragmentsClasses.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 // CVS information:
4 // $Revision: 1.1.2.1 $
5 // $Date: 2005/11/07 21:05:35 $
6 // $Author: pbradley $
7 // (c) Copyright Rosetta Commons Member Institutions.
8 // (c) This file is part of the Rosetta software suite and is made available under license.
9 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
10 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
11 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
12 
13 
14 // Rosetta Headers
17 
19 #include <core/pose/Pose.hh>
20 
21 // AUTO-REMOVED #include <ObjexxFCL/ObjexxFCL.hh>
22 #include <ObjexxFCL/FArray1D.hh>
23 #include <ObjexxFCL/FArray2D.hh>
24 // AUTO-REMOVED #include <ObjexxFCL/FArray4D.hh>
25 #include <ObjexxFCL/StaticIndexRange.hh>
26 
27 #include <ObjexxFCL/format.hh>
28 
29 #include <utility/io/izstream.hh>
30 #include <utility/exit.hh>
31 
32 // AUTO-REMOVED #include <numeric/random/random.hh>
33 #include <numeric/xyzVector.hh>
34 
35 #include <core/types.hh>
36 
37 // C++ headers
38 #include <fstream>
39 #include <iostream>
40 
41 //Auto Headers
42 #include <numeric/random/random.fwd.hh>
43 
44 //Auto using namespaces
45 namespace ObjexxFCL { namespace fmt { } } using namespace ObjexxFCL::fmt; // AUTO USING NS
46 //Auto using namespaces end
47 
48 
49 
50 namespace protocols{
51 namespace rna{
52 
53  using core::Size;
54  using core::Real;
55 
56  /////////////////////////////////////////////////////////////////////////////////////////////////
57  /////////////////////////////////////////////////////////////////////////////////////////////////
58  /////////////////////////////////////////////////////////////////////////////////////////////////
59  TorsionSet::TorsionSet( Size const size ){
60  torsions.dimension( core::scoring::rna::NUM_RNA_TORSIONS, SRange(0, size) );
61  torsion_source_name.dimension( SRange(0, size), std::string( 4, ' ' ) );
62  secstruct.dimension( SRange(0, size), 'L' );
63  non_main_chain_sugar_coords_defined = false;
64  size_ = size;
65  }
66 
67 //////////////////////////////////////////////////////////////////////
68  TorsionSet &
69  TorsionSet::operator =(
70  TorsionSet const & src
71  ){
72  size_ = src.size_;
73 
74  for (Size offset = 0; offset < size_; offset++){
75  for (Size j = 1; j <= core::scoring::rna::NUM_RNA_TORSIONS; j++ ){
76  torsions( j, offset) = src.torsions( j, offset);
77  }
78  torsion_source_name( offset ) = src.torsion_source_name( offset );
79  secstruct( offset ) = src.secstruct( offset );
80  }
81 
82  non_main_chain_sugar_coords_defined = src.non_main_chain_sugar_coords_defined;
83 
84  if (non_main_chain_sugar_coords_defined) {
85  non_main_chain_sugar_coords.dimension( SRange(0,size_), 3, 3 );
86  for (Size offset = 0; offset < size_; offset++){
87  for (Size j = 1; j <= 3; j++ ) {
88  for (Size k = 1; k <= 3; k++ ) {
89  non_main_chain_sugar_coords( offset, j, k ) =
90  src.non_main_chain_sugar_coords( offset, j, k );
91  }
92  }
93  }
94  }
95 
96  return *this;
97  }
98 
99  /////////////////////////////////////////////////////////////////////////////////////////////////
100  /////////////////////////////////////////////////////////////////////////////////////////////////
101  /////////////////////////////////////////////////////////////////////////////////////////////////
102  Real FragmentLibrary::get_fragment_torsion( Size const num_torsion, Size const which_frag, Size const offset ){
103  return align_torsions_[ which_frag - 1 ].torsions( num_torsion, offset) ;
104  }
105 
106  /////////////////////////////////////////////////////////////////////////////////////////////////
107  TorsionSet const FragmentLibrary::get_fragment_torsion_set( Size const which_frag ){
108  return align_torsions_[ which_frag - 1 ];
109  }
110 
111  /////////////////////////////////////////////////////////////////////////////////////////////////
112  void FragmentLibrary::add_torsion( TorsionSet const torsion_set ){
113  align_torsions_.push_back( torsion_set );
114  }
115 
116  /////////////////////////////////////////////////////////////////////////////////////////////////
117  void FragmentLibrary::add_torsion(
118  RNA_Fragments const & vall,
119  Size const position,
120  Size const size
121  )
122  {
123  TorsionSet torsion_set( size );
124 
125  for (Size offset = 0; offset < size; offset++){
126  for (Size j = 1; j <= core::scoring::rna::NUM_RNA_TORSIONS; j++ ){
127  torsion_set.torsions( j, offset) = vall.torsions( j, position+offset);
128  }
129  torsion_set.torsion_source_name( offset ) = vall.name( position+offset );
130  torsion_set.secstruct( offset ) = vall.secstruct( position+offset );
131 
132  //Defined non-ideal geometry of sugar ring -- to keep it closed.
134  torsion_set.non_main_chain_sugar_coords_defined = true;
135  torsion_set.non_main_chain_sugar_coords.dimension( SRange(0,size), 3, 3 );
136  for (Size j = 1; j <= 3; j++ ){
137  for (Size k = 1; k <= 3; k++ ){
138  torsion_set.non_main_chain_sugar_coords( offset, j, k ) =
139  vall.non_main_chain_sugar_coords( position+offset, j, k );
140  }
141  }
142  } else {
143  torsion_set.non_main_chain_sugar_coords_defined = false;
144  }
145 
146  }
147 
148  align_torsions_.push_back( torsion_set );
149  }
150 
151 
152  /////////////////////////////////////////////////////////////////////////////////////////////////
153  Size FragmentLibrary::get_align_depth() {
154  return align_torsions_.size();
155  }
156 
157  ///////////////////////////////////////////////////////////////////////////////////////
158  ///////////////////////////////////////////////////////////////////////////////////////
159  void
160  RNA_Fragments::pick_fragment_library( SequenceSecStructPair const & key ){
161 
162  //Don't worry, I coded a destructor in later.
163  FragmentLibrary * fragment_library_p;
164  fragment_library_p = new FragmentLibrary;
165 
166  std::string const RNA_string = key.first;
167  std::string const RNA_secstruct_string = key.second;
168 
169  Size const size = RNA_string.length();
170 
171  runtime_assert( RNA_string.length() == RNA_secstruct_string.length() );
172 
173  // dummy initialization.
174  std::string vall_current_sequence ( RNA_string );
175  std::string vall_current_secstruct( RNA_secstruct_string );
176 
177  for (Size i = 1; i <= vall_size_ - size + 1; i++ ){
178 
179  bool match( true );
180 
181  for (Size offset = 0; offset < size; offset++ ){
182  vall_current_sequence [offset] = vall_sequence_ ( i + offset );
183  vall_current_secstruct[offset] = vall_secstruct_( i + offset );
184 
185  if ( /*vall_is_chainbreak_( i + offset ) ||*/
186  !compare_RNA_char( vall_current_sequence[offset], RNA_string[ offset ] ) ||
187  !compare_RNA_secstruct( vall_current_secstruct[offset], RNA_secstruct_string[ offset ] ) ) {
188  match = false;
189  break;
190  }
191  }
192 
193  if (match) {
194  fragment_library_p->add_torsion( *this, i, size );
195  }
196 
197  }
198 
199 
200  if ( fragment_library_p->get_align_depth() == 0 ) {
201  // Problem -- need to repick with less stringent requirements?
202  for (Size i = 1; i <= vall_size_ - size + 1; i++ ){
203 
204  bool match( true );
205 
206  for (Size offset = 0; offset < size; offset++ ){
207  vall_current_sequence [offset] = vall_sequence_ ( i + offset );
208 
209  if ( !compare_RNA_char( vall_current_sequence[offset], RNA_string[ offset ] ) ) {
210  match = false;
211  break;
212  }
213  }
214 
215  if (match) {
216  fragment_library_p->add_torsion( *this, i, size );
217  }
218 
219  }
220  }
221 
222 
223  std::cout << "Picked Fragment Library for sequence " << RNA_string << " " <<
224  " and sec. struct " << RNA_secstruct_string << " ... found " <<
225  fragment_library_p->get_align_depth() << " potential fragments" << std::endl;
226 
227  fragment_library_pointer_map[ key ] = fragment_library_p;
228 
229  }
230 
231  ///////////////////////////////////////////////////////////////////////////////////////
232  ///////////////////////////////////////////////////////////////////////////////////////
233  void
234  RNA_Fragments::pick_random_fragment(
235  TorsionSet & torsion_set,
236  std::string const RNA_string,
237  std::string const RNA_secstruct_string,
238  Size const type /* = MATCH_YR */){
239 
240  std::string const RNA_string_local = convert_based_on_match_type( RNA_string, type );
241 
242  SequenceSecStructPair const key( std::make_pair( RNA_string_local, RNA_secstruct_string ) );
243 
244  if (! fragment_library_pointer_map.count( key ) ){
245  pick_fragment_library( key );
246  }
247 
248  FragmentLibraryOP fragment_library_pointer = fragment_library_pointer_map[ key ];
249 
250  Size const num_frags = fragment_library_pointer->get_align_depth();
251 
252  if (num_frags == 0) { //trouble.
253  std::cout << "Fragment Library: zero fragments found for " << RNA_string_local << std::endl;
254  std::cerr << "Fragment Library: zero fragments found for " << RNA_string_local << std::endl;
255  utility::exit( EXIT_FAILURE, __FILE__, __LINE__);
256  }
257 
258  Size const which_frag = static_cast <Size> ( numeric::random::uniform() * num_frags) + 1;
259 
260  torsion_set = fragment_library_pointer->get_fragment_torsion_set( which_frag );
261 
262  }
263 
264  ///////////////////////////////////////////////////////////////////////////////////////
265  ///////////////////////////////////////////////////////////////////////////////////////
266  void
267  RNA_Fragments::pick_random_fragment(
268  TorsionSet & torsion_set,
269  core::pose::Pose & pose,
270  Size const position,
271  Size const size,
272  Size const type /* = MATCH_YR */){
273 
274  std::string const & RNA_sequence( pose.sequence() );
275  std::string const & RNA_string = RNA_sequence.substr( position - 1, size );
276 
277  //Desired "secondary structure".
278  // TEMPORARY HACK!!!
279  // std::string const RNA_secstruct( pose.total_residue(), 'X' );
280  std::string const & RNA_secstruct( protocols::rna::get_rna_secstruct( pose ) );
281  std::string const & RNA_secstruct_string = RNA_secstruct.substr( position - 1, size );
282 
283  pick_random_fragment( torsion_set, RNA_string, RNA_secstruct_string, type );
284 
285  }
286 
287  ///////////////////////////////////////////////////////////////////////////////////////
288  ///////////////////////////////////////////////////////////////////////////////////////
289  void
290  RNA_Fragments::read_vall_torsions( std::string const filename ){
291 
292  //Just read in this file once.
293  static bool init ( false );
294  if (init) return;
295  init = true;
296 
297  ///////////////////////////////////////////////////////////////
298  //A bunch of vectors for temporary readin.
299  //At the end, transfer all the data to FArrays for faster access.
302  utility::vector1< utility::vector1< Vector > > vall_non_main_chain_sugar_coords;
303  utility::vector1< char > vall_sequence;
304  utility::vector1< char > vall_secstruct;
305  utility::vector1< bool > vall_is_chainbreak;
306  utility::vector1< utility::vector1< bool > > vall_edge_is_base_pairing;
307  utility::vector1< bool > vall_makes_canonical_base_pair;
309  vall_non_main_chain_sugar_coords_defined_ = false;
310 
311 
312  ///////////////////////////////////////////////////////////////
313  std::cout << "Reading in vall_torsions file: " << filename << std::endl;
314 
315  //This will check in rosetta_database first.
316  utility::io::izstream vall_in( filename.c_str() );
317  if ( vall_in.fail() ){
318  utility_exit_with_message( "Bad vall torsions file? " + filename );
319  }
320 
321  std::string line, tag;
322 
323  char dummy_char;
324  bool dummy_bool;
325  Real dummy_real;
326  std::string dummy_string;
327 
328  Size count( 0 );
329  while ( getline( vall_in, line) ){
330 
331  std::istringstream line_stream( line );
332 
333  count++;
334 
335  line_stream >> dummy_char;
336  vall_sequence.push_back( dummy_char );
337 
338  utility::vector1 < Real > dummy_vec;
339  for (Size i = 1; i <= core::scoring::rna::NUM_RNA_TORSIONS; i++ ) {
340  line_stream >> dummy_real;
341  dummy_vec.push_back( dummy_real );
342  }
343  vall_torsions.push_back( dummy_vec );
344 
345  line_stream >> dummy_char;
346 
347  //In the new style fragment set... keep track of C3*, C2*, O4* coordinates
348  // explicitly, allowing for non-ideal bond lengths and bond angles.
349  if ( dummy_char == 'S' ) {
350  vall_non_main_chain_sugar_coords_defined_ = true;
352  Real x,y,z;
353  for (Size n = 1; n <= 3; n++ ) {
354  line_stream >> x >> y >> z;
355  vecs.push_back( Vector( x,y,z) );
356  }
357  vall_non_main_chain_sugar_coords.push_back( vecs );
358  line_stream >> dummy_char;
359  }
360 
361  vall_secstruct.push_back( dummy_char );
362 
363  utility::vector1 < bool > dummy_vec2;
364  for (Size i = 1; i <= core::scoring::rna::NUM_EDGES; i++ ) {
365  line_stream >> dummy_bool;
366  dummy_vec2.push_back( dummy_bool );
367  }
368  vall_edge_is_base_pairing.push_back( dummy_vec2 );
369 
370  //vall_is_chainbreak_( count ) = 0.0;
371  line_stream >> dummy_bool;
372  vall_is_chainbreak.push_back( dummy_bool );
373 
374  //In principle could look for one more string in the vall
375  // torsions file as a "name", but for now just keep track
376  // of line number.
377  vall_name.push_back( I( 4, count ) );
378 
379  } // line_stream
380 
381  vall_size_ = count;
382 
383  vall_in.close();
384 
385  std::cout << "Lines read from vall_torsions file: " << vall_size_ << std::endl;
386 
387  ///////////////////////////////////////////////////////////////
388  // Permanent storage.
389  vall_torsions_.dimension ( SRange(0, core::scoring::rna::NUM_RNA_TORSIONS), vall_size_ );
390  vall_sequence_.dimension ( vall_size_ );
391  vall_secstruct_.dimension ( vall_size_ );
392  vall_is_chainbreak_.dimension ( vall_size_ );
393  vall_edge_is_base_pairing_.dimension( vall_size_, core::scoring::rna::NUM_EDGES );
394  vall_name_.dimension( vall_size_ );
395 
396  if ( vall_non_main_chain_sugar_coords_defined_ ) vall_non_main_chain_sugar_coords_.dimension( vall_size_, 3, 3 );
397 
398  for (Size n = 1; n <= vall_size_; n++ ) {
399 
400  for (Size i = 1; i <= core::scoring::rna::NUM_RNA_TORSIONS; i++ ) {
401  vall_torsions_( i, n ) = vall_torsions[ n ][ i ];
402  }
403 
404  if (vall_non_main_chain_sugar_coords_defined_) {
405  for ( Size i = 1; i <= 3; i++ ) {
406  vall_non_main_chain_sugar_coords_( n, i, 1 ) = vall_non_main_chain_sugar_coords[ n ][ i ].x();
407  vall_non_main_chain_sugar_coords_( n, i, 2 ) = vall_non_main_chain_sugar_coords[ n ][ i ].y();
408  vall_non_main_chain_sugar_coords_( n, i, 3 ) = vall_non_main_chain_sugar_coords[ n ][ i ].z();
409  }
410  }
411 
412  vall_sequence_( n ) = vall_sequence[ n ];
413  vall_secstruct_( n ) = vall_secstruct[ n ];
414  vall_is_chainbreak_( n ) = vall_is_chainbreak[ n ];
415  for (Size i = 1; i <= core::scoring::rna::NUM_EDGES; i++ ) {
416  vall_edge_is_base_pairing_( n , i) = vall_edge_is_base_pairing[ n ][ i ];
417  }
418  vall_name_( n ) = vall_name[ n ];
419  }
420 
421 
422  }
423 
424  /////////////////////////////////////////////////////////////////////////////////////////////////
425  std::string const
427 
428  std::string RNA_string_local = RNA_string;
429 
430  Size const size = RNA_string.length();
431 
432  //Obey orders to match exactly, match pyrimidine/purine, or match all.
433  if (type == MATCH_ALL){
434 
435  for (Size i = 0; i < size; i++) RNA_string_local[ i ] = 'n';
436 
437  } else if ( type == MATCH_YR ) {
438 
439  for (Size i = 0; i < size; i++) {
440  if (RNA_string[ i ] == 'g' || RNA_string[ i ] == 'a' ){
441  RNA_string_local[ i ] = 'r';
442  } else {
443  runtime_assert( RNA_string[ i ] == 'u' || RNA_string[ i ] == 'c' );
444  RNA_string_local[ i ] = 'y';
445  }
446  }
447 
448  }
449 
450  return RNA_string_local;
451  }
452 
453  /////////////////////////////////////////////////////////////////////////////////////////////////
454  bool
455  RNA_Fragments::compare_RNA_char( char const char1, char const char2 ) {
456  //Man this is silly, there must be a more elegant way to do this.
457  if (char1 == char2) return true;
458  if (char1 == 'n' || char2 == 'n') return true;
459  if (char1 == 'r' && (char2 == 'a' || char2 == 'g')) return true;
460  if (char1 == 'y' && (char2 == 'c' || char2 == 'u')) return true;
461  if (char2 == 'r' && (char1 == 'a' || char1 == 'g')) return true;
462  if (char2 == 'y' && (char1 == 'c' || char1 == 'u')) return true;
463  return false;
464  }
465 
466  bool
467  RNA_Fragments::compare_RNA_secstruct( char const char1, char const char2 ) {
468  if (char1 == char2) return true;
469  if (char1 == 'X' || char2 == 'X' ) return true;
470  if (char1 == 'L' && ( char2 == 'N' || char2 == 'P') ) return true;
471  if (char2 == 'L' && ( char1 == 'N' || char1 == 'P') ) return true;
472  return false;
473  }
474 
475 
476 }
477 }
478