Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
RNA_ChunkLibrary.cc
Go to the documentation of this file.
1  // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2  // vi: set ts=2 noet:
3  // CVS information:
4  // $Revision: 1.1.2.1 $
5  // $Date: 2005/11/07 21:05:35 $
6  // $Author: rhiju $
7  // (c) Copyright Rosetta Commons Member Institutions.
8  // (c) This file is part of the Rosetta software suite and is made available under license.
9  // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
10  // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
11  // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
12 
13 
14 // Rosetta Headers
18 #include <core/types.hh>
19 #include <basic/Tracer.hh>
21 #include <core/pose/Pose.hh>
22 #include <core/pose/MiniPose.hh>
24 #include <core/pose/util.hh>
26 #include <core/id/AtomID.hh>
29 #include <core/scoring/rms_util.hh>
30 #include <numeric/random/random.hh>
32 #include <core/kinematics/Stub.hh>
33 #include <core/kinematics/Jump.hh>
34 // AUTO-REMOVED #include <core/kinematics/AtomTree.hh>
35 // AUTO-REMOVED #include <core/kinematics/tree/Atom.hh>
37 
38 // ObjexxFCL Headers
39 #include <ObjexxFCL/FArray1D.hh>
40 // AUTO-REMOVED #include <ObjexxFCL/format.hh>
41 #include <ObjexxFCL/string.functions.hh>
42 
43 // Numeric headers
44 // AUTO-REMOVED #include <numeric/constants.hh>
45 
46 // C++ headers
47 // AUTO-REMOVED #include <fstream>
48 #include <iostream>
49 
50 #include <utility/vector1.hh>
51 
52 
53 
54 static numeric::random::RandomGenerator RG(2380934); // <- Magic number, do not change it!
55 
56 static basic::Tracer TR( "protocols.rna.rna_chunk_library" ) ;
57 
58 namespace protocols{
59 namespace rna{
60 
61  using namespace core;
62  using namespace ObjexxFCL;
63 
64  using core::Size;
65  using core::Real;
66 
67  using core::pose::ResMap;
68 
69  ///////////////////////////////////////////////////////////////////////
71  ResMap const & res_map ) {
72 
73  mini_pose_list_ = mini_pose_list;
74 
75  res_map_ = res_map;
76 
77  // not much information in mini_pose --> assume that all atoms are OK for copying.
78  core::pose::MiniPose const & mini_pose = *(mini_pose_list[ 1 ]);
79  for ( Size i = 1; i <= mini_pose.total_residue(); i++ ){
80  for ( Size j = 1; j <= mini_pose.coords()[i].size(); j++ ){
81  atom_id_mask_[ core::id::AtomID( j, i ) ] = true;
82  }
83  }
84 
85  }
86 
87  ///////////////////////////////////////////////////////////////////////
89  ResMap const & res_map ) {
90  for ( Size n = 1; n <= pose_list.size(); n++ ) {
91  mini_pose_list_.push_back( core::pose::MiniPoseOP( new core::pose::MiniPose( *(pose_list[n]) ) ) );
92  }
93 
94 
95  res_map_ = res_map;
96 
97  core::pose::Pose const & pose = *( pose_list[1] );
98  for ( Size i = 1; i <= pose.total_residue(); i++ ){
99 
100  core::conformation::Residue rsd = pose.residue( i );
101 
102  for ( Size j = 1; j <= rsd.natoms(); j++ ){
103  atom_id_mask_[ core::id::AtomID( j, i ) ] = !rsd.is_virtual( j );
104  }
105 
106  // special case for magnesium, which has a couple virtual atoms that need to get moved around and to define stubs.
107  // not elegant, but I want to get this working.
108  if ( rsd.name3() == " MG" ){
109  for ( Size j = 1; j <= rsd.natoms(); j++ ) atom_id_mask_[ core::id::AtomID( j, i ) ] = true;
110  }
111  }
112 
113 
114  }
115 
116  ///////////////////////////////////////////////////////////////////////
118 
119 
120  ///////////////////////////////////////////////////////////////////////
121  void
122  ChunkSet::insert_chunk_into_pose( core::pose::Pose & pose, Size const & chunk_pose_index,toolbox::AllowInsertOP const & allow_insert ) const{
123 
124  using namespace core::pose;
125  using namespace core::id;
126 
127  core::pose::MiniPose const & scratch_pose ( *(mini_pose_list_[ chunk_pose_index ]) );
128 
129  // TR << "SCRATCH_POSE " << scratch_pose.sequence() << ' ' << scratch_pose.fold_tree() << std::endl;
130 
131  std::map< AtomID, AtomID > atom_id_map = get_atom_id_map( pose, allow_insert );
132 
133  copy_dofs( pose, scratch_pose, atom_id_map );
134 
135  }
136 
137  //////////////////////////////////////////////////////////////////////////////////////////////
138  std::map< id::AtomID, id::AtomID >
140 
141  std::map< id::AtomID, id::AtomID > atom_id_map;
142 
143  allow_insert->calculate_atom_id_map( pose, res_map_, mini_pose_list_[1]->fold_tree(), atom_id_map );
144 
145  // This should prevent copying dofs for virtual phosphates, if they are tagged as such in the input silent files.
146  filter_atom_id_map_with_mask( atom_id_map );
147 
148  return atom_id_map;
149  }
150 
151  //////////////////////////////////////////////////////////////////////////////////////////////
152  void
153  ChunkSet::filter_atom_id_map_with_mask( std::map< core::id::AtomID, core::id::AtomID > & atom_id_map ) const{
154 
155  using namespace core::id;
156 
157  std::map< AtomID, AtomID > atom_id_map_new;
158 
159  for ( std::map< AtomID, AtomID >::const_iterator
160  it=atom_id_map.begin(), it_end = atom_id_map.end(); it != it_end; ++it ) {
161 
162  AtomID const & insert_atom_id = it->first;
163  AtomID const & source_atom_id = it->second;
164 
165  std::map< AtomID, bool >::const_iterator it_mask = atom_id_mask_.find( source_atom_id );
166  if ( it_mask == atom_id_mask_.end() ) utility_exit_with_message( "Some problem with atom_id_mask in defining atom_id_map " );
167  if ( !it_mask->second ) continue; // this source_atom_id is not allowed by mask, probably came from a virtual phosphate.
168 
169  atom_id_map_new[ insert_atom_id ] = source_atom_id;
170  }
171 
172  atom_id_map = atom_id_map_new;
173 
174  }
175 
176 
177  //////////////////////////////////////////////////////////////////////////////////////////////
179  ChunkSet::mini_pose( Size const idx ) const {
180  return mini_pose_list_[ idx ];
181  }
182 
183  //////////////////////////////////////////////////////////////////////////////////////////////
184  //////////////////////////////////////////////////////////////////////////////////////////////
186  // currently nothing.
187  chunk_coverage_ = 0.0;
188  }
189 
190 
191  //////////////////////////////////////////////////////////////////////////////////////////////
192  // constructor -- needs a list of silent files. Each silent file
193  // has solutions for a particular piece of the desired pose.
194  // THIS SHOULD BE DEPRECATED SOON -- no longer in use?
195  // Better to explicitly specify '-chunk_res'.
197  utility::vector1 < std::string > const & silent_files,
198  core::pose::Pose const & pose,
199  std::map< Size, Size > const & connections_in_big_pose /* to figure out mapping to big pose*/ )
200  {
201 
202  std::string const & sequence_of_big_pose( pose.sequence() );
203  coarse_rna_ = pose.residue( 1 ).is_coarse();
204 
205  // allow_insert keeps track of where chunks are placed -- only allow
206  // fragment insertions *outside* these regions.
207  allow_insert_ = new toolbox::AllowInsert( pose );
208  covered_by_chunk_.dimension( sequence_of_big_pose.size(), false );
209 
210  utility::vector1< Size > input_res;
211  // Size chunk_res_count( 0 ); // Unused variable causes warning.
212 
213  for ( Size n = 1; n <= silent_files.size(); n++ ) {
214 
216  process_input_file( silent_files[n], pose_list );
217 
218  core::pose::Pose const & scratch_pose( *(pose_list[1]) );
219 
221 
222  // There may be more than one part of the pose to which this sequence maps.
223  figure_out_possible_res_maps( res_maps, scratch_pose, sequence_of_big_pose, connections_in_big_pose );
224 
225  for (Size k = 1; k <= res_maps.size(); k++ ) {
226  check_res_map( res_maps[ k ], *(pose_list[1]), sequence_of_big_pose );
227 
228  ChunkSetOP chunk_set( new ChunkSet( pose_list, res_maps[ k ] ) );
229  chunk_sets_.push_back( chunk_set );
230 
231  zero_out_allow_insert( res_maps[ k ], pose, scratch_pose, n );
232  }
233 
234  for ( ResMap::const_iterator
235  it=res_maps[1].begin(), it_end = res_maps[1].end(); it != it_end; ++it ) {
236  input_res.push_back( it->first );
237  }
238 
239  }
240 
241  figure_out_chunk_coverage();
242 
243  //std::cout << "INPUT_RES: ";
244  // for ( Size n = 1; n <= input_res.size(); n++ ) std::cout << ' ' << input_res[ n ];
245  // std::cout << std::endl;
246 
247  }
248 
249 
250  //////////////////////////////////////////////////////////////////////////////////////////////
251  // deprecate soon?
253  utility::vector1 < std::string > const & silent_files,
254  core::pose::Pose const & pose,
255  utility::vector1< core::Size > const & input_res )
256  {
257  utility::vector1< std::string > pdb_files_BLANK;
258  initialize_rna_chunk_library( pdb_files_BLANK, silent_files, pose, input_res );
259  }
260 
261 
262  //////////////////////////////////////////////////////////////////////////////////////////////
263  // constructor -- needs a list of silent files. Each silent file
264  // has solutions for a particular piece of the desired pose.
266  utility::vector1 < std::string > const & pdb_files,
267  utility::vector1 < std::string > const & silent_files,
268  core::pose::Pose const & pose,
269  utility::vector1< core::Size > const & input_res )
270  {
271  initialize_rna_chunk_library( pdb_files, silent_files, pose, input_res );
272  }
273 
274  //////////////////////////////////////////////////////////////////////////////////////////////
275  void
277  utility::vector1 < std::string > const & pdb_files,
278  utility::vector1 < std::string > const & silent_files,
279  core::pose::Pose const & pose,
280  utility::vector1< core::Size > const & input_res )
281  {
282  std::string const & sequence_of_big_pose( pose.sequence() );
283  coarse_rna_ = pose.residue( 1 ).is_coarse();
284 
285  // allow_insert keeps track of where chunks are placed -- only allow
286  // fragment insertions *outside* these regions.
287  allow_insert_ = new toolbox::AllowInsert( pose );
288  covered_by_chunk_.dimension( sequence_of_big_pose.size(), false );
289 
290  utility::vector1< std::string > all_input_files;
291  utility::vector1< bool > is_pdb_file;
292  for ( Size n = 1; n <= pdb_files.size(); n++ ){
293  all_input_files.push_back( pdb_files[n] );
294  is_pdb_file.push_back( true );
295  }
296  for ( Size n = 1; n <= silent_files.size(); n++ ){
297  all_input_files.push_back( silent_files[n] );
298  is_pdb_file.push_back( false );
299  }
300 
301  Size count( 0 );
302  for ( Size n = 1; n <= all_input_files.size(); n++ ) {
303 
305  process_input_file( all_input_files[n], pose_list, is_pdb_file[n] );
306 
307  core::pose::Pose const & scratch_pose( *(pose_list[1]) );
308 
309  // There may be more than one part of the pose to which this sequence maps.
310  ResMap res_map;
311 
312  for ( Size i = 1; i <= scratch_pose.sequence().size(); i++ ) {
313  count++;
314  if ( sequence_of_big_pose[ input_res[ count ] -1 ] != scratch_pose.sequence()[ i - 1 ] ){
315  std::cout << "Problem with input_file: " << all_input_files[n] << std::endl;
316  std::cout << "mismatch in sequence in big pose: " << sequence_of_big_pose[ input_res[ count ] -1 ] << input_res[count] <<
317  " in input pose: " << scratch_pose.sequence()[ i - 1 ] << i << std::endl;
318  utility_exit_with_message( "mismatch in input_res sequence" );
319  }
320  res_map[ input_res[count ] ] = i;
321  }
322 
323  ChunkSetOP chunk_set( new ChunkSet( pose_list, res_map ) );
324  chunk_sets_.push_back( chunk_set );
325 
326  zero_out_allow_insert( res_map, pose, scratch_pose, n );
327 
328  //check_fold_tree_OK( res_map, pose, scratch_pose );
329 
330  }
331  if ( count != input_res.size() ){
332  utility_exit_with_message( "Number of input res does not match total res in input silent files!" );
333  }
334 
335  figure_out_chunk_coverage();
336 
337  }
338 
339  //////////////////////////////////////////////////////////////////////////////
340  void
342  std::string const & silent_file,
343  ResMap const & res_map,
344  pose::Pose const & big_pose )
345  {
346 
348 
349  process_input_file( silent_file, pose_list );
350  check_res_map( res_map, *(pose_list[1]), big_pose.sequence() );
351 
352  ChunkSetOP chunk_set( new ChunkSet( pose_list, res_map ) );
353  chunk_sets_.push_back( chunk_set );
354 
355  }
356 
357  ////////////////////////////////////////////////////////////////////////////
359  pose::Pose & pose,
360  Size const & chunk_list_index,
361  Size const & chunk_pose_index ) const
362  {
363  chunk_sets_[ chunk_list_index ]->insert_chunk_into_pose( pose, chunk_pose_index, allow_insert_ );
364  }
365 
366 
367  //////////////////////////////////////////////////////////////////////////////
368  bool
370 
371  Size const chunk_set_index = static_cast <int> ( RG.uniform() * num_chunk_sets() ) + 1;
372 
373  ChunkSet const & chunk_set( *chunk_sets_[ chunk_set_index ] );
374 
375  if ( chunk_set.num_chunks() < 2 ) return false;
376 
377  Size const chunk_index = static_cast <int> ( RG.uniform() * chunk_set.num_chunks() ) + 1;
378 
379  chunk_set.insert_chunk_into_pose( pose, chunk_index, allow_insert_ );
380 
381  // TR << "INSERTED CHUNK " << chunk_index << " FROM SET " << chunk_set_index << std::endl;
382 
383  return true;
384  }
385 
386  //////////////////////////////////////////////////////////////////////////////
387  void
389  core::pose::Pose const & pose,
390  core::pose::Pose const & scratch_pose,
391  core::Size const domain_num )
392  {
393  using namespace core::id;
394  using namespace core::conformation;
395 
396  // connected doesn't do anything anymore...
397  FArray1D< bool > connected( pose.total_residue(), false );
398  covered_by_chunk_ = false;
399 
400  for ( ResMap::const_iterator
401  it=res_map.begin(), it_end = res_map.end(); it != it_end; ++it ) {
402 
403  Size const i = it->first; //Index in big pose.
404  Size const i_scratch = it->second; //Index in scratch pose (chunk).
405 
406  covered_by_chunk_( i ) = true;
407 
408  Residue const & rsd_i = pose.residue(i);
409  for ( Size j = 1; j <= rsd_i.natoms(); j++ ){
410 
411  std::string const & atomname = rsd_i.atom_name( j );
412  Residue const & scratch_rsd = scratch_pose.residue(i_scratch);
413 
414  if ( scratch_rsd.has( atomname ) ) {
415  Size const & scratch_index = scratch_pose.residue( i_scratch ).atom_index( atomname );
416  if ( !scratch_rsd.is_virtual( scratch_index ) ) {
417  allow_insert_->set_domain( AtomID(j,i), domain_num);
418  }
419  }
420  }
421 
422  //We don't trust phosphates at the beginning of chains!
423  // MAKE THIS AN OPTION?
424  // if ( i_scratch == 1 || scratch_pose.fold_tree().is_cutpoint( i_scratch - 1 ) ) allow_insert_->set_phosphate( i, pose, true );
425 
426  }
427 
428  }
429 
430 
431  //////////////////////////////////////////////////////////////////////////////
432  bool
434 
435  for (Size k = 1; k <= chunk_sets_.size(); k++ ) {
436  ChunkSet & chunk_set = *(chunk_sets_[k]);
437  bool const OK = chunk_set.check_fold_tree_OK( pose );
438  if (!OK){
439  std::cout << "Problem with pose fold tree -- not enough jumps to handle the number of chains in chunk set " << k << std::endl;
440  utility_exit_with_message( "FoldTree in pose does not have the right number of jumps to match chunk_res" );
441  }
442  }
443 
444  // Can't get here unless everything is OK!
445  return true;
446  }
447 
448 
449  //////////////////////////////////////////////////////////////////////////////
450  bool
452 
453  // Check where the chunk is mapped to in the big pose.
454  // There should be at least the same number of jumps in the big pose
455  // as there are chains in the scratch_pose.
456  utility::vector1< bool > is_chunk_res( pose.total_residue(), false );
457  for ( ResMap::const_iterator
458  it=res_map_.begin(), it_end = res_map_.end(); it != it_end; ++it ) {
459  Size const i = it->first; //Index in big pose.
460  is_chunk_res[ i ] = true;
461  }
462 
463  Size const num_jumps_scratch = mini_pose_list_[1]->fold_tree().num_jump(); // number of chains - 1
464 
465  Size num_jumps_in_big_pose_in_scratch_region( 0 );
466  for ( Size n = 1; n <= pose.num_jump(); n++ ) {
467  if (! is_chunk_res[ pose.fold_tree().upstream_jump_residue( n ) ] ) continue;
468  if (! is_chunk_res[ pose.fold_tree().downstream_jump_residue( n ) ] ) continue;
469  num_jumps_in_big_pose_in_scratch_region++;
470  }
471 
472  if ( num_jumps_scratch > num_jumps_in_big_pose_in_scratch_region ){
473  std::cout << "Number of jumps in chunk pose : " << num_jumps_scratch << std::endl;
474  std::cout << "Number of jumps in full pose in chunk region: " << num_jumps_in_big_pose_in_scratch_region << " out of total jumps " << pose.num_jump() << std::endl;
475  return false;
476  }
477 
478  if ( num_jumps_scratch < num_jumps_in_big_pose_in_scratch_region ){
479  std::cout << "WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!" << std::endl;
480  std::cout << "Number of jumps in chunk pose : " << num_jumps_scratch << std::endl;
481  std::cout << "Does not match:" << std::endl;
482  std::cout << "Number of jumps in full pose in chunk region: " << num_jumps_in_big_pose_in_scratch_region << " out of total jumps " << pose.num_jump() << std::endl;
483  std::cout << "WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!" << std::endl;
484  // Just a warning
485  //return false;
486  }
487 
488  return true;
489 
490  }
491 
492  //////////////////////////////////////////////////////////////////////////////
493  void
495  {
496 
497  Size const tot_res( allow_insert_->nres() );
498  Size num_chunk_res( 0 );
499  Size num_other_res( 0 );
500 
501  for ( Size n = 1; n <= tot_res; n++ ) {
502  // Allow insert keeps track of where the chunk *aren't*, and
503  // where other moves (fragments, jumps) can be carried out.
504  if ( covered_by_chunk_(n) ){
505  num_chunk_res++;
506  } else {
507  num_other_res++;
508  }
509  }
510  chunk_coverage_ = Real( 3 * num_chunk_res ) / ( 3 * num_chunk_res + tot_res );
511 
512  }
513 
514  //////////////////////////////////////////////////////////////////////////////
515  void
517  core::pose::Pose const & pose,
520  utility::vector1< core::Size > & sequence_start ) const{
521 
522  chain_id.clear();
523  sequences.clear();
524 
525  std::string sequence = "";
526  Size count( 1 );
527  sequence_start.push_back( 1 );
528 
529  for ( Size i = 1; i <= pose.total_residue(); i++ ) {
530 
531  sequence += pose.residue(i).name1();
532  chain_id.push_back( count );
533 
534  if ( pose.fold_tree().is_cutpoint( i ) ) {
535  sequences.push_back( sequence );
536  sequence = "";
537  count++;
538  if ( i < pose.total_residue() ) sequence_start.push_back( i+1 );
539  }
540 
541  }
542 
543 
544  for (Size n = 1; n <= sequences.size(); n++ ) {
545  TR << "SEQUENCE " << n << " " << sequences[ n ] << std::endl;
546  }
547 
548  }
549 
550  //////////////////////////////////////////////////////////////////////////////
551  void
553  utility::vector1< ResMap > & res_maps,
554  pose::Pose const & scratch_pose,
555  std::string const & sequence_of_big_pose,
556  std::map< Size, Size > const & connections_in_big_pose ) const
557  {
558 
559  // Note -- using zero-indexed vectors -- easier to do modulo, etc.
560  utility::vector1< std::string > scratch_sequences;
562  utility::vector1< core::Size > scratch_sequence_start;
563 
564  get_component_sequences( scratch_pose, scratch_sequences, chain_id, scratch_sequence_start );
565 
566  // Go through each sequence and look for matches
567  utility::vector1< utility::vector1< Size > > matches_to_each_scratch_sequence;
568  get_sequence_matches( matches_to_each_scratch_sequence, scratch_sequences, sequence_of_big_pose );
569 
570  // now actually find some good res maps.
571  find_res_maps( chain_id, scratch_sequence_start, scratch_sequences, matches_to_each_scratch_sequence, scratch_pose, connections_in_big_pose, res_maps );
572  }
573 
574  ////////////////////////////////////////////////////////////////////////////////
575  void
577  utility::vector1< Size > const & chain_id,
578  utility::vector1< Size > const & scratch_sequence_start,
579  utility::vector1< std::string > const & scratch_sequences,
580  utility::vector1< utility::vector1< Size > > const & matches_to_each_scratch_sequence,
581  core::pose::Pose const & scratch_pose,
582  std::map< Size, Size > const & connections_in_big_pose,
583  utility::vector1< ResMap > & res_maps ) const
584  {
585  res_maps.clear();
586 
587  // Loop over matches for chain 1 -- if there are any chunks that match, they should be taggable by
588  // their match to chain 1.
589 
590  Size num_chain( 1 );
591  for (Size k = 1; k <= matches_to_each_scratch_sequence[ num_chain ].size(); k++ ) {
592  ResMap res_map;
593  fill_res_map( res_map, matches_to_each_scratch_sequence[ num_chain ][ k ],
594  scratch_sequence_start[ num_chain ] /*should be 1*/,
595  scratch_sequences[ num_chain ].size() );
596 
597  // Dig deep into each connection from this chain.
598  check_connections( num_chain, res_map,
599  chain_id, scratch_sequence_start, scratch_sequences, matches_to_each_scratch_sequence, scratch_pose, connections_in_big_pose, res_maps );
600  }
601 
602  TR << "Number of matches found: " << res_maps.size() << std::endl;
603  if ( res_maps.size() == 0 ) utility_exit_with_message( "Could not match silent file with sequence "+scratch_pose.sequence() );
604 
605  }
606 
607  ////////////////////////////////////////////////////////////////////////////////////////////////////
608  void
610  utility::vector1< std::string > const & scratch_sequences,
611  std::string const & sequence_of_big_pose ) const
612  {
613  // Size tot_matches = 1;
614  for ( Size n = 1; n <= scratch_sequences.size(); n++ ) {
615  utility::vector1< Size > matches;
616  std::string const scratch_sequence( scratch_sequences[n] );
617  Size const scratch_sequence_length = scratch_sequence.size();
618  for ( Size i = 0; i <= sequence_of_big_pose.size() - scratch_sequence_length; i++ ) {
619  bool does_it_match( true );
620  for (Size offset = 0; offset < scratch_sequence_length; offset++ ) {
621  if ( sequence_of_big_pose[ i + offset ] != scratch_sequence[ offset ] ) {
622  does_it_match = false;
623  break;
624  }
625  }
626  if (does_it_match) {
627  matches.push_back( i+1 ); // convert numbering to start with 1
628  TR << "Found match to scratch_sequence " << n <<
629  // " starting at scratch pose position " << scratch_sequence_start[ n ] << " " <<
630  " at big pose position: " << i+1 << std::endl;
631  }
632  }
633 
634  matches_to_each_scratch_sequence.push_back( matches );
635 
636  if ( matches.size() < 1 ) utility_exit_with_message( "Could not find match to sequence" );
637 
638  }
639 }
640  ////////////////////////////////////////////////////////////////////////////////////////////////////
641  void
642  RNA_ChunkLibrary::check_connections( Size const & num_chain, ResMap & res_map,
643  utility::vector1< Size > const & chain_id,
644  utility::vector1< Size > const & scratch_sequence_start,
645  utility::vector1< std::string > const & scratch_sequences,
646  utility::vector1< utility::vector1< Size > > const & matches_to_each_scratch_sequence,
647  core::pose::Pose const & scratch_pose,
648  std::map< Size, Size > const & connections_in_big_pose,
649  utility::vector1< ResMap > & res_maps ) const
650  {
651  // We might already be done!
652  if ( res_map.size() == scratch_pose.total_residue() ) { //everything assigned!
653  res_maps.push_back( res_map );
654  return;
655  }
656 
657  // Look for connections coming off the current chain
658  Size const n_jump( scratch_pose.num_jump() ) ;
659 
660  for ( Size n = 1; n <= n_jump; n++ ) {
661 
662  // TR << "CHECK OUT JUMP " << n << " for chain " << num_chain << std::endl;
663  Size const res1 = scratch_pose.fold_tree().upstream_jump_residue( n );
664  Size const res2 = scratch_pose.fold_tree().downstream_jump_residue( n );
665 
666  if ( chain_id[ res1 ] == num_chain ) {
667  test_matches( res1, res2, res_map,
668  chain_id, scratch_sequence_start,
669  scratch_sequences, matches_to_each_scratch_sequence,
670  scratch_pose, connections_in_big_pose, res_maps );
671  } else if ( chain_id[ res2 ] == num_chain ) {
672  test_matches( res2, res1, res_map,
673  chain_id, scratch_sequence_start,
674  scratch_sequences, matches_to_each_scratch_sequence,
675  scratch_pose, connections_in_big_pose, res_maps );
676  }
677 
678  }
679 
680  }
681 
682  //////////////////////////////////////////////
683  void
684  RNA_ChunkLibrary::test_matches( Size const & res1, Size const & res2, ResMap & res_map,
685  utility::vector1< Size > const & chain_id,
686  utility::vector1< Size > const & scratch_sequence_start,
687  utility::vector1< std::string > const & scratch_sequences,
688  utility::vector1< utility::vector1< Size > > const & matches_to_each_scratch_sequence,
689  core::pose::Pose const & scratch_pose,
690  std::map< Size, Size > const & connections_in_big_pose,
691  utility::vector1< ResMap > & res_maps ) const {
692 
693  // TR << " TESTING : " << res1 << " " << res2 << " from chains: " << chain_id[ res1 ] << " " << chain_id[ res2 ] <<
694  // ". Length of resmap " << res_map.size() << " out of " << scratch_pose.total_residue() << std::endl;
695 
696  Size const next_chain = chain_id[ res2 ];
697  // res2 may already be "taken care of" -- no need to check it out.
698  for ( ResMap::const_iterator
699  it=res_map.begin(), it_end = res_map.end(); it != it_end; ++it ) {
700  if ( it->second == res2 ) {
701  // TR << " ALREADY TESTED " << std::endl;
702  return;
703  }
704  }
705 
706  // Cycle through potential matches -- are any good?
707  for (Size k = 1; k <= matches_to_each_scratch_sequence[ next_chain ].size(); k++ ) {
708 
709  ResMap res_map_test( res_map );
710 
711  bool const res_map_ok = fill_res_map( res_map_test, matches_to_each_scratch_sequence[ next_chain ][ k ],
712  scratch_sequence_start[ next_chain ],scratch_sequences[next_chain].size() );
713 
714  if (!res_map_ok) continue;
715 
716  // Are the res1 and res2 connected in the big pose?
717  // Need to figure out where they map to in big pose.
718  Size res1_map( 0 ), res2_map( 0 );
719  for ( ResMap::const_iterator
720  it=res_map_test.begin(), it_end = res_map_test.end(); it != it_end; ++it ) {
721  if ( it->second == res1 ) res1_map = it->first;
722  if ( it->second == res2 ) res2_map = it->first;
723  }
724  if (res1_map == 0 || res2_map == 0 ){
725  TR << res1 << " " << res1_map << " " << res2 << " " << res2_map << std::endl;
726  utility_exit_with_message( "SHOULD NOT BE HERE! " );
727  }
728 
729 
730  bool connection_ok( false );
731  for ( ResMap::const_iterator
732  it=connections_in_big_pose.begin(), it_end = connections_in_big_pose.end(); it != it_end; ++it ) {
733 
734  Size const res1_in_big_pose = it->first;
735  Size const res2_in_big_pose = it->second;
736 
737  if ( res1_in_big_pose == res1_map &&
738  res2_in_big_pose == res2_map ) {
739  connection_ok = true; break;
740  }
741 
742  if ( res2_in_big_pose == res1_map &&
743  res1_in_big_pose == res2_map ) {
744  connection_ok = true; break;
745  }
746  }
747 
748  if (!connection_ok) {
749  // TR << "DENIED!! " << std::endl;
750  continue;
751  } else {
752  // TR << "OK!! " << std::endl;
753  res_map = res_map_test;
754  check_connections( next_chain, res_map,
755  chain_id, scratch_sequence_start,
756  scratch_sequences, matches_to_each_scratch_sequence,
757  scratch_pose, connections_in_big_pose, res_maps );
758  break;
759  }
760  }
761 
762 
763  }
764 
765 
766  ///////////////////////////////////////////////////////////
767  bool
768  RNA_ChunkLibrary::fill_res_map( ResMap & res_map, Size const & match_pos, Size const & scratch_start_pos, Size const & scratch_sequence_length ) const
769  {
770  bool one_to_one( true );
771  for (Size offset = 0; offset < scratch_sequence_length; offset++ ) {
772  Size const big_pose_pos = match_pos + offset;
773  Size const scratch_pos = scratch_start_pos + offset;
774  if ( res_map.find( big_pose_pos) == res_map.end() ) {
775  res_map[ big_pose_pos ] = scratch_pos;
776  TR << "MAPPING " << match_pos+offset << " --> " << scratch_start_pos+offset << std::endl;
777  } else {
778  // this is a problem -- expect res_maps to be one-to-one.
779  one_to_one = false;
780  break;
781  }
782  }
783  return one_to_one;
784  }
785 
786 
787 
788  ///////////////////
789  // DELETE FOLLOWING AFTER STUFF WORKS!
790  //////////////////////////////////////////////////////////////////////////////////////////////
791  void
793  utility::vector1< std::string > const & scratch_sequences,
794  utility::vector1< utility::vector1< Size > > const & matches_to_each_scratch_sequence,
795  pose::Pose const & scratch_pose,
796  std::map< Size, Size > const & connections_in_big_pose,
797  utility::vector1< core::Size > const & chain_id,
798  Size const & num_sequence, Size const & num_match, utility::vector1< ResMap > & res_maps ) const{
799  ResMap res_map( res_map_old );
800 
801  Size const match_pos = matches_to_each_scratch_sequence[ num_sequence ][ num_match ];
802  // TR << "HEY MATCH_POS " << match_pos << std::endl;
803  Size const scratch_sequence_length = scratch_sequences[num_sequence].size();
804  Size i( res_map.size() );
805  for (Size offset = 0; offset < scratch_sequence_length; offset++ ) {
806  res_map[ match_pos + offset + 1 ] = i + 1; // Add back in 1 to match pose numbering.
807  i++;
808  }
809 
810  bool const jump_match = check_jump_match( scratch_pose, connections_in_big_pose, res_map, chain_id );
811 
812  // for ( ResMap::const_iterator
813  // it=res_map.begin(), it_end = res_map.end(); it != it_end; ++it ) {
814  // TR << it->first << " mapped to " << it->second << std::endl;
815  // }
816 
817  // TR << " CHECKING: " << num_sequence << " -- " << num_match << " : " << res_map.size() << " --> " << jump_match << std::endl;
818 
819  if ( !jump_match ) return;
820 
821  if ( num_sequence == matches_to_each_scratch_sequence.size() ) { //Success!
822  res_maps.push_back( res_map );
823  }
824 
825  // TR << "ADVANCING to sequence " << num_sequence << std::endl;
826  Size const num_sequence_next = num_sequence + 1;
827 
828  if ( num_sequence_next <= matches_to_each_scratch_sequence.size() ) {
829 
830  for ( Size k = 1; k <= matches_to_each_scratch_sequence[ num_sequence_next ].size(); k++ ) {
831  // TR << "ABOUT TO TRY " << num_sequence_next << " " << k << std::endl;
832  check_res_map_recursively( res_map, scratch_sequences, matches_to_each_scratch_sequence, scratch_pose, connections_in_big_pose, chain_id, num_sequence_next, k, res_maps );
833  }
834 
835  }
836 
837  return;
838 
839  }
840 
841 
842 
843  ///////////////////////////////////////////////////////////////////////////
844  // Any jump inside the scratch pose (which came from a user-inputted silent file with its own fold tree)
845  // must correspond to a pairing defined in the new pose of interest. Could make this a little less restrictive,
846  // just asking for any connection between the chain segments that are putatively matching.
847  bool
849  pose::Pose const & scratch_pose,
850  std::map< Size, Size > const & connections_in_big_pose,
851  ResMap const & res_map,
852  utility::vector1< Size > const & chain_id ) const
853  {
854 
855  Size const n_jump( scratch_pose.num_jump() ) ;
856 
857  for ( Size n = 1; n <= n_jump; n++ ) {
858 
859  Size const res1 = scratch_pose.fold_tree().upstream_jump_residue( n );
860  Size const res2 = scratch_pose.fold_tree().downstream_jump_residue( n );
861 
862  // First check that res1 and res2 are in the resmap -- otherwise no point in looking for a matching
863  // jump in the big pose.
864  bool found_res1( false ), found_res2( false );
865  for ( ResMap::const_iterator it = res_map.begin(), it_end = res_map.end(); it != it_end; ++it ) {
866  if ( res1 == it->second ) found_res1 = true;
867  if ( res2 == it->second ) found_res2 = true;
868  }
869  if ( !found_res1 || !found_res2 ) {
870  //consider it OK if the res_map isn't complete yet.
871  continue;
872  }
873 
874  bool connection_ok( false );
875 
876  for ( ResMap::const_iterator
877  it=connections_in_big_pose.begin(), it_end = connections_in_big_pose.end(); it != it_end; ++it ) {
878 
879  Size const res1_in_big_pose = it->first;
880  Size const res2_in_big_pose = it->second;
881 
882  // TR << "Checking connection: " << res1_in_big_pose << " - " << res2_in_big_pose << std::endl;
883 
884  ResMap::const_iterator res1_map_id = res_map.find( res1_in_big_pose );
885  ResMap::const_iterator res2_map_id = res_map.find( res2_in_big_pose );
886 
887  if ( res1_map_id != res_map.end() &&
888  res2_map_id != res_map.end() ) {
889 
890  Size const res1_in_scratch_pose = res1_map_id->second;
891  Size const res2_in_scratch_pose = res2_map_id->second;
892  // TR << " Checking connection in scratch_pose: " << res1_in_scratch_pose << " - " << res2_in_scratch_pose << std::endl;
893 
894  if ( ( chain_id[ res1 ] == chain_id[ res1_in_scratch_pose ] &&
895  chain_id[ res2 ] == chain_id[ res2_in_scratch_pose ] ) ||
896  ( chain_id[ res1 ] == chain_id[ res2_in_scratch_pose ] &&
897  chain_id[ res2 ] == chain_id[ res1_in_scratch_pose ] ) ) {
898  connection_ok = true;
899  break;
900  }
901 
902  }
903  }
904 
905  if (!connection_ok) {
906  // TR << "FAIL! Could not find a match to " << res1 << " -- " << res2 << std::endl;
907  return false;
908  }
909 
910  }
911 
912  return true;
913  }
914 
915 
916  //////////////////////////////////////////////////////////////////////////////
917  bool
918  RNA_ChunkLibrary::check_res_map( ResMap const & res_map, pose::Pose const & scratch_pose, std::string const & sequence ) const{
919 
920  // CHECK SEQUENCE HERE!!! EXIT IF NO MATCH!!!!!!!
921 
922  for ( ResMap::const_iterator
923  it=res_map.begin(), it_end = res_map.end(); it != it_end; ++it ) {
924 
925  // For now, just do bonded atoms...update later to do jumps too.
926  Size const i = it->first; //Index in big pose.
927  Size const i_scratch_pose = it->second; // Index in the little "chunk" or "scratch" pose
928 
929  if ( sequence[ i-1 ] != scratch_pose.residue( i_scratch_pose ).name1() ){
930  utility_exit_with_message( "Mismatched sequence!!" );
931  return false;
932  }
933 
934  }
935  return true;
936  }
937 
938  //////////////////////////////////////////////////////////////////////////////////////////////
939  void
942  bool is_pdb /*= false*/ ) const
943  {
944  using namespace core::io::silent;
945  using namespace protocols::rna;
946 
949 
950  if ( is_pdb ){
951 
952  pose::PoseOP pose_op( new pose::Pose );
953  core::import_pose::pose_from_pdb( *pose_op, *rsd_set, input_file );
956  pose_list.push_back( pose_op );
957 
958  } else { //its a silent file.
959 
960  SilentFileData silent_file_data;
961  silent_file_data.read_file( input_file );
962  for ( core::io::silent::SilentFileData::iterator iter = silent_file_data.begin(),
963  end = silent_file_data.end(); iter != end; ++iter ) {
964  pose::PoseOP pose_op( new pose::Pose );
965  iter->fill_pose( *pose_op );
966  pose_list.push_back( pose_op );
967  }
968 
969  }
970 
971  // further cleanup.
972  for (Size n = 1; n <= pose_list.size(); n++ ){
973 
974  pose::PoseOP pose_op = pose_list[ n ];
975 
976  remove_cutpoints_closed( *pose_op );
977 
978  if ( coarse_rna_ && !pose_op->residue(1).is_coarse() ){
979  pose::Pose coarse_pose;
980  make_coarse_pose( *pose_op, coarse_pose );
981  *pose_op = coarse_pose;
982  }
983 
984  virtualize_5prime_phosphates( *pose_op );
985  }
986 
987  // std::cout << "DONE: " << input_file << std::endl;
988 
989  if ( pose_list.size() < 1) {
990  utility_exit_with_message( "No structure found in input file " + input_file );
991  }
992 
993  }
994 
995  ////////////////////////////////////////////////////////////////
996  void
997  RNA_ChunkLibrary::initialize_random_chunks( pose::Pose & pose, bool const dump_pdb /* = false */) const{
998  for ( Size n = 1; n <= num_chunk_sets(); n++ ) {
999 
1000  ChunkSet const & chunk_set( *chunk_sets_[ n ] );
1001 
1002  Size chunk_index = static_cast<int>( RG.uniform() * chunk_set.num_chunks() ) + 1;
1003 
1004  // JUST FOR TESTING
1005  if ( dump_pdb ) chunk_index = 1;
1006 
1007  //TR << "NUM_CHUNKS " << chunk_index << " " << chunk_set.num_chunks() << std::endl;
1008  chunk_set.insert_chunk_into_pose( pose, chunk_index, allow_insert_ );
1009 
1010 
1011  // useful for tracking homology modeling: perhaps we can align to first chunk as well -- 3D alignment of Rosetta poses are
1012  // arbitrarily set to origin (except in special cases with virtual residues...)
1013  if ( n==1 /*&& pose.residue( pose.total_residue() ).name3() != "VRT"*/ ) align_to_chunk( pose, chunk_set, chunk_index );
1014 
1015  if ( dump_pdb ) pose.dump_pdb( "start_"+string_of(n)+".pdb" );
1016 
1017  }
1018 
1019  //exit( 0 );
1020 
1021  }
1022 
1023  ///////////////////////////////////////////////////////////////////////
1024  void
1026  runtime_assert( chunk_sets_.size() > 0 );
1027  ChunkSet const & chunk_set( *chunk_sets_[ 1 ] );
1028  align_to_chunk( pose, chunk_set, 1 );
1029  }
1030 
1031  ///////////////////////////////////////////////////////////////////////
1032  void
1033  RNA_ChunkLibrary::align_to_chunk( pose::Pose & pose, ChunkSet const & chunk_set, Size const chunk_index ) const{
1034 
1035  using namespace core::id;
1036 
1037  std::map< AtomID, AtomID > atom_id_map = chunk_set.get_atom_id_map( pose, allow_insert_ );
1038 
1039  id::AtomID_Map< id::AtomID > alignment_atom_id_map; // weird alternative format needed for superimpose_pose
1040  core::pose::initialize_atomid_map( alignment_atom_id_map, pose, id::BOGUS_ATOM_ID );
1041  for ( std::map< AtomID, AtomID >::const_iterator
1042  it=atom_id_map.begin(), it_end = atom_id_map.end(); it != it_end; ++it ) {
1043  alignment_atom_id_map.set( it->first, it->second );
1044  }
1045 
1046  core::scoring::superimpose_pose( pose, *(chunk_set.mini_pose( chunk_index )), alignment_atom_id_map );
1047  }
1048 
1049 
1050 
1051  ////////////////////////////////////////////////////////////////
1052  void
1054  allow_insert_ = allow_insert;
1055  }
1056 
1057 }
1058 }
1059