19 #include <basic/Tracer.hh>
30 #include <numeric/random/random.hh>
39 #include <ObjexxFCL/FArray1D.hh>
41 #include <ObjexxFCL/string.functions.hh>
50 #include <utility/vector1.hh>
54 static numeric::random::RandomGenerator
RG(2380934);
56 static basic::Tracer
TR(
"protocols.rna.rna_chunk_library" ) ;
62 using namespace ObjexxFCL;
73 mini_pose_list_ = mini_pose_list;
80 for (
Size j = 1; j <= mini_pose.
coords()[i].size(); j++ ){
90 for (
Size n = 1; n <= pose_list.size(); n++ ) {
108 if ( rsd.
name3() ==
" MG" ){
124 using namespace core::pose;
125 using namespace core::id;
131 std::map< AtomID, AtomID > atom_id_map = get_atom_id_map( pose, allow_insert );
133 copy_dofs( pose, scratch_pose, atom_id_map );
138 std::map< id::AtomID, id::AtomID >
141 std::map< id::AtomID, id::AtomID > atom_id_map;
143 allow_insert->calculate_atom_id_map( pose, res_map_, mini_pose_list_[1]->fold_tree(), atom_id_map );
146 filter_atom_id_map_with_mask( atom_id_map );
155 using namespace core::id;
157 std::map< AtomID, AtomID > atom_id_map_new;
159 for ( std::map< AtomID, AtomID >::const_iterator
160 it=atom_id_map.begin(), it_end = atom_id_map.end(); it != it_end; ++it ) {
162 AtomID const & insert_atom_id = it->first;
163 AtomID const & source_atom_id = it->second;
165 std::map< AtomID, bool >::const_iterator it_mask = atom_id_mask_.find( source_atom_id );
166 if ( it_mask == atom_id_mask_.end() ) utility_exit_with_message(
"Some problem with atom_id_mask in defining atom_id_map " );
167 if ( !it_mask->second )
continue;
169 atom_id_map_new[ insert_atom_id ] = source_atom_id;
172 atom_id_map = atom_id_map_new;
180 return mini_pose_list_[ idx ];
187 chunk_coverage_ = 0.0;
199 std::map< Size, Size >
const & connections_in_big_pose )
208 covered_by_chunk_.dimension( sequence_of_big_pose.size(), false );
213 for (
Size n = 1; n <= silent_files.size(); n++ ) {
216 process_input_file( silent_files[n], pose_list );
223 figure_out_possible_res_maps( res_maps, scratch_pose, sequence_of_big_pose, connections_in_big_pose );
225 for (
Size k = 1; k <= res_maps.size(); k++ ) {
226 check_res_map( res_maps[ k ], *(pose_list[1]), sequence_of_big_pose );
229 chunk_sets_.push_back( chunk_set );
231 zero_out_allow_insert( res_maps[ k ], pose, scratch_pose, n );
234 for ( ResMap::const_iterator
235 it=res_maps[1].begin(), it_end = res_maps[1].
end(); it != it_end; ++it ) {
236 input_res.push_back( it->first );
241 figure_out_chunk_coverage();
258 initialize_rna_chunk_library( pdb_files_BLANK, silent_files, pose, input_res );
271 initialize_rna_chunk_library( pdb_files, silent_files, pose, input_res );
288 covered_by_chunk_.dimension( sequence_of_big_pose.size(), false );
292 for (
Size n = 1; n <= pdb_files.size(); n++ ){
293 all_input_files.push_back( pdb_files[n] );
294 is_pdb_file.push_back(
true );
296 for (
Size n = 1; n <= silent_files.size(); n++ ){
297 all_input_files.push_back( silent_files[n] );
298 is_pdb_file.push_back(
false );
302 for (
Size n = 1; n <= all_input_files.size(); n++ ) {
305 process_input_file( all_input_files[n], pose_list, is_pdb_file[n] );
312 for (
Size i = 1; i <= scratch_pose.
sequence().size(); i++ ) {
314 if ( sequence_of_big_pose[ input_res[ count ] -1 ] != scratch_pose.
sequence()[ i - 1 ] ){
315 std::cout <<
"Problem with input_file: " << all_input_files[n] << std::endl;
316 std::cout <<
"mismatch in sequence in big pose: " << sequence_of_big_pose[ input_res[ count ] -1 ] << input_res[count] <<
317 " in input pose: " << scratch_pose.
sequence()[ i - 1 ] << i << std::endl;
318 utility_exit_with_message(
"mismatch in input_res sequence" );
320 res_map[ input_res[count ] ] = i;
324 chunk_sets_.push_back( chunk_set );
326 zero_out_allow_insert( res_map, pose, scratch_pose, n );
331 if ( count != input_res.size() ){
332 utility_exit_with_message(
"Number of input res does not match total res in input silent files!" );
335 figure_out_chunk_coverage();
349 process_input_file( silent_file, pose_list );
350 check_res_map( res_map, *(pose_list[1]), big_pose.
sequence() );
353 chunk_sets_.push_back( chunk_set );
360 Size const & chunk_list_index,
361 Size const & chunk_pose_index )
const
363 chunk_sets_[ chunk_list_index ]->insert_chunk_into_pose( pose, chunk_pose_index, allow_insert_ );
371 Size const chunk_set_index = static_cast <
int> (
RG.uniform() * num_chunk_sets() ) + 1;
373 ChunkSet const & chunk_set( *chunk_sets_[ chunk_set_index ] );
375 if ( chunk_set.num_chunks() < 2 )
return false;
377 Size const chunk_index = static_cast <
int> (
RG.uniform() * chunk_set.num_chunks() ) + 1;
379 chunk_set.insert_chunk_into_pose( pose, chunk_index, allow_insert_ );
393 using namespace core::id;
394 using namespace core::conformation;
398 covered_by_chunk_ =
false;
400 for ( ResMap::const_iterator
401 it=res_map.begin(), it_end = res_map.end(); it != it_end; ++it ) {
403 Size const i = it->first;
404 Size const i_scratch = it->second;
406 covered_by_chunk_( i ) =
true;
414 if ( scratch_rsd.
has( atomname ) ) {
416 if ( !scratch_rsd.
is_virtual( scratch_index ) ) {
417 allow_insert_->set_domain(
AtomID(j,i), domain_num);
435 for (
Size k = 1; k <= chunk_sets_.size(); k++ ) {
436 ChunkSet & chunk_set = *(chunk_sets_[k]);
439 std::cout <<
"Problem with pose fold tree -- not enough jumps to handle the number of chains in chunk set " << k << std::endl;
440 utility_exit_with_message(
"FoldTree in pose does not have the right number of jumps to match chunk_res" );
457 for ( ResMap::const_iterator
458 it=res_map_.begin(), it_end = res_map_.end(); it != it_end; ++it ) {
459 Size const i = it->first;
460 is_chunk_res[ i ] =
true;
463 Size const num_jumps_scratch = mini_pose_list_[1]->fold_tree().num_jump();
465 Size num_jumps_in_big_pose_in_scratch_region( 0 );
469 num_jumps_in_big_pose_in_scratch_region++;
472 if ( num_jumps_scratch > num_jumps_in_big_pose_in_scratch_region ){
473 std::cout <<
"Number of jumps in chunk pose : " << num_jumps_scratch << std::endl;
474 std::cout <<
"Number of jumps in full pose in chunk region: " << num_jumps_in_big_pose_in_scratch_region <<
" out of total jumps " << pose.
num_jump() << std::endl;
478 if ( num_jumps_scratch < num_jumps_in_big_pose_in_scratch_region ){
479 std::cout <<
"WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!" << std::endl;
480 std::cout <<
"Number of jumps in chunk pose : " << num_jumps_scratch << std::endl;
481 std::cout <<
"Does not match:" << std::endl;
482 std::cout <<
"Number of jumps in full pose in chunk region: " << num_jumps_in_big_pose_in_scratch_region <<
" out of total jumps " << pose.
num_jump() << std::endl;
483 std::cout <<
"WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!" << std::endl;
497 Size const tot_res( allow_insert_->nres() );
498 Size num_chunk_res( 0 );
499 Size num_other_res( 0 );
501 for (
Size n = 1; n <= tot_res; n++ ) {
504 if ( covered_by_chunk_(n) ){
510 chunk_coverage_ =
Real( 3 * num_chunk_res ) / ( 3 * num_chunk_res + tot_res );
527 sequence_start.push_back( 1 );
532 chain_id.push_back( count );
535 sequences.push_back( sequence );
538 if ( i < pose.
total_residue() ) sequence_start.push_back( i+1 );
544 for (
Size n = 1; n <= sequences.size(); n++ ) {
545 TR <<
"SEQUENCE " << n <<
" " << sequences[ n ] << std::endl;
556 std::map< Size, Size >
const & connections_in_big_pose )
const
564 get_component_sequences( scratch_pose, scratch_sequences, chain_id, scratch_sequence_start );
568 get_sequence_matches( matches_to_each_scratch_sequence, scratch_sequences, sequence_of_big_pose );
571 find_res_maps( chain_id, scratch_sequence_start, scratch_sequences, matches_to_each_scratch_sequence, scratch_pose, connections_in_big_pose, res_maps );
582 std::map< Size, Size >
const & connections_in_big_pose,
591 for (
Size k = 1; k <= matches_to_each_scratch_sequence[ num_chain ].size(); k++ ) {
593 fill_res_map( res_map, matches_to_each_scratch_sequence[ num_chain ][ k ],
594 scratch_sequence_start[ num_chain ] ,
595 scratch_sequences[ num_chain ].
size() );
598 check_connections( num_chain, res_map,
599 chain_id, scratch_sequence_start, scratch_sequences, matches_to_each_scratch_sequence, scratch_pose, connections_in_big_pose, res_maps );
602 TR <<
"Number of matches found: " << res_maps.size() << std::endl;
603 if ( res_maps.size() == 0 ) utility_exit_with_message(
"Could not match silent file with sequence "+scratch_pose.
sequence() );
614 for (
Size n = 1; n <= scratch_sequences.size(); n++ ) {
616 std::string const scratch_sequence( scratch_sequences[n] );
617 Size const scratch_sequence_length = scratch_sequence.size();
618 for (
Size i = 0; i <= sequence_of_big_pose.size() - scratch_sequence_length; i++ ) {
619 bool does_it_match(
true );
620 for (
Size offset = 0; offset < scratch_sequence_length; offset++ ) {
621 if ( sequence_of_big_pose[ i + offset ] != scratch_sequence[ offset ] ) {
622 does_it_match =
false;
627 matches.push_back( i+1 );
628 TR <<
"Found match to scratch_sequence " << n <<
630 " at big pose position: " << i+1 << std::endl;
634 matches_to_each_scratch_sequence.push_back( matches );
636 if ( matches.size() < 1 ) utility_exit_with_message(
"Could not find match to sequence" );
648 std::map< Size, Size >
const & connections_in_big_pose,
653 res_maps.push_back( res_map );
660 for (
Size n = 1; n <= n_jump; n++ ) {
666 if ( chain_id[ res1 ] == num_chain ) {
667 test_matches( res1, res2, res_map,
668 chain_id, scratch_sequence_start,
669 scratch_sequences, matches_to_each_scratch_sequence,
670 scratch_pose, connections_in_big_pose, res_maps );
671 }
else if ( chain_id[ res2 ] == num_chain ) {
672 test_matches( res2, res1, res_map,
673 chain_id, scratch_sequence_start,
674 scratch_sequences, matches_to_each_scratch_sequence,
675 scratch_pose, connections_in_big_pose, res_maps );
690 std::map< Size, Size >
const & connections_in_big_pose,
696 Size const next_chain = chain_id[ res2 ];
698 for ( ResMap::const_iterator
699 it=res_map.begin(), it_end = res_map.end(); it != it_end; ++it ) {
700 if ( it->second == res2 ) {
707 for (
Size k = 1; k <= matches_to_each_scratch_sequence[ next_chain ].size(); k++ ) {
709 ResMap res_map_test( res_map );
711 bool const res_map_ok = fill_res_map( res_map_test, matches_to_each_scratch_sequence[ next_chain ][ k ],
712 scratch_sequence_start[ next_chain ],scratch_sequences[next_chain].
size() );
714 if (!res_map_ok)
continue;
718 Size res1_map( 0 ), res2_map( 0 );
719 for ( ResMap::const_iterator
720 it=res_map_test.begin(), it_end = res_map_test.end(); it != it_end; ++it ) {
721 if ( it->second == res1 ) res1_map = it->first;
722 if ( it->second == res2 ) res2_map = it->first;
724 if (res1_map == 0 || res2_map == 0 ){
725 TR << res1 <<
" " << res1_map <<
" " << res2 <<
" " << res2_map << std::endl;
726 utility_exit_with_message(
"SHOULD NOT BE HERE! " );
730 bool connection_ok(
false );
731 for ( ResMap::const_iterator
732 it=connections_in_big_pose.begin(), it_end = connections_in_big_pose.end(); it != it_end; ++it ) {
734 Size const res1_in_big_pose = it->first;
735 Size const res2_in_big_pose = it->second;
737 if ( res1_in_big_pose == res1_map &&
738 res2_in_big_pose == res2_map ) {
739 connection_ok =
true;
break;
742 if ( res2_in_big_pose == res1_map &&
743 res1_in_big_pose == res2_map ) {
744 connection_ok =
true;
break;
748 if (!connection_ok) {
753 res_map = res_map_test;
754 check_connections( next_chain, res_map,
755 chain_id, scratch_sequence_start,
756 scratch_sequences, matches_to_each_scratch_sequence,
757 scratch_pose, connections_in_big_pose, res_maps );
770 bool one_to_one(
true );
771 for (
Size offset = 0; offset < scratch_sequence_length; offset++ ) {
772 Size const big_pose_pos = match_pos + offset;
773 Size const scratch_pos = scratch_start_pos + offset;
774 if ( res_map.find( big_pose_pos) == res_map.end() ) {
775 res_map[ big_pose_pos ] = scratch_pos;
776 TR <<
"MAPPING " << match_pos+offset <<
" --> " << scratch_start_pos+offset << std::endl;
796 std::map< Size, Size >
const & connections_in_big_pose,
799 ResMap res_map( res_map_old );
801 Size const match_pos = matches_to_each_scratch_sequence[ num_sequence ][ num_match ];
803 Size const scratch_sequence_length = scratch_sequences[num_sequence].size();
804 Size i( res_map.size() );
805 for (
Size offset = 0; offset < scratch_sequence_length; offset++ ) {
806 res_map[ match_pos + offset + 1 ] = i + 1;
810 bool const jump_match = check_jump_match( scratch_pose, connections_in_big_pose, res_map, chain_id );
819 if ( !jump_match )
return;
821 if ( num_sequence == matches_to_each_scratch_sequence.size() ) {
822 res_maps.push_back( res_map );
826 Size const num_sequence_next = num_sequence + 1;
828 if ( num_sequence_next <= matches_to_each_scratch_sequence.size() ) {
830 for (
Size k = 1; k <= matches_to_each_scratch_sequence[ num_sequence_next ].size(); k++ ) {
832 check_res_map_recursively( res_map, scratch_sequences, matches_to_each_scratch_sequence, scratch_pose, connections_in_big_pose, chain_id, num_sequence_next, k, res_maps );
850 std::map< Size, Size >
const & connections_in_big_pose,
857 for (
Size n = 1; n <= n_jump; n++ ) {
864 bool found_res1(
false ), found_res2(
false );
865 for ( ResMap::const_iterator it = res_map.begin(), it_end = res_map.end(); it != it_end; ++it ) {
866 if ( res1 == it->second ) found_res1 =
true;
867 if ( res2 == it->second ) found_res2 =
true;
869 if ( !found_res1 || !found_res2 ) {
874 bool connection_ok(
false );
876 for ( ResMap::const_iterator
877 it=connections_in_big_pose.begin(), it_end = connections_in_big_pose.end(); it != it_end; ++it ) {
879 Size const res1_in_big_pose = it->first;
880 Size const res2_in_big_pose = it->second;
884 ResMap::const_iterator res1_map_id = res_map.find( res1_in_big_pose );
885 ResMap::const_iterator res2_map_id = res_map.find( res2_in_big_pose );
887 if ( res1_map_id != res_map.end() &&
888 res2_map_id != res_map.end() ) {
890 Size const res1_in_scratch_pose = res1_map_id->second;
891 Size const res2_in_scratch_pose = res2_map_id->second;
894 if ( ( chain_id[ res1 ] == chain_id[ res1_in_scratch_pose ] &&
895 chain_id[ res2 ] == chain_id[ res2_in_scratch_pose ] ) ||
896 ( chain_id[ res1 ] == chain_id[ res2_in_scratch_pose ] &&
897 chain_id[ res2 ] == chain_id[ res1_in_scratch_pose ] ) ) {
898 connection_ok =
true;
905 if (!connection_ok) {
922 for ( ResMap::const_iterator
923 it=res_map.begin(), it_end = res_map.end(); it != it_end; ++it ) {
926 Size const i = it->first;
927 Size const i_scratch_pose = it->second;
929 if ( sequence[ i-1 ] != scratch_pose.
residue( i_scratch_pose ).
name1() ){
930 utility_exit_with_message(
"Mismatched sequence!!" );
944 using namespace core::io::silent;
945 using namespace protocols::rna;
956 pose_list.push_back( pose_op );
961 silent_file_data.
read_file( input_file );
963 end = silent_file_data.
end(); iter !=
end; ++iter ) {
965 iter->fill_pose( *pose_op );
966 pose_list.push_back( pose_op );
972 for (
Size n = 1; n <= pose_list.size(); n++ ){
978 if ( coarse_rna_ && !pose_op->residue(1).is_coarse() ){
981 *pose_op = coarse_pose;
989 if ( pose_list.size() < 1) {
990 utility_exit_with_message(
"No structure found in input file " + input_file );
998 for (
Size n = 1; n <= num_chunk_sets(); n++ ) {
1000 ChunkSet const & chunk_set( *chunk_sets_[ n ] );
1002 Size chunk_index =
static_cast<int>(
RG.uniform() * chunk_set.
num_chunks() ) + 1;
1005 if ( dump_pdb ) chunk_index = 1;
1013 if ( n==1 ) align_to_chunk( pose, chunk_set, chunk_index );
1015 if ( dump_pdb ) pose.
dump_pdb(
"start_"+string_of(n)+
".pdb" );
1026 runtime_assert( chunk_sets_.size() > 0 );
1027 ChunkSet const & chunk_set( *chunk_sets_[ 1 ] );
1028 align_to_chunk( pose, chunk_set, 1 );
1035 using namespace core::id;
1037 std::map< AtomID, AtomID > atom_id_map = chunk_set.
get_atom_id_map( pose, allow_insert_ );
1041 for ( std::map< AtomID, AtomID >::const_iterator
1042 it=atom_id_map.begin(), it_end = atom_id_map.end(); it != it_end; ++it ) {
1043 alignment_atom_id_map.set( it->first, it->second );
1054 allow_insert_ = allow_insert;