23 #include <basic/options/option.hh>
39 #include <basic/Tracer.hh>
41 #include <utility/file/file_sys_util.hh>
42 #include <utility/vector1.hh>
44 #include <utility/vector0.hh>
45 #include <utility/io/izstream.hh>
46 #include <utility/io/ozstream.hh>
47 #include <utility/string_util.hh>
48 using utility::string_split;
50 #include <numeric/xyzVector.hh>
51 #include <numeric/conversions.hh>
61 #include <basic/options/keys/constraints.OptionKeys.gen.hh>
62 #include <basic/options/keys/dna.OptionKeys.gen.hh>
63 #include <basic/options/keys/out.OptionKeys.gen.hh>
64 #include <basic/options/keys/score.OptionKeys.gen.hh>
68 #include <ObjexxFCL/format.hh>
77 using namespace conformation;
78 using namespace chemical;
79 using namespace basic::options;
81 using namespace rotamer_set;
82 using namespace scoring;
83 using namespace ObjexxFCL::fmt;
85 static basic::Tracer
TR(
"protocols.dna.util", basic::t_info );
104 if ( baseatom->xyz().distance_squared( pres.
nbr_atom_xyz() ) < threshold )
return true;
122 using namespace pack;
123 using namespace scoring;
124 using namespace task;
129 ptask->set_bump_check(
false );
130 ptask->temporarily_set_pack_residue( presid,
true );
138 keep_aas[
aa_arg ] =
true;
139 restask.restrict_absent_canonical_aas( keep_aas );
143 if ( option[ OptionKeys::score::weights ].user() )
144 weights_tag = option[ OptionKeys::score::weights ]();
151 rotset->set_resid( presid );
152 rotset->build_rotamers( pose, *scrfxn, *ptask, dummygraph,
false );
158 Real shortest_dis2(10000), dis2;
160 for ( Rotamers::const_iterator rotamer( rotset->begin() ); rotamer != rotset->end(); ++rotamer ) {
161 if ( (*rotamer)->aa() !=
aa_arg ) {
163 if ( rotset->num_rotamers() == 1 )
continue;
164 TR <<
"warning non-arg rotamer " << (*rotamer)->aa() << std::endl;
165 runtime_assert(
false );
168 Atoms::const_iterator prot_begin( (*rotamer)->sidechainAtoms_begin() ),
169 prot_end( (*rotamer)->heavyAtoms_end() ),
171 Atoms::const_iterator dna_begin =
175 if ( dis2 < shortest_dis2 ) shortest_dis2 = dis2;
176 if ( shortest_dis2 < threshold )
return shortest_dis2;
178 return shortest_dis2;
186 Atoms::const_iterator a_begin,
187 Atoms::const_iterator a_end,
188 Atoms::const_iterator b_begin,
189 Atoms::const_iterator b_end,
193 Real shortest_dis2(10000), dis2;
195 for ( Atoms::const_iterator atm_a( a_begin ); atm_a != a_end; ++atm_a ) {
196 for ( Atoms::const_iterator atm_b( b_begin ); atm_b != b_end; ++atm_b ) {
198 dis2 = atm_a->xyz().distance_squared( atm_b->xyz() );
199 if ( dis2 < shortest_dis2 ) shortest_dis2 = dis2;
200 if ( shortest_dis2 < threshold )
return shortest_dis2;
203 return shortest_dis2;
214 using namespace scoring::dna;
223 return std::abs( dot(vec,Z) );
230 if ( dna ==
"ADE" )
return "THY";
231 if ( dna ==
"CYT" )
return "GUA";
232 if ( dna ==
"GUA" )
return "CYT";
233 if ( dna ==
"THY" )
return "ADE";
234 if ( dna ==
" A" )
return " T";
235 if ( dna ==
" C" )
return " G";
236 if ( dna ==
" G" )
return " C";
237 if ( dna ==
" T" )
return " A";
238 utility_exit_with_message(
"Bad DNA name " + dna );
246 if ( name3 ==
" A" || name3 ==
" DA" || name3 ==
"ADE" )
return "ADE";
247 if ( name3 ==
" C" || name3 ==
" DC" || name3 ==
"CYT" )
return "CYT";
248 if ( name3 ==
" G" || name3 ==
" DG" || name3 ==
"GUA" )
return "GUA";
249 if ( name3 ==
" T" || name3 ==
" DT" || name3 ==
"THY" )
return "THY";
250 if ( name3 ==
" rA" )
return "RAD";
251 if ( name3 ==
" rC" )
return "RCY";
252 if ( name3 ==
" rG" )
return "RGU";
253 if ( name3 ==
" rU" )
return "URA";
261 using namespace chemical;
286 bool include_unpaired
289 using namespace scoring::dna;
291 TR <<
"\nFinding basepairs:\n";
293 Real const max_d( 4.0 );
297 runtime_assert( dna_chains.
empty() );
299 std::map< AA, AA > base_partner;
305 std::map< AA, std::string > hbond_atom;
306 hbond_atom[
na_ade ] =
"N1";
307 hbond_atom[
na_thy ] =
"N3";
308 hbond_atom[
na_gua ] =
"N1";
309 hbond_atom[
na_cyt ] =
"N3";
317 hbond_atom[
na_rad ] =
"N1";
318 hbond_atom[
na_ura ] =
"N3";
319 hbond_atom[
na_rgu ] =
"N1";
320 hbond_atom[
na_rcy ] =
"N3";
322 std::map< Size, Size > partner;
324 for (
Size i(1); i <= nres; ++i ) {
326 AA const & i_aa( i_rsd.aa() );
327 if ( !i_rsd.is_DNA() )
continue;
329 if ( dna_chains.
contains(i) )
continue;
332 xyzVec const hbatm_xyz_i( i_rsd.xyz( hbond_atom[ i_aa ] ) ),
336 bool paired(
false );
340 for (
Size j(i+1); j <= nres; ++j ) {
343 AA const & j_aa( j_rsd.aa() );
344 if ( !j_rsd.is_DNA() )
continue;
348 xyzVec const hbatm_xyz_j( j_rsd.xyz( hbond_atom[ j_aa ] ) );
350 Real d( hbatm_xyz_i.distance( hbatm_xyz_j ) );
351 if ( d >= max_d )
continue;
356 hb_vec( ( hbatm_xyz_i - hbatm_xyz_j ).normalized() );
360 ydot( std::abs( dot( base_yaxis_i, base_yaxis_j ))),
362 dothbyi( std::abs( dot( base_yaxis_i, hb_vec ))),
363 dothbyj( std::abs( dot( base_yaxis_j, hb_vec ))),
365 dothbzi( std::abs( dot( base_zaxis_i, hb_vec ))),
366 dothbzj( std::abs( dot( base_zaxis_j, hb_vec )));
368 Real const dotsum( 2*ydot + dothbyi + dothbyj - dothbzi - dothbzj );
369 int pdbi(i), pdbj(j);
375 if ( verbosity >= 2 ) {
376 TR <<
"basepair geom "
377 << pdbi <<
" vs. " << pdbj <<
" dis " << d
379 <<
" hbydots " << dothbyi <<
" " << dothbyj
380 <<
" hbzdots " << dothbzi <<
" " << dothbzj;
383 if ( dotsum < bestdotsum || ydot < 0.8 ||
384 dothbyi < 0.6 || dothbyj < 0.6 ||
385 dothbzi > 0.5 || dothbzj > 0.5 )
387 if ( verbosity >= 2 ) TR <<
'\n';
390 if ( verbosity >= 2 ) TR <<
" acceptable" <<
'\n';
393 if ( j_aa != base_partner[ i_aa ] ) {
394 std::cerr <<
"Warning: nucleic acids " << i_rsd.name3() <<
" " <<
395 pdbi <<
" and " << j_rsd.name3() <<
" " <<
396 pdbj <<
" have basepaired geometry, but are not " <<
397 "complementary types" <<
'\n';
404 if ( bestdotsum != 0. ) {
408 if ( paired || !include_unpaired )
continue;
412 dna_chains.
print( pose, TR );
427 using namespace task;
429 Size resid( *seqset_iter );
433 type != restask.allowed_residue_types_end(); ++type ) {
436 sequence[ resid ] = *type;
438 if ( seqset_iter == seq_indices.end() - 1 ) sequences.push_back( sequence );
453 using namespace task;
454 for ( ResTypeSequence::const_iterator it( sequence.begin() ); it != sequence.end(); ++it ) {
455 Size index( it->first );
458 type != rtask.allowed_residue_types_end(); ++type ) {
461 if ( (*type)->aa() == it->second->aa() )
continue;
463 mutant[ index ] = *type;
464 sequences.push_back( mutant );
471 std::list< PositionType > & design_residues,
473 task::PackerTask
const & ptask
477 for (
Size index(1); index <= nres; ++index ) {
479 if ( !ptask.residue_task(index).has_behavior(
"TARGET") &&
480 !ptask.residue_task(index).has_behavior(
"SCAN") &&
481 !ptask.residue_task(index).being_designed() )
continue;
483 else if ( !ptask.pack_residue( index ) )
continue;
484 design_residues.push_back(
492 for ( ResTypeSequence::const_iterator pos( seq.begin() ); pos != seq.end(); ++pos ) {
493 if ( pos != seq.begin() ) os <<
", ";
494 os << pos->first <<
"-" << pos->second->name1();
501 for ( ResTypeSequence::const_iterator pos( seq.begin() ); pos != seq.end(); ++pos ) {
502 str += pos->second->name1();
509 for ( ResTypeSequences::const_iterator seq( seqs.begin() ); seq != seqs.end(); ++seq ) {
520 std::ostringstream os;
521 for ( ResTypeSequence::const_iterator pos( seq.begin() ); pos != seq.end(); ++pos ) {
522 Size const index( pos->first );
527 if ( pos != seq.begin() ) os <<
",";
529 os << pose.
pdb_info()->chain( index ) <<
"." << pose.
pdb_info()->number( index );
531 os << pose.
chain( index ) <<
"." << index;
553 for ( ResTypeSequences::const_iterator seq( seqs.begin() ); seq != seqs.end(); ++seq ) {
569 Size const nrot( rotamer_sets->nrotamers() );
570 for (
Size roti(1); roti <= nrot; ++roti ) {
572 Size const rotpos( rotamer_sets->res_for_rotamer(roti) );
575 ResTypeSequence::const_iterator seqindex( seq.find( rotpos ) );
576 if ( seqindex != seq.end() ) {
578 std::string seq_typename( (seqindex->second)->name3() ),
579 rot_typename( rot_type->name3() );
580 if ( seq_typename != rot_typename )
continue;
582 rot_to_pack.push_back( roti );
584 Size const rots_off( nrot - rot_to_pack.size() );
585 TR <<
"Fixing DNA rotamers: " << rots_off
586 <<
" out of " << nrot <<
" rotamers disabled." << std::endl;
600 Size const nrot( rotamer_sets->nrotamers() );
601 for (
Size roti(1); roti <= nrot; ++roti ) {
602 Size const rotpos( rotamer_sets->res_for_rotamer(roti) );
607 std::string seq_typename( ( single_sequence[ rotpos ] )->name3() ),
608 rot_typename( rot_type->name3() );
609 if ( seq_typename != rot_typename )
continue;
610 rot_to_pack.push_back( roti );
612 Size const rots_off( nrot - rot_to_pack.size() );
613 TR <<
"Fixing rotamers for a single sequence: " << rots_off
614 <<
" out of " << nrot <<
" rotamers disabled." << std::endl;
629 new_res->set_chi( 1, existing.chi(1) );
639 if ( ! option[ OptionKeys::dna::design::checkpoint ].user() )
return;
640 std::string fileroot( option[ OptionKeys::dna::design::checkpoint ]() );
642 TR <<
"writing dna mode checkpoint files..." <<
'\n';
645 std::string pdbname( fileroot +
".pdb.checkpoint" );
646 utility::io::ozstream pdbout( pdbname.c_str() );
651 std::string checkpointname( fileroot +
".checkpoint" );
652 utility::io::ozstream out( checkpointname.c_str() );
656 std::cerr <<
"trouble opening file " << checkpointname
657 <<
" for writing... skipping checkpoint" << std::endl;
658 runtime_assert(
false );
663 out <<
"Iteration " << iter <<
'\n' << pdbname <<
'\n';
666 TR <<
"wrote " << pdbname <<
", " << checkpointname << std::endl;
675 if ( ! option[ OptionKeys::dna::design::checkpoint ].user() )
return;
676 std::string fileroot( option[ OptionKeys::dna::design::checkpoint ]() );
678 utility::io::izstream file;
680 file.open( filename.c_str() );
683 TR <<
"Reading DNA design checkpoint info from " << filename <<
'\n';
688 file >> word >> last_iter >> skip;
689 if ( ( word !=
"Iteration" ) )
return;
690 file >> pdbfile >> skip;
693 if ( option[ OptionKeys::out::pdb_gz ]() ) pdbfile +=
".gz";
699 iter = last_iter + 1;
701 TR <<
"loaded " << pdbfile <<
" for iteration " << iter << std::endl;
710 if ( ! option[ OptionKeys::dna::design::checkpoint ].user() )
return;
711 std::string fileroot( option[ OptionKeys::dna::design::checkpoint ]() );
713 std::list< std::string > filenames;
714 filenames.push_back( fileroot +
".checkpoint" );
715 filenames.push_back( fileroot +
".pdb.checkpoint" );
717 for ( std::list< std::string >::const_iterator
filename( filenames.begin() );
721 std::rename( (*filename).c_str(), nameold.c_str() );
738 for ( Strings::const_iterator str_def( str_defs.begin() ),
end( str_defs.end() );
739 str_def !=
end; ++str_def ) {
753 TR <<
"Getting dna_defs from file " <<
filename;
754 if ( ! stripped_prefix.empty() ) {
755 stripped_prefix = string_split( stripped_prefix,
'/' ).back();
756 TR <<
" for " << stripped_prefix;
760 utility::io::izstream defs_file( filename.c_str() );
762 while ( getline( defs_file, line ) ) {
766 if ( ! stripped_prefix.empty() && words.front() != stripped_prefix )
continue;
768 str_defs.insert( str_defs.begin(), words.begin()+1, words.end() );
779 if ( option[ OptionKeys::dna::design::dna_defs ].user() ) {
781 Strings str_defs( option[ OptionKeys::dna::design::dna_defs ]().vector() );
783 }
else if ( option[ OptionKeys::dna::design::dna_defs_file ].user() ) {
787 option[ OptionKeys::dna::design::dna_defs_file ](),
799 using namespace scoring::constraints;
802 if ( option[ OptionKeys::constraints::cst_file ].user() ) {
803 cst_file = option[ OptionKeys::constraints::cst_file ]().front();
825 Size num_chains( 1 );
830 chain_start.push_back( 1 );
831 for(
Size resid = 1 ; resid < nres ; ++resid ) {
832 if( pdb_data->chain( resid ) != pdb_data->chain( resid + 1 ) ){
833 chain_end.push_back( resid );
834 chain_start.push_back( resid + 1 );
838 chain_end.push_back( nres );
841 Size num_cuts( num_chains - 1 );
842 ObjexxFCL::FArray1D_int cut_positions( num_cuts, 0 );
843 ObjexxFCL::FArray2D_int jump_pairs( 2, num_cuts, 0 );
844 Size jump_pair_count( 1 );
847 for(
Size cut_num = 1 ; cut_num < chain_end.size() ; ++cut_num ){
848 cut_positions( cut_num ) = chain_end[ cut_num ];
851 Size const amino( 1 );
852 Size const bped_dna( 2 );
853 Size const non_bped_dna( 3 );
859 for(
Size this_chain = 1 ; this_chain <= num_chains ; ++this_chain ) {
860 TR <<
"Working on chain " << this_chain << std::endl;
864 chain_type.push_back( amino );
865 TR <<
"Found 1 initial segments for chain " << this_chain << std::endl;
866 TR <<
"Chain " << this_chain <<
" segment 1 start res " << chain_start[ this_chain ] <<
867 " end res " << chain_end[ this_chain ] <<
" of type 1" << std::endl;
871 Real best_dist( 9999.0 );
872 for(
Size prot_res = chain_start[ this_chain ] ; prot_res <= chain_end[ this_chain ] ; ++prot_res ) {
873 for(
Size dna_res = 1 ; dna_res <= nres ; ++dna_res ) {
876 if( check_dist < best_dist ) {
877 best_dist = check_dist;
878 protein_root[ this_chain ] = prot_res;
879 closest_base[ this_chain ] = dna_res;
884 TR <<
"Protein closest approach is res " << protein_root[ this_chain ]<<
" with base " << closest_base[ this_chain ] <<
" with distance " << std::sqrt( best_dist ) << std::endl;
887 if( protein_root[this_chain] < closest_base[this_chain] ) {
888 jump_pairs( 1, jump_pair_count ) = protein_root[this_chain];
889 jump_pairs( 2, jump_pair_count ) = closest_base[this_chain];
891 jump_pairs( 2, jump_pair_count ) = protein_root[this_chain];
892 jump_pairs( 1, jump_pair_count ) = closest_base[this_chain];
901 std::cerr <<
"Bad call to make_basepair_aware_fold_tree() with non-protein, non-DNA type" << std::endl;
902 utility_exit_with_message(
"make_base_aware_fold_tree() takes only protein, DNA!" );
905 chain_type.push_back( bped_dna );
909 Size num_segments( 1 );
914 segment_start.push_back( chain_start[ this_chain ] );
915 if( dna_info.find_partner( chain_start[ this_chain ] ) != 0 ) {
916 segment_type.push_back( bped_dna );
918 segment_type.push_back( non_bped_dna );
920 for(
Size resid = chain_start[this_chain] ; resid < chain_end[ this_chain ] ; ++resid ) {
922 bool this_bped( dna_info.find_partner( resid ) != 0 );
923 bool next_bped( dna_info.find_partner( resid + 1 ) != 0 );
925 if( this_bped && !next_bped ) {
926 segment_end.push_back( resid );
927 segment_start.push_back( resid + 1 );
928 segment_type.push_back( non_bped_dna );
929 }
else if ( !this_bped && next_bped ) {
930 segment_end.push_back( resid );
931 segment_start.push_back( resid + 1 );
932 segment_type.push_back( bped_dna );
933 }
else if (!this_bped && !next_bped ) {
935 }
else if ( pdb_data->chain( dna_info.find_partner( resid ) ) !=
936 pdb_data->chain( dna_info.find_partner( resid + 1 ) ) ) {
937 segment_end.push_back( resid );
938 segment_start.push_back( resid + 1 );
939 segment_type.push_back( bped_dna );
942 segment_end.push_back( chain_end[ this_chain ] );
944 num_segments = segment_start.size();
947 TR <<
"Found " << num_segments <<
" initial segments for chain " << this_chain << std::endl;
948 for(
Size this_segment = 1 ; this_segment <= num_segments ; ++this_segment ) {
949 TR <<
"Chain " << this_chain <<
" segment " << this_segment <<
" start res " << segment_start[ this_segment ] <<
950 " end res " << segment_end[ this_segment ] <<
" of type " << segment_type[ this_segment ] << std::endl;
957 for(
Size this_segment = 1 ; this_segment <= num_segments ; ++this_segment ) {
958 if( segment_type[ this_segment ] == bped_dna ) {
959 bp_middle[ this_segment ] = ( segment_start[ this_segment ] + segment_end[ this_segment ] ) / 2;
965 Size num_processed( num_segments );
967 if( num_segments > 1 ) {
968 for(
Size this_segment = 1 ; this_segment <= num_segments ; ++this_segment ) {
969 if( segment_type[ this_segment ] == non_bped_dna && num_segments > 1 ) {
971 if( this_segment == 1 ) {
972 segment_start[ this_segment + 1 ] = segment_start[ this_segment ];
973 }
else if( this_segment == num_segments ) {
974 segment_end[ this_segment - 1 ] = segment_end[ this_segment ];
977 if( segment_start[ this_segment ] == segment_end[ this_segment ] ) {
979 segment_end[ this_segment - 1 ] = segment_end[ this_segment ];
982 Size split_pos( ( segment_start[ this_segment ] + segment_end[ this_segment ] ) / 2 );
983 segment_end[ this_segment - 1 ] = split_pos;
984 segment_start[ this_segment + 1 ] = split_pos + 1;
992 if( num_segments == 1 && chain_type[ this_chain ] == non_bped_dna ) {
994 Real best_dist( 9999.0 );
995 for(
Size dna_res = chain_start[ this_chain ] ; dna_res <= chain_end[ this_chain ] ; ++dna_res ) {
996 for(
Size prot_res = 1 ; prot_res <= nres ; ++prot_res ) {
999 if( check_dist < best_dist ) {
1000 best_dist = check_dist;
1001 protein_root[ this_chain ] = prot_res;
1002 closest_base[ this_chain ] = dna_res;
1007 TR <<
"Unpaired DNA closest approach is res " << closest_base[ this_chain ]<<
" with amino acid " << protein_root[ this_chain ] <<
" with distance " << std::sqrt( best_dist ) << std::endl;
1010 if( protein_root[this_chain] < closest_base[this_chain] ) {
1011 jump_pairs( 1, jump_pair_count ) = protein_root[this_chain];
1012 jump_pairs( 2, jump_pair_count ) = closest_base[this_chain];
1014 jump_pairs( 2, jump_pair_count ) = protein_root[this_chain];
1015 jump_pairs( 1, jump_pair_count ) = closest_base[this_chain];
1022 TR <<
"Found " << num_processed <<
" final segments for chain " << this_chain << std::endl;
1023 Size accum_count( 0 );
1024 for(
Size this_segment = 1 ; this_segment <= num_segments ; ++this_segment ) {
1025 if( segment_type[ this_segment ] == bped_dna ) {
1027 TR <<
"Chain " << this_chain <<
" segment " << accum_count <<
" start res " << segment_start[ this_segment ] <<
1028 " end res " << segment_end[ this_segment ] <<
" of type " << segment_type[ this_segment ] << std::endl;
1031 Size mid_partner = dna_info.find_partner( bp_middle[ this_segment ] );
1037 if( ( bp_middle[ this_segment ] < mid_partner ) &&
1039 TR <<
"Making jump between " << bp_middle[ this_segment ] <<
" and " << mid_partner << std::endl;
1040 jump_pairs( 1, jump_pair_count ) = bp_middle[ this_segment ];
1041 jump_pairs( 2, jump_pair_count ) = mid_partner;
1058 Size const num_jumps,
1059 char const this_chain,
1060 char const other_chain,
1061 ObjexxFCL::FArray2D_int & jump_pairs
1065 for(
Size i = 1 ; i <= num_jumps ; ++i ) {
1068 char const jump_chain1( pose.
pdb_info()->chain( jump_pairs( 1, i ) ) );
1069 char const jump_chain2( pose.
pdb_info()->chain( jump_pairs( 2, i ) ) );
1073 if( ( jump_chain1 == this_chain && jump_chain2 == other_chain ) ||
1074 ( jump_chain2 == this_chain && jump_chain1 == other_chain ) ) {
1090 using namespace scoring::constraints;
1092 using numeric::conversions::radians;
1099 Real const O3_P_distance( 1.608 );
1100 Real const O3_angle( 119.8 );
1101 Real const P_angle( 103.4 );
1102 Real const O1P_angle( 108.23 );
1104 Real const distance_stddev( 0.3 );
1105 Real const angle_stddev_degrees( 35 );
1108 FuncOP const O3_angle_func(
new HarmonicFunc( radians( O3_angle ), radians( angle_stddev_degrees ) ) );
1109 FuncOP const P_angle_func(
new HarmonicFunc( radians( P_angle ), radians( angle_stddev_degrees ) ) );
1110 FuncOP const O1P_angle_func(
new HarmonicFunc( radians( O1P_angle ), radians( angle_stddev_degrees ) ) );
1112 assert( start_base <= end_base );
1121 AtomID const C3_id( rsd1.atom_index(
"C3*" ), start_base - 1 );
1122 AtomID const O3_id( rsd1.atom_index(
"O3*" ), start_base - 1 );
1123 AtomID const P_id( rsd2.atom_index(
"P" ), start_base );
1124 AtomID const O5_id( rsd2.atom_index(
"O5*" ), start_base );
1125 AtomID const O1P_id( rsd2.atom_index(
"O1P" ), start_base );
1145 AtomID const C3_id( rsd1.atom_index(
"C3*" ), end_base );
1146 AtomID const O3_id( rsd1.atom_index(
"O3*" ), end_base );
1147 AtomID const P_id( rsd2.atom_index(
"P" ), end_base + 1 );
1148 AtomID const O5_id( rsd2.atom_index(
"O5*" ), end_base + 1 );
1149 AtomID const O1P_id( rsd2.atom_index(
"O1P" ), end_base + 1 );