41 #include <basic/options/option.hh>
42 #include <basic/options/keys/dna.OptionKeys.gen.hh>
43 #include <basic/options/keys/packing.OptionKeys.gen.hh>
44 #include <basic/options/keys/run.OptionKeys.gen.hh>
45 #include <basic/options/keys/out.OptionKeys.gen.hh>
47 #include <basic/Tracer.hh>
49 #include <utility/vector0.hh>
50 #include <utility/vector1.hh>
51 #include <utility/string_util.hh>
52 #include <utility/io/izstream.hh>
53 #include <utility/io/ozstream.hh>
54 #include <utility/tag/Tag.hh>
57 #include <ObjexxFCL/string.functions.hh>
65 namespace ObjexxFCL {
namespace fmt { } }
using namespace ObjexxFCL::fmt;
74 core::Size const asize( a.size() ), bsize( b.size() );
75 if ( asize < bsize )
return true;
76 if ( asize > bsize )
return false;
77 for ( ResTypeSequence::const_iterator ait( a.begin() ), bit( b.begin() ), aend( a.end() ),
78 bend( b.end() ); ait != aend && bit != bend; ++ait, ++bit ) {
79 if ( ait->first < bit->first )
return true;
80 if ( ait->first > bit->first )
return false;
81 if ( ait->second->name1() < bit->second->name1() )
return true;
82 if ( ait->second->name1() > bit->second->name1() )
return false;
91 using utility::string_split;
93 using namespace chemical;
94 using namespace conformation;
95 using namespace optimization;
96 using namespace basic::options;
99 using namespace operation;
100 using namespace pose;
101 using namespace scoring;
103 using namespace ObjexxFCL;
104 using namespace ObjexxFCL::fmt;
106 using basic::t_warning;
108 using basic::t_debug;
109 static basic::Tracer
TR(
"protocols.dna.DnaInterfacePacker");
110 static basic::Tracer
TR_spec(
"protocols.dna.Specificity");
115 DnaInterfacePackerCreator::keyname()
const
117 return DnaInterfacePackerCreator::mover_name();
121 DnaInterfacePackerCreator::create_mover()
const {
126 DnaInterfacePackerCreator::mover_name()
128 return "DnaInterfacePacker";
133 DnaInterfacePacker::DnaInterfacePacker()
134 : protocols::simple_moves::PackRotamersMover(
"DnaInterfacePacker"),
138 filename_root_( option[ OptionKeys::out::prefix ]() ),
140 probe_specificity_(false),
141 reversion_scan_(false),
142 protein_scan_(false),
144 include_dna_potentials_in_specificity_calculations_(false),
146 specificity_repacks_(1),
147 minimize_options_(0),
150 initialization_state_(false),
159 ) : protocols::simple_moves::PackRotamersMover(
"DnaInterfacePacker"),
162 minimize_( minimize ),
163 filename_root_( filename_root ),
165 probe_specificity_(false),
166 reversion_scan_(false),
167 protein_scan_(false),
169 include_dna_potentials_in_specificity_calculations_(false),
171 specificity_repacks_(1),
172 minimize_options_(0),
175 initialization_state_(false),
179 binding_E_ = option[ OptionKeys::dna::design::binding ]();
181 if ( option[ OptionKeys::dna::design::probe_specificity ].user() ) {
186 base_only_ = option[ OptionKeys::dna::design::base_contacts_only ]();
188 option[ OptionKeys::dna::design::specificity::include_dna_potentials ]();
189 if ( option[ OptionKeys::dna::design::protein_scan ].user() ) {
191 allowed_types_ = option[ OptionKeys::dna::design::protein_scan ]();
236 Pose starting_pose( pose );
239 for ( ResTypeSequences::const_iterator dnaseq(
dna_sequences_.begin() ),
253 run( pose, rot_to_pack );
272 if ( !dnaseq.empty() ) seqtag =
"_" +
dna_seq_tag( pose, dnaseq );
274 pdbname_ = pdbroot +
"_" + lead_zero_string_of(trial,4);
278 std::pair< SequenceScores, SequenceScores > specificities;
283 for ( SequenceScores::const_iterator bound( specificities.first.begin() ),
284 bound_end( specificities.first.end() ); bound != bound_end; ++bound ) {
285 std::ostringstream os;
286 os << std::showpoint << std::fixed << std::setprecision(
PRECISION);
287 os <<
"Specificities(bound): ";
288 os <<
seq_pdb_str( bound->first, pose ) <<
" = " << bound->second;
289 TR << os.str() <<
'\n';
290 info_lines.push_back(
"REMARK " + os.str() );
293 for ( SequenceScores::const_iterator binding( specificities.second.begin() ),
294 binding_end( specificities.second.end() ); binding != binding_end; ++binding ) {
295 std::ostringstream os;
296 os << std::showpoint << std::fixed << std::setprecision(
PRECISION);
297 os <<
"Specificities(binding): ";
298 os <<
seq_pdb_str( binding->first, pose ) <<
" = " << binding->second;
299 TR << os.str() <<
'\n';
300 info_lines.push_back(
"REMARK " + os.str() );
314 Real binding_score(0.);
317 bool const output_pdb(
318 trial == 0 && option[ OptionKeys::dna::design::output_unbound_pdb ]() );
321 std::ostringstream bindingstream;
322 bindingstream << std::showpoint << std::fixed << std::setprecision(
PRECISION)
323 <<
"Binding energy: " << binding_score;
324 TR << bindingstream.str() << std::endl;
325 info_lines.push_back(
"REMARK " + bindingstream.str() );
330 std::pair< Real, Real > overall_specificities( 0., 0. );
333 if ( specificities.first.find( currseq ) != specificities.first.end() )
334 overall_specificities.first = specificities.first[ currseq ];
335 if ( specificities.second.find( currseq ) != specificities.second.end() )
336 overall_specificities.second = specificities.second[ currseq ];
337 reversion_scan( pose, bound_score, binding_score, overall_specificities );
343 bool const overwrite_old_info(
true);
344 pdboutput_->add_info(
"REMARK DnaInterfacePacker " + pdbroot +
":", info_lines, !overwrite_old_info );
347 (*pdboutput_)( pose,
pdbname_ +
".pdb" );
351 info().insert(
info().
end(), info_lines.begin(), info_lines.end() );
363 TR << std::showpoint << std::fixed << std::setprecision(
PRECISION);
384 if ( option[ OptionKeys::packing::resfile ].user() ) my_tf->push_back(
new ReadResfile );
390 my_tf->push_back( rdtpdi );
402 rot_dna_hb_filter->report();
412 if ( !target_seq.empty() )
dna_sequences_.push_back( target_seq );
415 TR <<
"DNA sequences to be considered:" <<
'\n';
425 std::string const min_type( option[ OptionKeys::run::min_type ]() );
426 Real const tolerance( option[ OptionKeys::run::min_tolerance ]() );
429 TR <<
"Chi minimization will be allowed for the following residues:";
430 for (
Size index(1); index <=
task()->total_residue(); ++index ) {
432 if (
task()->pack_residue( index ) ) {
437 TR <<
" " << pose.
chain(index) <<
"." << index;
464 return is_initialized;
484 if ( tag->hasOption(
"binding") )
binding_E_ = tag->getOption<
bool>(
"binding");
485 if ( tag->hasOption(
"base_only") )
base_only_ = tag->getOption<
bool>(
"base_only");
486 if ( tag->hasOption(
"minimize") )
minimize_ = tag->getOption<
bool>(
"minimize");
487 if ( tag->hasOption(
"reversion_scan") )
reversion_scan_ = tag->getOption<
bool>(
"reversion_scan");
488 if ( tag->hasOption(
"probe_specificity") ) {
492 if ( tag->hasOption(
"pdb_output") ) {
493 if ( tag->getOption<
bool>(
"pdb_output") ) {
497 if ( tag->hasOption(
"protein_scan") )
protein_scan_ = tag->getOption<
bool>(
"protein_scan");
513 runtime_assert(
task() );
518 for (
Size moltenres(1); moltenres <=
rotamer_sets()->nmoltenres(); ++moltenres ) {
520 if ( !
task()->has_behavior(
"SCAN", resid ) )
continue;
523 seq_indices.push_back( resid );
526 if ( seq_indices.empty() )
return;
536 sequence !=
end; ++sequence ) {
551 for ( ResTypeSequence::iterator postype( sequence.begin() ),
end( sequence.end() );
552 postype !=
end; ++postype ) {
553 Size const index( postype->first );
555 runtime_assert( dnatop.
top() == index );
556 if ( !dnatop.
paired() )
continue;
562 complement[ comppos ] = comptype;
565 for ( ResTypeSequence::iterator postype( complement.begin() ),
end( complement.end() );
566 postype !=
end; ++postype ) {
567 sequence.insert( *postype );
583 Pose unbound_pose( pose );
585 unbind.
apply( unbound_pose );
597 unbound_outputter->score_function( *nonconst_scorefxn );
598 (*unbound_outputter)( unbound_pose, pdbname +
"_unbound.pdb" );
610 TR_spec <<
"\nMeasuring specificity by repacking against other possible DNA states:" << std::endl;
615 if ( current_sequence.empty() ) {
616 TR <<
"No double-stranded DNA positions found!" << std::endl;
617 return std::make_pair( 0., 0. );
621 specificity_sequences.push_back( current_sequence );
625 for ( ResTypeSequences::iterator sequence( specificity_sequences.begin() ),
626 end( specificity_sequences.end() ); sequence !=
end; ++sequence ) {
637 std::pair< SequenceScores, SequenceScores > sequence_scores(
640 Real bound_specificity(0.), binding_specificity(0.);
644 TR_spec <<
"\nCalculating bound specificity:";
647 TR_spec <<
"\nCalculating binding specificity:";
651 return std::make_pair( bound_specificity, binding_specificity );
659 std::pair< SequenceScores, SequenceScores >
663 TR_spec <<
"\nMeasuring individual basepair specificity by explicitly modeling alternative "
664 <<
"DNA states:" << std::endl;
669 if ( current_sequence.empty() ) {
670 TR <<
"No targeted double-stranded DNA positions found!" << std::endl;
671 return std::pair< SequenceScores, SequenceScores >();
677 for ( ResTypeSequence::const_iterator bppos( current_sequence.begin() ),
678 end( current_sequence.end() ); bppos !=
end; ++bppos ) {
680 single_bp_variants.push_back( current_sequence );
681 Size index( bppos->first );
686 if ( (*type)->aa() == bppos->second->aa() )
continue;
692 single_bp_mutant[ index ] = *
type;
693 single_bp_variants.push_back( single_bp_mutant );
696 for ( ResTypeSequences::iterator sequence( single_bp_variants.begin() ),
697 end( single_bp_variants.end() ); sequence !=
end; ++sequence ) {
702 std::pair< SequenceScores, SequenceScores > sequence_scores(
705 if ( current_sequence.size() > 1 ) {
709 current_single_bp[ bppos->first ] = bppos->second;
713 TR_spec <<
"\nCalculating bound specificity for " <<
seq_pdb_str( current_single_bp, pose );
714 bound_specificities[ current_single_bp ] =
717 TR_spec <<
"\nCalculating binding specificity for "
719 binding_specificities[ current_single_bp ] =
724 for ( SequenceScores::const_iterator ss_it( sequence_scores.first.begin() ),
725 ss_end( sequence_scores.first.end() ); ss_it != ss_end; ++ss_it ) {
726 bound_scores[ ss_it->first ] = ss_it->second;
728 for ( SequenceScores::const_iterator ss_it( sequence_scores.second.begin() ),
729 ss_end( sequence_scores.second.end() ); ss_it != ss_end; ++ss_it ) {
730 binding_scores[ ss_it->first ] = ss_it->second;
735 TR_spec <<
"\nCalculating bound specificity for " <<
seq_pdb_str( current_sequence, pose );
739 bound_specificities[ current_sequence ] =
742 TR_spec <<
"\nCalculating binding specificity for " <<
seq_pdb_str( current_sequence, pose );
743 binding_specificities[ current_sequence ] =
748 for ( SequenceScores::const_iterator it( bound_scores.begin() ),
end( bound_scores.end() );
750 std::ostringstream os;
751 os << std::showpoint << std::fixed << std::setprecision(
PRECISION);
752 os <<
"REMARK SeqScore(bound): " <<
seq_pdb_str( it->first, pose ) <<
" = " << it->second;
753 info().push_back( os.str() );
755 for ( SequenceScores::const_iterator it( binding_scores.begin() ),
end( binding_scores.end() );
757 std::ostringstream os;
758 os << std::showpoint << std::fixed << std::setprecision(
PRECISION);
759 os <<
"REMARK SeqScore(binding): " <<
seq_pdb_str( it->first, pose ) <<
" = " << it->second;
760 info().push_back( os.str() );
763 return std::make_pair( bound_specificities, binding_specificities );
770 std::pair< SequenceScores, SequenceScores >
774 Pose starting_pose( pose );
778 for ( ResTypeSequences::const_iterator dna_sequence( dna_sequences.begin() ),
779 end( dna_sequences.end() ); dna_sequence !=
end; ++dna_sequence ) {
780 Real best_trial_E(0), best_trial_binding_E(0);
785 single_sequence.push_back( & pose.
residue_type(index) );
788 for ( ResTypeSequence::const_iterator it( dna_sequence->begin() ),
789 seq_end( dna_sequence->end() ); it != seq_end; ++it ) {
790 single_sequence[ it->first ] = it->second;
796 run( pose, rot_to_pack );
803 nonconst_scorefxn->set_weight(
dna_bp, 0. );
804 nonconst_scorefxn->set_weight(
dna_bs, 0. );
806 Real trial_E( (*nonconst_scorefxn)( pose ) );
807 if ( trial == 0 || ( trial_E < best_trial_E ) ) {
808 best_trial_E = trial_E;
811 if (
pdboutput_ && option[ OptionKeys::dna::design::specificity::output_structures ]() ) {
814 "_spec_" +
dna_seq_tag( pose, *dna_sequence ) +
".pdb"
816 pdboutput_->score_function( *nonconst_scorefxn );
817 (*pdboutput_)( pose,
pdbname );
820 sequence_scores[ *dna_sequence ] = best_trial_E;
821 sequence_binding_scores[ *dna_sequence ] = best_trial_binding_E;
823 pose = starting_pose;
824 return std::make_pair( sequence_scores, sequence_binding_scores );
838 TR_spec << std::showpoint << std::fixed << std::setprecision(
PRECISION) <<
'\n';
840 Real const temp( option[ OptionKeys::dna::design::Boltz_temp ]() );
844 for ( SequenceScores::const_iterator iter( sequence_scores.begin() );
845 iter != sequence_scores.end(); ++iter ) {
846 Real score( iter->second );
847 if ( iter == sequence_scores.begin() || ( score < low ) ) low = score;
850 Real const inv_temp( 1.0 / temp );
851 Real num(0), denom(0);
852 for ( SequenceScores::const_iterator iter( sequence_scores.begin() );
853 iter != sequence_scores.end(); ++iter ) {
855 Real score( iter->second );
857 for ( ResTypeSequence::const_iterator pos( sequence.begin() ); pos != sequence.end(); ++pos ) {
858 if ( pos != sequence.begin() )
TR_spec <<
", ";
867 TR_spec <<
": " << score <<
'\n';
868 Real term( std::exp( ( low - score ) * inv_temp ) );
869 if ( sequence == target_sequence ) num += term;
872 if ( denom == 0. )
return 0.;
873 Real const spec( num / denom );
874 TR_spec <<
"\tspecificity: " << spec << std::endl;
899 Real starting_bound_score,
900 Real starting_binding_score,
901 std::pair< Real, Real > starting_specificities
906 TR << std::flush <<
"Starting reversion scan: using starting scores: " <<
"bound = "
907 << starting_bound_score <<
", binding = " << starting_binding_score
908 <<
", specificity.bound = " << starting_specificities.first <<
", specificity.binding = "
909 << starting_specificities.second << std::endl <<
'\n';
911 Real current_bound_score( starting_bound_score ), current_binding_score( starting_binding_score );
912 std::pair< Real, Real > current_specificities( starting_specificities );
917 fixed_residue_types.push_back( &pose.
residue_type( index ) );
923 for (
Size index(1),
end( fixed_residue_types.size() ); index !=
end; ++index ) {
925 if ( reference_type->is_protein() &&
926 fixed_residue_types[index]->name3() != reference_type->name3() ) {
927 reversions.push_back(
Reversion( index, reference_type ) );
931 Real const dscore_cutoff( option[ OptionKeys::dna::design::reversion::dscore_cutoff ]() ),
932 dspec_cutoff( option[ OptionKeys::dna::design::reversion::dspec_cutoff ]() );
938 for ( Reversions::iterator rev( reversions.begin() ),
end( reversions.end() );
939 rev !=
end; ++rev ) {
940 Size const index( rev->index );
943 fixed_residue_types[ index ] = reference_type;
945 Real best_score(0.), best_binding_score(0.);
946 std::pair< Real, Real > best_specificities;
953 run( pose, rot_to_pack );
958 if ( trial == 1 || score < best_score ) {
966 fixed_residue_types[ rev->index ] = starting_type;
968 Real const dscore_bound( best_score - current_bound_score ),
969 dscore_binding( best_binding_score - current_binding_score ),
970 dspec_bound( best_specificities.first - current_specificities.first ),
971 dspec_binding( best_specificities.second - current_specificities.second );
973 TR <<
"Scores for reversion from " << starting_type->name3() <<
" to "
974 << reference_type->name3() <<
" at ";
976 TR << pose.
pdb_info()->chain( index ) <<
"." << pose.
pdb_info()->number( index ) <<
":";
978 TR << pose.
chain( index ) <<
"." << index <<
":";
980 TR <<
" bound = " << best_score <<
" (" << dscore_bound <<
")"
981 <<
", binding = " << best_binding_score <<
" (" << dscore_binding <<
")"
982 <<
", specificity.bound = " << best_specificities.first <<
" (" << dspec_bound <<
")"
983 <<
", specificity.binding = " << best_specificities.second <<
" (" << dspec_binding
986 rev->dscore_bound = dscore_bound;
987 rev->dscore_binding = dscore_binding;
988 rev->dspec_bound = dspec_bound;
989 rev->dspec_binding = dspec_binding;
993 std::sort( reversions.begin(), reversions.end() );
994 Reversions::iterator rev( reversions.begin() );
995 for ( Reversions::const_iterator
end( reversions.end() ); rev !=
end; ++rev ) {
997 if ( rev->dscore_binding > dscore_cutoff || rev->dspec_binding < dspec_cutoff )
continue;
999 Size const index( rev->index );
1002 fixed_residue_types[ index ] = reference_type;
1003 TR <<
"(round " << round <<
") Reversion from " << starting_type->name3()
1004 <<
" to " << reference_type->name3() <<
" at ";
1008 TR << pose.
chain( index ) <<
"." << index;
1010 TR <<
" is acceptable and is now fixed.\n";
1015 if ( rev != reversions.end() ) reversions.erase( rev );
1018 TR <<
"No (more) acceptable reversions found." << std::endl;
1028 run( pose, rot_to_pack );
1054 TR <<
"Starting protein_scan with allowed types " << typestring <<
"." << std::endl;
1058 for ( std::string::const_iterator typechar( typestring.begin() );
1059 typechar != typestring.end(); ++typechar ) {
1061 if ( aas.empty() ) {
1062 TR(t_warning) <<
"no ResidueType found in ResidueTypeSet for " << *typechar << std::endl;
1063 runtime_assert(
false);
1065 allowed_type_caps.push_back( aas.front() );
1067 runtime_assert( !allowed_type_caps.empty() );
1070 std::list< Size > scan_positions;
1071 for (
Size index(1); index <=
task()->total_residue(); ++index ) {
1072 if ( !
task()->design_residue( index ) )
continue;
1074 scan_positions.push_back( index );
1077 if ( option[ OptionKeys::dna::design::checkpoint ].user() ) {
1079 utility::io::izstream file;
1081 file.open( filename.c_str() );
1083 TR <<
"Reading existing (incomplete?) protein scan results file\n";
1086 while ( file.getline( line ) ) {
1088 if ( words.front() !=
"Done" )
continue;
1090 if ( words.size() < 5 )
continue;
1091 std::istringstream
ss_index( words.back() );
1094 TR <<
"skipping previously completed scan position " << index <<
'\n';
1095 scan_positions.remove( index );
1104 utility::io::ozstream outfile( outfilename.c_str(), std::ios::app );
1106 std::cerr <<
"trouble opening file " << outfilename <<
" for writing" << std::endl;
1113 outfile << std::showpoint << std::fixed << std::setprecision(
PRECISION);
1116 Pose const input_pose( pose );
1120 pose_residue_types.push_back( &pose.
residue_type(index) );
1124 Real best_native_score(0.), best_native_dG(0.);
1125 std::pair< Real, Real > best_native_specificities;
1129 Pose best_pose( pose );
1135 run( pose, native_rot_to_pack );
1143 if ( trial == 1 || native_score < best_native_score ) {
1144 best_native_score = native_score;
1154 outfile <<
"Scanning protein positions that interface with DNA position(s) "
1156 outfile <<
"Using native scores from best trial: " <<
"bound = " << best_native_score
1157 <<
", binding = " << best_native_dG <<
", specificity.bound = "
1158 << best_native_specificities.first <<
", specificity.binding = "
1159 << best_native_specificities.second <<
'\n';
1164 for ( std::list< Size >::const_iterator index( scan_positions.begin() ),
1165 end( scan_positions.end() ); index !=
end; ++index ) {
1167 outfile <<
"current designable residues are";
1168 for (
Size i(1); i <=
task()->total_residue(); ++i ) {
1169 if ( !
task()->design_residue(i) )
continue;
1171 outfile <<
" " << pose.
pdb_info()->chain(i) <<
"." << pose.
pdb_info()->number(i);
1173 outfile <<
" " << pose.
chain(i) <<
"." << i;
1181 for ( ResidueTypeCOPs::const_iterator scan_type( allowed_type_caps.begin() );
1182 scan_type != allowed_type_caps.end(); ++scan_type ) {
1190 TR <<
"packing was disabled at " << pose.
pdb_info()->chain(*index) <<
"."
1191 << pose.
pdb_info()->number(*index) << std::endl;
1192 runtime_assert(
false);
1194 if ( std::find( art.begin(), art.end(), *scan_type ) == art.end() ) {
1195 TR << (*scan_type)->name() <<
" not allowed at " << pose.
pdb_info()->chain(*index) <<
"."
1196 << pose.
pdb_info()->number(*index) << std::endl;
1201 pose_residue_types[ *index ] = *scan_type;
1203 Real best_score(0.), best_dG(0.);
1204 std::pair< Real, Real > best_specificities;
1210 run( pose, rot_to_pack );
1215 if ( trial == 1 || score < best_score ) {
1221 outfile <<
"Scores for mutation to " << pose_residue_types[ *index ]->name3() <<
" at ";
1223 outfile << pose.
pdb_info()->chain( *index ) <<
"." << pose.
pdb_info()->number( *index );
1225 outfile << pose.
chain( *index ) <<
"." << *index;
1227 outfile <<
"." << native_type->name() <<
":" <<
" bound = " << best_score <<
" ("
1228 << best_score - best_native_score <<
")" <<
", binding = " << best_dG <<
" ("
1229 << best_dG - best_native_dG <<
")" <<
", specificity.bound = "
1230 << best_specificities.first <<
" ("
1231 << best_specificities.first - best_native_specificities.first <<
")"
1232 <<
", specificity.binding = " << best_specificities.second
1233 <<
" (" << best_specificities.second - best_native_specificities.second <<
")\n";
1236 pose_residue_types[ *index ] = native_type;
1237 outfile <<
"Done scanning at index " << *index <<
'\n';
1242 std::rename( outfilename.c_str(), newname.c_str() );
1265 std::ostringstream ss;
1267 for ( ResTypeSequence::const_iterator pos( sequence.begin() ); pos != sequence.end(); ++pos ) {
1268 Size const seqpos( pos->first );
1270 if ( sep ) ss <<
"_";
1272 ss << pose.
pdb_info()->chain( seqpos ) <<
"." << pose.
pdb_info()->number( seqpos );
1274 ss << pose.
chain( seqpos ) <<
"." << seqpos;
1289 runtime_assert(
task() );
1295 Size const resid( pos.
top() );
1297 if ( rtask.has_behavior(
"TARGET") ) {
1300 if ( rtask.target_type() != 0 ) sequence[ resid ] = rtask.target_type();
1304 if ( !pos.
paired() )
continue;
1308 if ( comp_rtask.target_type() != 0 ) sequence[ comp_resid ] = comp_rtask.target_type();
1309 else sequence[ comp_resid ] = & pose.
residue_type( comp_resid );
1324 if ( ! it->second.paired() )
continue;
1325 Size const resid( it->first );
1328 current_sequence[ resid ] = pose.
residue( resid ).
type();
1330 return current_sequence;