33 #include <basic/options/option.hh>
43 #include <basic/Tracer.hh>
45 #include <utility/exit.hh>
47 #include <utility/tag/Tag.hh>
48 #include <boost/foreach.hpp>
49 #define foreach BOOST_FOREACH
52 #include <basic/options/keys/out.OptionKeys.gen.hh>
54 #include <basic/options/keys/ms.OptionKeys.gen.hh>
55 #include <basic/options/keys/packing.OptionKeys.gen.hh>
59 #include <utility/vector0.hh>
60 #include <utility/vector1.hh>
61 #include <ObjexxFCL/format.hh>
64 #include <utility/excn/Exceptions.hh>
69 namespace protein_interface_design {
74 using namespace chemical;
75 using namespace basic::options;
78 using namespace operation;
79 using namespace scoring;
81 using namespace ObjexxFCL::fmt;
83 using namespace multistate_design;
84 using namespace genetic_algorithm;
90 static basic::Tracer
TR(
"protocols.protein_interface_design.movers.ProteinInterfaceMultiStateDesignMover",t_info);
106 return "ProteinInterfaceMS";
112 multistate_packer_(0),
117 generations_( option[ OptionKeys::ms::generations ]() ),
118 pop_size_( option[ OptionKeys::ms::pop_size ]() ),
119 num_packs_( option[ OptionKeys::ms::num_packs ]() ),
120 pop_from_ss_( option[ OptionKeys::ms::pop_from_ss ]() ),
121 numresults_( option[ OptionKeys::ms::numresults ]() ),
122 fraction_by_recombination_( option[ OptionKeys::ms::fraction_by_recombination ]() ),
123 mutate_rate_( option[ OptionKeys::ms::mutate_rate ]() ),
124 boltz_temp_( option[ OptionKeys::ms::Boltz_temp ]() ),
125 anchor_offset_( option[ OptionKeys::ms::anchor_offset ]() ),
126 checkpoint_prefix_( option[ OptionKeys::ms::checkpoint::prefix ]() ),
127 checkpoint_interval_( option[ OptionKeys::ms::checkpoint::interval ]() ),
128 checkpoint_gz_( option[ OptionKeys::ms::checkpoint::gz ]() ),
129 checkpoint_rename_( option[ OptionKeys::ms::checkpoint::rename ]() ),
132 input_is_positive_( true ),
133 use_unbound_for_sequence_profile_( true ),
134 bump_threshold_( 1.0 ),
135 compare_energy_to_ground_state_( false ),
149 using namespace protocols::moves;
151 translate->step_size( 1000.0 );
152 translate->apply( *pose );
159 if ( !pose->residue_type(i).is_protein() )
continue;
160 std::string const restype( pose->residue(i).type().name() );
161 if ( restype ==
"PRO" || ( i>1 && pose->residue(i-1).type().name() ==
"PRO" ) )
continue;
163 pose->set_phi( i, -130.0 );
164 pose->set_psi( i, 130.0 );
189 using namespace core::chemical;
190 using namespace core::pack;
191 using namespace core::pack::task;
201 TR<<
"Total number of sequence possibilites: "<<seq_space_before<<std::endl;
204 TR<<
"Restricting the packer task to residues that would not clash in the unbound monomer..."<<std::endl;
210 allow_ala[
aa_ala ] =
true;
211 vector< Size > designable;
213 for(
Size i( 1 ),
end( ala_task->total_residue() ); i <=
end; ++i ){
216 if( !rtask.being_designed() )
219 rtask.restrict_absent_canonical_aas( allow_ala );
220 designable.push_back( i );
227 bump_scorefxn->
reset();
228 bump_scorefxn->set_weight(
fa_rep, 1.0 );
230 foreach(
Size const pos, designable ){
231 EnergyPerResidueFilter
const eprf( pos, bump_scorefxn,
fa_rep, 0 );
232 core::Real const ref_bump_energy( eprf.compute( *ala_pose ) );
233 PackerTaskOP template_substitution_task( ptask->clone() );
236 template_substitution_task->nonconst_residue_task(i).prevent_repacking();
239 list< ResidueTypeCOP >
const & allowed( rtask.allowed_residue_types() );
240 Pose ala_pose_and_single_residue( *ala_pose );
243 AA const aa( t->aa() );
244 PackerTaskOP specific_substitution_task( template_substitution_task->clone() );
246 allow_aa[ aa ] =
true;
247 specific_substitution_task->nonconst_residue_task(pos).restrict_absent_canonical_aas( allow_aa );
248 rotamer_trials( ala_pose_and_single_residue, *bump_scorefxn, specific_substitution_task );
249 core::Real const bump_energy( eprf.compute( ala_pose_and_single_residue ) );
251 allowed_aas_in_pos[ aa ] =
true;
253 rtask.restrict_absent_canonical_aas( allowed_aas_in_pos );
255 if( !allowed_aas_in_pos[ aa_in_pose ] ){
256 TR<<
"Native identity "<<pose.
residue( pos ).
name3()<<
" at position "<<pos<<
" in input pdb is not allowed by bump_test! Increase the bump_cutoff from the current "<<
bump_threshold_<<std::endl;
261 TR<<
"Finished restricting. Total number of sequences after restriction: "<<seq_space_after<<
'\n';
262 TR<<
"Orders of magnitude change: "<<log10( (
double)seq_space_after ) - log10( (
double)seq_space_before )<<std::endl;
268 using namespace core::pack::task;
270 unsigned long size( 1 );
271 for(
core::Size i( 1 ); i<=ptask->total_residue(); ++i ){
273 if( !rtask.being_designed() )
continue;
274 core::Size const pos_allowed( rtask.allowed_residue_types().size() );
309 if ( option[ OptionKeys::packing::resfile ].user() ) my_tf->push_back(
new ReadResfile );
318 for (
Size i(1),
end( ptask->total_residue() ); i <=
end; ++i ) {
321 if ( rtask.being_designed() ) {
322 design_positions.push_back(i);
326 std::set< core::chemical::AA > aaset;
327 std::list< ResidueTypeCOP >
const & allowed( rtask.allowed_residue_types() );
328 for ( std::list< ResidueTypeCOP >::const_iterator
t( allowed.begin() ),
end( allowed.end() );
332 if ( aaset.find( aa ) != aaset.end() )
continue;
334 TR(t_debug) <<
"adding choice " << aa << std::endl;
335 choices.push_back(
new PosType( i, aa ) );
337 rand->append_choices( choices );
342 TR(t_info) <<
"There will be " << rand->library_size() <<
" possible sequences." << std::endl;
355 TR(t_info) <<
"There are " <<
multistate_packer_->num_positive_states() <<
" positive states and "
375 TR(t_info) <<
"Adding single-state design entities:" << std::endl;
376 for ( SingleStateCOPs::const_iterator s( states.begin() ),
end( states.end() );
380 end( design_positions.end() ); i !=
end; ++i ) {
381 PosType pt( *i, (*s)->pose().residue_type(*i).aa() );
382 traits.push_back(
new PosType( pt ));
383 TR(t_info) << pt.to_string() <<
" ";
386 gen_alg_->add_parent_entity( traits );
387 TR(t_info) << std::endl;
393 gen_alg_->fill_with_random_entities();
405 if (
gen_alg_->current_generation_complete() )
gen_alg_->evolve_next_generation();
406 TR(t_info) <<
"Generation " <<
gen_alg_->current_generation() <<
":" << std::endl;
415 using namespace core::pack::task;
416 using namespace core::pack::task::operation;
421 PackerTaskCOP ptask_output_pose = tf->create_task_and_apply_taskoperations( output_pose );
425 (*scorefxn_)(copy_pose );
432 (*scorefxn_)(state_i);
435 if( !unmodifed_ptask->residue_task( resi ).being_designed() )
continue;
439 PackerTaskCOP ptask_statei = tf->create_task_and_apply_taskoperations( state_i );
441 (*scorefxn_)( state_i );
447 TR<<
"\nDumped "<<pdbname<<
'\n';
457 pdboutput->reference_pose( pose );
460 if ( option[ OptionKeys::out::prefix ].user() ) prefix = option[ OptionKeys::out::prefix ]();
463 TraitEntityHashMap
const & cache(
gen_alg_->entity_cache() );
465 for ( TraitEntityHashMap::const_iterator it( cache.begin() ),
end( cache.end() ); it !=
end; ++it ) {
466 sortable.push_back( it->second );
468 std::sort( sortable.begin(), sortable.end(), lt_OP_deref< Entity > );
470 TR(t_info) <<
"Evaluated " << sortable.size() <<
" sequences.\nBest sequences:\n";
475 end( sortable.end() ); it !=
end; ++it ) {
483 std::string pdbname( prefix +
"_ms_" + ObjexxFCL::lead_zero_string_of(counter,4) +
".pdb" );
485 std::ostringstream ms_info;
486 ms_info <<
"REMARK MultiState Fitness: " << F(5,4,entity.
fitness());
487 extra_lines.push_back( ms_info.str() );
489 ms_info <<
"REMARK MultiState Sequence:";
490 for ( EntityElements::const_iterator pos( entity.
traits().begin() ),
end( entity.
traits().end() );
491 pos !=
end; ++pos ) {
492 ms_info <<
" " << (*pos)->to_string();
493 TR(t_info) << (*pos)->to_string() <<
" ";
495 TR(t_info) <<
"fitness " << F(5,4,entity.
fitness()) <<
'\n';
496 extra_lines.push_back( ms_info.str() );
497 if ( counter == 0 ) {
499 pose = solution_pose;
501 info().insert(
info().
end(), extra_lines.begin(), extra_lines.end() );
507 pdboutput->add_info(
"multistate_design", extra_lines,
false );
508 (*pdboutput)( solution_pose, pdbname );
511 TR(t_info) << std::endl;
523 if ( tag->hasOption(
"generations") )
generations_ = tag->getOption<
Size>(
"generations");
524 if ( tag->hasOption(
"pop_size") )
pop_size_ = tag->getOption<
Size>(
"pop_size");
525 if ( tag->hasOption(
"num_packs") )
num_packs_ = tag->getOption<
Size>(
"num_packs");
526 if ( tag->hasOption(
"pop_from_ss") )
pop_from_ss_ = tag->getOption<
Size>(
"pop_from_ss");
527 if ( tag->hasOption(
"numresults") )
numresults_ = tag->getOption<
Size>(
"numresults");
528 if ( tag->hasOption(
"fraction_by_recombination") )
530 if ( tag->hasOption(
"mutate_rate") )
mutate_rate_ = tag->getOption<
Real>(
"mutate_rate");
531 if ( tag->hasOption(
"boltz_temp") )
boltz_temp_ = tag->getOption<
Real>(
"boltz_temp");
532 if ( tag->hasOption(
"anchor_offset") )
anchor_offset_ = tag->getOption<
Real>(
"anchor_offset");
534 if ( tag->hasOption(
"checkpoint_prefix") )
536 if ( tag->hasOption(
"checkpoint_interval") )
538 if ( tag->hasOption(
"checkpoint_gz") )
checkpoint_gz_ = tag->getOption<
bool>(
"checkpoint_gz");
539 if ( tag->hasOption(
"checkpoint_rename") )
545 if ( datamap.
has(
"scorefxns", scorefxn_key ) ) {
548 throw utility::excn::EXCN_RosettaScriptsOption(
"ScoreFunction " + scorefxn_key +
" not found in DataMap.");
555 unfolded_ = tag->getOption<
bool >(
"unfolded", 1 );
556 unbound_ = tag->getOption<
bool >(
"unbound", 1 );
562 bool at_least_one_negative_state( unfolded_ || unbound_ );
565 foreach(
TagPtr const btag, branch_tags ){
566 if( unfolded_ || unbound_ ){
567 TR<<
"ERROR: If you specify additional pdb files as states, it is assumed that those would have different energies than the starting pdb. As such, comparison of energies across different states is automatically done by grounding each pdb file to its starting 'best-score design' and comparing energy differences from that state. The energies of unbound and unfolded states then become tricky to interpret. You can use anchor_offset to get much of the effect of these additional states. Or, ask Sarel."<<std::endl;
568 throw utility::excn::EXCN_RosettaScriptsOption(
"");
571 bool const unbound( btag->getOption<
bool >(
"unbound", 0 ) );
572 bool const unfolded( btag->getOption<
bool >(
"unfolded", 0 ) );
584 if( btag->getName() ==
"Positive" )
586 else if( btag->getName() ==
"Negative" ){
588 at_least_one_negative_state =
true;
591 throw utility::excn::EXCN_RosettaScriptsOption(
"Name "+btag->getName()+
" is not recognized in ProteinInterfaceMultistateDesign::parse_my_tag." );
594 runtime_assert( at_least_one_negative_state );
617 using namespace core::pose;
618 using namespace protocols::multistate_design;
625 Pose const bound( pose );
639 bound_state->create_packer_data(
scorefxn_, ptask );
646 unbound_state->create_packer_data(
scorefxn_, ptask );
647 unfolded_state->create_packer_data(
scorefxn_, ptask );
673 state->create_packer_data(
scorefxn_, state_ptask );