Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
DnaInterfaceMultiStateDesign.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file DnaInterfaceMultiStateDesign.cc
11 /// @brief
12 /// @author ashworth
13 
14 // Unit headers
17 
18 // Package headers
24 #include <protocols/dna/util.hh> // find_basepairs, substitute_residue
29 
33 #include <basic/options/option.hh>
37 #include <core/pose/PDBInfo.hh>
38 #include <core/pose/Pose.hh>
41 #include <basic/Tracer.hh>
42 
43 #include <utility/exit.hh>
44 #include <utility/string_util.hh> // string_split
45 #include <utility/tag/Tag.hh>
46 
47 // option key includes
48 #include <basic/options/keys/out.OptionKeys.gen.hh>
49 #include <basic/options/keys/score.OptionKeys.gen.hh>
50 #include <basic/options/keys/ms.OptionKeys.gen.hh>
51 #include <basic/options/keys/packing.OptionKeys.gen.hh>
52 
54 #include <utility/vector0.hh>
55 #include <utility/vector1.hh>
56 #include <ObjexxFCL/format.hh>
57 
58 
59 namespace protocols {
60 namespace dna {
61 
62 using utility::vector1;
63 using utility::string_split;
64 using namespace core;
65  using namespace chemical;
66  using namespace basic::options;
67  using namespace pack;
68  using namespace task;
69  using namespace operation;
70  using namespace scoring;
71 
72 using namespace ObjexxFCL::fmt;
73 
74 using namespace multistate_design;
75 using namespace genetic_algorithm;
76 
77 using basic::t_error;
78 using basic::t_info;
79 using basic::t_debug;
80 using basic::t_trace;
81 static basic::Tracer TR("protocols.dna.DnaInterfaceMultiStateDesign",t_info);
82 
85 {
87 }
88 
92 }
93 
96 {
97  return "DnaInterfaceMultiStateDesign";
98 }
99 
101  : protocols::simple_moves::PackRotamersMover( DnaInterfaceMultiStateDesignCreator::mover_name() ),
102  gen_alg_(0),
103  multistate_packer_(0),
104  dna_chains_(0),
105  // option flags/parameters: default to command line options
106  // parse_my_tag method may change them
107  generations_( option[ OptionKeys::ms::generations ]() ),
108  pop_size_( option[ OptionKeys::ms::pop_size ]() ),
109  num_packs_( option[ OptionKeys::ms::num_packs ]() ),
110  pop_from_ss_( option[ OptionKeys::ms::pop_from_ss ]() ),
111  numresults_( option[ OptionKeys::ms::numresults ]() ),
112  fraction_by_recombination_( option[ OptionKeys::ms::fraction_by_recombination ]() ),
113  mutate_rate_( option[ OptionKeys::ms::mutate_rate ]() ),
114  boltz_temp_( option[ OptionKeys::ms::Boltz_temp ]() ),
115  anchor_offset_( option[ OptionKeys::ms::anchor_offset ]() ),
116  checkpoint_prefix_( option[ OptionKeys::ms::checkpoint::prefix ]() ),
117  checkpoint_interval_( option[ OptionKeys::ms::checkpoint::interval ]() ),
118  checkpoint_gz_( option[ OptionKeys::ms::checkpoint::gz ]() ),
119  checkpoint_rename_( option[ OptionKeys::ms::checkpoint::rename ]() )
120 {}
121 
123 
124 void
126 {
127  initialize( pose );
128  run();
129  output_results( pose );
130 }
131 
132 void
134 {
135  runtime_assert( dna_chains );
136  dna_chains_ = new DnaChains( *dna_chains );
137 }
138 
139 void
141 {
142  targeted_dna_ = defs;
143 }
144 
145 void
147 {
148  // clear out (any) pre-existing info strings (from previous apply calls)
149  info().clear();
150 
151  if ( ! dna_chains_ ) {
152  dna_chains_ = new DnaChains;
153  find_basepairs( pose, *dna_chains_ );
154  }
155 
156  if ( ! score_function() ) {
157  std::string weights_tag("dna");
158  if ( option[ OptionKeys::score::weights ].user() )
159  weights_tag = option[ OptionKeys::score::weights ]();
161  }
162 
163  // always start with a fresh GeneticAlgorithm
164  // important when reusing DnaInterfaceMultistateDesign mover
165  gen_alg_ = new GeneticAlgorithm; /// APL does this require a PosType ctor?
166 
167  // set up genetic algorithm
168  gen_alg_->set_max_generations( generations_ );
169  gen_alg_->set_max_pop_size( pop_size_ );
170  gen_alg_->set_num_to_propagate( static_cast<core::Size>( 0.5 * pop_size_ ) );
171  gen_alg_->set_frac_by_recomb( fraction_by_recombination_ );
172 
173  // set up sequence randomizer
174  PositionSpecificRandomizer::OP rand = new PositionSpecificRandomizer; /// APL does this require a PosType ctor?
175  rand->set_mutation_rate( mutate_rate_ );
176 
177  TaskFactoryOP my_tf;
178  // if PackRotamerMover base class has no initialized TaskFactory, create default one here
179  if ( ! task_factory() ) {
180  // DNA-specific TaskFactory -> PackerTask -> figure out positions to design
181  my_tf = new TaskFactory;
182  my_tf->push_back( new InitializeFromCommandline );
183  if ( option[ OptionKeys::packing::resfile ].user() ) my_tf->push_back( new ReadResfile );
185  rest_to_dna_int->copy_dna_chains( dna_chains_ );
186  if ( ! targeted_dna_.empty() ) rest_to_dna_int->copy_targeted_dna( targeted_dna_ );
187  my_tf->push_back( rest_to_dna_int );
188  } else { // TaskFactory already exists, add to it
189  // (temporary? parser has no support for RotamerOperations yet)
190  my_tf = new TaskFactory( *task_factory() );
191  }
192  // a protein-DNA hbonding filter for ex rotamers that the PackerTask makes available to the rotamer set during rotamer building (formerly known as 'rotamer explosion')
193  RotamerDNAHBondFilterOP rot_dna_hb_filter( new RotamerDNAHBondFilter );
194  my_tf->push_back( new AppendRotamer( rot_dna_hb_filter ) );
195  task_factory( my_tf ); // PackRotamersMover base class setter
196 
197  PackerTaskOP ptask = task_factory()->create_task_and_apply_taskoperations( pose );
198 
199  // figure out design positions/choices from PackerTask
200  vector1< Size > design_positions;
201  for ( Size i(1), end( ptask->total_residue() ); i <= end; ++i ) {
202  // ignore DNA positions
203  if ( !pose.residue_type(i).is_protein() ) continue;
204  ResidueLevelTask const & rtask( ptask->residue_task(i) );
205  if ( rtask.being_designed() ) {
206  design_positions.push_back(i);
207  // will be passed to randomizer
209  // to avoid duplicate AA's (such as for multiple histidine ResidueTypes)
210  std::set< core::chemical::AA > aaset;
211  std::list< ResidueTypeCOP > const & allowed( rtask.allowed_residue_types() );
212  for ( std::list< ResidueTypeCOP >::const_iterator t( allowed.begin() ), end( allowed.end() );
213  t != end; ++t ) {
214  core::chemical::AA aa( (*t)->aa() );
215  // avoid duplicate AA's (such as for multiple histidine ResidueTypes)
216  if ( aaset.find( aa ) != aaset.end() ) continue;
217  aaset.insert(aa);
218  TR(t_debug) << "adding choice " << aa << std::endl;
219  choices.push_back( new PosType( i, aa ) );
220  }
221  rand->append_choices( choices );
222  }
223  }
224  // done setting up randomizer
225  gen_alg_->set_rand( rand );
226  TR(t_info) << "There will be " << rand->library_size() << " possible sequences." << std::endl;
227 
228  // set up fitness function
230 
231  multistate_packer_->set_aggregate_function(
233 
234  multistate_packer_->set_scorefxn( score_function() );
235 
236  // add target and competitor states to fitness function
237  add_dna_states( pose, ptask );
238 
239  TR(t_info) << "There are " << multistate_packer_->num_positive_states() << " positive states and "
240  << multistate_packer_->num_negative_states() << " negative states" << std::endl;
241 
242  // do single-state designs to find best theoretical single-state energy
243  multistate_packer_->single_state_design();
244  // done setting up fitness function
245  gen_alg_->set_func( multistate_packer_ );
246 
247  // enable checkpointing
248  gen_alg_->set_checkpoint_prefix( checkpoint_prefix_ );
249  gen_alg_->set_checkpoint_write_interval( checkpoint_interval_ );
250  gen_alg_->set_checkpoint_gzip( checkpoint_gz_ );
251  gen_alg_->set_checkpoint_rename( checkpoint_rename_ );
252  gen_alg_->read_checkpoint();
253 
254  // start the genetic algorithm from scratch if not resuming from a checkpoint
255  if ( gen_alg_->population( gen_alg_->current_generation() ).size() == 0 ) {
256  // add single-state design sequence(s) to genetic algorithm starting population
257  SingleStateCOPs states( multistate_packer_->positive_states() );
258  TR(t_info) << "Adding single-state design entities:" << std::endl;
259  for ( SingleStateCOPs::const_iterator s( states.begin() ), end( states.end() );
260  s != end; ++s ) {
261  EntityElements traits;
262  for ( vector1< Size >::const_iterator i( design_positions.begin() ),
263  end( design_positions.end() ); i != end; ++i ) {
264  PosTypeOP pt = new PosType( *i, (*s)->pose().residue_type(*i).aa() );
265  traits.push_back( pt );
266  TR(t_info) << pt->to_string() << " ";
267  }
268  gen_alg_->add_entity( traits );
269  gen_alg_->add_parent_entity( traits );
270  TR(t_info) << std::endl;
271  }
272 
273  // make more entities by mutation of single-state seeds
274  gen_alg_->fill_by_mutation( pop_from_ss_ );
275  // the rest are fully random
276  gen_alg_->fill_with_random_entities();
277  // clear parents for the next generation
278  gen_alg_->clear_parents();
279  }
280 }
281 
282 void
284 {
285  // loop over generations
286  while ( !gen_alg_->complete() ) {
287  if ( gen_alg_->current_generation_complete() ) gen_alg_->evolve_next_generation();
288  TR(t_info) << "Generation " << gen_alg_->current_generation() << ":" << std::endl;
289  gen_alg_->evaluate_fitnesses();
290  gen_alg_->print_population( TR(t_info) );
291  }
292 }
293 
294 void
296 {
298  pdboutput->score_function( *multistate_packer_->scorefxn() );
299  pdboutput->reference_pose( pose );
300 
301  std::string prefix("result");
302  if ( option[ OptionKeys::out::prefix ].user() ) prefix = option[ OptionKeys::out::prefix ]();
303 
304  // sort local copy of sequence/fitness cache
305  typedef GeneticAlgorithm::TraitEntityHashMap TraitEntityHashMap;
306  TraitEntityHashMap const & cache( gen_alg_->entity_cache() );
307  vector1< EntityOP > sortable;
308 // std::copy( cache.begin(), cache.end(), sortable.begin() ); // FAIL(?)
309  for ( TraitEntityHashMap::const_iterator it( cache.begin() ), end( cache.end() );
310  it != end; ++it ) {
311  sortable.push_back( it->second );
312  }
313  std::sort( sortable.begin(), sortable.end(), lt_OP_deref< Entity > );
314 
315  TR(t_info) << "Evaluated " << sortable.size() << " sequences.\nBest sequences:\n";
316  // list and output top solutions
317  Size counter(0);
318 
319  for ( vector1< EntityOP >::const_iterator it( sortable.begin() ),
320  end( sortable.end() ); it != end; ++it ) {
321  Entity & entity(**it);
322  // apply sequence to existing positive state(s)
323  multistate_packer_->evaluate_positive_states( entity );
324  // copy pose
325  Pose solution_pose = multistate_packer_->positive_states().front()->pose();
326  // output pdb with information
327  std::string pdbname( prefix + "_ms_" + ObjexxFCL::lead_zero_string_of(counter,4) + ".pdb" );
328  Strings extra_lines;
329  std::ostringstream ms_info;
330  ms_info << "REMARK MultiState Fitness: " << F(5,4,entity.fitness());
331  extra_lines.push_back( ms_info.str() );
332  ms_info.str(""); // funky way to 'empty' ostringstream
333  ms_info << "REMARK MultiState Sequence:";
334  for ( EntityElements::const_iterator
335  pos( entity.traits().begin() ), end( entity.traits().end() );
336  pos != end; ++pos ) {
337  ms_info << " " << (*pos)->to_string();
338  TR(t_info) << (*pos)->to_string() << " ";
339  }
340  TR(t_info) << "fitness " << F(5,4,entity.fitness()) << '\n';
341  extra_lines.push_back( ms_info.str() );
342  if ( counter == 0 ) {
343  // copy top result to input pose
344  pose = solution_pose;
345  // save info for top result
346  info().insert( info().end(), extra_lines.begin(), extra_lines.end() );
347  // ensure that pose has up-to-date score information
348  (*multistate_packer_->scorefxn())(pose);
349  }
350  // set numresults to 0 to suppress output
351  if ( counter >= numresults_ ) break;
352  pdboutput->add_info( "multistate_design", extra_lines, false );
353  (*pdboutput)( solution_pose, pdbname );
354  ++counter;
355  }
356  TR(t_info) << std::endl;
357 }
358 
362 }
363 
364 ///@brief parse "XML" Tag (specifically in the context of the parser/scripting scheme)
366  TagPtr const tag,
367  moves::DataMap & datamap,
368  protocols::filters::Filters_map const & filters,
369  moves::Movers_map const & movers,
370  Pose const & pose
371 )
372 {
373  // flags/parameters (override options settings)
374  if ( tag->hasOption("generations") ) generations_ = tag->getOption<Size>("generations");
375  if ( tag->hasOption("pop_size") ) pop_size_ = tag->getOption<Size>("pop_size");
376  if ( tag->hasOption("num_packs") ) num_packs_ = tag->getOption<Size>("num_packs");
377  if ( tag->hasOption("pop_from_ss") ) pop_from_ss_ = tag->getOption<Size>("pop_from_ss");
378  if ( tag->hasOption("numresults") ) numresults_ = tag->getOption<Size>("numresults");
379  if ( tag->hasOption("fraction_by_recombination") )
380  fraction_by_recombination_ = tag->getOption<Real>("fraction_by_recombination");
381  if ( tag->hasOption("mutate_rate") ) mutate_rate_ = tag->getOption<Real>("mutate_rate");
382  if ( tag->hasOption("boltz_temp") ) boltz_temp_ = tag->getOption<Real>("boltz_temp");
383  if ( tag->hasOption("anchor_offset") ) anchor_offset_ = tag->getOption<Real>("anchor_offset");
384  // checkpointing options
385  if ( tag->hasOption("checkpoint_prefix") )
386  checkpoint_prefix_ = tag->getOption<std::string>("checkpoint_prefix");
387  if ( tag->hasOption("checkpoint_interval") )
388  checkpoint_interval_ = tag->getOption<Size>("checkpoint_interval");
389  if ( tag->hasOption("checkpoint_gz") ) checkpoint_gz_ = tag->getOption<bool>("checkpoint_gz");
390  if ( tag->hasOption("checkpoint_rename") )
391  checkpoint_rename_ = tag->getOption<bool>("checkpoint_rename");
392 
393  // calls to PackRotamersMover base class methods
394  parse_score_function( tag, datamap, filters, movers, pose );
395  parse_task_operations( tag, datamap, filters, movers, pose );
396 }
397 
398 ///@brief required in the context of the parser/scripting scheme
401 {
402  return new DnaInterfaceMultiStateDesign;
403 }
404 
405 ///@brief required in the context of the parser/scripting scheme
408 {
409  return new DnaInterfaceMultiStateDesign( *this );
410 }
411 
412 void
414  Pose const & pose,
415  PackerTaskCOP ptask
416 )
417 {
418  runtime_assert( dna_chains_ );
419  runtime_assert( multistate_packer_ );
420 
421  // temporary copy of Pose used to build DNA target and competitor states
422  Pose mutpose( pose );
423  ResidueTypeSet const & rts( mutpose.residue(1).residue_type_set() );
424  ResidueTypeCOPs dna_types(
425  ResidueSelector().set_property("DNA").exclude_variants().select( rts )
426  );
427 
428  TR(t_info) << "\nBuilding dna target state:\n";
429 
430  // construct and add the target state
431  for ( DnaPositions::const_iterator itr( dna_chains_->begin() );
432  itr != dna_chains_->end(); ++itr ) {
433  // limit to dna design positions
434  DnaPosition const & pos( itr->second );
435  Size const index( pos.top() );
436  runtime_assert( index == itr->first );
437  // resfile key "TARGET" indicates positions at which multistate design will be targeted
438  if ( !ptask->has_behavior("TARGET",index) ) continue;
439 
440  std::ostringstream pdbtag;
441  if ( pose.pdb_info() ) {
442  pdbtag << pose.pdb_info()->chain(index) << '.' << pose.pdb_info()->number(index);
443  } else {
444  pdbtag << pose.chain(index) << '.' << index;
445  }
446  ResidueTypeCOP target_type( ptask->residue_task( index ).target_type() );
447  if ( ! target_type ) {
448  TR(t_error) << "No target type found for " << pdbtag.str() << '\n'
449  << "(Did the DNA definition string indicate a target nucleotide type?" << std::endl;
450  utility_exit();
451  }
452  runtime_assert( pos.paired() );
453  std::ostringstream pdbtag_btm;
454  if ( pose.pdb_info() ) {
455  pdbtag_btm << pose.pdb_info()->chain( pos.bottom() )
456  << '.' << pose.pdb_info()->number( pos.bottom() );
457  } else {
458  pdbtag_btm << pose.chain( pos.bottom() ) << '.' << pos.bottom();
459  }
460  ResidueTypeCOP bot_type( ptask->residue_task( pos.bottom() ).target_type() );
461  if ( ! target_type ) {
462  TR(t_error) << "No target type found for " << pdbtag_btm.str() << '\n'
463  << "(Did the DNA definition string indicate a target nucleotide type?" << std::endl;
464  utility_exit();
465  }
466  TR(t_info) << pdbtag.str() << '.' << target_type->name() << '/';
467  TR(t_info) << pdbtag_btm.str() << '.' << bot_type->name() << ", ";
468  substitute_residue( mutpose, index, *target_type );
469  substitute_residue( mutpose, pos.bottom(), *bot_type );
470  }
471  TR(t_info) << '\n';
472  PackingStateOP target_state = new PackingState( mutpose, true );
473  target_state->create_packer_data( multistate_packer_->scorefxn(), ptask );
474  multistate_packer_->add_state( target_state );
475 
476  TR(t_info) << "Building dna sequence competitors:\n";
477 
478  // build competitor DNA negative states and add to the MultiStateDesign instance
479  for ( DnaPositions::const_iterator itr( dna_chains_->begin() );
480  itr != dna_chains_->end(); ++itr ) {
481  // limit to dna design positions
482  DnaPosition const & pos( itr->second );
483  Size const index( pos.top() );
484  assert( index == itr->first );
485  if ( !ptask->has_behavior("TARGET",index) ) continue;
486  if ( pose.pdb_info() ) {
487  TR(t_info) << pose.pdb_info()->number( index ) << "/"
488  << pose.pdb_info()->number( pos.bottom() ) << ":";
489  } else {
490  TR(t_info) << index << "/" << pos.bottom() << ":";
491  }
492 
493  // remember the starting type in order to restore later
494  ResidueType const & orig_top( mutpose.residue_type( index ) );
495  ResidueType const & orig_bot( mutpose.residue_type( pos.bottom() ) );
496 
497  // add a negative state for every single basepair substitution
498  for ( ResidueTypeCOPs::const_iterator rt( dna_types.begin() ); rt != dna_types.end(); ++rt ) {
499  std::string name( (*rt)->name() );
500  if ( (*rt)->name3() == orig_top.name3() ) continue; // add mutants only
501  ResidueType const & bot_type( rts.name_map( dna_comp_name_str( name ) ) );
502  TR(t_info) << " " << name << "/" << bot_type.name() << ",";
503  substitute_residue( mutpose, index, **rt );
504  substitute_residue( mutpose, pos.bottom(), bot_type );
505  PackingStateOP competitor_state = new PackingState( mutpose, false );
506  competitor_state->share_packer_data_from( *target_state );
507  multistate_packer_->add_state( competitor_state );
508  }
509 
510  // restore the original basepair at this position
511  substitute_residue( mutpose, index, orig_top );
512  substitute_residue( mutpose, pos.bottom(), orig_bot );
513  }
514  TR(t_info) << std::endl;
515 }
516 
517 } // namespace dna
518 } // namespace protocols