Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
GrowPeptides.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // This file is part of the Rosetta software suite and is made available under license.
5 // The Rosetta software is developed by the contributing members of the Rosetta Commons consortium.
6 // (C) 199x-2009 Rosetta Commons participating institutions and developers.
7 // For more information, see http://www.rosettacommons.org/.
8 //
9 /// @file protocols/seeded_abinitio/GrowPeptides.cc
10 /// @brief different ways of growing peptide sequences
11 /// @author Eva-Maria Strauch (evas01@u.washington.edu)
12 
17 #include <core/types.hh>
18 #include <core/pose/Pose.hh>
19 #include <core/pose/Pose.fwd.hh>
20 #include <core/pose/util.hh>
29 
33 
34 //other
36 #include <core/pose/selection.hh>
37 
38 
39 // C++ headers
40 #include <string>
41 #include <utility/string_util.hh>
42 
43 #include <basic/Tracer.hh>
44 
54 #include <core/chemical/util.hh>
55 
56 //parser
57 #include <utility/tag/Tag.hh>
58 #include <utility/tag/Tag.fwd.hh>
61 #include <core/pose/selection.hh>
62 
63 //loops
64 #include <protocols/loops/Loop.hh>
67 
68 //util
69 #include <utility/vector1.hh>
70 #include <set>
71 #include <boost/foreach.hpp>
72 
73 #define foreach BOOST_FOREACH
74 
75 using namespace core;
76 using namespace protocols::seeded_abinitio;
77 static basic::Tracer TR( "protocols.seeded_abinitio.GrowPeptides" );
78 
79 
80 namespace protocols {
81  namespace seeded_abinitio {
82 
83  using namespace protocols::moves;
84  using namespace core;
85 
87  GrowPeptidesCreator::keyname() const{
88  return GrowPeptidesCreator::mover_name();
89  }
90 
92  GrowPeptidesCreator::create_mover() const {
93  return new GrowPeptides();
94  }
95 
97  GrowPeptidesCreator::mover_name(){
98  return "GrowPeptides";
99  }
100 
101 
102  GrowPeptides::GrowPeptides()
103  {}
104 
105  GrowPeptides::~GrowPeptides() {}
106 
107 
110  return( protocols::moves::MoverOP( new GrowPeptides( *this ) ) );
111 }
112 
114 GrowPeptides::fresh_instance() const {
116 }
117 
118 bool
119 GrowPeptides::ddg(){
120  return ddg_;
121 }
122 
123 void
124 GrowPeptides::append_residues_nterminally ( Size seq_register, Size res_pos, Size stop, std::string & nat_seq , pose::Pose & target_seeds ){
125  TR<<" ---- growing N-terminal stretch from residues: "<<res_pos <<" to " <<stop <<"----------" << std::endl;
126  core::chemical::ResidueTypeSet const & rsd_set( target_seeds.residue(1).residue_type_set() );// this could be changed as needed
127  //std::cout<<"nseq size: " << nat_seq.size() << " template sequence: "<< nat_seq << std::endl;
128  //std::cout<< "sequence register : "<< seq_register << std::endl;
129 
130  for (Size k= res_pos; k < stop ; ++k ) {
131  Size resi = stop - k + res_pos - 1 ; /* so that stop doesnt get incorporated anymore*/
132  const char aa = nat_seq[ stop - k - 1 + seq_register - 1];//in case this is called within the sequence, -1 because strings start counting at 0
133  TR.Debug << "RES AA N-terminal extension: " << resi << aa <<std::endl;
134  core::chemical::ResidueTypeCOP new_rsd_type( core::chemical::ResidueSelector().set_name1( aa ).exclude_variants().select( rsd_set )[1] );
136  target_seeds.conformation().safely_prepend_polymer_residue_before_seqpos(*new_rsd, res_pos, true);
137  target_seeds.set_omega( res_pos, 180.0 );
138  }
139  //target_seeds.dump_pdb("nextended.pdb");
140 }
141 
142 void
143 GrowPeptides::append_residues_cterminally ( Size seq_register, Size res_pos, Size stop, std::string & nat_seq , pose::Pose & target_seeds ){
144  TR<<" ----- growing C-terminal extension from residues: "<<res_pos <<" to " <<stop <<"--------" <<std::endl;
145  core::chemical::ResidueTypeSet const & rsd_set( target_seeds.residue( res_pos - 1 ).residue_type_set() );
146  //std::cout<<"cseq size: " << nat_seq.size() << " sequence: "<< nat_seq << std::endl;
147  //std::cout<<"seq_register: " << seq_register << std::endl;
148 
149  for ( Size j = res_pos ; j < stop ; ++j ){
150  const char aa = nat_seq[ j - res_pos + seq_register /*-1*/]; // -1 for string adjustment
151  Size resi = j ;
152  TR.Debug << "RES AA C-terminal extension " << resi << aa <<std::endl;
153  core::chemical::ResidueTypeCOP new_rsd_type( core::chemical::ResidueSelector().set_name1( aa ).exclude_variants().select( rsd_set )[1] );
155  target_seeds.conformation().safely_append_polymer_residue_after_seqpos( *new_rsd, resi /*- 1*/ , true );// stop
156  target_seeds.set_omega( resi , 180.0 );
157  //target_seeds.dump_pdb( "ctermextn.pdb" );
158  }
159 }
160 
161 /*
162 void
163 insert_segment( std::pair <Size, Size> insert_type, std::string seq ,pose::pose curr_pose, )
164 {
165  //(0, num) = insert c-terminally of given position
166  //(num, 0) = insert n-terminally of given position
167  //(0,0 ) = insert n-terminally
168  //(0, total resi) = insert c-terminally
169  ///replace a segment between the two given positions
170  ///simply insert either N or C terminal of a given residue
171 }
172 */
173 
174 
175 /*
176 void
177 GrowPeptides::process_length_change(
178  core::pose::Pose & pose,
179  core::id::SequenceMappingCOP smap
180  ){
181  enz_prot_->remap_resid( pose, *smap );
182  core::id::combine_sequence_mappings( *start_to_current_smap_, *smap );
183 
184  for( utility::vector1< protocols::forge::remodel::RemodelConstraintGeneratorOP >::iterator rcg_it = rcgs_.begin();
185  rcg_it != rcgs_.end(); ++rcg_it ){
186  (*rcg_it)->set_seqmap( smap );
187  }
188 }
189 */
190 
191 void
192 GrowPeptides::grow_from_verteces(
193  core::pose::Pose & curr_pose,
194  std::string sequence,
195  protocols::loops::Loops & seeds,
196  std::set< core::Size > vertex_set
197  ){
198 
199  using namespace core;
200  using namespace kinematics;
201 
202  core::pose::Pose saved_pose;
203  saved_pose = curr_pose;
205 
206  core::kinematics::FoldTree grow_foldtree = curr_pose.fold_tree() ;
207  TR<<"foldtree before growing: " << grow_foldtree << std::endl;
208 
210  foreach( const Size vertex, vertex_set )
211  verteces.push_back( vertex );
212 
213  TR<<"start new protein: " << verteces[1] <<" end: " << verteces[verteces.size()] <<" size sequence: " << sequence.size()<< std::endl;
214  TR.Debug <<"number verteces: "<< verteces.size()<< std::endl;
215 
216  if ( verteces[verteces.size()] - (verteces[1] - 1) != sequence.size() )
217  utility_exit_with_message( "chunk pieces do not agree with the length of the submitted template pdb" );
218 
219  for ( Size vertex_it = 4 ; vertex_it <= verteces.size(); vertex_it = vertex_it + 4){
220 
221  if( vertex_it < verteces.size() ){
222  grow_foldtree = curr_pose.fold_tree();
223  //connect seeds
224  TR.Debug <<"for temp jump --- from: " <<verteces[vertex_it - 3 ]<<", to: "<< verteces[vertex_it - 3] + (seeds[vertex_it/4].stop() - seeds[vertex_it/4].start()) + 1 <<", cutpoint: "<< verteces[vertex_it - 3] + (seeds[vertex_it/4].stop() - seeds[vertex_it/4].start()) <<std::endl;
225  //ensuring that the new temporariy cutpoint is unique
226  Size temp_cutpoint = verteces[vertex_it - 3] + (seeds[vertex_it/4].stop() - seeds[vertex_it/4].start());
227  if ( curr_pose.fold_tree().is_cutpoint( temp_cutpoint ) ) {
228  ++temp_cutpoint;
229  }
230 
231  grow_foldtree.new_jump( verteces[vertex_it - 3 ], verteces[vertex_it - 3] + (seeds[vertex_it/4].stop() - seeds[vertex_it/4].start()) + 1 /*start of next seed before additions*/ , temp_cutpoint );
232  curr_pose.fold_tree( grow_foldtree );
233  TR<<"foldtree before adding new seeds, current seed: "<<vertex_it/4 << " with foldtree " << grow_foldtree << std::endl;
234  }
235 
236  //need to adjust the verteces for the N-terminally extensions since the vertex container has the numbering for the complete sequences
237  //and not the trunctated starting pieces.
238  TR<<"appending N-terminally from: " << verteces[vertex_it - 3 ] << " to " << verteces[vertex_it - 2] <<std::endl;
239  TR<<"appending C-terminally from: " << verteces[vertex_it - 1 ] << " to " << verteces[vertex_it ] << std::endl;
240 
241  //need to adjust the extensions/numbering
242  Size nseq_start = seeds[vertex_it/4 ].start() - ( verteces[vertex_it - 2] - verteces[vertex_it - 3] );
243  TR.Debug <<" grow nterm: start sequence " <<nseq_start <<" position in pose "<< verteces[vertex_it - 3] <<" stop " << verteces[vertex_it - 2] <<" sequence " << sequence << std::endl ;
244 
245  //get sequence start through the seed start minus that actual length that needs to be added
246  append_residues_nterminally( seeds[vertex_it /4].start() - ( verteces[vertex_it - 2] - verteces[vertex_it - 3] ) , verteces[vertex_it - 3] , verteces[vertex_it - 2], sequence , curr_pose ) ;
247 
248  TR <<"growing foldtree: " << grow_foldtree << std::endl;
249  TR.Debug <<" grow cterm: start sequence " << seeds<<" position in pose "<< verteces[vertex_it - 1 ] <<" stop " << verteces[vertex_it] <<" sequence " << sequence << std::endl ;
250  append_residues_cterminally( seeds[vertex_it/4].stop(), verteces[vertex_it - 1 ] , verteces[vertex_it], sequence , curr_pose ) ;
251 
252  //using simple foldtree that connects the two seeds with each other to keep consequutives seeds constants in space
253  //connect cutpoint with next seed
254 
255  grow_foldtree.clear();
256  TR <<"pose size: " << curr_pose.total_residue() << std::endl;
257  grow_foldtree = curr_pose.fold_tree();
258  TR.Debug <<"done growing, temporary foldtree: " << grow_foldtree << std::endl;
259  }
260 
261  grow_foldtree = curr_pose.fold_tree();
262  TR << "growing completed, temporary foldtree: " << grow_foldtree << std::endl;
263 }
264 
265 void GrowPeptides::apply (core::pose::Pose & pose ){
266 
267  ///adding a pose observer for downstream adjustment of sequence positions
268  setup_cached_observers( pose );
269 
270  ///if there are loops and template, then activate grow from seeds
271  if( all_seeds_.size() > 0 && template_presence ){
272 
273  utility::vector1< Size > cutpoints;
274 
275  if( !fetch_foldtree )
276  TR<<"taking foldtree from pose" <<std::endl;
277  cutpoints = pose.fold_tree().cutpoints();
278 
279  if( fetch_foldtree ){
280  TR<<"generate a foldtree through SeedFoldTree, and get cutpoints" << std::endl;
281  core::pose::PoseOP tmp_seed_target_poseOP = new core::pose::Pose( pose );
282  SeedFoldTreeOP seed_ft_generator = new SeedFoldTree();
283  seed_ft_generator->ddg_based( ddg() );
284  seed_ft_generator->scorefxn( scorefxn_ );
285  seed_ft_generator->anchor_specified(anchor_specified_);
286  if( anchor_specified_ )
287  seed_ft_generator->set_anchor_res( anchors_ );
288  core::scoring::dssp::Dssp dssp( *template_pdb_ );
289  dssp.insert_ss_into_pose( *template_pdb_ );
290  std::string secstr_template = template_pdb_->secstruct();
291  TR.Debug << "sec str for template: " << secstr_template << std::endl;
292  seed_foldtree_ = seed_ft_generator->set_foldtree( /**template_pdb_ ,*/ tmp_seed_target_poseOP, secstr_template, all_seeds_, true );
293  verteces_ = seed_ft_generator->get_folding_verteces();
294  TR.Debug<<"verteces for folding: " <<std::endl;
295  cutpoints = seed_foldtree_->cutpoints();
296 
297  //debugging stuff
298  foreach( core::Size const r, verteces_ ){
299  TR.Debug<< r <<"\t";
300  }
301  }
302  std::string seq;
303  if( seq_ != "" )
304  seq = seq_;
305  else
306  seq = template_pdb_->sequence();
307  if( seq == "" ) utility_exit_with_message("no sequence specified" );
308 
309  grow_from_verteces( pose, seq, all_seeds_ , verteces_ );
310 
311  //add the new seed foldtree to the pose
312  pose.fold_tree( *seed_foldtree_ );
313  TR<<"set new foldtree: "<< pose.fold_tree() <<std::endl;
314 
315  }//end grow from seeds
316 
317 
318  /*
319  // ------------------ simple pose extensions based on teh parsers input -----------------------------
320 
321  if( all_seeds_.size() == 0 ){
322  if( extend_nterm > 0 ){
323  std::string nseq;
324 
325  if ( nsequence_.size() > 0 ){
326  nseq = nsequence_;
327  if( nsequence_.size() != extend_nterm ){
328  TR<<"WARNING: specified sequence is not long enough for the desired length of extension, adding extra alanine residue" <<std::endl;
329  for (Size i = 0 ; i < extend_nterm - nsequence_.size(); ++i )
330  nseq += "A";
331  }
332  }
333 
334  if( all_ala_N ){
335  TR<<"overwriting N-term sequence with all ALA, if not desired turn of all_ala_N" <<std::endl;
336  for(Size i = 0; i < extend_nterm ; ++i){
337  nseq += "A";
338  std::cout<<nseq<<"\n"<< std::endl;
339  }
340  }
341 
342  append_residues_nterminally ( 0 , 1 , 1 , nseq , copy_pose );
343  }//end nterm extension
344 
345 
346  if ( extend_cterm > 0 ){
347  std::string cseq;
348  std::cout<<"cseq "<< cseq <<std::endl;
349 
350  if ( csequence_.size() > 0 ){
351  cseq = csequence_;
352  if( csequence_.size() != extend_cterm ){
353  TR<<"WARNING: specified sequence is not long enough for the desired length of extension, adding extra alanine residue" <<std::endl;
354  for (Size i = 0 ; i < extend_cterm - csequence_.size(); ++i )
355  std::cout<<cseq<<std::endl;
356  cseq = cseq + "A";
357  }
358  }
359 
360  if( all_ala_C ){
361  TR<<"overwriting C-term sequence with all ALA, if not desired turn of all_ala_C" <<std::endl;
362  for(Size i = 0; i < extend_cterm ; ++i){
363  std::cout<<cseq<<"\n"<<std::endl;
364  cseq += "A";
365  }
366  }
367  std::cout<<"pose: "<< copy_pose.total_residue()<< "\n" <<copy_pose.total_residue() + extend_cterm <<"seq: "<< cseq << std::endl;
368 
369  append_residues_cterminally( 0, copy_pose.total_residue(), copy_pose.total_residue() + extend_cterm, cseq , copy_pose );
370 
371  }//end cterm extension
372 
373  pose = copy_pose;
374 
375  }//end without loops
376  */
377 
378  if( !output_centroid ){
379  TR<<"switching back to full_atom mode" <<std::endl;
382  ( *scorefxn_ )( pose );
383  }//end output centroid
384 
385  TR.flush();
386 }
387 
388 
389 /// @details putting a LengthEventCollector into the pose
390 void
391 GrowPeptides::setup_cached_observers( core::pose::Pose & pose ){
394 }
395 
397 GrowPeptides::get_name() const {
398  return GrowPeptidesCreator::mover_name();
399 }
400 
401 void
402 GrowPeptides::parse_my_tag(
403  utility::tag::TagPtr const tag,
407  core::pose::Pose const & pose )
408 {
409  TR<<"GrowPeptides mover has been initiated" <<std::endl;
410  //default
411  template_presence = false;
412  //need scorefxn to score extended/changed pose
413  std::string const scorefxn_name( tag->getOption<std::string>( "scorefxn", "score12" ) );
414  scorefxn_ = new core::scoring::ScoreFunction( *(data.get< core::scoring::ScoreFunction * >( "scorefxns", scorefxn_name) ));
415  TR<<"scoring with following scorefunction: " << *scorefxn_ <<std::endl;
416 
417  fetch_foldtree = tag->getOption< bool >( "SeedFoldTree", 0 );
418 
419  ddg_ = tag->getOption< bool >( "ddg_based", 0 );
420 
421  //add_chainbreakterm_ = tag->getOption< bool >( "add_chainbreakterm", 1 );
422 
423  if( tag->hasOption( "extend_nterm" ) ) {
424  extend_nterm = tag->getOption< Size >( "extend_nterm" , 0 );
425  TR<<"extending peptide n-terminally by "<< extend_nterm << std::endl;
426  }
427  if( tag->hasOption( "extend_cterm" ) ){
428  extend_cterm = tag->getOption< Size >( "extend_cterm" , 0 );
429  TR<<"extending peptide c-terminally by " << extend_cterm << std::endl;
430  }
431 
432  all_ala_N = tag->getOption< bool >("all_ala_N" , 0 );
433  if( all_ala_N )
434  TR<<"N-terminally added amino acids are all ALA" <<std::endl;
435 
436  all_ala_C = tag->getOption< bool >("all_ala_C", 0 );
437  if( all_ala_C )
438  TR<<"C-terminally added amino acids are all ALA" <<std::endl;
439 
440  if( tag->hasOption( "nseq" ) )
441  csequence_ = ( tag->getOption< std::string >( "nseq" ) );
442 
443  if( tag->hasOption( "cseq" ) )
444  nsequence_ = ( tag->getOption< std::string >( "cseq" ) );
445 
446  output_centroid = tag->getOption< bool >( "output_centroid", 0 );
447 
448  if( tag->hasOption( "template_pdb" ) ){
449  std::string const template_pdb_fname( tag->getOption< std::string >( "template_pdb" ));
450  template_pdb_ = new core::pose::Pose ;
451  core::import_pose::pose_from_pdb( *template_pdb_, template_pdb_fname );
452  TR<<"read in a template pdb with " <<template_pdb_->total_residue() <<"residues"<<std::endl;
453  template_presence = true;
454  }
455 
456  if( tag->hasOption("sequence" )){
457  seq_ = tag->getOption< std::string >("sequence" );
458  }
459 
460  if( !template_presence && seq_ == "" )
461  utility_exit_with_message("neither template pdb nor sequence for growing is specified!!" );
462 
463  //parsing branch tags
464  utility::vector0< TagPtr > const branch_tags( tag->getTags() );
465  foreach( TagPtr const btag, branch_tags ){
466 
467  //parse the pdb of interest, which is either the template or the input pdb depending on the users specificiation
468  if( template_presence )
469  curr_pose_ = template_pdb_;
470  else
471  curr_pose_ = new pose::Pose( pose );
472 
473  anchor_specified_ = false;
474 
475  if( btag->getName() == "Seeds" ) { //need an assertion for the presence of these or at least for the option file
476  //needs some assertions to avoid bogus input
477  std::string const beginS( btag->getOption<std::string>( "begin" ) );
478  std::string const endS( btag->getOption<std::string>( "end" ) );
479  core::Size const begin( core::pose::parse_resnum( beginS, *curr_pose_ ) );
480  core::Size const end( core::pose::parse_resnum( endS, *curr_pose_ ) );
481  all_seeds_.add_loop( begin , end , 0, 0, false );
482  TR <<"parsing seeds: \n"<< begin <<" and " << end <<std::endl;
483 
484  if( btag->hasOption( "anchor" )){
485  Size anchor_res = btag->getOption< core::Size >("anchor", 0 );
486  TR<<"anchor residue: " << anchor_res << std::endl;
487  anchors_.push_back( anchor_res );
488  anchor_specified_ = true;
489  }
490  }//end seed tags
491 
492  //not hooked in yet...
493  if( btag->getName() == "Steal_seq_span" ) {
494  if( !template_presence )
495  utility_exit_with_message("need to specify a template pdb to steal sequenc spans");
496  std::string const begin_str( btag->getOption<std::string>( "begin" ) );
497  std::string const end_str( btag->getOption<std::string>( "end" ) );
498  core::Size const begin( core::pose::parse_resnum( begin_str, *template_pdb_ ) );
499  core::Size const end( core::pose::parse_resnum( end_str, *template_pdb_ ) );
500  runtime_assert( end > begin );
501  runtime_assert( begin>=1);
502  runtime_assert( end<=template_pdb_->total_residue() );
503  std::string seq_chunk = template_pdb_->sequence();
504  sequence_chunks_.push_back( seq_chunk );
505  }//end steal sequence
506 
507  }//end branch tags
508 }//end parse my tag
509 }//seeded abinitio
510 } //end protocols
511 
512 
513 
514