Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
SegmentHybridizer.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file protocols/seeded_abinitio/movers/
11 /// @brief repurposing logic and some functions from CartesianHybridze protocols for segment insertions and chain closure
12 /// @author Eva-Maria Strauch (evas01@uw.edu)
13 
14 // Unit headers
17 #include <utility/string_util.hh>
18 #include <utility/exit.hh>
20 #include <boost/foreach.hpp>
22 
23 #define foreach BOOST_FOREACH
24 // Package headers
25 #include <core/pose/Pose.hh>
26 #include <core/pose/util.hh>
29 #include <basic/Tracer.hh>
31 #include <utility/tag/Tag.hh>
32 #include <utility/vector1.hh>
34 #include <protocols/moves/Mover.hh>
37 #include <numeric/xyzVector.hh>
39 #include <numeric/random/random.hh>
40 #include <core/pose/selection.hh>
42 #include <basic/datacache/BasicDataCache.hh>
44 #include <protocols/moves/Mover.hh>
48 
54 #include <core/scoring/rms_util.hh>
55 
56 #include <core/pose/Pose.hh>
57 #include <core/pose/PDBInfo.hh>
60 #include <core/pose/util.hh>
61 
63 //#include <core/scoring/symmetry/SymmetricScoreFunction.hh>
65 
71 
73 #include <core/scoring/Energies.hh>
75 
78 #include <core/fragment/FragSet.hh>
79 #include <core/fragment/Frame.hh>
85 #include <core/fragment/util.hh>
86 
88 
89 #include <numeric/random/random.hh>
90 #include <numeric/xyzVector.hh>
91 #include <numeric/xyz.functions.hh>
92 #include <numeric/model_quality/rms.hh>
93 
94 #include <basic/Tracer.hh>
95 #include <boost/unordered/unordered_map.hpp>
96 
97 namespace protocols {
98 namespace seeded_abinitio {
99 
100 static basic::Tracer TR( "protocols.seeded_abinitio.movers.SegmentHybridizer" );
101 static basic::Tracer TR_ccd( "protocols.seeded_abinitio.movers.SegmentHybridizer_ccd" );
102 static numeric::random::RandomGenerator RG(1245273);
103 
106 {
108 }
109 
112  return new SegmentHybridizer;
113 }
114 
117 {
118  return "SegmentHybridizer";
119 }
120 
121 
123  Mover( SegmentHybridizerCreator::mover_name() ),
124  cartfrag_overlap_(2)
125  /*big_( 9 ),
126  small_( 3 ),
127  nfrags_( 50 ),
128  cartfrag_overlap_( 2 ),
129  use_seq_( 0 ),
130  rms_( 0.5 ),
131  auto_mm_( 1 ),
132  tries_( 100 ),
133  mc_cycles_( 4 ),
134  temp_(2.0),
135  use_frags_(1),
136  min_cycles_(2)
137  */
138 {
139  //torsion_database_.clear();
140  //delta_lengths_.clear();
141 }
142 
143 
145 
148 
149 
150 void
154 
155  // default scorefunction
157 }
158 
159 void
161  lowres_scorefxn_ = scorefxn_in->clone();
162 
163  min_scorefxn_ = scorefxn_in->clone();
164 
165  //bonds_scorefxn_ = new core::scoring::symmetry::SymmetricScoreFunction();
166  bonds_scorefxn_ = scorefxn_in->clone();
167  bonds_scorefxn_->reset();
173 
176 }
177 
178 void
180 
181  core::Size start = frame.start();
182  core::Size len = frame.length();
183 
184  //std::cout << "len: " << len << std::endl;
185 
186  int aln_len = cartfrag_overlap_;
187  runtime_assert( cartfrag_overlap_>=1 && cartfrag_overlap_<= len/2 + 1);//cartfrag_overlap_<=len/2);
188  core::Size nres = pose.total_residue();
189 
190  if (pose.residue(nres).aa() == core::chemical::aa_vrt) nres--;
191  bool nterm = (start == 1);
192  bool cterm = (start == nres- len -1 );
193 
194  /// insert frag:
195 
196  core::pose::Pose pose_copy = pose;
197 
198  ObjexxFCL::FArray1D< numeric::Real > ww( 2*4*aln_len, 1.0 );
199  ObjexxFCL::FArray2D< numeric::Real > uu( 3, 3, 0.0 );
200  numeric::xyzVector< core::Real > com1(0,0,0), com2(0,0,0);
201 
202  for (int i=0; i<(int)len; ++i) {
204  }
205  for (int tries = 0; tries< tries_; ++tries) {
206  ww = 1.0;
207  uu = 0.0;
208  com1 = numeric::xyzVector< core::Real >(0,0,0);
209  com2 = numeric::xyzVector< core::Real >(0,0,0);
210 
211  // grab coords
212  ObjexxFCL::FArray2D< core::Real > init_coords( 3, 2*4*aln_len );
213  for (int ii=-aln_len; ii<aln_len; ++ii) {
214  int i = (ii>=0) ? (nterm?len-ii-1:ii) : (cterm?-ii-1:len+ii);
215  numeric::xyzVector< core::Real > x_1 = pose.residue(start+i).atom(" C ").xyz();
216  numeric::xyzVector< core::Real > x_2 = pose.residue(start+i).atom(" O ").xyz();
217  numeric::xyzVector< core::Real > x_3 = pose.residue(start+i).atom(" CA ").xyz();
218  numeric::xyzVector< core::Real > x_4 = pose.residue(start+i).atom(" N ").xyz();
219  com1 += x_1+x_2+x_3+x_4;
220  for (int j=0; j<3; ++j) {
221  init_coords(j+1,4*(ii+aln_len)+1) = x_1[j];
222  init_coords(j+1,4*(ii+aln_len)+2) = x_2[j];
223  init_coords(j+1,4*(ii+aln_len)+3) = x_3[j];
224  init_coords(j+1,4*(ii+aln_len)+4) = x_4[j];
225  }
226  }
227  com1 /= 2.0*4.0*aln_len;
228  for (int ii=0; ii<2*4*aln_len; ++ii) {
229  for ( int j=0; j<3; ++j ) init_coords(j+1,ii+1) -= com1[j];
230  }
231 
232  core::Size toget = numeric::random::random_range( 1, frame.nr_frags() );
233  frame.apply( toget, pose_copy );
234 
235  // grab new coords
236  ObjexxFCL::FArray2D< core::Real > final_coords( 3, 2*4*aln_len );
237  for (int ii=-aln_len; ii<aln_len; ++ii) {
238  int i = (ii>=0) ? (nterm?len-ii-1:ii) : (cterm?-ii-1:len+ii);
239  numeric::xyzVector< core::Real > x_1 = pose_copy.residue(start+i).atom(" C ").xyz();
240  numeric::xyzVector< core::Real > x_2 = pose_copy.residue(start+i).atom(" O ").xyz();
241  numeric::xyzVector< core::Real > x_3 = pose_copy.residue(start+i).atom(" CA ").xyz();
242  numeric::xyzVector< core::Real > x_4 = pose_copy.residue(start+i).atom(" N ").xyz();
243  com2 += x_1+x_2+x_3+x_4;
244  for (int j=0; j<3; ++j) {
245  final_coords(j+1,4*(ii+aln_len)+1) = x_1[j];
246  final_coords(j+1,4*(ii+aln_len)+2) = x_2[j];
247  final_coords(j+1,4*(ii+aln_len)+3) = x_3[j];
248  final_coords(j+1,4*(ii+aln_len)+4) = x_4[j];
249  }
250  }
251  com2 /= 2.0*4.0*aln_len;
252  for (int ii=0; ii<2*4*aln_len; ++ii) {
253  for ( int j=0; j<3; ++j ) final_coords(j+1,ii+1) -= com2[j];
254  }
255 
256  numeric::Real ctx;
257  float rms;
258 
259  numeric::model_quality::findUU( final_coords, init_coords, ww, 2*4*aln_len, uu, ctx );
260  numeric::model_quality::calc_rms_fast( rms, final_coords, init_coords, ww, 2*4*aln_len, ctx );
261 
262  TR.Debug << "fragments rms: " << rms << std::endl;
263 
264  if (rms < rms_) break;
265  if (tries >= 50 && rms < 1) break; //20
266  if (tries >= 70 && rms < 2) break; //40
267  if (tries >= 100 && rms < 4) break; //60
268  }
269 
271  R.xx( uu(1,1) ); R.xy( uu(2,1) ); R.xz( uu(3,1) );
272  R.yx( uu(1,2) ); R.yy( uu(2,2) ); R.yz( uu(3,2) );
273  R.zx( uu(1,3) ); R.zy( uu(2,3) ); R.zz( uu(3,3) );
274 
275  for ( Size i = 0; i < len; ++i ) {
276  for ( Size j = 1; j <= pose.residue_type(start+i).natoms(); ++j ) {
277  core::id::AtomID id( j, start+i );
278  pose.set_xyz( id, R * ( pose_copy.xyz(id) - com2) + com1 );
279  }
280  }
281 }
282 
283 
284 void
286  //if (fragments_big_ && fragments_small_) return;
287  // how do I reset the fragments best?
288 
289  //if (!fragments_big_present) {
290  /// want to use the native ss here!
291 
292  std::string tgt_seq = pose.sequence();
293  core::scoring::dssp::Dssp dssp( pose );
294  dssp.insert_ss_into_pose( pose );
295  std::string tgt_ss = pose.secstruct();
296  //std::cout << "fragment picking start position " << insert_start << " and stop: " << insert_stop <<std::endl;
297 
298  // pick from vall based on template SS or target sequence
299  for ( core::Size j=insert_start; j<= insert_stop - big_-1 ; ++j ) {
300  std::string ss_sub = tgt_ss.substr( j-1, big_ );
301  std::string aa_sub = tgt_seq.substr( j-1, big_ );
302 
303  core::fragment::FrameOP frame = new core::fragment::Frame( j, big_ );
304  if( use_seq_ ){
305  frame->add_fragment( core::fragment::picking_old::vall::pick_fragments_by_ss_plus_aa( ss_sub, aa_sub,
307  //std::cout << "picking " << nfrags_ << " fragments based on ss and sequence" << std::cout;
308  }
309  else {
310  frame->add_fragment( core::fragment::picking_old::vall::pick_fragments_by_ss( ss_sub, nfrags_, true,
312  //std::cout << "picking " << nfrags_ << " fragments based on ss only " << std::endl;
313  }
314 
315  fragments_big_->add( frame );
316  }
317 }
318 
319 void
320  SegmentHybridizer::hybridize( core::pose::Pose & pose , core::Size insert_pos_start, core::Size insert_pos_stop){
321 
322  Pose pose_in = pose;
323  core::Size nres = pose.total_residue();
324  if (pose.residue(nres).aa() == core::chemical::aa_vrt) nres--;
325 
326  // pick an insert position based on gap
327  utility::vector1<core::Real> residuals( nres , 0.0 );
328  utility::vector1<core::Real> max_residuals(3,0);
329  utility::vector1<int> max_poses(4,-1);
330 
331 
332  for (Size /*int*/ i=1; i<nres; ++i) {
333  if (pose.fold_tree().is_cutpoint(i+1)) {
334  residuals[i] = -1;
335  } else {
337  c0 = pose.residue(i).atom(" C ").xyz();
338  n1 = pose.residue(i+1).atom(" N ").xyz();
339  core::Real d2 = c0.distance( n1 );
340  residuals[i] = (d2-1.328685)*(d2-1.328685);
341  if ( residuals[i] > max_residuals[1]) {
342  max_residuals[3] = max_residuals[2]; max_residuals[2] = max_residuals[1]; max_residuals[1] = residuals[i];
343  max_poses[3] = max_poses[2]; max_poses[2] = max_poses[1]; max_poses[1] = i;
344  } else if ( residuals[i] > max_residuals[2]) {
345  max_residuals[3] = max_residuals[2]; max_residuals[2] = residuals[i];
346  max_poses[3] = max_poses[2]; max_poses[2] = i;
347  } else if ( residuals[i] > max_residuals[3]) {
348  max_residuals[3] = residuals[i];
349  max_poses[3] = i;
350  }
351  }
352  }
353 
354  int select_position = numeric::random::random_range(1,3); //4);
355  core::Size max_pos = max_poses[ select_position ];
356 
357  //std::cout << "selection position: "<< select_position <<
358  //"\nmaxpos: " << max_pos << std::endl;
359 
360  // select random pos in the middle depending on fragment size
361  core::Size insert_pos = max_pos - numeric::random::random_range(big_/2-1, big_/2); //+1);
362  //std::cout << "insert position before apply frame : " << insert_pos << std::endl;
363 
364  //insert_pos = std::min( insert_pos, nres - big_-1);
365  insert_pos = std::min( insert_pos, insert_pos_stop - big_-1);
366  insert_pos = std::max( (int)insert_pos, (int)insert_pos_start);
367 
368  // for debugging of frames
369  //for (boost::unordered_map<core::Size, core::fragment::Frame>::iterator iter = library_.begin() ; iter != library_.end() ; ++iter){
370  //std::cout << " frame " << (*iter).first << " len: " << library_[insert_pos].length() << std::endl;
371  //}
372 
373  if (library_.find(insert_pos) != library_.end()){
374  apply_frame (pose, library_[insert_pos]);
375  //TR<< "applying fragments on position: " << insert_pos << std::endl;
376  }
377 
378 }
379 
380 void
382  //using namespace basic::options;
383  //using namespace basic::options::OptionKeys;
384  //using namespace core::pose::datacache;
385  init();
386 
387  /// 1. setting weights -- needs some cleaning
388 
389  core::Real max_cart = lowres_scorefxn_->get_weight( core::scoring::cart_bonded );
390  core::Real max_cart_angle = lowres_scorefxn_->get_weight( core::scoring::cart_bonded_angle );
391  core::Real max_cart_length = lowres_scorefxn_->get_weight( core::scoring::cart_bonded_length );
392  core::Real max_cart_torsion = lowres_scorefxn_->get_weight( core::scoring::cart_bonded_torsion );
394  core::Real max_vdw = lowres_scorefxn_->get_weight( core::scoring::vdw );
395 
396  core::Real bonded_weight = 0.1*max_cart;
397  core::Real bonded_weight_angle = 0.1*max_cart_angle;
398  core::Real bonded_weight_length = 0.1*max_cart_length;
399  core::Real bonded_weight_torsion = 0.1*max_cart_torsion;
400  core::Real cst_weight = 2*max_cst;
401  core::Real vdw_weight = 0.1*max_vdw;
402 
403  lowres_scorefxn_->set_weight( core::scoring::cart_bonded, bonded_weight );
404  lowres_scorefxn_->set_weight( core::scoring::cart_bonded_angle, bonded_weight_angle );
405  lowres_scorefxn_->set_weight( core::scoring::cart_bonded_length, bonded_weight_length );
406  lowres_scorefxn_->set_weight( core::scoring::cart_bonded_torsion, bonded_weight_torsion );
407  lowres_scorefxn_->set_weight( core::scoring::atom_pair_constraint, cst_weight );
408  lowres_scorefxn_->set_weight( core::scoring::vdw, vdw_weight );
409 
410  using namespace core::pose::datacache;
411 
414  tocen->apply( pose );
415 
416  /// 2. set up minimizer
417 
418  //core::optimization::MinimizerOptions options( "linmin", 0.01, true, false, false );
419  core::optimization::MinimizerOptions options_minilbfgs( "lbfgs_armijo_nonmonotone", 0.01, true, false, false );
420  options_minilbfgs.max_iter(5);
421  core::optimization::MinimizerOptions options_lbfgs( "lbfgs_armijo_nonmonotone", 0.01, true, false, false );
422  options_lbfgs.max_iter(200);
424 
425 
426  //// 3. iterate through segments of interest
427 
428  for( Size iter = 1 ; iter <= seg_vector_.size() ; ++ iter ){
429 
430  /// runtime parsing of input information
431  //core::Size insert_pos_start = protocols::rosetta_scripts::parse_resnum( seg_vector_[iter].first, pose ) - extend_outside_ ;
432  //core::Size insert_pos_stop = extend_outside_ + protocols::rosetta_scripts::parse_resnum( seg_vector_[iter].second, pose );
433  core::Size insert_pos_start( core::pose::parse_resnum( seg_vector_[iter].first, pose ) - extend_outside_ );
434  core::Size insert_pos_stop( extend_outside_ + core::pose::parse_resnum( seg_vector_[iter].second, pose ) );
435 
436  core::Size nterm_mm = insert_pos_start;
437  core::Size cterm_mm = insert_pos_stop;
438  //std::cout << "nterm before adjustment : "<< nterm_mm << " cterm: " << cterm_mm << std::endl;
439 
440  // assertionsssss ...
441 
442  /// 3.a. define and set movemap
443 
445  extended_mm_->set_bb ( false );
446  extended_mm_->set_chi ( false );
447  extended_mm_->set_jump( true );
448 
449 
450  //if there is more than 1 chain, use only the last one
451  if ( all_movable_ ){
452  TR<< "allowing all elements to move, if there is more than 1 chain, it will be the last chain that is allowed to move " <<std::endl;
453  core::Size chain_start = 1 ;
454  core::Size chain_num = pose.conformation().num_chains();
455  if (chain_num > 1)
456  chain_start = pose.conformation().chain_begin( chain_num );
457  for (core::Size i = chain_start; i <= pose.total_residue(); i++ ){
458  extended_mm_->set_bb(i, true);
459  extended_mm_->set_chi(i,true);
460  }
461  }
462 
463  if ( !all_movable_ ){
464  if ( auto_mm_ ){
465  core::scoring::dssp::Dssp dssp( pose );
466  dssp.insert_ss_into_pose( pose );
467  std::string tgt_ss = pose.secstruct();
468  //extend the movemap on both sides until it hits a loop
469  for (core::Size it = insert_pos_start - 2 ; it > 0 ; it-- ){ //2 instead of 1 because it is typically misassigned as loop
470  //std::cout << "ss: " << tgt_ss[it-1] << std::endl;
471  if ( tgt_ss[it-1] == 'L' ) break;
472  else {
473  nterm_mm = it;
474  //std::cout << "ss: " << tgt_ss[it-1] << std::endl;
475  }
476  }
477  for (Size it = insert_pos_stop + 1 ; it <= pose.total_residue() ; it++){ // 1 off because ss is typically misassigned
478  //std::cout << "ss: " << tgt_ss[it-1] << std::endl;
479  if ( tgt_ss[it-1] == 'L' ) break;
480  else {
481  cterm_mm = it;
482  //std::cout << "ss: " << tgt_ss[it-1] << std::endl;
483  }
484  }
485  }
486 
487  for (core::Size i = 1; i <= pose.total_residue(); ++i ){
488  if ( i >= nterm_mm && i <= cterm_mm ){
489  extended_mm_->set_bb( i, true );
490  extended_mm_->set_chi( i, true );
491  if( i >= insert_pos_start + extend_outside_ + extend_inside_ && i <= insert_pos_stop - extend_outside_ ){
492  //reset to false
493  extended_mm_->set_bb( i, false );
494  extended_mm_->set_chi( i, false );
495  }
496  }
497  }
498  } //specific movemap
499 
500  /// 3.b fragment hybridize
501 
502  if( use_frags_ ){
503  /// get fragments -- should sample more than 1
504  // TODO check that the insert position does not go into the first chain!
505  check_and_create_fragments( pose, insert_pos_start, insert_pos_stop );
506 
507 
508  /// map resids to frames, keeping track of positions
509  core::Size insert_frags_pos = fragments_big_->min_pos();
510  //std::cout << "start frags = fragments->min_pos:"<< insert_frags_pos <<
511  //"\nmax_pos " << fragments_big_->max_pos() <<
512  //"\nfragset for positions = nr_frames " << fragments_big_->nr_frames() << std::endl;
513 
514  for (core::fragment::FrameIterator i = fragments_big_->begin(); i != fragments_big_->end(); ++i){
515  core::Size position = (*i)->start();
516  //std::cout << "position after iterator: " << position << std::endl;
517  library_[position] = **i;
518  //library_[insert_frags_pos]= **i;
519  //adjust position counter (for debugging stuff)
520  //std::cout<< "position counter: " << insert_frags_pos << std::endl;
521  insert_frags_pos++;
522 
523  }
524 
525  (*lowres_scorefxn_)(pose);
527 
528  for (core::Size i=1; i <= mc_cycles_; ++i) {
529  hybridize(pose, insert_pos_start, insert_pos_stop);
530  (*lowres_scorefxn_)(pose);
531  mc->boltzmann(pose);
532  }
533  mc->show_scores();
534  mc->show_counters();
535  mc->recover_low(pose);
536  }
537 
538  TR.Debug << "final rms for fragment align: " << rms_ << std::endl;
539 
540  /// 3.c. finish with minimizing
541  minimizer.run( pose, *extended_mm_, *min_scorefxn_, options_minilbfgs );
542 
543  }//finish interating through pieces that need to be smoothened
544 
545  /// 4. final minimization (optional)
546 
547  if( extra_min_ ){
548  TR<< "final minimization" << std::endl;
549  (*min_scorefxn_)(pose); minimizer.run( pose, *extended_mm_, *min_scorefxn_, options_lbfgs );
550  }
551 }
552 
556 }
557 
558 
559 void
564  core::pose::Pose const & /*pose*/ ){
565 
566  TR << "initialized SegmentHybridizer mover " << std::endl;
567 
568  //jump_num_ = tag->getOption<core::Size>("jump_number", 1 );
569  std::string const fa_scorefxn_name( tag->getOption<std::string>( "fa_scorefxn", "score12" ) );
570  highres_scorefxn_ = new core::scoring::ScoreFunction( *(data.get< core::scoring::ScoreFunction * >( "scorefxns", fa_scorefxn_name) ));
571 
572  TR << "initialized SegmentHybridizer mover " << std::endl;
573 
574  big_ = tag->getOption <core::Size>( "frag_big" , 9 );
575  //currently uses only 1 size
576  small_ = tag->getOption <core::Size>( "frag_smal", 3 );
577  nfrags_= tag->getOption<core::Size>( "nfrags", 50 );
578  cartfrag_overlap_ = tag->getOption<core::Size>("cartfrag_overlap" , 2 );
579  use_seq_ = tag->getOption<bool> ("use_seq", 0 );
580  rms_ = tag->getOption<core::Real> ("rms_frags" , 0.5 );
581  auto_mm_= tag->getOption<bool>("auto_mm", 1 );
582  tries_=tag->getOption<int>("frag_tries" , 100 );
583  mc_cycles_=tag->getOption<core::Size>("mc_cycles", 200 );
584  temp_=tag->getOption<core::Real>("temp", 2.0);
585  use_frags_=tag->getOption<bool>("use_frags", 1 );
586  min_cycles_=tag->getOption<core::Size>("min_cycles", 5 );
587  all_movable_=tag->getOption<bool>("all_movable", 0 );
588  extra_min_=tag->getOption<bool>("extra_min", 0 );
589 
590  /// read areas that are supposed to be remodeled
591  utility::vector0< TagPtr > const branch_tags( tag->getTags() );
592  foreach( TagPtr const btag, branch_tags ){
593 
594  if( btag->getName() == "Span" ) { //need an assertion for the presence of these or at least for the option file
595  std::string const beginS( btag->getOption<std::string>( "begin" ) );
596  std::string const endS( btag->getOption<std::string>( "end" ) );
597 
598  extend_outside_ = btag->getOption<core::Size>( "extend_outside", 5);
599  extend_inside_ = btag->getOption<core::Size>( "extend_inside" , 1 );
600 // N_mm_ = btag->getOption<core::Size>( "N_mm", 2 );
601 // C_mm_ = btag->getOption<core::Size>( "C_mm", 2 );
602 
603  std::pair <std::string,std::string> segpair;
604  segpair.first = beginS;
605  //std::cout <<"parsing spans: " << beginS << " " <<endS << " and remdoel "<< std::endl;
606  segpair.second = endS;
607  seg_vector_.push_back( segpair ); // parse at runtime for possible length changes
608 
609  }//end seeds
610  }//end b-tags
611 }//end parse tag
612 
613 } //seeded_abinitio
614 } //protocols