Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
PlaceFragments.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file SwapAlignedFragments might be a better name
11 /// @author Eva-Maria Strauch (evas01@u.washington.edu)
12 /// @brief mover for pulling fragments into a pose so that they can be aligned onto stubs or other segements
13 
14 // Unit headers
17 
18 //#include <protocols/protein_interface_design/util.hh>
19 #include <utility/string_util.hh>
21 #include <core/types.hh>
22 #include <basic/Tracer.hh>
24 #include <utility/vector1.hh>
29 
32 #include <core/fragment/FragSet.hh>
33 #include <core/fragment/Frame.hh>
37 #include <core/fragment/FragSet.hh>
38 #include <core/fragment/Frame.hh>
43 #include <core/fragment/FragSet.hh>
44 #include <core/fragment/Frame.hh>
49 #include <core/fragment/util.hh>
50 
52 
53 // unit headers
55 //#include <protocols/fldsgn/BluePrintBDR.hh>
56 //#include <protocols/fldsgn/BluePrintBDRCreator.hh>
57 
58 // package headers
64 
69 /////#include <protocols/jd2/parser/BluePrint.fwd.hh>
70 
71 
73 #include <core/chemical/AA.hh>
78 
79 //#include <protocols/evaluation/Align_RmsdEvaluator.hh>
80 #include <core/scoring/rms_util.hh>
81 #include <core/sequence/util.hh>
85 
86 #include <utility/excn/Exceptions.hh>
87 #include <utility/file/file_sys_util.hh>
88 
89 #include <numeric/random/random.hh>
90 #include <numeric/xyzVector.hh>
91 #include <numeric/xyz.functions.hh>
92 #include <numeric/model_quality/rms.hh>
93 
94 
95 // Project headers
96 #include <core/pose/Pose.hh>
97 #include <core/pose/PDBInfo.hh>
98 #include <utility/tag/Tag.hh>
99 
100 
101 namespace protocols {
102 namespace seeded_abinitio {
103 
104 using namespace core;
105 using namespace scoring::constraints;
106 using namespace protocols::moves;
107 
108 static basic::Tracer TR( "protocols.seeded_abinitio.PlaceFragments" );
109 
111  PlaceFragmentsCreator::keyname() const
112  {
113  return PlaceFragmentsCreator::mover_name();
114  }
115 
117  PlaceFragmentsCreator::create_mover() const {
118  return new PlaceFragments;
119  }
120 
122  PlaceFragmentsCreator::mover_name()
123  {
124  return "PlaceFragments";
125  }
126 
128 
130  protocols::moves::Mover( PlaceFragmentsCreator::mover_name() )
131  {
132  frags_onflight_ = true;
133  }
134 
135 
136 
137 
138 void
140  using namespace core::fragment;
141  assert(fragments);
142  fragments_ = fragments;
143 
145  for (i = fragments_->begin(); i != fragments_->end(); ++i) {
146  Size position = (*i)->start();
147  library_[position] = **i;
148  }
149 }
150 
151 
152 void
154 
155  // pick from vall based on template SS, and target sequence if specified
156  // randomly pick a sequence from the input that fits to the specified fragment size, this of course only makes sense
157  // if the fragment size is shorter than the specified secondary structure or amino acid stretch
158 
159  Size seq_start = numeric::random::random_range( 0, ss_.size() - fsize_ );
160  std::string ss_sub = "" ;
161  std::string aa_sub = "" ;
162 
163  for( Size i = seq_start ; i < ss_.size(); ++ i )
164  ss_sub+= ss_[i];
165 
166  std::cout << "picking fragments for secondary structure "<< ss_sub << std::endl;
167 
168 
169  for ( core::Size j=insert_start; j<= insert_stop - fsize_-1 ; ++j ) {
170  //std::string ss_sub = tgt_ss.substr( j-1, fsize_ );
171  //std::string aa_sub = tgt_seq.substr( j-1, fsize_ );
172  core::fragment::FrameOP frame = new core::fragment::Frame( j, fsize_ );
173 
174  if( use_seq_ )
175  frame->add_fragment( core::fragment::picking_old::vall::pick_fragments_by_ss_plus_aa( ss_sub, aa_sub,
177 
178  else
179  frame->add_fragment( core::fragment::picking_old::vall::pick_fragments_by_ss( ss_sub, nfrags_, true,
181 
182  fragments_->add( frame );
183  }
184 }
185 
186 
187 void
188 //PlaceFragments::apply_frag( core::pose::Pose &pose, core::pose::Pose &templ, protocols::loops::Loop &frag, bool superpose) {
189 PlaceFragments::apply_frag( core::pose::Pose &pose, core::fragment::Frame &frame ){//core::pose::Pose &templ, protocols::loops::Loop &frag, bool superpose) {
190 
191  /// superimpose fragment
192 
194  numeric::xyzVector< core::Real > preT(0,0,0), postT(0,0,0);
195  R.xx() = R.yy() = R.zz() = 1;
196  R.xy() = R.yx() = R.zx() = R.zy() = R.yz() = R.xz() = 0;
197 
198  core::Size fstart = frame.start();
199  core::Size len = frame.length();
200  core::pose::Pose pose_copy = pose;
201 
202  // in case that the overlap is ever more than 1 residues
203  //int cartfrag_overlap = aln_len_;
204  runtime_assert( cartfrag_overlap_>=1 && cartfrag_overlap_<= len/2 + 1);//cartfrag_overlap_<=len/2);
205  core::Size nres = pose.total_residue();
206 
207 
208  // pick a random fragment
209  core::Size toget = numeric::random::random_range( 1, frame.nr_frags() );
210  frame.apply( toget, pose_copy );
211 
212  core::Size aln_len = std::min( (core::Size)9999, len ); //can change 9999 to some max alignment sublength
213  core::Size aln_start = numeric::random::random_range(frag.start(), len-aln_len+frag.start() );
214 
215  // don't try to align really short frags
216  if (len > 2) {
217  ObjexxFCL::FArray2D< core::Real > final_coords( 3, 4*aln_len );
218  ObjexxFCL::FArray2D< core::Real > init_coords( 3, 4*aln_len );
219 
220  for (int ii=0; ii<(int)aln_len; ++ii) {
221  int i=aln_start+ii;
222  numeric::xyzVector< core::Real > x_1 = pose_copy.residue(i).atom(" C ").xyz();
223  numeric::xyzVector< core::Real > x_2 = pose_copy.residue(i).atom(" O ").xyz();
224  numeric::xyzVector< core::Real > x_3 = pose_copy.residue(i).atom(" CA ").xyz();
225  numeric::xyzVector< core::Real > x_4 = pose_copy.residue(i).atom(" N ").xyz();
226  preT += x_1+x_2+x_3+x_4;
227 
228  numeric::xyzVector< core::Real > y_1 = pose.residue(templ.pdb_info()->number(i)).atom(" C ").xyz();
229  numeric::xyzVector< core::Real > y_2 = pose.residue(templ.pdb_info()->number(i)).atom(" O ").xyz();
230  numeric::xyzVector< core::Real > y_3 = pose.residue(templ.pdb_info()->number(i)).atom(" CA ").xyz();
231  numeric::xyzVector< core::Real > y_4 = pose.residue(templ.pdb_info()->number(i)).atom(" N ").xyz();
232  postT += y_1+y_2+y_3+y_4;
233 
234  for (int j=0; j<3; ++j) {
235  init_coords(j+1,4*ii+1) = x_1[j];
236  init_coords(j+1,4*ii+2) = x_2[j];
237  init_coords(j+1,4*ii+3) = x_3[j];
238  init_coords(j+1,4*ii+4) = x_4[j];
239  final_coords(j+1,4*ii+1) = y_1[j];
240  final_coords(j+1,4*ii+2) = y_2[j];
241  final_coords(j+1,4*ii+3) = y_3[j];
242  final_coords(j+1,4*ii+4) = y_4[j];
243  }
244  }
245  preT /= 4*len;
246  postT /= 4*len;
247  for (int i=1; i<=(int)4*len; ++i) {
248  for ( int j=0; j<3; ++j ) {
249  init_coords(j+1,i) -= preT[j];
250  final_coords(j+1,i) -= postT[j];
251  }
252  }
253 
254  // get optimal superposition
255  // rotate >init< to >final<
256  ObjexxFCL::FArray1D< numeric::Real > ww( 4*len, 1.0 );
257  ObjexxFCL::FArray2D< numeric::Real > uu( 3, 3, 0.0 );
258  numeric::Real ctx;
259 
260  numeric::model_quality::findUU( init_coords, final_coords, ww, 4*len, uu, ctx );
261  R.xx( uu(1,1) ); R.xy( uu(2,1) ); R.xz( uu(3,1) );
262  R.yx( uu(1,2) ); R.yy( uu(2,2) ); R.yz( uu(3,2) );
263  R.zx( uu(1,3) ); R.zy( uu(2,3) ); R.zz( uu(3,3) );
264  }
265 
266 
267 /// xyz copy fragment to pose
268 
269 for (int i=frag.start(); i<=frag.stop(); ++i) {
270  for (int j=1; j<=pose_copy.residue(i).natoms(); ++j) {
271  core::id::AtomID src(j,i), tgt(j, pose_copy.pdb_info()->number(i));
272  pose.set_xyz( tgt, postT + (R*(pose_copy.xyz( src )-preT)) );
273  }
274 }
275 }
276 
277 
278 void
280  core::fragment::Frame &frame,
281  int aln_len,
282  core::Size seq_start,
283  core::Size max_frag_len ) {
284 
285 
286 
287  core::Size start = frame.start() + seq_start,len = frame.length();
288  bool nterm = (start == seq_start ); //seq_position );
289  bool cterm = (start == pose.total_residue()-max_frag_len );
290 
291  // insert frag
292  core::pose::Pose pose_copy = pose;
293 
294  //compare two, use 4 atoms from alnlen defined residues, 1 is the initialized number
295  ObjexxFCL::FArray1D< numeric::Real > ww( 2*4*aln_len, 1.0 );
296  ObjexxFCL::FArray2D< numeric::Real > uu( 3, 3, 0.0 );
297  numeric::xyzVector< core::Real > com1(0,0,0), com2(0,0,0);
298 
299  for (int tries = 0; tries<100; ++tries) {
300  ww = 1.0;
301  uu = 0.0;
302  com1 = numeric::xyzVector< core::Real >(0,0,0);
303  com2 = numeric::xyzVector< core::Real >(0,0,0);
304 
305  // grab coords
306  ObjexxFCL::FArray2D< core::Real > init_coords( 3, 2*4*aln_len );
307  for (int ii=-aln_len; ii<aln_len; ++ii) {
308  int i = (ii>=0) ? (nterm?len-ii-1:ii) : (cterm?-ii-1:len+ii);
309 
310  numeric::xyzVector< core::Real > x_1 = pose.residue(start+i).atom(" C ").xyz();
311  numeric::xyzVector< core::Real > x_2 = pose.residue(start+i).atom(" O ").xyz();
312  numeric::xyzVector< core::Real > x_3 = pose.residue(start+i).atom(" CA ").xyz();
313  numeric::xyzVector< core::Real > x_4 = pose.residue(start+i).atom(" N ").xyz();
314  com1 += x_1+x_2+x_3+x_4;
315 
316  for (int j=0; j<3; ++j) {
317  init_coords(j+1,4*(ii+aln_len)+1) = x_1[j];
318  init_coords(j+1,4*(ii+aln_len)+2) = x_2[j];
319  init_coords(j+1,4*(ii+aln_len)+3) = x_3[j];
320  init_coords(j+1,4*(ii+aln_len)+4) = x_4[j];
321  }
322  }
323  com1 /= 2.0*4.0*aln_len;
324  for (int ii=0; ii<2*4*aln_len; ++ii) {
325  for ( int j=0; j<3; ++j ) init_coords(j+1,ii+1) -= com1[j];
326  }
327 
328  core::Size toget = numeric::random::random_range( 1, frame.nr_frags() );
329  frame.apply( toget, pose_copy );
330 
331  // grab new coords
332  ObjexxFCL::FArray2D< core::Real > final_coords( 3, 2*4*aln_len );
333  for (int ii=-aln_len; ii<aln_len; ++ii) {
334  int i = (ii>=0) ? (nterm?len-ii-1:ii) : (cterm?-ii-1:len+ii);
335  numeric::xyzVector< core::Real > x_1 = pose_copy.residue(start+i).atom(" C ").xyz();
336  numeric::xyzVector< core::Real > x_2 = pose_copy.residue(start+i).atom(" O ").xyz();
337  numeric::xyzVector< core::Real > x_3 = pose_copy.residue(start+i).atom(" CA ").xyz();
338  numeric::xyzVector< core::Real > x_4 = pose_copy.residue(start+i).atom(" N ").xyz();
339  com2 += x_1+x_2+x_3+x_4;
340  for (int j=0; j<3; ++j) {
341  final_coords(j+1,4*(ii+aln_len)+1) = x_1[j];
342  final_coords(j+1,4*(ii+aln_len)+2) = x_2[j];
343  final_coords(j+1,4*(ii+aln_len)+3) = x_3[j];
344  final_coords(j+1,4*(ii+aln_len)+4) = x_4[j];
345  }
346  }
347  com2 /= 2.0*4.0*aln_len;
348  for (int ii=0; ii<2*4*aln_len; ++ii) {
349  for ( int j=0; j<3; ++j ) final_coords(j+1,ii+1) -= com2[j];
350  }
351 
352  // get optimal superposition
353  // rotate >final< to >init<
354  numeric::Real ctx;
355  float rms;
356 
357  numeric::model_quality::findUU( final_coords, init_coords, ww, 2*4*aln_len, uu, ctx );
358  numeric::model_quality::calc_rms_fast( rms, final_coords, init_coords, ww, 2*4*aln_len, ctx );
359 
360  std::cout << "try " << tries << " rms " << rms << std::endl;
361 
362  if (rms < 0.5) break;
363  if (tries >= 20 && rms < 1) break;
364  if (tries >= 40 && rms < 2) break;
365  if (tries >= 60 && rms < 3) break;
366  }
368  R.xx( uu(1,1) ); R.xy( uu(2,1) ); R.xz( uu(3,1) );
369  R.yx( uu(1,2) ); R.yy( uu(2,2) ); R.yz( uu(3,2) );
370  R.zx( uu(1,3) ); R.zy( uu(2,3) ); R.zz( uu(3,3) );
371 
372  // apply rotation to ALL atoms
373  // x_i' <- = R*x_i + com1;
374  for ( Size i = 0; i < len; ++i ) {
375  for ( Size j = 1; j <= pose.residue_type(start+i).natoms(); ++j ) {
376  core::id::AtomID id( j, start+i );
377  pose.set_xyz( id, R * ( pose_copy.xyz(id) - com2) + com1 );
378  }
379  }
380 }
381 
382 ///adjustment since parse time specified residues are different numbered than run time residues
385  std::string resid ){
386 
387  utility::vector1< std::string > const design_keys( utility::string_split( resid, ',' ) );
389 
390  foreach( std::string const key, design_keys ){
391  core::Size const resnum( core::pose::parse_resnum( key, pose ));
392  res.push_back( resnum);
393  TR<<"parsed: "<<key<<std::endl;
394  }
395  return res;
396 }
397 
398 void
400 
401  /// overall scheme:
402  /// grow around stub (can be done outside )
403  /// decide on fragment length (or a window of it)
404  /// make fragments
405  /// align and copy fragment coordinates
406  /// modify with mover
407  /// filter
408 
409  // need to keep track of starting position or loop for alignement, start with 1 res for simplicity
410 
411  //protocols::loops::Loops parse_seeds( );
412 
413  utility::vector1<core::Size> parsed_residues( parse_residues( input_stubs ));
414  Size stub = parsed_residues[1];
415  std::cout << "stub: " << stub <<std::endl;
416 
417  /// 1. define fragment insert/starting position and get and assign fragments
418 
419  // get length of the last chain
420  Size const num_chains( pose.conformation().num_chains() );
421  Size chainB_len = pose.split_by_chain( num_chains ).total_residue();
422 
423  Size insert_start = pose.chain_begin( num_chains );
424  Size insert_stop = chainB_len - fsize_ + 1;
425  runtime_assert( insert_start <= insert_stop );
426 
427  create_fragments( pose, insert_start, insert_stop);
428 
429  // map resids to frames
430  core::Size insert_frags_pos = fragments_->min_pos();
431  std::cout << "start frags = fragments->min_pos:"<< insert_frags_pos <<
432  "\nmax_pos " << fragments_->max_pos() <<
433  "\nfragset for positions = nr_frames " << fragments_->nr_frames() << std::endl;
434 
435  for (core::fragment::FrameIterator i = fragments_->begin(); i != fragments_->end(); ++i){
436  core::Size position = (*i)->start();
437  std::cout << "position after iterator: " << position << std::endl;
438  library_[position] = **i;
439  }
440 
441  /// 2. alignment
442 
443  //int select_position = numeric::random::random_range(1,3); //4);
444  //core::Size max_pos = max_poses[ select_position ];
445  //int select_position = numeric::random::random_range(insert_start,insert_stop);
446 
447  // insert more complex alignment other than a single stub (eg. loop for segement) here...
448  // for now just simple single
449  int in_position = stub_pos;
450 
451  // select random pos around the middle depending on fragment size
452  core::Size insert_pos = max_pos - numeric::random::random_range(fsize_/2 -1, fsize_/2); //+1); /////////?
453  std::cout << "insert position before apply frags : " << insert_pos << std::endl;
454 
455  //insert_pos = std::min( insert_pos, nres - big_-1);
456  insert_pos = std::min( insert_pos, insert_stop - fsize_-1);
457  insert_pos = std::max( (int)insert_pos, (int)insert_start);
458 
459  // for debugging of frames
460  for (boost::unordered_map<core::Size, core::fragment::Frame>::iterator iter = library_.begin() ; iter != library_.end() ; ++iter){
461  std::cout << " frame " << (*iter).first << " len: " << library_[insert_pos].length() << std::endl;
462  }
463 
464  if (library_.find(insert_pos) != library_.end()){
465  apply_frag (pose, library_[insert_pos]);
466  std::cout << "applying fragments on position: " << insert_pos << std::endl;
467  }
468 
469 }
470 
473  return PlaceFragmentsCreator::mover_name();
474 }
475 
476 void
478  DataMap & /*data*/,
479  protocols::filters::Filters_map const & filters,
480  Movers_map const & movers,
481  Pose const & pose){
482 
485  using std::string;
486  using namespace filters;
487 
488  if( tag->hasOption ( "fragments" )){
489  string fragments_file = tag->getOption<string>("fragments");
490  FragSetOP fragments = FragmentIO().read_data(fragments_file);
491  initialize_fragments(fragments);
492  frags_onflight_ = false;
493  }
494 
495  fsize_ = tag->getOption < core::Size >("frag_length", 6 );
496  ss_ = tag->getOption<std::string>( "secstr", "" );
497  if( tag->!hasOption("secstr") || !hasOption("fragments") )
498  throw utility::excn::EXCN_RosettaScriptsOption("either need to specify secondary structure or supply fragements!!");
499 
500  // option for amino acid sequence not yet specified
501  use_seq_ = false
502 
503  nfrags_= tag->getOption<core::Size>( "nfrags", 50 );
504  cartfrag_overlap_ = tag->getOption < int >("aln_len", 1);
505 
506  /// to simplify for now, just take in one residue
507  if( tag->hasOption("stubs") )
508  input_stubs_ = tag->getOption< std::string >( "stubs" );
509 
510  else{
511  /// read input seeds
512  utility::vector0< TagPtr > const branch_tags( tag->getTags() );
513  foreach( TagPtr const btag, branch_tags ){
514 
515  if( btag->getName() == "Seeds" ) { //need an assertion for the presence of these or at least for the option file
516 
517  std::string const beginS( btag->getOption<std::string>( "begin" ) );
518  std::string const endS( btag->getOption<std::string>( "end" ) );
519  std::pair <std::string,std::string> seedpair;
520  seedpair.first = beginS;
521  TR.Debug <<"parsing seeds: " << beginS << " " <<endS <<std::endl;
522  seedpair.second = endS;
523  seed_vector_.push_back( seedpair );
524  }//end seeds
525  else {
526  throw utility::excn::EXCN_RosettaScriptsOption("need to either specify a stub residue or a seed/segment");
527  }
528  }//end b-tags
529  }
530 
531  /// get movers
532  Movers_map::const_iterator find_mover( movers.find( mover_name ) );
533  std::string const mover_name( tag->getOption< std::string >( "mover", "null" ) );
534  protocols::moves::Movers_map::const_iterator mover_it( movers.find( mover_name ) );
535  if( mover_it == movers.end() )
536  throw utility::excn::EXCN_RosettaScriptsOption( "mover "+ mover_name+" not found" );
537  mover_ = mover_it->second ;
538 
539  /// get filters
540  Filters_map::const_iterator find_filter( filters.find( filter_name ));
541  if( find_filter == filters.end() ) {
542  TR<<"WARNING WARNING!!! filter not found in map. skipping: \n"<<tag<<"defaulting to truefilter "<<std::endl;
543  //runtime_assert( find_filter == filters.end() );
544  }
545  else
546  find_filter_ = new protocols::filters::TrueFilter;
547 
548  filter_ = find_filter->second->clone();
549 
550  TR << "with mover \"" << mover_name << "\" and filter \"" << filter_name << std::endl ;
551  TR.flush();
552 }
553 
554 
555 
556 }//seeded_abinitio
557 }//protocol