Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Templates.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @author Oliver Lange
11 
12 // Unit Headers
14 
15 // Package Headers
19 
20 // Project Headers
21 #include <core/types.hh>
22 
23 #include <core/pose/Pose.hh>
24 #include <core/pose/util.hh>
25 // AUTO-REMOVED #include <core/io/pdb/pose_io.hh>
26 
27 // AUTO-REMOVED #include <core/kinematics/MoveMap.hh>
28 #include <core/id/AtomID.hh>
29 
30 
31 // AUTO-REMOVED #include <core/chemical/ChemicalManager.hh>
32 #include <basic/options/option.hh>
33 #include <basic/options/keys/templates.OptionKeys.gen.hh>
34 
35 #include <core/fragment/FragSet.hh>
38 #include <core/fragment/FragData.hh> //to get secondary structure
39 #include <core/fragment/SecstructSRFD.hh> //to get secondary structure
41 
43 // AUTO-REMOVED #include <core/scoring/constraints/ConstraintIO.hh>
45 
46 // AUTO-REMOVED #include <core/sequence/util.hh>
48 
51 
52 
53 //numeric headers
54 #include <numeric/random/random.hh>
55 
56 // ObjexxFCL Headers
57 
58 // Utility headers
59 #include <utility/io/izstream.hh>
60 // AUTO-REMOVED #include <utility/io/ozstream.hh>
61 #include <utility/vector1.hh>
62 #include <basic/Tracer.hh>
63 
64 // C++ headers
65 #include <cstdlib>
66 #include <string>
67 #include <vector>
68 
73 
74 
75 
76 static basic::Tracer tr("protocols.abinitio.Templates");
77 using namespace core;
78 using namespace basic;
79 using namespace basic::options;
80 
81 
82 
84  using namespace basic::options;
85  using namespace basic::options::OptionKeys;
88  option.add_relevant( templates::no_culling );
89  option.add_relevant( templates::helix_pairings );
90  option.add_relevant( templates::prefix );
91 
92  option.add_relevant( templates::cst::topN );
93  option.add_relevant( templates::cst::wTopol );
94  option.add_relevant( templates::cst::wExtern );
95 
96  option.add_relevant( templates::fragsteal::topN );
97  option.add_relevant( templates::fragsteal::wTopol );
98  option.add_relevant( templates::fragsteal::wExtern );
99 
100 }
101 
102 
103 namespace protocols {
104 namespace abinitio {
105 
106 static numeric::random::RandomGenerator RG(12598234); // <- Magic number, do not change it!
107 
108 using namespace core;
109 using namespace jumping;
110 using namespace basic::options;
111 using namespace basic::options::OptionKeys;
113 public:
114  bool has( std::string const& name ) const {
115  StructureMap::const_iterator iter = poses_.find( name );
116  return ( iter != poses_.end() );
117  };
118 
119  pose::PoseCOP operator[] ( std::string const& name ) const{
120  StructureMap::const_iterator iter = poses_.find( name );
121  if ( iter != poses_.end() ) {
122  return (iter->second);
123  } else return NULL;
124  };
125 
126  void add( std::string const& file_name );
127 
128 private:
129  typedef std::map<std::string, pose::PoseCOP > StructureMap;
131 };
132 
133 void StructureStore::add( std::string const& file_name ) {
134  if ( !has( file_name) ) {
135  pose::PoseOP pose = new pose::Pose;
136 
137  //read structure
138  core::import_pose::pose_from_pdb( *pose, file_name );
139 
140  // switch to centroid --- such that constraints get correct atom numbers assigned
142 
143  // set ss structure -- good for fragpicking
144  pose::set_ss_from_phipsi( *pose );
145 
146  poses_[ file_name ]=pose;
147  }
148 }
149 
150 Templates::~Templates() {}
151 
152 using namespace core;
153 using namespace fragment;
154 Templates::Templates( std::string const& config_file, pose::PoseCOP native ) :
155  native_( native )
156 {
157  utility::io::izstream in( config_file);
158  tr.Info << "read homolog-template information from " << config_file << std::endl;
159  if ( !in ) {
160  utility_exit_with_message("ERROR:: Unable to open template-config file: "+config_file);
161  }
162  StructureStore pose_store;
163  std::string line;
164  good_ = true;
165  while ( getline( in, line) ) {
166  std::istringstream line_stream( line );
167  std::string name, pdb, align;
168  int offset; Real score;
169  line_stream >> name >> pdb >> align >> offset >> score;
170  TemplateOP theTemplate;
171  if ( !line_stream.fail() && ( pdb.size() >= 1 ) ) {
172  if ( option[ templates::prefix ].user() ) {
173  std::string const prefix( option[ templates::prefix ]() );
174  pdb = prefix+"/"+pdb;
175  align = prefix+"/"+align;
176  }
177  tr.Info << "add Template " << name << " ..." << std::endl;
178  if ( !pose_store.has( pdb ) ) pose_store.add( pdb );
179  tr.Info << "template " << name << " read template structure " << pdb << " with offset " << offset << std::endl;
180 
181  theTemplate = new Template( name, pose_store[pdb], align, offset, score );
182  if( !theTemplate->is_good() ){
183  good_ = false;
184  continue;
185  }
186  if ( target_sequence_.size() ) {
187  if ( target_sequence_ != theTemplate->query_sequence() ) {
188  tr.Warning << "[WARNING] the query sequence " << theTemplate->query_sequence() << " is different than previous " << std::endl;
189  }
190  } else target_sequence_ = theTemplate->query_sequence();
191  templates_.insert( TemplateMap::value_type( name, theTemplate ) );
192  }
193 
194  std::string tag;
195  while ( line_stream >> tag ) {
196  if ( tag == "C" || tag == "SC" ) cull_list_.push_back( theTemplate );
197  if ( tag == "F" ) fragpick_list_.push_back( theTemplate );
198  if ( tag == "H" ) helixjump_pick_list_.push_back( theTemplate );
199  if ( tag == "CST" || tag == "SCST" ) {
200  std::string cst_file;
201  line_stream >> cst_file;
202  if ( option[ templates::prefix ].user() ) {
203  std::string const prefix( option[ templates::prefix ]() );
204  cst_file = prefix + "/" + cst_file;
205  }
206  if ( !line_stream.fail() ) {
207  tr.Info << "read constraints " << cst_file << "..." << std::endl;
208  theTemplate->read_constraints( cst_file );
209  }
210  }
211  }
212  }
213  PairingStatisticsOP strand_stats = new PairingStatistics( *this );
214  if ( native_ ) strand_stats->set_native_topology( core::scoring::dssp::StrandPairingSet( *native_ ) );
215  strand_stats_ = strand_stats;
216 
217  tr.Info << "statistics of pairings: \n\n\n" << *strand_stats_ << std::endl;
218 
219  //copy topology score into templates:
220  for ( Size i = 1; i <= strand_stats->nr_models(); i++ ) {
221  templates_[ strand_stats->ranked_model( i ) ]->topology_score( strand_stats->weight( i ) );
222  }
223 
224  if ( option[ templates::fragsteal::topN ].user() ) {
227  tr.Info << "helixjump_list \n";
228  for ( TemplateList::const_iterator it = helixjump_pick_list_.begin(), eit = helixjump_pick_list_.end();
229  it != eit; ++it ) {
230  tr.Info << (*it)->name() << "\n";
231  }
232  tr.Info << std::endl;
233  }
234 }
235 
237  native_ = native;
238 }
239 
240 
241 void
242 Templates::get_cst_list( TemplateList& cst_list, TemplateList& cull_list ) const {
243  cst_list.clear();
244 
245  bool bScoreFilter = false;
246  Real wTopol( 0.0 ), wExtern( 0.0 );
247  if ( option[ templates::cst::topN ].user() ) {
248  bScoreFilter = true;
249  wTopol = option[ templates::cst::wTopol ];
250  wExtern = option[ templates::cst::wExtern ];
251  }
252 
253  // first get list of Templates with constraints
254  for ( TemplateMap::const_iterator it=templates_.begin(),
255  eit = templates_.end(); it!=eit; ++it ) {
256  // if template has constraints
257  TemplateCOP aTemplate( it->second );
258  if ( aTemplate->has_constraints() ) {
259  cst_list.push_back( aTemplate );
260  }
261  }
262  if ( !bScoreFilter ) return;
263 
264  _get_scored_list( cst_list, option[ templates::cst::topN ], wTopol, wExtern );
265  cull_list = cst_list;
266 }
267 
268 
269 void
271  frag_list.clear();
272 
273  //bool bScoreFilter = false;
274  Real wTopol( 0.0 ), wExtern( 0.0 );
275  if ( option[ templates::fragsteal::topN ].user() ) {
276  //bScoreFilter = true; // set but never used ~Labonte
277  wTopol = option[ templates::fragsteal::wTopol ];
278  wExtern = option[ templates::fragsteal::wExtern ];
279  }
280 
281  for ( TemplateMap::const_iterator it=templates_.begin(),
282  eit = templates_.end(); it!=eit; ++it ) {
283  frag_list.push_back( it->second );
284  }
285  _get_scored_list( frag_list, option[ templates::fragsteal::topN ], wTopol, wExtern );
286 }
287 
288 
289 
290 void Templates::_get_scored_list( TemplateList& cst_list, Size topN, Real wTopol, Real wExtern) const {
291 
292  Real sum_extern = 0;
293  Real sum_extern2 = 0;
294  Real sum_topol = 0;
295  Real sum_topol2 = 0;
296  Size n = 0;
297 
298  bool bScoreFilter = true;
299 
300  // first get list of Templates with constraints
301  for ( TemplateMap::const_iterator it=templates_.begin(),
302  eit = templates_.end(); it!=eit; ++it ) {
303  TemplateCOP aTemplate( it->second );
304  if ( bScoreFilter ) {
305  sum_extern += aTemplate->external_score();
306  sum_extern2 += aTemplate->external_score()*aTemplate->external_score();
307  sum_topol += aTemplate->topology_score();
308  sum_topol2 += aTemplate->topology_score()*aTemplate->topology_score();
309  n++;
310  }
311  }
312  if ( n == 0 ) return;
313  if ( !bScoreFilter ) return;
314  // compute std-dev and mean:
315  Real mean_topol = sum_topol / n;
316  Real mean_extern = sum_extern / n;
317  Real std_topol = std::sqrt( sum_topol2 / n - mean_topol*mean_topol );
318  Real std_extern = std::sqrt( sum_extern2 / n - mean_extern*mean_extern );
319 
320  std::list< std::pair< core::Real, TemplateCOP > > weight_list;
321  for ( TemplateList::const_iterator it = cst_list.begin(), eit = cst_list.end();
322  it != eit; ++it ) {
323  Real score = ( (*it)->topology_score() - mean_topol ) / std_topol * wTopol
324  + ( (*it)->external_score() - mean_extern ) / std_extern * wExtern;
325  weight_list.push_back( std::make_pair( score, *it ) );
326  }
327  weight_list.sort();
328  weight_list.reverse();
329  cst_list.clear();
330  std::list< std::pair< core::Real, TemplateCOP > >::const_iterator iter = weight_list.begin();
331  for ( Size i = 1; i <= topN && iter != weight_list.end(); i++, iter++ ) {
332  cst_list.push_back( iter->second );
333  }
334 }
335 
336 
337 Size
338 Templates::pick_frags( FragSet& frag_set, core::fragment::FragDataOP frag_type, Size ncopies /* default 1 */) const {
339 
340  Size nframes = target_total_residue() - frag_type->size() + 1;
341  FrameList frames;
342  Size total( 0 );
343  tr.Info << "pick frames for target position 1 -> " << nframes << " from templates" << std::endl;
344  for ( Size pos =1; pos<=nframes; pos ++ ) {
345  FrameOP frame = new Frame( pos, frag_type );
346  frames.push_back( frame );
347  }
348 
349  for ( TemplateList::const_iterator it=fragpick_list_.begin(),
350  eit = fragpick_list_.end(); it!=eit; ++it ) {
351  tr.Info << "pick from template " << (*it)->name() << std::endl;
352  Size nr_frags = (*it)->steal_frags( frames, frag_set, ncopies );
353  tr.Info << "found " << nr_frags << " new fragments " << std::endl;
354  total+=nr_frags;
355  }
356 
357  return total;
358 }
359 
360 
361 FragSetOP
362 Templates::pick_frags( FragSetOP frag_set, core::fragment::FragDataOP frag_type, Size min_nr_frags, Size ncopies /* default 1 */ ) const {
363 
364  ConstantLengthFragSet template_frags;
365  Size total( 0 );
366  total = pick_frags( template_frags, frag_type, ncopies );
367 
368  // template_frags contains only homolog fragments, .. needs filling up in gappy regions
369  FragSetOP merged_frags = frag_set->empty_clone();
370 
371  Size total_fill( 0 );
372  //merge fragments:
373  for ( Size pos = 1; pos<=target_total_residue(); pos++ ) {
374  FrameList template_frames;
375  template_frags.frames( pos, template_frames );
376  merged_frags->add( template_frames );
377  Size nr_frags( template_frames.flat_size() );
378  tr.Info << nr_frags << " fragments at pos " << pos << ". required: " << min_nr_frags << std::endl;
379  if ( nr_frags < min_nr_frags ) {
380  Size nr_fill ( min_nr_frags - nr_frags );
381  FrameList standard_frames;
382  frag_set->frames( pos, standard_frames );
383  if ( standard_frames.size() ) {
384  tr.Info << "attempt to fill up with " << nr_fill << " frags at position " << pos << " ... ";
385  for ( FragID_Iterator it = standard_frames.begin(), eit = standard_frames.end();
386  it != eit && nr_fill; ++it, --nr_fill ) {
387  merged_frags->add( *it );
388  ++total_fill;
389  }
390  if ( nr_fill ) {
391  tr.Info << nr_fill << " fragments short " << std::endl;
392  } else {
393  tr.Info << "succeeded! " << std::endl;
394  }
395  } // standard frags present
396  } //fill up
397  }
398  tr.Info << "found " << total << " fragments from homologs. supplemented by " << total_fill << " frags from standard library " << std::endl;
399  return merged_frags;
400 }
401 // Size total( 0 );
402 // for ( TemplateMap::const_iterator it=templates_.begin(),
403 // eit = templates_.end(); it!=eit; ++it ) {
404 // total += it->second->pick_frags( frag_set, frag_type );
405 // }
406 // return total;
407 //}
408 
410  core::fragment::FragSet& frag_set,
412  core::Size ncopies /*default = 1*/
413 ) const {
414  Size total( 0 );
415  for ( TemplateList::const_iterator it=fragpick_list_.begin(),
416  eit = fragpick_list_.end(); it!=eit; ++it ) {
417  tr.Info << "pick large frag from template " << (*it)->name() << std::endl;
418  Size nr_frags = (*it)->pick_large_frags( frag_set, frag_type, ncopies );
419  tr.Info << "found " << nr_frags << " new fragments " << std::endl;
420  total+=nr_frags;
421  }
422  return total;
423 }
424 
425 
426 void
428  pairings.clear();
429 
430  utility::io::izstream in( filename );
431  if ( !in ) {
432  utility_exit_with_message("ERROR Unable to open pairings file "+filename);
433  }
434 
435  std::string pdb;
436  in >> pdb;
437 
439  read_pairing_list( in, raw_pairings );
440  tr.Debug << " read pairings for template " << pdb << "\n" << raw_pairings << std::endl;
441  const_iterator iter = templates_.find( pdb );
442  if ( iter == templates_.end() ) {
443  utility_exit_with_message("unrecognized template name "+pdb+" --- this name has to be in template:config file");
444  }
445  iter->second->map_pairings2target( raw_pairings, pairings );
446  tr.Debug << " mapped pairings for target\n " << pairings_ << std::endl;
447 }
448 
450  if ( !ss_def ) {
451  using namespace fragment;
452  tr.Info << "TemplateJumpSetup will be initialized with secondary structure from homologs " << std::endl;
453  ConstantLengthFragSet fragset;
454  pick_frags( fragset, new FragData( new SecstructSRFD, 1 ) ); //for ss-structure 1mers are enough
455  ss_def = new core::fragment::SecondaryStructure( fragset, target_total_residue() );
456  }
457 // utility::io::ozstream dump("ss_def_for_jumps");
458 // for ( Size i = 1; i<=ss_def->total_residue(); i++ ) {
459 // dump << i << " " << ss_def->loop_fraction()(i) << std::endl;
460 // }
461  core::scoring::dssp::PairingsList helix_pairings;
462  if ( option[ templates::helix_pairings ].user() ) read_pairings( option[ templates::helix_pairings ], helix_pairings );
463  return new TemplateJumpSetup( this, ss_def, strand_stats_, helix_pairings );
464 }
465 
466 
467 
468 void
470  using namespace scoring::constraints;
471  using namespace basic::options;
472  using namespace basic::options::OptionKeys;
473 
474  typedef Template::NamedAtomPairConstraintList NamedAtomPairConstraintList;
475  typedef Template::AtomPairConstraintList AtomPairConstraintList;
476  AtomPairConstraintList full_list;
477  // take constraint sets from each template
478  TemplateList cst_list;
479  TemplateList cull_list = cull_list_;
480 
481  get_cst_list( cst_list, cull_list ); // evaluates options to see if score-ranking is used...
482  tr.Info << "pick constraints from " << cst_list.size() << " models " << std::endl;
483 
484  for ( TemplateList::const_iterator it=cst_list.begin(),
485  eit = cst_list.end(); it!=eit; ++it ) {
486  tr.Info << "pick constraints from template " << (*it)->name() << std::endl;
487 
488  // if template has constraints
489  Template const& aTemplate( **it );
490  // map them to target sequence
491  NamedAtomPairConstraintList new_constraints;
492  aTemplate.map2target( aTemplate.constraints(), new_constraints );
493 
494  tr.Info << "have " << new_constraints.size() << " constraints; start culling... " << std::endl;
495 
496  if ( !option[ templates::no_culling ] ) {
497  //throw out all constraints that violate any template structure
498  for ( TemplateList::const_iterator it=cull_list.begin(),
499  eit = cull_list.end(); it!=eit; ++it ) {
500  NamedAtomPairConstraintList culled_constraints;
501  tr.Info << "cull with template " << (*it)->name() << std::endl;
502  (*it)->cull_violators( new_constraints, culled_constraints );
503  new_constraints = culled_constraints;
504  tr.Info << (*it)->name() << " leaves " << new_constraints.size() << " unviolated " << std::endl;
505  }
506  }
507 
508  // add them to full_list if they are not out of bounds
509  for ( NamedAtomPairConstraintList::const_iterator it = new_constraints.begin(),
510  eit = new_constraints.end(); it!=eit; ++it ) {
511  AtomPairConstraintOP cst = (*it)->mapto( pose );
512  if ( cst ) full_list.push_back( cst );
513  }
514  }
515  cstset->add_constraints( full_list );
516  // run them over all other templates and throw out violated constraints
517  // Todo: consolidate: redundant constraints --> single constraint with higher weight
518 }
519 
520 
521 } //abinitio
522 } //protocols