Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
PairingStatistics.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @author Oliver Lange
11 
12 // Unit Headers
14 
15 // Package Headers
18 // AUTO-REMOVED #include <protocols/abinitio/TemplateJumpSetup.hh>
19 
20 // Project Headers
21 #include <core/types.hh>
22 
23 // AUTO-REMOVED #include <core/pose/Pose.hh>
24 // AUTO-REMOVED #include <core/pose/util.hh>
25 // AUTO-REMOVED #include <core/io/pdb/pose_io.hh>
26 
27 // AUTO-REMOVED #include <core/kinematics/MoveMap.hh>
28 // AUTO-REMOVED #include <core/id/AtomID.hh>
29 
31 // AUTO-REMOVED #include <core/fragment/ConstantLengthFragSet.hh>
32 // AUTO-REMOVED #include <core/fragment/FragData.hh> //to get secondary structure
33 // AUTO-REMOVED #include <core/fragment/SecstructSRFD.hh> //to get secondary structure
34 // AUTO-REMOVED #include <core/fragment/FragID_Iterator.hh>
35 
36 // AUTO-REMOVED #include <core/scoring/constraints/AtomPairConstraint.hh>
37 // AUTO-REMOVED #include <core/scoring/constraints/ConstraintIO.hh>
38 // AUTO-REMOVED #include <core/scoring/constraints/ConstraintSet.hh>
39 
40 // AUTO-REMOVED #include <core/sequence/util.hh>
42 
43 // AUTO-REMOVED #include <core/fragment/SecondaryStructure.hh>
44 // AUTO-REMOVED #include <protocols/jumping/JumpSample.hh>
45 
46 
47 //numeric headers
48 #include <numeric/random/random.hh>
49 
50 // ObjexxFCL Headers
51 #include <ObjexxFCL/format.hh>
52 
53 // Utility headers
54 // AUTO-REMOVED #include <utility/io/izstream.hh>
55 // AUTO-REMOVED #include <utility/io/ozstream.hh>
56 #include <utility/vector1.hh>
57 #include <basic/Tracer.hh>
58 
59 
60 // AUTO-REMOVED
61 // AUTO-REMOVED #include <core/chemical/ChemicalManager.hh>
62 #include <basic/options/option.hh>
63 #include <basic/options/keys/templates.OptionKeys.gen.hh>
64 #include <basic/options/keys/jumps.OptionKeys.gen.hh>
65 
66 
67 // C++ headers
68 #include <cstdlib>
69 
70 #ifdef WIN32
71 #include <iterator>
72 #endif
73 
74 #include <string>
75 #include <vector>
76 
77 
78 static basic::Tracer tr("protocols.abinitio.PairingStats");
79 using namespace core;
80 using namespace basic;
81 using namespace basic::options;
82 using namespace ObjexxFCL::fmt;
83 
85  using namespace basic::options;
86  using namespace basic::options::OptionKeys;
87  option.add_relevant( templates::force_native_topology );
88  option.add_relevant( templates::topology_rank_cutoff );
89 }
90 
91 namespace protocols {
92 namespace abinitio {
93 
94 static numeric::random::RandomGenerator RG(5651234); // <- Magic number, do not change it!
95 
96 using namespace core;
97 //using namespace jumping;
98 using namespace basic::options;
99 using namespace basic::options::OptionKeys;
100 
101 
102 
103 PairingStatEntry::PairingStatEntry() {}
104 
105 PairingStatEntry::PairingStatEntry( core::scoring::dssp::StrandPairing const& strand, Model const& id ) :
106  strand_pairing_( strand ),
107  weight_( -1.0 )
108 {
109  models_.push_back( id );
110 }
111 
112 
114  bool success( true );
115  if ( models_.size() ) {
116  //queries 'mergeable'
117  success = strand_pairing_.merge( new_strand, true /*do_merge */);
118  } else {
119  strand_pairing_ = new_strand;
120  }
121  if ( success ) models_.push_back( id );
122  return success;
123 }
124 
125 
127  return strand_pairing_.mergeable( strand );
128 }
129 
130 bool PairingStatEntry::has_model( std::string const& model ) const {
131  for ( ModelList::const_iterator it = models_.begin(), eit = models_.end(); it != eit; ++it ) {
132  if ( model == *it ) return true;
133  }
134  return false;
135 }
136 
138  return strand_pairing_.mergeable( other.pairing() );
139 }
140 
142  return !strand_pairing_.mergeable( other.pairing() );
143 }
144 
146  return p1.mergeable( p2 );
147 }
148 
150  StatEntries::iterator itentry = entries_.find( ps );
151  bool merged( false );
152  if ( itentry != entries_.end() ) {
153  // tr.Trace << "found that it matches with " << itentry->first << std::endl;
154  merged = itentry->second.add_pairing( ps, id );
155  // if (!merged) tr.Trace << "strangely couldn't merge the matched one..."<< std::endl;
156  // else tr.Trace << "new merged strand is " << itentry->second.pairing() << std::endl;
157  core::scoring::dssp::StrandPairing const& key( itentry->first );
158  core::scoring::dssp::StrandPairing const& new_pairing( itentry->second.pairing() );
159  if ( merged && ( !entries_.key_eq()( new_pairing, key ) ) ) {
160  core::scoring::dssp::StrandPairing new_pairing( itentry->second.pairing() ); //need a copy
161  entries_.erase( itentry );
162  add_entry( new_pairing, id );
163  // entries_[ itentry->second.pairing() ] = itentry->second;
164  }
165  }
166  if ( !merged ) entries_[ ps ]=PairingStatEntry( ps, id );
167  //for ( StatEntries::iterator itentry = entries_.begin(), eitentry = entries_.end();
168  // itentry != eitentry && !merged; ++itentry ) {
169  // merged = itentry->add_pairing( *it, id );
170  // }
171 }
172 
174  // if (! topology.size() ) return; //also add empty sets -- otherwise the modelname floats around and can't be found in this list
175  for ( core::scoring::dssp::StrandPairingSet::const_iterator it = topology.begin(), eit = topology.end();
176  it != eit; ++it ) {
177  //bool merged ( false );
178  tr.Trace << "adding stand pairing to hash.. " << *it << std::endl;
179  add_entry( *it, id );
180  }
181  topols_[ id ] = topology;
182 }
183 
184 
185 void PairingStatistics::compute( Templates const& templates ) {
186  ModelFreq model_freq; //count of the underlying structures ( letters 1-4 of model-name )
187  for ( Templates::const_iterator it = templates.begin(), eit = templates.end();
188  it != eit; ++it ) {
189  Template const& model( *it->second );
190  core::scoring::dssp::PairingList template_pairings, target_pairings;
191  model.strand_pairings().get_beta_pairs( template_pairings );
192  model.map_pairings2target( template_pairings, target_pairings );
193  add_topology( core::scoring::dssp::StrandPairingSet( target_pairings ), model.name() );
194  model_freq[ model.name().substr(0,4) ] += 1;
195  }
196  compute_model_weights( model_freq );
197 }
198 
199 
201  add_topology( topology, "SINGLE_TOP" );
202  ModelFreq model_freq;
203  model_freq[ "SING" ] = 1;
204  compute_model_weights( model_freq );
205 }
206 
208 
210  StatEntries::const_iterator itentry = entries_.find( pairing );
211  if ( itentry != entries_.end() ) {
212  return itentry->second.weight();
213  }
214  // for ( StatEntries::const_iterator entry= entries_.begin(), eentry = entries_.end();
215  // entry != eentry; ++entry ) {
216  // if ( entry->compatible( pairing ) ) {
217  // return entry->weight();
218  // }
219  // }
220  return 0.0;
221 }
222 
223 
225  for ( ModelWeight::const_iterator top = model_weight_.begin(); top != model_weight_.end(); ++top ) {
226  if ( top->second==id ) return top->first;
227  }
228  utility_exit_with_message("Model name not known: " + id );
229  return 0.0;
230 }
231 
233  Real const contact_order_weight( basic::options::option[ basic::options::OptionKeys::jumps::contact_score ] );
234  std::list< std::pair< core::Real, Model > > weight_list;
235  for ( Topologies::const_iterator top = topols_.begin(), etop=topols_.end();
236  top != etop; ++top ) {
237  Real score( 0 );
238  Real const norm( sqrt( (Real) model_freq[ top->first.substr(0,4) ] ) );
239  // loop over pairings and find them in entries_
240  for ( core::scoring::dssp::StrandPairingSet::const_iterator pairing = top->second.begin(),
241  epairing = top->second.end(); pairing != epairing; ++pairing ) {
242  // find pairing in entries
243  StatEntries::iterator itentry = entries_.find( *pairing );
244  if ( itentry != entries_.end() ) {
245  Real const weight( 1.0/norm * itentry->second.frequency() * ( itentry->second.size()-1.0
246  + contact_order_weight * ( std::max( 0, (int) itentry->second.contact_order() - 20)) ) );
247  itentry->second.set_weight( weight );//so far only used for output later on
248  score += weight;
249  }
250  }
251 //
252 // for ( StatEntries::iterator entry= entries_.begin(), eentry = entries_.end();
253 // entry != eentry; ++entry ) {
254 // if ( entry->compatible( *pairing ) ) {
255 // Real const contact_order_weight( basic::options::option[ basic::options::OptionKeys::jumps::contact_score ] );
256 // Real const weight( 1.0/norm * entry->frequency() * ( entry->size()-1.0
257 // + contact_order_weight * ( std::max( 0, (int) entry->contact_order() - 20)) ) );
258 // entry->set_weight( weight );//so far only used for output later on
259 // score += weight;
260 // break;
261 // }
262 // }
263  //} // added score for each pairing
264  weight_list.push_back( std::make_pair( score, top->first ) );
265  // weights_[ top->first ] = score; //also want the score in a map NAME --> score
266  } // score for each model/topology
267  weight_list.sort();
268  weight_list.reverse();
269  model_weight_.clear();
270  copy( weight_list.begin(), weight_list.end(), std::back_inserter( model_weight_ ) );
271 }
272 
275  runtime_assert( nr_models() );
276 
277  if ( !option[ templates::force_native_topology ] ) {
278  Real const rel_rank_cutoff( option[ templates::topology_rank_cutoff ] );
279 
280  if( 1 > model_weight_.size() ){
281  utility_exit_with_message( "The array model_weight_[] has 0 members, yet we need one. The topology is: " + topol_id );
282  }
283  Real const rank_cutoff( weight( 1 )*rel_rank_cutoff );
284  Size top_max;
285  Size const nr_models( model_weight_.size() );
286  if ( rel_rank_cutoff >= 0.99 ) {
287  top_max = std::min( 5, (int) nr_models);
288  } else {
289  for ( top_max = 1; (top_max < nr_models) && weight( top_max ) > rank_cutoff; top_max++ ) {};
290  }
291  Size const rg_topol ( static_cast< int >( RG.uniform() * top_max ) + 1 );
292  topol_id = ranked_model( rg_topol );
293  runtime_assert( topol_id != "BOGUS" );
294  tr.Info << "cutoff: " << rank_cutoff << " use topology ** " << rg_topol << ": " << topol_id << " ** to select pairings " << std::endl;
295  return topology( topol_id );
296  } else {
297  return native_topology_;
298  }
299 }
300 
301 // void PairingStatistics::remove_stupid_strands() {
302 
303 // }
304 
305 std::ostream& operator<< ( std::ostream& out, PairingStatistics const& ps ) {
306  out << "PAIRING_STATISTICS " << ps.model_weight_.size();
307  // out << ps.entries_;
308 // out << "PAIRSTAT: -----------------------------\n";
309 // for ( PairingStatistics::ModelFreq::const_iterator it = ps.model_freq_.begin(), eit = ps.model_freq_.end();
310 // it != eit; ++it ) {
311 // out << it->first << " " << it->second << "\nPAIRSTAT: ";
312 // }
313  // out << "\nPAIRSTAT: ";
314 // for ( PairingStatistics::ModelWeight::const_iterator it = ps.model_weight_.begin(), eit = ps.model_weight_.end();
315 // it != eit; ++it ) {
316 // out << it->first << " " << it->second << "\nPAIRSTAT: ";
317 // }
318 // out << " -----------------------------\nPAIRSTAT: ";
319  for ( PairingStatistics::ModelWeight::const_iterator it = ps.model_weight_.begin(), eit = ps.model_weight_.end();
320  it != eit; ++it ) {
321  out << "\nSTRAND_TOPOLOGY " << " " << ps.topology( it->second ).size() << " " << it->first << " " << it->second;
323  eisp = ps.topology( it->second ).end(); isp != eisp; ++isp ) {
324  if ( !isp->range_check() ) {
325  tr.Error << "[ERROR] skip inconsistent pairing... " << *isp << std::endl;
326  continue;
327  }
328  out << "\nPAIRSTAT_ENTRY: ";
329  out << F(5,2, ps.strand_weight( *isp ) ) << " ";
330  if ( ps.is_native_pairing( *isp ) ) out << " * ";
331  else out << " . ";
332  out << *isp;
333  }
334  }
335 
336  return out;
337 }
338 
339 
340 void PairingStatEntry::show( std::ostream& out ) const {
341  if ( !strand_pairing_.range_check() ) {
342  tr.Error << "[ERROR] skip inconsistent pairing... " << strand_pairing_ << std::endl;
343  return;
344  }
345  out << "PAIRSTAT_ENTRY: " << F(5,2,weight() ) << " " << frequency() << " " <<strand_pairing_ << " ";
346  for ( ModelList::const_iterator it = models_.begin(), eit = models_.end();
347  it != eit; ++it ) {
348  out << *it << " ";
349  }
350 }
351 
352 std::istream& operator>> ( std::istream& is, PairingStatEntry& ps ) {
353  std::string tag;
354  is >> tag;
355  if ( tag != "PAIRSTAT_ENTRY:" ) {
356  tr.Trace << "failed reading PAIRSTAT_ENTRY: --- found instead: " << tag << std::endl;
357  is.setstate( std::ios_base::failbit );
358  return is;
359  }
360  Size nr_models;
361  is >> ps.weight_ >> nr_models >> ps.strand_pairing_;
362  for ( Size ct = 1; ct <= nr_models; ct ++) {
363  is >> tag;
364  ps.models_.push_back( tag );
365  }
366  return is;
367 }
368 
369 std::ostream& operator<< (std::ostream& out, StatEntries const& ps ) {
370  for ( StatEntries::const_iterator it = ps.begin(), eit = ps.end();
371  it != eit; ++it ) {
372  out << it->second << "\n";
373  }
374  return out;
375 }
376 
377 std::istream& operator>> (std::istream& is, StatEntries& pslist) {
378  pslist.clear();
379  PairingStatEntry ps;
380  while ( is >> ps ) { //not perfect because it fucks up the stream and the next line...
381  pslist[ ps.pairing() ] = ps;
382  }
383  return is;
384 }
385 
386 std::istream & operator>>( std::istream &is, PairingStatistics &ps) {
387  std::string tag;
388  Size ntops;
389  is >> tag >> ntops;
390  if ( tag != "PAIRING_STATISTICS" ) {
391  tr.Trace << "failed reading PAIRING_STATISTIC --- found instead: " << tag << std::endl;
392  is.setstate( std::ios_base::failbit );
393  return is;
394  }
395  tr.Trace << " read " << ntops << " topologies from file... " << std::endl;
396  ps.model_weight_.reserve( ntops+10 );
397 
398  //cause the hash-container to have at least ntops*3+10 buckets
399  ps.entries_.rehash( ntops*3+10 );
400 
401  for ( Size ct_top = 1; ct_top <= ntops; ct_top++ ) {
402  Size nstrand;
403  Real model_weight;
404  std::string model_ID;
405  is >> tag >> nstrand >> model_weight >> model_ID;
406  if ( tag != "STRAND_TOPOLOGY" ) {
407  tr.Trace << "failed reading STRAND_TOPOLOGY --- found instead: " << tag << std::endl;
408  is.setstate( std::ios_base::failbit );
409  return is;
410  }
411  tr.Debug << "reading strand-topology " << model_ID << " with " << nstrand << " strands... "<<std::endl;
413  for ( Size ct = 1; ct <= nstrand; ct++ ) {
415  Real weight;
416  char ntag;
417  std::string entry_tag;
418  is >> entry_tag >> weight >> ntag >> pairing;
419  if ( is.fail() ) return is;
420  if ( !pairing.range_check() ) {
421  tr.Error << "[ERROR] read inconsistent pairing in " << tag << " " << model_weight << " " << model_ID << std::endl;
422  tr.Error << "offending pairing " << pairing << std::endl;
423  is.setstate( std::ios_base::failbit );
424  return is;
425  }
426  if ( entry_tag != "PAIRSTAT_ENTRY:" ) {
427  tr.Trace << "failed reading PAIRSTAT_ENTRY: --- found instead: " << entry_tag << std::endl;
428  is.setstate( std::ios_base::failbit );
429  return is;
430  }
431  runtime_assert( pairing.range_check() );
432  sps.push_back( pairing );
433 
434  //maintain also an extra list of all individual strand-pairings found... "PairingStatEntry"
435  //too slow: option one, skip this condensing test -> check memory
436  // otpion two,
437  // bool found( false );
438  // for ( StatEntries::iterator try_entry= ps.entries_.begin(), eentry = ps.entries_.end();
439 // try_entry != eentry; ++try_entry ) {
440 // if ( try_entry->compatible( pairing ) ) {
441 // found = true;
442 // if ( try_entry->weight() != weight ) {
443 // tr.Warning << "inconsistent weights in topology " << *try_entry << std::endl;
444 // tr.Warning << "new weight is ignored: " << weight << " for strand " << pairing << "which had weight " << try_entry->weight() << std::endl;
445 // }
446 // try_entry->models().push_back( model_ID );
447 // break;
448 // } // if
449 // }
450  StatEntries::iterator try_entry = ps.entries_.find( pairing );
451  if ( try_entry != ps.entries_.end() ) {
452  if ( try_entry->second.weight() != weight ) {
453  tr.Warning << "inconsistent weights in topology " << try_entry->second << std::endl;
454  tr.Warning << "new weight is ignored: " << weight << " for strand " << pairing << "which had weight " << try_entry->second.weight() << std::endl;
455  }
456  try_entry->second.models().push_back( model_ID );
457  } else {
458  PairingStatEntry entry( pairing, model_ID );
459  entry.models().reserve( ntops );
460  entry.set_weight( weight );
461  ps.entries_[ pairing ] = entry;
462  }
463 
464  } // finished reading this topology
465  ps.topols_[ model_ID ]=sps;
466  ps.model_weight_.push_back( std::make_pair( model_weight, model_ID ) );
467  } //for all expected topologies
468  return is;
469 }
470 
471 }
472 }