Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
NormalizedEvaluatedArchive.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // This file is part of the Rosetta software suite and is made available under license.
5 // The Rosetta software is developed by the contributing members of the Rosetta Commons consortium.
6 // (C) 199x-2009 Rosetta Commons participating institutions and developers.
7 // For more information, see http://www.rosettacommons.org/.
8 
9 /// @file protocols/jd2/MPIFileBufJobDistributor.cc
10 /// @brief implementation of MPIFileBufJobDistributor
11 /// @author Oliver Lange olange@u.washington.edu
12 
13 // Unit headers
17 
18 //#include <core/io/silent/SilentFileData.hh>
19 
21 //#include <core/scoring/constraints/ConstraintIO.hh>
22 //#include <core/scoring/constraints/util.hh>
23 //#include <core/scoring/Energies.hh>
24 
25 //#include <core/pose/Pose.hh>
26 //#include <basic/datacache/BasicDataCache.hh>
27 
28 //#include <protocols/evaluation/EvaluatorFactory.hh>
30 
31 #include <utility/exit.hh>
32 #include <utility/excn/Exceptions.hh>
33 
34 #include <numeric/util.hh>
35 
36 #include <basic/Tracer.hh>
37 #include <basic/prof.hh>
38 
39 //for DebugArchive
40 // AUTO-REMOVED #include <utility/io/ozstream.hh>
41 
42 #include <basic/options/option.hh>
43 #include <basic/options/option_macros.hh>
44 
45 //for setup_default_evaluators
46 #include <basic/options/keys/run.OptionKeys.gen.hh>
47 #include <utility/vector1.hh>
48 
49 
50 static basic::Tracer tr("protocols.jd2.NormalizedEvaluatedArchive");
51 
52 //OPT_1GRP_KEY( Real, iterative, chainbreak_evaluator_exponent )
53 //OPT_1GRP_KEY( Boolean, iterative, simulate_bg4_cbtreatment )
54 OPT_2GRP_KEY( Boolean, iterative, normalize, activate )
55 OPT_2GRP_KEY( Boolean, iterative, normalize, extra_archive )
56 OPT_2GRP_KEY( Real, iterative, normalize, keep_adding )
57 OPT_2GRP_KEY( Integer, iterative, normalize, num_struct)
58 OPT_2GRP_KEY( Integer, iterative, normalize, start )
59 OPT_2GRP_KEY( StringVector, iterative, normalize, force_zero )
60 OPT_2GRP_KEY( Boolean, iterative, normalize, lower_quartile )
61 
62 std::string const SPECIAL_INITIAL_DECOY_PENALTY( "special_initial_decoy_penalty" );
63 
64 bool protocols::jd2::archive::NormalizedEvaluatedArchive::options_registered_( false );
65 
66 //Mike: when you want to remove these Macros... leave them at least here as comment - since they provide documentation
67 void protocols::jd2::archive::NormalizedEvaluatedArchive::register_options() {
69  if ( !options_registered_ ) {
70  NEW_OPT( iterative::normalize::activate, "score-variations are determined to normalize scores", false );
71  NEW_OPT( iterative::normalize::extra_archive, "determine score variations from extra archive", false );
72  NEW_OPT( iterative::normalize::keep_adding, "keep adding X percent of the incoming structures, throw out old ones randomly", 0.0 );
73  NEW_OPT( iterative::normalize::num_struct, "number of structures in varaiance archive", 1000 );
74  NEW_OPT( iterative::normalize::start, "do not normalize until X structures have been accumulated, if 0 this will be set to normalize:nstruct structures", 0 );
75  NEW_OPT2( iterative::normalize::force_zero, "for scores whose name starts with XXX compute variance as 0...Q3 instead of Q1..Q3", "rdc", "filter_cst" );
76  NEW_OPT( iterative::normalize::lower_quartile, "use Lo..Q1 instead of Q1..Q3 to determine range", false );
77  options_registered_ = true;
78  }
79 }
80 
81 
82 namespace protocols {
83 namespace jd2 {
84 namespace archive {
85 // using namespace basic::options;
86 // using namespace basic::options::OptionKeys;
87 using namespace core;
88 using namespace core::io::silent;
89 
91 
92 
94  runtime_assert( options_registered_ );
95  score_variations_are_clean_ = false;
96  init_from_options();
97 }
98 
100  : EvaluatedArchive( ptr )
101 {
102  runtime_assert( options_registered_ );
105 }
106 
108  using namespace basic::options;
109  insertion_prob_ = option[ OptionKeys::iterative::normalize::keep_adding ]();
111  use_variance_archive_ = option[ OptionKeys::iterative::normalize::extra_archive ]();
112  nstruct_for_statistics_ = option[ OptionKeys::iterative::normalize::num_struct ]();
115  activated_ = option[ OptionKeys::iterative::normalize::activate ]();
116  positive_scores_ = option[ OptionKeys::iterative::normalize::force_zero ]();
117  lower_quartile_ = option[ OptionKeys::iterative::normalize::lower_quartile ]();
118  if ( option[ OptionKeys::run::test_cycles ] || option[ OptionKeys::run::dry_run ] ) {
121  }
122 }
123 
125  if ( use_variance_archive_ ) {
126  variance_archive_ = new VarianceStatisticsArchive( name()+"_variance" );
128  variance_archive_->set_insertion_prob( insertion_prob_ );
129  variance_archive_->initialize();
130  }
131 }
132 
133 //completely overwrites the EvaluatedArchive version. Doesn't call Parent function.
135  bool added( Parent::add_evaluated_structure( evaluated_decoy, batch ) );
136  bool added_variance( added );
137  if ( variance_archive_ ) {
138  added_variance = false;
140  added_variance = variance_archive_->add_evaluated_structure( evaluated_decoy,batch );
141  }
142  }
143  tr.Info << "offered structure that was " << ( added ? "" : "not" ) << " relevant to the variances " << std::endl;
145  return added;
146 }
147 
149  Parent::save_to_file( suffix );
150  if ( variance_archive_ ) variance_archive_->save_to_file( suffix );
151 }
152 
153 ///@detail restore archive and sort
155  bool b_have_restored = Parent::restore_from_file();
156  if ( use_variance_archive_ ) {
157  runtime_assert( variance_archive_ );
158  variance_archive_->restore_from_file();
159  }
161  return b_have_restored;
162 }
163 // --------------------------- end sort ------------------------------
164 
165 ///@detail determine variations of the non-zero weighted (select_weight_) scores by taking the difference Q3-Q1 (upper / lower quartil)
167  if ( score_variations_are_clean_ ) return false; //not changed
169  SilentStructs const& my_decoys( variance_archive_ ? variance_archive_->decoys() : decoys() );
170  core::Size ndecoys( my_decoys.size() );
171  tr.Info << "determine score variations in NormalizedEvaluatedArchive " << name() << "... " << std::endl;
172  tr.Info << "use " << ndecoys << " decoys from " << (variance_archive_ ? variance_archive_->name() : name() ) << std::endl;
174  for ( WeightMap::const_iterator it = weights().begin(); it != weights().end(); ++it ) {
175  if ( it->first == "special_initial_decoy_penalty" ) continue;
176  if ( it->second > 0.01 ) {
177  if ( ndecoys >= min_decoys_for_statistics_ && activated_ ) {
178 
179  std::string const& name( it->first );
180  Size ct( 1 );
181  core::Size half( ndecoys / 2 );
182  core::Size lowQ( half / 2 );
183  core::Size highQ( half + lowQ );
184  if ( lower_quartile_ ) {
185  highQ = lowQ;
186  lowQ = 1 ;
187  }
188  //score_variations_.clear(); not really needed. should be faster without
189  values.resize( ndecoys );
190  for ( SilentStructs::const_iterator iss = my_decoys.begin(); iss != my_decoys.end(); ++iss, ++ct ) {
191  if ( !(*iss)->has_energy( name ) ) {
192  throw EXCN_Archive( "energy name "+name+" not found in returned decoys -- run with rescoring in archive to avoid this or fix your batches" );
193  } // add weighted column-value to final score
194  values[ct]=(*iss)->get_energy( name );
195  }
196  runtime_assert( lowQ > 0 && highQ < ndecoys );
197  std::sort(values.begin(), values.end());
198  if ( is_start_zero_score( it->first ) ) {
199  score_variations_[ it->first ] = values[highQ];
200  tr.Info << "score variation of " << score_variations_[ it->first ] << " for " << name
201  << " between 0 (forced)"
202  << " and " << values[highQ] << " at " << highQ << std::endl;
203  } else {
204  score_variations_[ it->first ] = std::abs( values[highQ]-values[lowQ] );
205  tr.Info << "score variation of " << score_variations_[ it->first ] << " for " << name
206  << " between " << values[lowQ] << " at " << lowQ
207  << " and " << values[highQ] << " at " << highQ << std::endl;
208  }
209  } else { //not enough decoys or not activated
210  score_variations_[ it->first ] = 1.0;
211  }
212  //cutoff to avoid division by 0
213  if ( score_variations_[ it->first ]< 1e-20 ) {
214  score_variations_[ it->first ]= 1e-20;
215  }
216  } // if weight > 0.01
217  } //for select_weights
218  return true; //changed
219 } //determine_score_variations
220 
223  if ( str.substr( 0, it->size() ) == *it ) return true;
224  }
225  return false;
226 }
227 
228 ///@detail rescore and sort archive
230  Parent::rescore();
232  if ( variance_archive_ ) {
233  variance_archive_->set_evaluators( evaluators(), weights() );
234  variance_archive_->set_scorefxn( scorefxn().clone() );
235  variance_archive_->rescore();
236  }
237 }
238 
240  tr.Info << "ask for score_variations. They are " << (score_variations_are_clean_? "clean" : "not clean") << std::endl;
242  return score_variations_;
243 }
244 
246  tr.Info << "ask for score_variations. They are " << (score_variations_are_clean_? "clean" : "not clean") << std::endl;
248  WeightMap::const_iterator iter = score_variations_.find( column );
249  if ( iter != score_variations_.end() ) return iter->second;
250  else return 1.0;
251 }
252 
253 /* =================== end maintenance of evaluators and weights ====================== */
254 
255 
256 }//archive
257 }//jd2
258 }//protocols