Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
NestedEnergyTermOptEData.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file protocols/optimize_weights/NestedEnergyTermOptEData.cc
11 /// @author Ron Jacak
12 
13 #ifdef USEMPI
14 #include <mpi.h>
15 #endif
16 
17 // Unit headers
20 
21 // Project headers
22 // AUTO-REMOVED #include <basic/options/util.hh>
25 #include <basic/Tracer.hh>
26 
27 #include <ObjexxFCL/format.hh>
28 
29 #include <utility/string_util.hh>
30 #include <utility/vector1.functions.hh> // to get arg_min()
31 
32 // AUTO-REMOVED #include <numeric/numeric.functions.hh>
33 // AUTO-REMOVED #include <numeric/statistics.functions.hh>
34 
35 // C++ headers
36 #include <fstream>
37 #include <ostream>
38 #include <sstream>
39 #include <string>
40 
41 // option key includes
42 #include <basic/options/keys/optE.OptionKeys.gen.hh>
43 
44 //Auto Headers
45 #include <utility/vector1.hh>
46 #include <basic/options/option.hh>
47 
48 
49 
50 using namespace core;
51 using namespace core::scoring;
52 using namespace ObjexxFCL::fmt;
53 
54 namespace protocols {
55 namespace optimize_weights {
56 
58 
59 static basic::Tracer TR("NestedEnergyTermOptEData");
60 
61 #define CAP_FA_REP 1
62 
63 
64 //
65 // ------------------- NestedEnergyTermPNatAAOptEPositionData -----------------------//
66 //
67 
68 
69 ///
70 /// @begin NestedEnergyTermPNatAAOptEPositionData::NestedEnergyTermPNatAAOptEPositionData()
71 ///
72 NestedEnergyTermPNatAAOptEPositionData::NestedEnergyTermPNatAAOptEPositionData() {}
73 
74 ///
75 /// @begin NestedEnergyTermPNatAAOptEPositionData::~NestedEnergyTermPNatAAOptEPositionData()
76 ///
77 NestedEnergyTermPNatAAOptEPositionData::~NestedEnergyTermPNatAAOptEPositionData() {}
78 
79 
80 ///
81 /// @begin NestedEnergyTermPNatAAOptEPositionData::get_score()
82 ///
83 /// @brief
84 /// Does actual work for OptE minimization
85 /// Special implementation of get_score that includes logic to handle unfolded state energy calculation.
86 /// See header file and OptEData.hh for more information.
87 ///
88 Real
89 NestedEnergyTermPNatAAOptEPositionData::get_score(
90  optimization::Multivec const & component_weights,
91  optimization::Multivec const & vars,
92  optimization::Multivec & dE_dvars,
93  Size const num_energy_dofs,
94  int const num_ref_dofs,
95  int const num_total_dofs,
96  EnergyMap const & fixed_terms,
97  ScoreTypes const & score_list,
98  ScoreTypes const & fixed_score_list
99 ) const
100 {
101  return process_score( TR, false, component_weights, vars, dE_dvars, num_energy_dofs, num_ref_dofs, num_total_dofs, fixed_terms, score_list, fixed_score_list );
102 }
103 
104 
105 ///
106 /// @begin NestedEnergyTermPNatAAOptEPositionData::print_score()
107 ///
108 /// @brief
109 /// Special implementation of print_score that includes logic to handle unfolded state energy calculation.
110 ///
111 void
112 NestedEnergyTermPNatAAOptEPositionData::print_score(
113  std::ostream & ostr,
114  optimization::Multivec const & component_weights,
115  optimization::Multivec const & vars,
116  optimization::Multivec & dE_dvars,
117  Size const num_energy_dofs,
118  int const num_ref_dofs,
119  int const num_total_dofs,
120  EnergyMap const & fixed_terms,
121  ScoreTypes const & score_list,
122  ScoreTypes const & fixed_score_list
123 ) const
124 {
125  process_score( ostr, true, component_weights, vars, dE_dvars, num_energy_dofs, num_ref_dofs, num_total_dofs, fixed_terms, score_list, fixed_score_list );
126 }
127 
128 
129 ///
130 /// @begin NestedEnergyTermPNatAAOptEPositionData::process_score()
131 ///
132 /// @brief
133 /// One method to do the score processing which takes a boolean dictating whether to print to an ostream or not. With this function, changes
134 /// to how scoring works only need to be made in one place as opposed to two (when get_score() and print_score() both had scoring logic in them).
135 Real
136 NestedEnergyTermPNatAAOptEPositionData::process_score(
137  std::ostream & ostr,
138  bool print,
139  optimization::Multivec const & component_weights,
140  optimization::Multivec const & vars,
141  optimization::Multivec & dE_dvars,
142  Size const num_energy_dofs,
143  int const num_ref_dofs,
144  int const,
145  EnergyMap const & fixed_terms,
146  ScoreTypes const & score_list,
147  ScoreTypes const & fixed_score_list
148 ) const
149 {
150  using namespace core;
151  using namespace core::optimization;
152  using namespace basic::options;
153  using namespace basic::options::OptionKeys;
154 
155  chemical::AA const this_native_aa( native_aa() );
156 
157  static Real const inv_kT( option[ optE::inv_kT_nataa ] );
158 
159  // contains the best energy for each amino acid at this position; init to a bad energy of 1000
160  utility::vector1< Real > best_energy_by_aa( chemical::num_canonical_aas, 1000.0 );
161 
162  // containers for derivatives
163  // all values are initialized to zero
164  Multivec ref_deriv_weight( chemical::num_canonical_aas, 0.0 );
165  utility::vector1< Real > vector_of_zeros( score_list.size(), 0.0 ); // assigned to unweighted_E_dof for bad rotamers
166  utility::vector1< utility::vector1< Real > > unweighted_E_dof( chemical::num_canonical_aas, vector_of_zeros );
167 
168  process_rotamers( vars, num_energy_dofs, fixed_terms, score_list, fixed_score_list, chemical::num_canonical_aas,
169  vector_of_zeros, best_energy_by_aa, unweighted_E_dof, ref_deriv_weight );
170 
171  //TR << "process_score(): best_energy_by_aa before: [ ";
172  //for ( Size i=1; i <= best_energy_by_aa.size(); ++i ) {
173  // TR << F(6,2,best_energy_by_aa[i]) << ", ";
174  //}
175  //TR << std::endl;
176 
177  // now do some special processing of the unfolded state energy
178  // the parameters score_list and fixed_score_list are vectors of ScoreType objects; these should be in the same order
179  // as the weights in the vars array.
180  // this part has two steps: first we have to take the weights for the canonical score12 score terms and apply them
181  // to the unweighted unfolded energies stored in the unfolded_energy_emap_vector. then, we have to take that entire
182  // sum and multiply it by the unfolded term weight currently being evaluated.
183  // If you neglect to multiply the total unfolded state energy of an aa by the current unfolded term weight, then
184  // the unfolded term weight varies wildly and minimization doesn't do anything.
185 
186  for( Size aa = 1; aa <= chemical::num_canonical_aas; ++aa ) {
187 
188  Real unfolded_energy_for_one_aa = 0.0;
189  Real weighted_unfolded_energy_for_one_aa = 0.0;
190 
191  // Part 1a:
192  // Variable-weighted energy terms
193  //
194  // Assume free params are fa_rep, solubility, and unfolded. The unfolded_energy_emap_vector only contains energies
195  // for fa_rep; solubility and unfolded always return zeros. Thus, even though we iterate through all the free terms
196  // here, only the fa_rep will contribute to the unfolded_energy_for_one_aa.
197  //
198  // The neat thing about this setup is that if a score12 energy term is not included as a free or fixed param, then
199  // it won't be included in the unfolded state energy either.
200  //
201  for( Size ii = 1; ii <= num_energy_dofs; ++ii ) {
202  //if ( print ) {
203  //TR << "process_score(): adding unfolded energy for aa '" << chemical::name_from_aa( (chemical::AA) aa )
204  // << "' for unweighted free '" << name_from_score_type( score_list[ ii ] ) << "' energy: "
205  // << unfolded_energy_emap_vector_[ aa ][ score_list[ ii ]] << " * '"
206  // << name_from_score_type( score_list[ ii ] )
207  // << "' weight: " << vars[ ii ]
208  // << " to local unfolded_energy_for_one_aa variable." << std::endl;
209  //}
210  // the unweighted, unfolded energy for this ScoreType times the weight
211  unfolded_energy_for_one_aa += (unfolded_energy_emap_vector_[ aa ][ score_list[ ii ]] * vars[ ii ]);
212 
213  // see comments in process_rotamers() commented out below for what's going on in the next block
214  if ( ref_deriv_weight[ aa ] != 0.0 ) {
215 
216  // Subtract the unweighted, unfolded energy from unweighted_E_dof array (which is broken up by aa and eterm)
217  // This is so that the minimizer knows how to adjust this weight during minimization.
218 
219  // aa is the amino acid, ii is the free weight; only free terms have a spot in the unweighted_E_dof array
220  // so we only have to iterate over the array in this one section
221  unweighted_E_dof[ aa ][ ii ] -= unfolded_energy_emap_vector_[ aa ][ score_list[ ii ] ];
222 
223  // What if the unfolded weight is free/floating? What do we use for the unweighted_E_dof for the unfolded
224  // state energy? Well, the unfolded energy method returns 0.0 for the energy at this point of optE.
225  // Therefore, the unweighted_E_dof should also be 0.0. The unfolded_energy_emap_vector should contain a
226  // 0.0 at the location for unfolded, so no special code is needed here to prevent problems if the unfolded
227  // weight is variable.
228  }
229  }
230 
231  // Part 1b:
232  // Fixed-weight energy terms
233  for( Size ii = 1; ii <= fixed_score_list.size(); ++ii ) {
234  //if ( print ) {
235  //TR << "process_score(): adding unfolded energy for aa '" << chemical::name_from_aa( (chemical::AA) aa )
236  // << "' unweighted fixed '" << name_from_score_type( fixed_score_list[ ii ] ) << "' energy: "
237  // << unfolded_energy_emap_vector_[ aa ][ fixed_score_list[ ii ]] << " * '"
238  // << name_from_score_type( fixed_score_list[ ii ] )
239  // << "' weight: " << fixed_terms[ fixed_score_list[ ii ] ] << std::endl;
240  //}
241  unfolded_energy_for_one_aa += (unfolded_energy_emap_vector_[ aa ][ fixed_score_list[ ii ] ] * fixed_terms[ fixed_score_list[ ii ] ]);
242  }
243 
244  // Part 1c:
245  // Reference energy term
246  // The unfolded state energy map should not have anything for a reference energy. Even if it does though, don't add anything
247  // else here.
248 
249  // Part 2: Now use the current 'unfolded' term weight
250  // 'unfolded' could be a free or fixed term so iterate over both lists; here it's necessary because this energy gets added
251  // to the best energy by aa array regardless.
252  for( Size tt = 1; tt <= num_energy_dofs; ++tt ) {
253  if ( name_from_score_type( score_list[ tt ] ) == "unfolded" ) {
254  //if ( print ) {
255  // TR << chemical::name_from_aa( (chemical::AA) aa )
256  // << " unweighted unfolded energy: " << unfolded_energy_for_one_aa << ", free '"
257  // << name_from_score_type( score_list[ tt ] ) << "' term weight: " << vars[tt]
258  // << ", weighted unfolded energy: " << unfolded_energy_for_one_aa * vars[ tt ] << std::endl;
259  //}
260  weighted_unfolded_energy_for_one_aa = unfolded_energy_for_one_aa * vars[ tt ];
261  }
262  }
263  for( Size tt = 1; tt <= fixed_score_list.size(); ++tt ) {
264  if ( name_from_score_type( fixed_score_list[ tt ] ) == "unfolded" ) {
265  //if ( print ) {
266  // TR << chemical::name_from_aa( (chemical::AA) aa )
267  // << " unfolded energy: " << unfolded_energy_for_one_aa << ", FIXED '"
268  // << name_from_score_type( fixed_score_list[ tt ] ) << "' term weight: " << fixed_terms[ fixed_score_list[ tt ] ]
269  // << ", weighted unfolded energy: " << unfolded_energy_for_one_aa * fixed_terms[ fixed_score_list[ tt ] ] << std::endl;
270  //}
271  weighted_unfolded_energy_for_one_aa = unfolded_energy_for_one_aa * fixed_terms[ fixed_score_list[ tt ] ];
272  }
273  }
274 
275  // SUBTRACT this unfolded energy from the value for this aa in the best_energy_by_aa array. We subtract because only positive weights are
276  // being used in the driver class, and unfolded energy should be subtracted from folded energy to get difference in free energy.
277  // See the doxygen documentation for function optimize_weights() in IterativeOptEDriver. cc
278  best_energy_by_aa[ aa ] -= weighted_unfolded_energy_for_one_aa;
279 
280  // do a check to make sure the energy doesn't exceed our cutoff (APL set at 300) or otherwise we get INF/NAN errors during minimization
281  Real const cutoff( 300.0 );
282  if ( best_energy_by_aa[ aa ] > cutoff )
283  best_energy_by_aa[ aa ] = cutoff;
284  else if ( best_energy_by_aa[ aa ] < -1.0*cutoff )
285  best_energy_by_aa[ aa ] = -1.0*cutoff;
286 
287  }
288 
289  //TR << "process_score(): best_energy_by_aa after : [ ";
290  //for ( Size i=1; i <= best_energy_by_aa.size(); ++i ) {
291  // TR << F(6,2,best_energy_by_aa[i]) << ", ";
292  //}
293  //TR << std::endl;
294 
295 
296  // now do the partition function analysis
297  // all this should remain the same assuming the arrays were correctly set in process_rotamers()
298 
299  Real numerator(0.0), partition(0.0);
300  Multivec dpartition( vars.size(), 0.0 ), dnumerator( vars.size(), 0.0 );
301 
302  for( Size aa(1); aa <= chemical::num_canonical_aas; ++aa ) {
303 
304  Real const exp_term( std::exp( -1.0 * inv_kT * best_energy_by_aa[ aa ] ) );
305  partition += exp_term;
306  if ( aa == size_t( this_native_aa ) )
307  numerator = exp_term;
308 
309  // for reference energy derivatives, but don't assume the protocol is using them
310  // note for derivatives: dE/dw( e^-(E*w+...) ) = -E * e^-(E*w+...) but as this is an energy, the 'weight' here is 0 or 1
311  // reference energies do not have an unweighted energy. really, the energy is just 1 or 0 depending on what the process_rotamers()
312  // function decided. so d/dw[ e(-E*w) ] = -1 * (0|1) * e(-E*w)
313  if ( num_ref_dofs != 0 ) {
314  Real const ref_deriv_term( -1.0 * inv_kT * ref_deriv_weight[ aa ] * exp_term );
315  dpartition[ num_energy_dofs + aa ] = ref_deriv_term;
316  if ( aa == size_t(this_native_aa) )
317  dnumerator[ num_energy_dofs + aa ] = ref_deriv_term;
318  }
319 
320  // partitions for energy derivatives
321  // note for derivatives: dE/dw( e^-(E*w+...) ) = -E * e^-(E*w+...)
322  // dE/dweight( e^(-unweightedE*weight + ...) ) = -1 * unweightedE * e^(-unweightedE*weight + ...)
323  // and best_energy_by_aa is the same as (unweightedE * weight)
324 
325  // there's a potential problem here I'm not sure how it happens. If the best energy by aa is something really small (e.g. -700)
326  // then the exponential e(700) is an extremely large number. the runtime just assigns it INF. Then when you multiply it by
327  // zero, you still get INF for some reason.
328  for( Size e_term = 1; e_term <= num_energy_dofs; ++e_term ) {
329  Real e_term_deriv( -1.0 * inv_kT * unweighted_E_dof[ aa ][ e_term ] * exp_term );
330  dpartition[ e_term ] += e_term_deriv;
331  if ( aa == size_t( this_native_aa ) )
332  dnumerator[ e_term ] = e_term_deriv;
333  }
334  }
335 
336  // accumulate to passed-in derivative sums
337  for ( Size dof(1); dof <= vars.size(); ++dof ) {
338  dE_dvars[ dof ] += component_weights[ type() ] * ( dpartition[ dof ] / partition - dnumerator[ dof ] / numerator );
339 
340  if ( score_list[ dof ] == omega ) { dE_dvars[ dof ] = 0.0; }
341  if ( score_list[ dof ] == hbond_lr_bb ) { dE_dvars[ dof ] = 0.0; }
342 
343  /*if ( tag() == "1nls_" && (int)this_native_aa == 3 ) {
344  if ( score_list[ dof ] == omega ) {
345  TR << "PNATAA " << tag() << X(1) << this_native_aa << "," << I(2, (int)this_native_aa) << X(1)
346  << "-lnp: " << F(6,4,-1.0 * std::log( numerator / partition ))
347  << ", dE_dvars[ omega ]: " << F(6,4, dE_dvars[ dof ])
348  << ", best_energy_by_aa: [ ";
349  for ( Size i=1; i <= best_energy_by_aa.size(); ++i ) { TR << F(5,2,best_energy_by_aa[i]) << ", "; } TR << "], ";
350  TR << ", dpart[ omega ]: " << F(7,3,dpartition[dof]) << ", part: " << F(7,3,partition)
351  << ", dnum[ omega ]: " << F(7,3,dnumerator[dof]) << ", num: " << F(7,3,numerator) << std::endl;
352  }
353  if ( score_list[ dof ] == unfolded ) {
354  TR << "PNATAA " << tag() << X(1) << this_native_aa << "," << I(2, (int)this_native_aa) << X(1)
355  << " dpart[ unfolded ]: " << F(7,3,dpartition[dof]) << " part: " << F(7,3,partition)
356  << " dnum[ unfolded ]: " << F(7,3,dnumerator[dof]) << " num: " << F(7,3,numerator)
357  << " dE_dvars[ unfolded ]: " << F(6,4, dE_dvars[ dof ])
358  << " dE_dvars: [ ";
359  for ( Size ii=1; ii <= vars.size(); ++ii ) { TR << F(5,2,dE_dvars[ii]) << ", "; }
360  TR << "]" << std::endl;
361  }
362  }*/
363 
364  }
365 
366  if ( print ) {
367  ostr << "PNATAA " << tag() << X(1) << this_native_aa << "," << I(2, (int)this_native_aa) << X(1)
368  << " nbs: " << I(2,neighbor_count())
369  << " num: " << F(7,3,numerator) << " part: " << F(7,3,partition)
370  << " p: " << F(7,5,numerator / partition)
371  << " -lnp: " << F(6,4,-1.0 * std::log( numerator / partition ))
372  << " -compwt_lnp: " << F(6, 4, component_weights[ type() ] * (-1.0 * std::log( numerator / partition )) )
373  << " best_energy_by_aa: [ ";
374 
375  for ( Size i=1; i <= best_energy_by_aa.size(); ++i ) {
376  ostr << F(5,2,best_energy_by_aa[i]) << ", ";
377  }
378  ostr << "]" << std::endl;
379  }
380 
381  return ( -1.0 * component_weights[ type() ] * std::log( numerator / partition ) );
382 }
383 
384 
385 ///
386 /// @begin NestedEnergyTermPNatAAOptEPositionData::type()
387 ///
388 /// @brief
389 /// To be sure we create the right types when writing/reading from files, need to add a special OptEPositionData
390 /// type that gets returned here.
391 ///
393 NestedEnergyTermPNatAAOptEPositionData::type() const {
395 }
396 
397 
398 ///
399 /// @begin NestedEnergyTermPNatAAOptEPositionData::write_to_file()
400 ///
401 /// @brief
402 /// Add a special for loop to print out the unfolded state energy EnergyMap values.
403 ///
404 void
405 NestedEnergyTermPNatAAOptEPositionData::write_to_file( std::ofstream & outfile ) const {
406 
407  outfile << "position " << position() << " "
408  << "nataa " << native_aa() << " "
409  << "neighbor_count " << neighbor_count() << " "
410  << "unfolded_energy" << std::endl;
411 
412  // print one line with all the emap info per aa
413  for ( int aa = 1; aa < chemical::num_canonical_aas; ++aa ) {
414  for ( int type = 1; type < scoring::n_score_types; ++type ) {
415  outfile << name_from_score_type( ScoreType( type ) ) << "=" << (unfolded_energy_emap_vector_[ aa ])[ ScoreType(type) ] << " ";
416  }
417  outfile << std::endl;
418  }
419  outfile << std::endl;
420 
421  outfile << "nrots " << data().size() << "\n";
422  for ( PNatAAOptERotamerDataOPs::const_iterator rot( rotamer_data_begin() ); rot != rotamer_data_end(); ++rot ) {
423  outfile << *rot << std::endl;
424  }
425 
426 }
427 
428 
429 ///
430 /// @begin NestedEnergyTermPNatAAOptEPositionData::read_from_file()
431 ///
432 /// @brief
433 ///
434 void
435 NestedEnergyTermPNatAAOptEPositionData::read_from_file( std::ifstream & infile ) {
436 
437  using namespace utility;
438 
439  // read first line with position, native aa, neighbor_count, and num_rotamers data
440  std::string line;
441  getline( infile, line );
442  Strings words( string_split( line, ' ' ) );
443  assert( words[ 1 ] == "position" );
444  set_position( from_string( words[ 2 ], Size( 0 ) ) );
445  assert( words[ 3 ] == "nataa" );
446  set_native_aa ( chemical::aa_from_name( words[ 4 ] ) );
447  assert( words[ 5 ] == "neighbor_count" );
448  set_neighbor_count( from_string( words[ 6 ], Size( 0 ) ) );
449 
450  // extra logic to handle reading in the unfolded state energies into an EnergyMap
451  assert( words[ 7 ] == "unfolded_energy" );
452  utility::vector1 < EnergyMap > emap_vector;
453  emap_vector.resize( chemical::num_canonical_aas );
454 
455  for ( int aa = 1; aa < chemical::num_canonical_aas; ++aa ) {
456  getline( infile, line );
457  Strings sections( string_split( line, ' ' ) );
458 
459  EnergyMap emap;
460  for ( Strings::iterator section = sections.begin(); section != sections.end(); ++section ) {
461  Strings pair( string_split( *section, '=' ) );
463  Real score;
464  std::istringstream ss( pair[2] );
465  ss >> score;
466  emap[ st ] = score;
467  }
468  emap_vector[ aa ] = emap;
469  }
470 
471  getline( infile, line );
472  Strings rotamer_line_words( string_split( line, ' ' ) );
473  assert( rotamer_line_words[ 1 ] == "nrots" );
474  Size num_rotamers = from_string( rotamer_line_words[ 2 ], Size( 0 ) );
475 
476  for ( Size ii = 1; ii <= num_rotamers; ++ii ) {
477  getline( infile, line );
478 
479  // rotamers for existing position: parse, append new OptERotamerDataOP to OptEPositionDataOP
480  Strings sections( string_split( line, ',' ) );
481  // sections:
482  // 0 - rotnum, 1 - aa three-letter code, 2 - energies for fixed terms, 3 - energies for free terms
483  Size rotnum;
484  std::istringstream ss( sections[1] );
485  ss >> rotnum;
486  chemical::AA aa( chemical::aa_from_name( sections[2] ) );
487  utility::vector1< Real > fixed_energies, energies;
488  Strings fixed_vals( string_split( sections[3], ' ' ) ), free_vals( string_split( sections[4], ' ' ) );
489  for ( Strings::iterator fixed_val( fixed_vals.begin() ); fixed_val != fixed_vals.end(); ++fixed_val ) {
490  Real val;
491  std::istringstream ss( *fixed_val );
492  ss >> val;
493  fixed_energies.push_back( val );
494  }
495  for ( Strings::iterator free_val( free_vals.begin() ); free_val != free_vals.end(); ++free_val ) {
496  Real val;
497  std::istringstream ss( *free_val );
498  ss >> val;
499  energies.push_back( val );
500  }
501  assert( !energies.empty() );
502  PNatAAOptERotamerDataOP new_rot_data = new PNatAAOptERotamerData( aa, rotnum, energies, fixed_energies );
503  add_rotamer_line_data( new_rot_data );
504  }
505 }
506 
507 
508 ///
509 /// @begin NestedEnergyTermPNatAAOptEPositionData::write_to_binary_file()
510 ///
511 /// @brief
512 /// Leaving this unimplemented since I don't feel like figuring out how to output an EnergyMap in binary
513 /// and also because reading/writing binary files is not being used in the optE protocol currently.
514 ///
515 void
516 NestedEnergyTermPNatAAOptEPositionData::write_to_binary_file( std::ofstream & /* outfile */ ) const {}
517 
518 ///
519 /// @begin NestedEnergyTermPNatAAOptEPositionData::read_from_binary_file()
520 ///
521 /// @brief
522 /// Leaving this unimplemented since I don't feel like figuring out how to output an EnergyMap in binary
523 /// and also because reading/writing binary files is not being used in the optE protocol currently.
524 ///
525 void
526 NestedEnergyTermPNatAAOptEPositionData::read_from_binary_file( std::ifstream & /* infile */ ) {}
527 
528 
529 ///
530 /// @begin NestedEnergyTermPNatAAOptEPositionData::memory_use()
531 ///
532 Size
533 NestedEnergyTermPNatAAOptEPositionData::memory_use() const {
534 
535  Size total = sizeof( NestedEnergyTermPNatAAOptEPositionData ) + sizeof( PNatAAOptERotamerData ) * data().size();
536  if ( data().size() > 0 ) {
537  total += sizeof( Real ) * ( data()[ 1 ]->data().size() + data()[ 1 ]->fixed_data().size() ) * data().size();
538  }
539  // the emap vector uses space, too!
540  total += sizeof( EnergyMap ) * unfolded_energy_emap_vector_.size();
541 
542  return total;
543 }
544 
545 
546 
547 #ifdef USEMPI
548 ///
549 /// @begin NestedEnergyTermPNatAAOptEPositionData::send_to_node()
550 ///
551 void
552 NestedEnergyTermPNatAAOptEPositionData::send_to_node( int const destination_node, int const tag ) const {
553 
554  /// 1. Which position is this?
555  int ii_pos = position();
556  MPI_Send( & ii_pos, 1, MPI_INT, destination_node, tag, MPI_COMM_WORLD );
557 
558  /// 2. What is the native amino acid at this position?
559  int ii_aa = native_aa();
560  MPI_Send( & ii_aa, 1, MPI_INT, destination_node, tag, MPI_COMM_WORLD );
561 
562  /// 3. How many neighbors did this position have?
563  int ii_neighbor_count = neighbor_count();
564  MPI_Send( & ii_neighbor_count, 1, MPI_INT, destination_node, tag, MPI_COMM_WORLD );
565 
566  /// 4. How many aa's are in the unfolded state energy map vector?
567  /// This will probably always be 20, but send it anyway
568  int ii_unfolded_energy_emap_vector_size = unfolded_energy_emap_vector_.size();
569  MPI_Send( & ii_unfolded_energy_emap_vector_size, 1, MPI_INT, destination_node, tag, MPI_COMM_WORLD );
570 
571  /// 4b. The energies in the emap
572  Real * unfolded_energies = new Real[ chemical::num_canonical_aas * scoring::n_score_types ];
573  for ( int aa = 1; aa <= chemical::num_canonical_aas; ++aa ) {
574  for ( Size ee = 1; ee <= scoring::n_score_types; ++ee ) {
575  unfolded_energies[ (aa - 1) * scoring::n_score_types + ee - 1 ] = unfolded_energy_emap_vector_[ aa ][ (ScoreType) ee ];
576  }
577  }
578  MPI_Send( unfolded_energies, chemical::num_canonical_aas * scoring::n_score_types, MPI_DOUBLE, destination_node, tag, MPI_COMM_WORLD );
579  delete [] unfolded_energies; unfolded_energies = 0;
580 
581  /// 5. The number of rotamers for this position
582  Size ii_num_rotamers = size();
583  MPI_Send( & ii_num_rotamers, 1, MPI_UNSIGNED_LONG, destination_node, tag, MPI_COMM_WORLD );
584 
585  if ( ii_num_rotamers == 0 )
586  return;
587 
588  Size free_count = data()[1]->data().size();
589  Size fixed_count = data()[1]->fixed_data().size();
590 
591  /// 6 The size of the free and fixed data, since that context is not available.
592  MPI_Send( & free_count, 1, MPI_UNSIGNED_LONG, destination_node, tag, MPI_COMM_WORLD );
593  MPI_Send( & fixed_count, 1, MPI_UNSIGNED_LONG, destination_node, tag, MPI_COMM_WORLD );
594 
595  int * ii_aa_types = new int[ ii_num_rotamers ];
596  int * ii_rot_nums = new int[ ii_num_rotamers ];
597  Real * free_data = new Real[ ii_num_rotamers * free_count ];
598  Real * fixed_data = new Real[ ii_num_rotamers * fixed_count ];
599  for ( Size jj = 1; jj <= ii_num_rotamers; ++jj ) {
600  ii_aa_types[ jj - 1 ] = data()[ jj ]->this_aa();
601  ii_rot_nums[ jj - 1 ] = data()[ jj ]->rot_number();
602  for ( Size kk = 1; kk <= free_count; ++kk ) {
603  free_data[ ( jj - 1 ) * free_count + kk - 1 ] = data()[ jj ]->data()[ kk ];
604  }
605  for ( Size kk = 1; kk <= fixed_count; ++kk ) {
606  fixed_data[ ( jj - 1 ) * fixed_count + kk - 1 ] = data()[ jj ]->fixed_data()[ kk ];
607  }
608  }
609 
610  /// 7. All the amino acids for all rotamers at this position
611  MPI_Send( ii_aa_types, ii_num_rotamers, MPI_INT, destination_node, tag, MPI_COMM_WORLD );
612 
613  /// 8. All the rotamer indices for all rotamers at this position
614  MPI_Send( ii_rot_nums, ii_num_rotamers, MPI_INT, destination_node, tag, MPI_COMM_WORLD );
615 
616  /// 9. All the free data for all rotamers
617  MPI_Send( free_data, ii_num_rotamers * free_count, MPI_DOUBLE, destination_node, tag, MPI_COMM_WORLD );
618 
619  /// 10. All the fixed data for all rotamers
620  MPI_Send( fixed_data, ii_num_rotamers * fixed_count, MPI_DOUBLE, destination_node, tag, MPI_COMM_WORLD );
621 
622  delete [] ii_aa_types; ii_aa_types = 0;
623  delete [] ii_rot_nums; ii_rot_nums = 0;
624  delete [] free_data; free_data = 0;
625  delete [] fixed_data; fixed_data = 0;
626 
627  OptEPositionData::send_to_node( destination_node, tag );
628 }
629 
630 
631 ///
632 /// @begin NestedEnergyTermPNatAAOptEPositionData::receive_from_node()
633 ///
634 void
635 NestedEnergyTermPNatAAOptEPositionData::receive_from_node( int const source_node, int const tag ) {
636 
637  MPI_Status stat;
638 
639  /// 1. Which sequence position is this?
640  int ii_pos;
641  MPI_Recv( & ii_pos, 1, MPI_INT, source_node, tag, MPI_COMM_WORLD, &stat );
642  set_position( ii_pos );
643 
644  /// 2. What is the native amino acid at this position?
645  int ii_aa;
646  MPI_Recv( & ii_aa, 1, MPI_INT, source_node, tag, MPI_COMM_WORLD, &stat );
647  set_native_aa( (chemical::AA) ii_aa );
648 
649  /// 3. How many neighbors did this position have?
650  int ii_neighbor_count;
651  MPI_Recv( & ii_neighbor_count, 1, MPI_INT, source_node, tag, MPI_COMM_WORLD, &stat );
652  set_neighbor_count( ii_neighbor_count );
653 
654  /// 4. How many emaps are in the unfolded state energy map vector?
655  /// Most likely 20, for the 20 canonical aas, but check anyway.
656  int ii_unfolded_energy_emap_vector_size;
657  MPI_Recv( & ii_unfolded_energy_emap_vector_size, 1, MPI_INT, source_node, tag, MPI_COMM_WORLD, &stat );
658  utility::vector1 < EnergyMap > emap_vector;
659  emap_vector.resize( ii_unfolded_energy_emap_vector_size );
660 
661  /// 4b. Now get the energies in the emap
662  Real * unfolded_energies = new Real[ chemical::num_canonical_aas * scoring::n_score_types ];
663  MPI_Recv( unfolded_energies, chemical::num_canonical_aas * scoring::n_score_types, MPI_DOUBLE, source_node, tag, MPI_COMM_WORLD, &stat );
664 
665  for ( Size aa = 1; aa <= chemical::num_canonical_aas; ++aa ) {
666  for ( Size ee = 1; ee <= scoring::n_score_types; ++ee ) {
667  // be careful because the array is 0-based while the score type enum is 1-based
668  emap_vector[ aa ][ (ScoreType) ee ] = unfolded_energies[ (aa-1) * scoring::n_score_types + ee - 1 ];
669  }
670  }
671  set_unfolded_energy_emap_vector( emap_vector );
672  delete [] unfolded_energies; unfolded_energies = 0;
673 
674 
675  /// 6. The number of rotamers for this position
676  Size ii_num_rotamers;
677  MPI_Recv( & ii_num_rotamers, 1, MPI_UNSIGNED_LONG, source_node, tag, MPI_COMM_WORLD, &stat );
678 
679  if ( ii_num_rotamers == 0 )
680  return;
681 
682  Size free_count(0);
683  Size fixed_count(0);
684 
685  /// 6b The size of the free and fixed data, since that context is not available.
686  MPI_Recv( & free_count, 1, MPI_UNSIGNED_LONG, source_node, tag, MPI_COMM_WORLD, &stat );
687  MPI_Recv( & fixed_count, 1, MPI_UNSIGNED_LONG, source_node, tag, MPI_COMM_WORLD, &stat );
688 
689 
690  int * ii_aa_types = new int[ ii_num_rotamers ];
691  int * ii_rot_nums = new int[ ii_num_rotamers ];
692  Real * free_data = new Real[ ii_num_rotamers * free_count ];
693  Real * fixed_data = new Real[ ii_num_rotamers * fixed_count ];
694 
695  /// 7. All the amino acids for all rotamers at this position
696  MPI_Recv( ii_aa_types, ii_num_rotamers, MPI_INT, source_node, tag, MPI_COMM_WORLD, &stat );
697 
698  /// 8. All the rotamer indices for all rotamers at this position
699  MPI_Recv( ii_rot_nums, ii_num_rotamers, MPI_INT, source_node, tag, MPI_COMM_WORLD, &stat );
700 
701  /// 9. All the free data for all rotamers
702  MPI_Recv( free_data, ii_num_rotamers * free_count, MPI_DOUBLE, source_node, tag, MPI_COMM_WORLD, &stat );
703 
704  /// 10. All the fixed data for all rotamers
705  MPI_Recv( fixed_data, ii_num_rotamers * fixed_count, MPI_DOUBLE, source_node, tag, MPI_COMM_WORLD, &stat );
706 
707  utility::vector1< Real > free_data_vect( free_count );
708  utility::vector1< Real > fixed_data_vect( fixed_count );
709 
710  for ( Size jj = 1; jj <= ii_num_rotamers; ++jj ) {
711  for ( Size kk = 1; kk <= free_count; ++kk ) {
712  free_data_vect[ kk ] = free_data[ ( jj - 1 ) * free_count + kk - 1 ];
713  }
714  for ( Size kk = 1; kk <= fixed_count; ++kk ) {
715  fixed_data_vect[ kk ] = fixed_data[ ( jj - 1 ) * fixed_count + kk - 1 ];
716  }
717  PNatAAOptERotamerDataOP jj_rotamer_data = new PNatAAOptERotamerData(
718  (chemical::AA ) ii_aa_types[ jj - 1 ],
719  ii_rot_nums[ jj - 1 ],
720  free_data_vect,
721  fixed_data_vect );
722  add_rotamer_line_data( jj_rotamer_data );
723 
724  }
725 
726  delete [] ii_aa_types; ii_aa_types = 0;
727  delete [] ii_rot_nums; ii_rot_nums = 0;
728  delete [] free_data; free_data = 0;
729  delete [] fixed_data; fixed_data = 0;
730 
731  OptEPositionData::receive_from_node( source_node, tag );
732 
733 }
734 #endif
735 
736 
737 //
738 // ------------------- NestedEnergyTermDDGMutationOptEData -----------------------//
739 //
740 
741 ///
742 /// @begin NestedEnergyTermDDGMutationOptEData::NestedEnergyTermDDGMutationOptEData()
743 ///
744 NestedEnergyTermDDGMutationOptEData::NestedEnergyTermDDGMutationOptEData() {}
745 
746 ///
747 /// @begin NestedEnergyTermDDGMutationOptEData::~NestedEnergyTermDDGMutationOptEData()
748 ///
749 NestedEnergyTermDDGMutationOptEData::~NestedEnergyTermDDGMutationOptEData() {}
750 
751 
752 ///
753 /// @begin NestedEnergyTermDDGMutationOptEData::get_score()
754 ///
755 /// @details
756 /// This get_score() method needs to contain some extra logic for the unfolded state energy term. Right now, the value
757 /// under unfolded energy is 0.0, because that's what the EnergyMethod is coded to return. But, as in the NatAA class
758 /// above, we need use the unweighted, unfolded energies and the current weight set to come up with a unfolded energy.
759 ///
760 Real
761 NestedEnergyTermDDGMutationOptEData::get_score(
762  optimization::Multivec const & component_weights,
763  optimization::Multivec const & vars,
764  optimization::Multivec & dE_dvars,
765  /// Basically, turn over all the private data from OptEMultiFunc
766  Size const num_energy_dofs,
767  int const num_ref_dofs,
768  int const num_total_dofs,
769  EnergyMap const & fixed_terms,
770  ScoreTypes const & free_score_list,
771  ScoreTypes const & fixed_score_list
772 ) const
773 {
774  return process_score( TR, false, component_weights, vars, dE_dvars, num_energy_dofs, num_ref_dofs, num_total_dofs, fixed_terms, free_score_list, fixed_score_list );
775 }
776 
777 
778 ///
779 /// @begin NestedEnergyTermDDGMutationOptEData::print_score()
780 ///
781 void
782 NestedEnergyTermDDGMutationOptEData::print_score(
783  std::ostream & ostr,
784  optimization::Multivec const & component_weights,
785  optimization::Multivec const & vars,
786  optimization::Multivec & dE_dvars,
787  /// Basically, turn over all the private data from OptEMultiFunc
788  Size const num_energy_dofs,
789  int const num_ref_dofs,
790  int const num_total_dofs,
791  EnergyMap const & fixed_terms,
792  ScoreTypes const & free_score_list,
793  ScoreTypes const & fixed_score_list
794 ) const
795 {
796  process_score( ostr, true, component_weights, vars, dE_dvars, num_energy_dofs, num_ref_dofs, num_total_dofs, fixed_terms, free_score_list, fixed_score_list );
797 }
798 
799 
800 ///
801 /// @begin NestedEnergyTermDDGMutationOptEData::process_score()
802 ///
803 /// @brief
804 /// One method to do the score processing which takes a boolean dictating whether to print to an ostream or not. With this function, changes
805 /// to how scoring works only need to be made in one place as opposed to two (when get_score() and print_score() both had scoring logic in them).
806 Real
807 NestedEnergyTermDDGMutationOptEData::process_score(
808  std::ostream & ostr,
809  bool print,
810  optimization::Multivec const & component_weights,
811  optimization::Multivec const & vars,
812  optimization::Multivec & dE_dvars,
813  /// Basically, turn over all the private data from OptEMultiFunc
814  Size const num_energy_dofs,
815  int const num_ref_dofs,
816  int const,
817  EnergyMap const & fixed_terms,
818  ScoreTypes const & score_list,
819  ScoreTypes const & fixed_score_list
820 ) const
821 {
822  using namespace core::optimization;
823  using namespace basic::options;
824  using namespace basic::options::OptionKeys;
825  using namespace utility;
826 
827  // if there are no structures to go through, return immediately
828  if ( muts_.size() == 0 || wts_.size() == 0 ) return 0.0;
829 
830  // these vectors are sized to the number of structures there are for a wt name and mutant name;
831  // they'll be used to determine which structure has the best energy
832  utility::vector1< Real > wt_energies( wts_.size(), 0.0 );
833  utility::vector1< Real > mut_energies( muts_.size(), 0.0 );
834 
835  // go through and come up with a total score for each structure in the wts_ and muts_ list.
836  //
837  // wts_ is a vector1 of SingleStructureData (SSD) objects. this for loop iterates over each free weight and
838  // takes the unweighted energy for the current free term in wts_[jj] and multiplies it by the weight in vars.
839  // so the term 'unfolded' will have an energy of zero, which is what we need to fix. the SSD objects have
840  // a vector1 of Reals accessible by free_data() and fixed_data() member functions. These store the total
841  // energies by score type for the entire pose.
842 
843  for ( Size ii = 1; ii <= num_energy_dofs; ++ii ) {
844  for ( Size jj = 1; jj <= wts_.size(); ++jj ) {
845 
846  // cap the fa_rep term at some value - this at least keeps it around for most of the mutants
847  #ifdef CAP_FA_REP
848  if ( ( score_list[ ii ] == fa_rep ) && ( vars[ ii ] * wts_[ jj ]->free_data()[ ii ] > 10 ) ) { wt_energies[ jj ] += 10; }
849  else
850  #endif
851  wt_energies[ jj ] += vars[ ii ] * wts_[ jj ]->free_data()[ ii ];
852  }
853  for ( Size jj = 1; jj <= muts_.size(); ++jj ) {
854  #ifdef CAP_FA_REP
855  if ( ( score_list[ ii ] == fa_rep ) && ( vars[ ii ] * muts_[ jj ]->free_data()[ ii ] > 10 ) ) { mut_energies[ jj ] += 10; }
856  else
857  #endif
858  mut_energies[ jj ] += vars[ ii ] * muts_[ jj ]->free_data()[ ii ];
859  }
860  }
861  for ( Size ii = 1; ii <= fixed_score_list.size(); ++ii ) {
862  for ( Size jj = 1; jj <= wts_.size(); ++jj ) {
863  #ifdef CAP_FA_REP
864  if ( ( fixed_score_list[ ii ] == fa_rep ) && ( fixed_terms[ fixed_score_list[ ii ] ] * wts_[ jj ]->fixed_data()[ ii ] > 10 ) ) { wt_energies[ jj ] += 10; }
865  else
866  #endif
867  wt_energies[ jj ] += fixed_terms[ fixed_score_list[ ii ] ] * wts_[ jj ]->fixed_data()[ ii ];
868  }
869  for ( Size jj = 1; jj <= muts_.size(); ++jj ) {
870  #ifdef CAP_FA_REP
871  if ( ( fixed_score_list[ ii ] == fa_rep ) && ( fixed_terms[ fixed_score_list[ ii ] ] * muts_[ jj ]->fixed_data()[ ii ] > 10 ) ) { mut_energies[ jj ] += 10; }
872  else
873  #endif
874  mut_energies[ jj ] += fixed_terms[ fixed_score_list[ ii ] ] * muts_[ jj ]->fixed_data()[ ii ];
875  }
876  }
877 
878  // I presume these are the reference energies that are being added in?
879  // num_energy_dofs is the number of free, non-reference energy parameters in the run, so yes these are the refE's
880  if ( num_ref_dofs != 0 ) {
881  for ( Size jj = 1; jj <= wts_.size(); ++jj ) {
882  wt_energies[ jj ] += vars[ num_energy_dofs + wt_aa_ ];
883  }
884  for ( Size jj = 1; jj <= muts_.size(); ++jj ) {
885  mut_energies[ jj ] += vars[ num_energy_dofs + mut_aa_ ];
886  }
887  }
888 
889  //TR << "process_score(): before unfolded wts_: [ ";
890  //for ( Size jj = 1; jj <= wts_.size(); ++jj ) { TR << F(6,1,wt_energies[ jj ]) << ", "; }
891  //TR << "]" << std::endl;
892 
893  // now do some special processing of the unfolded state energy
894  // this part has two steps: first we have to take the weights for the canonical score12 score terms and apply them
895  // to the unweighted unfolded energies stored in the member variable emap. then, we have to take that entire
896  // sum of products and multiply it by the unfolded term weight currently being evaluated.
897  Real wt_unweighted_unfolded_energy(0.0), wt_weighted_unfolded_energy(0.0);
898  Real mut_unweighted_unfolded_energy(0.0), mut_weighted_unfolded_energy(0.0);
899 
900  // Part 1a: Variable-weighted energy terms
901  //TR << "process_score(): weighted free unfolded energies: [ ";
902  for ( Size ii = 1; ii <= num_energy_dofs; ++ii ) {
903 
904  //TR << name_from_score_type( score_list[ ii ] ) << ": "
905  // << ( vars[ii] * wt_unfolded_energies_emap_[ score_list[ ii ] ] ) << ", "
906  // << ( vars[ii] * mut_unfolded_energies_emap_[ score_list[ ii ] ] ) << " / ";
907 
908  //TR << "process_score(): adding unfolded '" << name_from_score_type( score_list[ ii ] )
909  // << "' energy: " << wt_unfolded_energies_emap_[ score_list[ ii ] ]
910  // << " * free weight: " << vars[ ii ]
911  // << " = " << ( vars[ii] * wt_unfolded_energies_emap_[ score_list[ ii ] ] ) << std::endl;
912 
913  // Assume free params are fa_rep, solubility, and unfolded. The unfolded_energy_emap_vector only contains energies
914  // for fa_rep; solubility and unfolded always return zeros. Thus, even though we iterate through all the free terms
915  // here, only the fa_rep will contribute to the unfolded_energy_for_one_aa.
916  //
917  // The neat thing about this setup is that if a score12 energy term is not included as a free or fixed param, then
918  // it won't be included in the unfolded state energy either.
919  wt_unweighted_unfolded_energy += ( vars[ii] * wt_unfolded_energies_emap_[ score_list[ ii ] ] );
920  mut_unweighted_unfolded_energy += ( vars[ii] * mut_unfolded_energies_emap_[ score_list[ ii ] ] );
921  }
922  //TR << std::endl;
923 
924  // Part 1b: Fixed-weight energy terms
925  //TR << "process_score(): weighted fixed unfolded energies: [ ";
926  for ( Size ii = 1; ii <= fixed_score_list.size(); ++ii ) {
927 
928  //TR << name_from_score_type( fixed_score_list[ ii ] ) << ": "
929  // << ( fixed_terms[ fixed_score_list[ ii ] ] * wt_unfolded_energies_emap_[ fixed_score_list[ ii ] ] ) << ", "
930  // << ( fixed_terms[ fixed_score_list[ ii ] ] * mut_unfolded_energies_emap_[ fixed_score_list[ ii ] ] ) << " / ";
931 
932  //TR << "process_score(): adding unfolded '" << name_from_score_type( fixed_score_list[ ii ] )
933  // << "' energy: " << wt_unfolded_energies_emap_[ fixed_score_list[ ii ] ]
934  // << " * fixed weight: " << fixed_terms[ fixed_score_list[ ii ] ]
935  // << " = " << ( fixed_terms[ fixed_score_list[ ii ] ] * wt_unfolded_energies_emap_[ fixed_score_list[ ii ] ] ) << std::endl;
936 
937  wt_unweighted_unfolded_energy += ( fixed_terms[ fixed_score_list[ ii ] ] * wt_unfolded_energies_emap_[ fixed_score_list[ ii ] ] );
938  mut_unweighted_unfolded_energy += ( fixed_terms[ fixed_score_list[ ii ] ] * mut_unfolded_energies_emap_[ fixed_score_list[ ii ] ] );
939  }
940  //TR << std::endl;
941 
942  // Part 2: Now use the current 'unfolded' term weight
943  // 'unfolded' could be a free or fixed term so iterate over both lists;
944  Real unfolded_weight = 0.0;
945  for ( Size ii = 1; ii <= num_energy_dofs; ++ii ) {
946  if ( name_from_score_type( score_list[ ii ] ) == "unfolded" ) {
947  wt_weighted_unfolded_energy = vars[ii] * wt_unweighted_unfolded_energy;
948  mut_weighted_unfolded_energy = vars[ii] * mut_unweighted_unfolded_energy;
949  unfolded_weight = vars[ii];
950  //TR << "process_score(): weighting unweighted unfolded energy: '" << wt_unweighted_unfolded_energy
951  // << " by free unfolded term weight: " << vars[ii]
952  // << " = " << vars[ii] * wt_unweighted_unfolded_energy << std::endl;
953  }
954  }
955 
956  // !!!!!!!!!!!!!!!!! fixed_score_list[ ii ] gives you a ScoreType - that's not the weight. fixed_terms[ fixed_score_list[ ii ] ]
957  // is the actual weight. But if you use the ScoreType in a multiplication, it will work just fine because ScoreTypes are
958  // an enum. They're position ~120 in the enum, so it'll be a nice large weight.
959 
960  for ( Size ii = 1; ii <= fixed_score_list.size(); ++ii ) {
961  if ( name_from_score_type( fixed_score_list[ ii ] ) == "unfolded" ) {
962  wt_weighted_unfolded_energy = fixed_terms[ fixed_score_list[ ii ] ] * wt_unweighted_unfolded_energy;
963  mut_weighted_unfolded_energy = fixed_terms[ fixed_score_list[ ii ] ] * mut_unweighted_unfolded_energy;
964  unfolded_weight = fixed_terms[ fixed_score_list[ ii ] ];
965  //TR << "process_score(): weighting unweighted unfolded energy: '" << wt_unweighted_unfolded_energy
966  // << " by fixed unfolded term weight: " << fixed_terms[ fixed_score_list[ ii ] ]
967  // << " = " << fixed_terms[ fixed_score_list[ ii ] ] * wt_unweighted_unfolded_energy << std::endl;
968  }
969  }
970 
971  // Now SUBTRACT this unfolded energy from the sums we have so far. See comments for optimize_weights() in IterativeOptEDriver.cc.
972  // This weighted unfolded energy will be the same for every structure in the SSD vectors wts_ and muts_. So just iterate over
973  // both of those and subtract out this unfolded energy.
974  for ( Size ii = 1; ii <= wts_.size(); ++ii ) { wt_energies[ ii ] -= wt_weighted_unfolded_energy; }
975  for ( Size ii = 1; ii <= muts_.size(); ++ii ) { mut_energies[ ii ] -= mut_weighted_unfolded_energy; }
976 
977  //TR << "process_score(): unf weight: " << unfolded_weight
978  // << ", unweighted energy: " << wt_unweighted_unfolded_energy << ", " << mut_unweighted_unfolded_energy
979  // << "; weighted unfolded energy: " << F(7,2,wt_weighted_unfolded_energy) << ", " << F(7,2,mut_weighted_unfolded_energy) << std::endl;
980 
981  //TR << "process_score(): after unfolded wts_: [ ";
982  //for ( Size jj = 1; jj <= wts_.size(); ++jj ) { TR << F(6,1,wt_energies[ jj ]) << ", "; }
983  //TR << "]" << std::endl;
984 
985 
986  // This is where we branch on how the score is calculated. The simplest approach is to take the minimum energy
987  // of all the wts and all the muts and subtract them to get the ddG. The mean-based approach use the difference
988  // of the average of all muts and average of all wts. Finally, the boltzmann approach calculates a boltzmann
989  // probability for the wts and muts to get a score.
990  // The other ways have been removed.
991 
992  Real predicted_ddG( 0.0 );
993  Real ddG_diff( 0.0 );
994 
995  // Do things the old-fashioned way: best energy mut - best energy wt
996  Size const best_wt = arg_min( wt_energies );
997  Size const best_mut = arg_min( mut_energies );
998 
999  Real const best_wt_energy = wt_energies[ best_wt ];
1000  Real const best_mut_energy = mut_energies[ best_mut ];
1001 
1002  predicted_ddG = best_mut_energy - best_wt_energy;
1003  ddG_diff = predicted_ddG - experimental_ddG_;
1004 
1005  if ( print ) {
1006  ostr << "DDG " << A( 20, tag() ) << X(1) << "pred: " << F(6,2,predicted_ddG) << " exp: " << F(6,2,experimental_ddG_)
1007  << " diff^2: " << F(7,2,ddG_diff*ddG_diff) << " cmptwt_diff^2: " << F( 7,2,component_weights[ ddG_mutation_correlation_with_unfolded_energy ] * ddG_diff * ddG_diff )
1008  << std::endl;
1009 
1010  TR << "process_score(): unf weight: " << F(5,3,unfolded_weight)
1011  << ", raw unfE: " << wt_unweighted_unfolded_energy << ", " << mut_unweighted_unfolded_energy
1012  << "; unfE: " << wt_weighted_unfolded_energy << ", " << mut_weighted_unfolded_energy
1013  << "; totalE: " << best_wt_energy << ", " << best_mut_energy
1014  << "; pred: " << ObjexxFCL::fmt::F(4,2,predicted_ddG)
1015  << ", exp'tal: " << experimental_ddG_ << ", ddG_diff^2: " << ObjexxFCL::fmt::F(6,3,ddG_diff * ddG_diff)
1016  << ", tag: " << tag() << std::endl;
1017 
1018  } else {
1019 
1020  for( Size e_dof(1); e_dof <= num_energy_dofs; ++e_dof ) {
1021 
1022  if ( ( score_list[ e_dof ] == fa_rep ) && ( muts_[ best_mut ]->free_data()[ e_dof ] - wts_[ best_wt ]->free_data()[ e_dof ] ) > 10 ) {
1023  // deal with the really bad repulsive energy cases here
1024  dE_dvars[ e_dof ] += 2 * component_weights[ ddG_mutation_correlation ] * ddG_diff * 10;
1025 
1026  } else if ( score_list[ e_dof ] == unfolded ) {
1027  dE_dvars[ e_dof ] += 2 * component_weights[ ddG_mutation_correlation_with_unfolded_energy ] * ddG_diff *
1028  ( mut_unweighted_unfolded_energy - wt_unweighted_unfolded_energy );
1029 
1030  } else
1031  dE_dvars[ e_dof ] += 2 * component_weights[ ddG_mutation_correlation_with_unfolded_energy ] * ddG_diff *
1032  ( muts_[ best_mut ]->free_data()[ e_dof ] - wts_[ best_wt ]->free_data()[ e_dof ] );
1033  }
1034 
1035  if ( num_ref_dofs != 0 ) {
1036  dE_dvars[ num_energy_dofs + mut_aa_ ] += 2 * component_weights[ ddG_mutation_correlation_with_unfolded_energy ] * ddG_diff;
1037  dE_dvars[ num_energy_dofs + wt_aa_ ] -= 2 * component_weights[ ddG_mutation_correlation_with_unfolded_energy ] * ddG_diff;
1038  }
1039  }
1040 
1041  return component_weights[ ddG_mutation_correlation_with_unfolded_energy ] * ddG_diff * ddG_diff;
1042 }
1043 
1044 
1045 ///
1046 /// @begin NestedEnergyTermDDGMutationOptEData::type()
1047 ///
1049 NestedEnergyTermDDGMutationOptEData::type() const {
1051 }
1052 
1053 
1054 ///
1055 /// @begin NestedEnergyTermDDGMutationOptEData::memory_use()
1056 ///
1057 /// Only used for user feedback. Nothing in the code uses the result from this to allocate memory.
1058 ///
1059 Size
1060 NestedEnergyTermDDGMutationOptEData::memory_use() const {
1061 
1062  Size total = sizeof( DDGMutationOptEData ) +
1063  sizeof( SingleStructureData ) * wts_.size() +
1064  sizeof( SingleStructureData ) * muts_.size();
1065  if ( wts_.size() > 0 ) {
1066  total += sizeof( Real ) * ( wts_[ 1 ]->free_data().size() + wts_[ 1 ]->fixed_data().size() ) * wts_.size();
1067  }
1068  if ( muts_.size() > 0 ) {
1069  total += sizeof( Real ) * ( muts_[ 1 ]->free_data().size() + muts_[ 1 ]->fixed_data().size() ) * muts_.size();
1070  }
1071 
1072  // the emap uses some memory, too! do I take the number of score types and multiply by sizeof(Real)? Or can I just
1073  // do sizeof(emap)? I think taking the size of the class is enough. Emaps don't grow/shrink dynamically. They're
1074  // essentially constant size, an array of Reals sized to n_score_types.
1075  total += sizeof( EnergyMap ) * 2; // there's the wt one and the mutant one
1076 
1077  return total;
1078 }
1079 
1080 
1081 #ifdef USEMPI
1082 ///
1083 /// @begin NestedEnergyTermDDGMutationOptEData::send_to_node()
1084 ///
1085 void
1086 NestedEnergyTermDDGMutationOptEData::send_to_node( int const destination_node, int const tag ) const {
1087 
1088  /// 1. Experimental DDG, wt_aa, mut_aa
1089  int wt_aa( wt_aa_ ), mut_aa( mut_aa_ );
1090  Real experimental_ddG = experimental_ddG_; // stupid const pointer
1091  MPI_Send( & experimental_ddG, 1, MPI_DOUBLE, destination_node, tag, MPI_COMM_WORLD );
1092  MPI_Send( & wt_aa, 1, MPI_INT, destination_node, tag, MPI_COMM_WORLD );
1093  MPI_Send( & mut_aa, 1, MPI_INT, destination_node, tag, MPI_COMM_WORLD );
1094 
1095  // 2a. n natives
1096  //std::cout << "sending nwts to node " << destination_node << std::endl;
1097  Size nwts = wts_.size();
1098  MPI_Send( & nwts, 1, MPI_UNSIGNED_LONG, destination_node, tag, MPI_COMM_WORLD );
1099 
1100  /// 2b. n decoys
1101  //std::cout << "sending nmuts to node " << destination_node << std::endl;
1102  Size nmuts = muts_.size();
1103  MPI_Send( & nmuts, 1, MPI_UNSIGNED_LONG, destination_node, tag, MPI_COMM_WORLD );
1104 
1105  if ( nwts == 0 || nmuts == 0 )
1106  return;
1107 
1108  /// 3. n free
1109  Size n_free = muts_[ 1 ]->free_data().size();
1110  //std::cout << "sending n_free to node " << destination_node << " " << n_free << std::endl;
1111  MPI_Send( & n_free, 1, MPI_UNSIGNED_LONG, destination_node, tag, MPI_COMM_WORLD );
1112 
1113  /// 4. n fixed
1114  Size n_fixed = muts_[ 1 ]->fixed_data().size();
1115  //std::cout << "sending n_fixed to node " << destination_node << " " << n_fixed << std::endl;
1116  MPI_Send( & n_fixed, 1, MPI_UNSIGNED_LONG, destination_node, tag, MPI_COMM_WORLD );
1117 
1118  /// Send natives, then send decoys
1119  Real * free_data = new Real[ n_free * nwts ];
1120  Real * fixed_data = new Real[ n_fixed * nwts ];
1121  for ( Size ii = 1; ii <= nwts; ++ ii ) {
1122  for ( Size jj = 1; jj <= n_free; ++jj ) {
1123  free_data[ ( ii - 1 ) * n_free + ( jj - 1 ) ] = wts_[ ii ]->free_data()[ jj ];
1124  }
1125  for ( Size jj = 1; jj <= n_fixed; ++jj ) {
1126  fixed_data[ ( ii - 1 ) * n_fixed + ( jj - 1 ) ] = wts_[ ii ]->fixed_data()[ jj ];
1127  }
1128  }
1129 
1130  //std::cout << "sending native free_data to node " << destination_node << " " << free_data << std::endl;
1131  /// 5. native free data
1132  MPI_Send( free_data, nwts * n_free, MPI_DOUBLE, destination_node, tag, MPI_COMM_WORLD );
1133 
1134  //std::cout << "sending native fixed_data to node " << destination_node << " " << fixed_data << std::endl;
1135  /// 6. fixed data
1136  MPI_Send( fixed_data, nwts * n_fixed, MPI_DOUBLE, destination_node, tag, MPI_COMM_WORLD );
1137 
1138  //std::cout << "Sent -- about to delete data" << std::endl;
1139 
1140  /// now send decoys
1141  Real * decoy_free_data = new Real[ n_free * nmuts ];
1142  Real * decoy_fixed_data = new Real[ n_fixed * nmuts ];
1143  for ( Size ii = 1; ii <= nmuts; ++ ii ) {
1144  for ( Size jj = 1; jj <= n_free; ++jj ) {
1145  decoy_free_data[ ( ii - 1 ) * n_free + ( jj - 1 ) ] = muts_[ ii ]->free_data()[ jj ];
1146  }
1147  for ( Size jj = 1; jj <= n_fixed; ++jj ) {
1148  decoy_fixed_data[ ( ii - 1 ) * n_fixed + ( jj - 1 ) ] = muts_[ ii ]->fixed_data()[ jj ];
1149  }
1150  }
1151  /// 7. decoy free data
1152  //std::cout << "sending decoy free_data to node " << destination_node << std::endl;
1153  MPI_Send( decoy_free_data, nmuts * n_free, MPI_DOUBLE, destination_node, tag, MPI_COMM_WORLD );
1154 
1155  /// 8. decoy fixed data
1156  //std::cout << "sending decoy fixed_data to node " << destination_node << std::endl;
1157  MPI_Send( decoy_fixed_data, nmuts * n_fixed, MPI_DOUBLE, destination_node, tag, MPI_COMM_WORLD );
1158 
1159  delete [] free_data;
1160  delete [] fixed_data;
1161 
1162  delete [] decoy_free_data;
1163  delete [] decoy_fixed_data;
1164 
1165  /// 9a. The energies in the unfolded emap, wt first
1166  Real * wt_unfolded_energies = new Real[ scoring::n_score_types ];
1167  for ( Size ee = 1; ee <= scoring::n_score_types; ++ee ) {
1168  wt_unfolded_energies[ ee - 1 ] = wt_unfolded_energies_emap_[ (ScoreType) ee ];
1169  }
1170  MPI_Send( wt_unfolded_energies, scoring::n_score_types, MPI_DOUBLE, destination_node, tag, MPI_COMM_WORLD );
1171  delete [] wt_unfolded_energies;
1172  wt_unfolded_energies = 0;
1173 
1174  /// 9b. mutant emap
1175  Real * mut_unfolded_energies = new Real[ scoring::n_score_types ];
1176  for ( Size ee = 1; ee <= scoring::n_score_types; ++ee ) {
1177  mut_unfolded_energies[ ee - 1 ] = mut_unfolded_energies_emap_[ (ScoreType) ee ];
1178  }
1179  MPI_Send( mut_unfolded_energies, scoring::n_score_types, MPI_DOUBLE, destination_node, tag, MPI_COMM_WORLD );
1180  delete [] mut_unfolded_energies;
1181  mut_unfolded_energies = 0;
1182 
1183 
1184  OptEPositionData::send_to_node( destination_node, tag );
1185 
1186 }
1187 
1188 
1189 ///
1190 /// @begin NestedEnergyTermDDGMutationOptEData::receive_from_node()
1191 ///
1192 void
1193 NestedEnergyTermDDGMutationOptEData::receive_from_node( int const source_node, int const tag )
1194 {
1195  MPI_Status stat;
1196  //TR << "PNatStructureOptEData::Recieving data from node... " << source_node << std::endl;
1197 
1198  /// 1. Experimental DDG, wt_aa, mut_aa
1199  int wt_aa( 0 ), mut_aa(0);
1200  MPI_Recv( & experimental_ddG_, 1, MPI_DOUBLE, source_node, tag, MPI_COMM_WORLD, &stat );
1201  MPI_Recv( & wt_aa, 1, MPI_INT, source_node, tag, MPI_COMM_WORLD, &stat );
1202  MPI_Recv( & mut_aa, 1, MPI_INT, source_node, tag, MPI_COMM_WORLD, &stat );
1203  wt_aa_ = static_cast< AA > ( wt_aa );
1204  mut_aa_ = static_cast< AA > ( mut_aa );
1205 
1206  /// 2a. n wts
1207  Size nwts( 0 );
1208  MPI_Recv( & nwts, 1, MPI_UNSIGNED_LONG, source_node, tag, MPI_COMM_WORLD, &stat );
1209 
1210  /// 2b. n decoys
1211  Size nmuts( 0 );
1212  MPI_Recv( & nmuts, 1, MPI_UNSIGNED_LONG, source_node, tag, MPI_COMM_WORLD, &stat );
1213 
1214  if ( nwts == 0 || nmuts == 0 ) return;
1215  wts_.reserve( nwts );
1216  muts_.reserve( nmuts );
1217 
1218  /// 3. n free
1219  Size n_free( 0 );
1220  MPI_Recv( & n_free, 1, MPI_UNSIGNED_LONG, source_node, tag, MPI_COMM_WORLD, &stat );
1221 
1222  /// 4. n fixed
1223  Size n_fixed( 0 );
1224  MPI_Recv( & n_fixed, 1, MPI_UNSIGNED_LONG, source_node, tag, MPI_COMM_WORLD, &stat );
1225 
1226  /// Recieve native data first, then decoys
1227  Real * free_data = new Real[ n_free * nwts ];
1228  Real * fixed_data = new Real[ n_fixed * nwts ];
1229 
1230  /// 5. free data
1231  MPI_Recv( free_data, nwts * n_free, MPI_DOUBLE, source_node, tag, MPI_COMM_WORLD, &stat );
1232 
1233  /// 6. fixed data
1234  MPI_Recv( fixed_data, nwts * n_fixed, MPI_DOUBLE, source_node, tag, MPI_COMM_WORLD, &stat );
1235 
1236  utility::vector1< Real > free_data_v( n_free );
1237  utility::vector1< Real > fixed_data_v( n_fixed );
1238  for ( Size ii = 1; ii <= nwts; ++ ii ) {
1239  for ( Size jj = 1; jj <= n_free; ++jj ) {
1240  free_data_v[ jj ] = free_data[ ( ii - 1 ) * n_free + ( jj - 1 ) ];
1241  }
1242  for ( Size jj = 1; jj <= n_fixed; ++jj ) {
1243  fixed_data_v[ jj ] = fixed_data[ ( ii - 1 ) * n_fixed + ( jj - 1 ) ];
1244  }
1245  wts_.push_back( new SingleStructureData( free_data_v, fixed_data_v ) );
1246  }
1247 
1248 
1249  delete [] free_data; free_data = 0;
1250  delete [] fixed_data; fixed_data = 0;
1251 
1252  //// Now receive decoy data
1253  free_data = new Real[ n_free * nmuts ];
1254  fixed_data = new Real[ n_fixed * nmuts ];
1255 
1256  /// 5. free data
1257  MPI_Recv( free_data, nmuts * n_free, MPI_DOUBLE, source_node, tag, MPI_COMM_WORLD, &stat );
1258 
1259  /// 6. fixed data
1260  MPI_Recv( fixed_data, nmuts * n_fixed, MPI_DOUBLE, source_node, tag, MPI_COMM_WORLD, &stat );
1261 
1262  for ( Size ii = 1; ii <= nmuts; ++ ii ) {
1263  for ( Size jj = 1; jj <= n_free; ++jj ) {
1264  free_data_v[ jj ] = free_data[ ( ii - 1 ) * n_free + ( jj - 1 ) ];
1265  }
1266  for ( Size jj = 1; jj <= n_fixed; ++jj ) {
1267  fixed_data_v[ jj ] = fixed_data[ ( ii - 1 ) * n_fixed + ( jj - 1 ) ];
1268  }
1269  muts_.push_back( new SingleStructureData( free_data_v, fixed_data_v ) );
1270  }
1271 
1272  delete [] free_data;
1273  delete [] fixed_data;
1274 
1275 
1276  /// 7a. Unfolded energies
1277  EnergyMap emap;
1278 
1279  Real * wt_unfolded_energies = new Real[ scoring::n_score_types ];
1280  MPI_Recv( wt_unfolded_energies, scoring::n_score_types, MPI_DOUBLE, source_node, tag, MPI_COMM_WORLD, &stat );
1281 
1282  for ( Size ee = 1; ee <= scoring::n_score_types; ++ee ) {
1283  // be careful because the array is 0-based while the score type enum is 1-based
1284  emap[ (ScoreType) ee ] = wt_unfolded_energies[ ee - 1 ];
1285  }
1286  set_wt_unfolded_energies_emap( emap );
1287 
1288  /// 7b. Unfolded energies
1289  emap.zero();
1290  Real * mut_unfolded_energies = new Real[ scoring::n_score_types ];
1291  MPI_Recv( mut_unfolded_energies, scoring::n_score_types, MPI_DOUBLE, source_node, tag, MPI_COMM_WORLD, &stat );
1292 
1293  for ( Size ee = 1; ee <= scoring::n_score_types; ++ee ) {
1294  emap[ (ScoreType) ee ] = mut_unfolded_energies[ ee - 1 ];
1295  }
1296  set_mut_unfolded_energies_emap( emap );
1297 
1298  delete [] wt_unfolded_energies;
1299  delete [] mut_unfolded_energies;
1300 
1301  wt_unfolded_energies = 0;
1302  mut_unfolded_energies = 0;
1303 
1304 
1305  OptEPositionData::receive_from_node( source_node, tag );
1306 
1307 }
1308 #endif
1309 
1310 
1311 }
1312 }