Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
IterativeOptEDriver.cc
Go to the documentation of this file.
1 // -*- Mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file protocols/optimize_weights/IterativeOptEDriver.cc
11 /// @brief Implementation of iterative weight fitting protocol
12 /// @author Andrew Leaver-Fay -- emulating a protocol by Jim Havranek and Brian Kuhlman.
13 
14 // Unit headers
16 
17 #include <core/types.hh>
18 
19 #include <core/chemical/AA.hh>
22 #include <core/scoring/Energies.hh>
30 // AUTO-REMOVED #include <core/scoring/hbonds/HBondSet.hh>
35 
36 #include <core/scoring/rms_util.hh>
37 
47 
48 
54 
55 #include <core/graph/Graph.hh>
56 
58 
59 // AUTO-REMOVED #include <core/io/pdb/pose_io.hh>
60 
62 
69 
70 #include <core/pose/Pose.hh>
71 #include <core/pose/PDBInfo.hh>
72 
73 #include <basic/options/util.hh>
74 #if defined(WIN32) || defined(__CYGWIN__)
75  #include <ctime>
76 #endif
77 
78 #include <basic/Tracer.hh>
79 
84 
85 #include <utility/io/izstream.hh>
86 #include <utility/vector1.hh>
87 #include <utility/vector1.functions.hh>
88 #include <utility/exit.hh>
89 #include <utility/file/FileName.hh>
90 #include <utility/file/PathName.hh>
91 #include <utility/file/file_sys_util.hh>
92 #include <utility/string_util.hh>
93 #include <utility/pointer/owning_ptr.hh>
94 #include <utility/pointer/ReferenceCount.hh>
95 
96 #include <numeric/xyzVector.hh>
97 #include <numeric/statistics.functions.hh>
98 #include <numeric/random/random.hh>
99 
100 #include <ObjexxFCL/FArray1D.hh>
101 #include <ObjexxFCL/string.functions.hh>
102 
103 //silent file stuff
105 
106 #ifdef USEMPI
107 /// MPI
108 #include <mpi.h>
109 #endif
110 
111 // C++ headers
112 #include <fstream>
113 #include <iostream>
114 #include <string>
115 #include <algorithm>
116 #include <sstream>
117 
118 // option key includes
119 
120 #include <basic/options/keys/optE.OptionKeys.gen.hh>
121 #include <basic/options/keys/in.OptionKeys.gen.hh>
122 
124 #include <utility/vector0.hh>
125 #include <ObjexxFCL/format.hh>
126 
127 //Auto using namespaces
128 namespace ObjexxFCL { namespace fmt { } } using namespace ObjexxFCL::fmt; // AUTO USING NS
129 //Auto using namespaces end
130 
131 namespace protocols {
132 namespace optimize_weights {
133 
134 using namespace core;
135 using namespace scoring;
136 using namespace optimization;
137 
138 using namespace basic::options;
139 using namespace basic::options::OptionKeys;
140 
141 using namespace utility;
142 
143 using utility::vector1;
144 
145 basic::Tracer TR("protocols.optimize_weights.IterativeOptEDriver");
146 basic::Tracer TR_VERBOSE("protocols.optimize_weights.IterativeOptEDriver.verbose");
147 
148 static numeric::random::RandomGenerator optE_RG(10193);
149 
150 void attach_debugger();
151 
153 
154 public:
156 
159  ) :
160  parent(),
161  scale_factor_( other.scale_factor_ )
162  {}
163  ScaleAnnealerTemperatureOperation( core::Real scale ) : scale_factor_( scale ) {}
164 
166 
167  virtual
169  clone() const {
170  return new ScaleAnnealerTemperatureOperation( *this );
171  }
172 
173  virtual
174  void
176  task.low_temp( 0.3 * scale_factor_ );
177  task.high_temp( 100.0 * scale_factor_ );
178  }
179 
180  void scale_factor( core::Real setting ) {
181  scale_factor_ = setting;
182  }
183 
184 private:
186 };
187 
188 ////////////////////////////////////////////////////////////////
189 ////////////////////////////////////////////////////////////////
190 ////////////////////////////////////////////////////////////////
191 
192 ///
193 /// @begin IterativeOptEDriver::IterativeOptEDriver()
194 ///
195 /// @brief
196 /// Main constructor for the IterativeOptEDriver class. Note that mpi_rank and mpi_nprocs get set even if
197 /// USEMPI is not defined. These values are then used to set MPI_rank_ and MPI_nprocs_.
198 /// Also calls the initialize_free_and_fixed_terms method.
199 ///
200 IterativeOptEDriver::IterativeOptEDriver() :
201  ligand_repack_pdbs_(),
202  ligand_repack_native_poses_(),
203  decoy_discrim_data_( 0 ),
204  ligand_discrim_data_( NULL ),
205  dG_binding_data_( NULL ),
206  ddG_bind_optE_data_( NULL ),
207  include_count_( 0 ),
208  fixed_count_( 0 ),
209  free_count_( 0 ),
210  component_weights_( n_optE_data_types, 1.0 ),
211 #ifdef USEMPI
212  tag_( 1 ),
213 #endif
214  outer_loop_counter_( 0 ),
215  inner_loop_counter_( 1 ),
216  total_positions_( 0 ), // new params by APL for sequence entropy optimization
217  count_recovered_( 0 ), // entropy
218  aa_obs_( core::chemical::num_canonical_aas, 0 ), // entropy
219  aa_exp_( core::chemical::num_canonical_aas, 0 ), // entropy
220  aa_freq_obs_( core::chemical::num_canonical_aas, 0.0 ), // entropy
221  aa_freq_exp_( core::chemical::num_canonical_aas, 0.0 ), // entropy
222  mixing_factor_( 0.0 ),
223  outer_loop_last_sequence_recovery_rate_( 0.0 ),
224  outer_loop_seq_profile_cross_entropy_( 0.0 ), // entropy
225  inner_loop_sequence_recovery_rate_( 0.0 ),
226  using_unfolded_energy_term_( false )
227 {
228  // default task factory, generates 'vanilla' PackerTasks
230 
231  // load custom TaskOperations according to an xml-like utility::tag file
232  if ( option[ optE::parse_tagfile ].user() ) {
233  using namespace core::pack::task::operation;
234  std::string tagfile_name( option[ optE::parse_tagfile ]() );
236  // else use default TaskOperation(s)
237  } else if ( ! option[ optE::design_with_minpack ] ) {
239  }
240 
241  int mpi_rank( 0 ), mpi_nprocs( 1 );
242 #ifdef USEMPI
243  MPI_Comm_rank (MPI_COMM_WORLD, &mpi_rank);/* get current process id */
244  MPI_Comm_size (MPI_COMM_WORLD, &mpi_nprocs);/* get number of processes */
245 #endif
246 
248  MPI_nprocs_ = mpi_nprocs;
249 
250  // only init the refE vector1's if we're using reference energies
251  if ( ! option[ optE::dont_use_reference_energies ].user() ) {
255  }
256 
259 
260  // set the using unfolded boolean, by iterating over the score types in both the fixed and free lists and
261  // checking for "unfolded". checking if the emaps have non-zero weights associated with "unfolded" would also work.
262  for( vector1< ScoreType >::iterator score_type_iter = free_score_list_.begin(), end_iter = free_score_list_.end();
263  score_type_iter != end_iter; ++score_type_iter ) {
264  if ( name_from_score_type( *score_type_iter ) == "unfolded" ) {
265  TR << "IterativeOptEDriver(): setting 'using_unfolded_energy_term_' to true." << std::endl;
267  }
268  }
269  for( vector1< ScoreType >::iterator score_type_iter = fixed_score_list_.begin(), end_iter = fixed_score_list_.end();
270  score_type_iter != end_iter; ++score_type_iter ) {
271  if ( name_from_score_type( *score_type_iter ) == "unfolded" ) {
272  TR << "IterativeOptEDriver(): setting 'using_unfolded_energy_term_' to true." << std::endl;
274  }
275  }
276 
277 }
278 
279 
280 ///
281 /// @begin IterativeOptEDriver::~IterativeOptEDriver()
282 ///
284 
285 void
287 {
289 }
290 
291 ///
292 /// @begin IterativeOptEDriver::read_tagfile_to_taskfactory()
293 ///
294 /// @brief
295 /// Reads in an XML formatted task operation and puts builds a task factory from it.
296 ///
297 void
300  using namespace core::pack::task::operation;
302  TaskOperationFactory::get_instance()->newTaskOperations( tops, tagfile_name );
303  for ( TaskOperationFactory::TaskOperationOPs::iterator it( tops.begin() ), itend( tops.end() ); it != itend; ++it ) {
304  task_factory->push_back( *it );
305  }
306 }
307 
308 ///
309 /// @begin IterativeOptEDriver::load_pose()
310 ///
311 /// @brief
312 /// loads structure into pose - decides between silent or pdb
313 ///
314 
315 // PTC - this is a quick and dirty function to intercept the file name intended for pose_from_pdb and retrieve it from a silent file instead
316 // it dramatically speeds up decoy discrimination (more than 70% of the time is spent on loading pdbs!)
317 // it uses the path of requested pdb to find silent file, each PDB needs to have all of its structures in its own folder (ie: 1agy/pdb_set.silent)
318 // it looks within each folder for the filename passed to optE::load_from_silent option
319 // only used in optimize_decoy_discrimination and use of optE::load_from_silent option is not exhaustively tested!
320 
321 void
322 IterativeOptEDriver::load_pose( pose::Pose & pose, std::string const & filename, bool ignore_centroid_input_flag=false )
323 {
324  if ( option[ optE::load_from_silent ].user() ) {
325  /// APL -- refactor this. Static data is unacceptible here.
326  static std::string prev_path = "";
328 
329  Size slash_index = filename.find_last_of("/\\");
330  std::string path = filename.substr(0, slash_index);
331  std::string tag = filename.substr(slash_index+1);
332  std::string filename = option[ optE::load_from_silent ];
333  TR_VERBOSE << "loading: " << tag << "from " << path << "/" << filename << std::endl;
334 
335  if ( prev_path != path ) {
336  prev_path = path;
337  delete sfd;
339  sfd->read_file( path + "/" + filename );
340  }
341  (*sfd)[ tag ]->fill_pose( pose );
342  }
343  else {
344  if ( option[ in::file::centroid_input ] && !ignore_centroid_input_flag) {
346  } else {
347  core::import_pose::pose_from_pdb( pose, filename );
348  }
349  }
350 }
351 
352 ///
353 /// @begin IterativeOptEDriver::divide_up_pdbs()
354 ///
355 /// @brief
356 /// The head node has to send out to all the work nodes the list of pdb files they have to do their thing on.
357 /// It itself doesn't do any of the calculations, right?
358 /// Work nodes get their list of pdb to work on.
359 ///
360 void
362 {
363  using namespace basic::options;
364  using namespace basic::options::OptionKeys;
365 
366 
367  if ( MPI_rank_ == 0 ) {
369 
370  Size const num_pdbs_per_cpu = all_filenames.size() / MPI_nprocs_;
371  Size const nextra = all_filenames.size() - num_pdbs_per_cpu * MPI_nprocs_;
372 
373  Size my_njobs = ( nextra >= 1 ? 1 : 0 ) + num_pdbs_per_cpu;
374  for ( Size ii = 1; ii <= my_njobs; ++ii ) {
375  native_pdbs_.push_back(all_filenames[ ii ] );
376 //#ifndef USEMPI
377  next_iteration_pdbs_.push_back(all_filenames[ ii ] );
378 //#else
379  //next_iteration_pdbs_.push_back( "workdir_" + to_string( MPI_rank_ ) + "/" + all_filenames[ ii ] );
380 //#endif
381  //TR << "divide_up_pdbs(): PROC #" << MPI_rank_ << " has native pdb " << native_pdbs_[ ii ] << std::endl;
382  }
383 
384 #ifdef USEMPI
385  //std::cout << " number of nodes " << MPI_nprocs_ << std::endl;
386  Size ii_offset = my_njobs;
387  for ( Size ii = 1; ii < MPI_nprocs_; ++ii ) {
388  Size ii_njobs = ( nextra > ii ? 1 : 0 ) + num_pdbs_per_cpu;
389  MPI_Send( & ii_njobs, 1, MPI_UNSIGNED_LONG, ii, tag_, MPI_COMM_WORLD );
390  for ( Size jj = ii_offset + 1; jj <= ii_offset + ii_njobs; ++jj ) {
391  send_string_to_node( ii, all_filenames[ jj ] );
392  }
393  ii_offset += ii_njobs;
394  }
395  } else {
396  Size my_njobs;
397  MPI_Recv( & my_njobs, 1, MPI_UNSIGNED_LONG, 0, tag_, MPI_COMM_WORLD, & stat_ );
398  native_pdbs_.reserve( my_njobs );
399  for ( Size ii = 1; ii <= my_njobs; ++ii ) {
400  native_pdbs_.push_back( receive_string_from_node( 0 ) );
401  next_iteration_pdbs_.push_back( native_pdbs_[ ii ] );
402  //next_iteration_pdbs_.push_back( "workdir_" + to_string( MPI_rank_ ) + "/" + all_filenames[ my_offset + ii ] );
403  //TR << "divide_up_pdbs(): PROC #" << MPI_rank_ << " has native pdb " << native_pdbs_[ ii ] << std::endl;
404  }
405 #endif
406  }
407 
408 #ifdef USEMPI
409  for ( Size ii = 1; ii <= native_pdbs_.size(); ++ii ) {
410  char hostname[256];
411  gethostname(hostname, sizeof(hostname));
412  //printf("Structure %s assigned to %s (rank = %d)\n", native_pdbs_[ ii ].c_str(), hostname, (int) MPI_rank_);
413  //fflush( stdout );
414  TR_VERBOSE << "divide_up_pdbs(): structure '" << native_pdbs_[ii] << "' assigned to " << hostname << " (rank = " << MPI_rank_ << ")" << std::endl;
415  }
416 #endif
417 
418 
419  // Decoy discrimination option processing...
420  if ( option[ optE::optimize_decoy_discrimination ].user() ) {
421  if ( MPI_rank_ == 0 ) {
423  utility::vector1< std::string > crystal_native_list;
424  std::ifstream native_and_decoy_lists( option[ optE::optimize_decoy_discrimination ]()().c_str() );
425  while ( native_and_decoy_lists ) {
426  std::string native_files;
427  std::string decoy_files;
428  std::string crystal_native_file;
429  native_and_decoy_lists >> native_files >> decoy_files >> crystal_native_file;
430  if ( native_files != "" && decoy_files != "" && crystal_native_file != "" ) {
431  file_lists.push_back( std::make_pair( native_files, decoy_files ));
432  crystal_native_list.push_back( crystal_native_file );
433  }
434  }
435 
436  Size const num_pdbs_per_cpu = file_lists.size() / MPI_nprocs_;
437  Size const nextra = file_lists.size() - num_pdbs_per_cpu * MPI_nprocs_;
438 
439  Size my_njobs = ( nextra >= 1 ? 1 : 0 ) + num_pdbs_per_cpu;
440  for ( Size ii = 1; ii <= my_njobs; ++ii ) {
441  decdisc_native_decoy_pairs_.push_back(file_lists[ ii ] );
442  decdisc_crystal_natives_.push_back( crystal_native_list[ ii ] );
443  //TR << " PROC #" << MPI_rank_ << " "<< ii << " decdiscrim: "
444  // << decdisc_native_decoy_pairs_[ ii ].first << " "
445  // << decdisc_native_decoy_pairs_[ ii ].second << " "
446  // << decdisc_crystal_natives_[ ii ] << std::endl;
447  }
448 
449 #ifdef USEMPI
450  Size ii_offset = my_njobs;
451  for ( Size ii = 1; ii < MPI_nprocs_; ++ii ) {
452  Size ii_njobs = ( nextra > ii ? 1 : 0 ) + num_pdbs_per_cpu;
453  MPI_Send( & ii_njobs, 1, MPI_UNSIGNED_LONG, ii, tag_, MPI_COMM_WORLD );
454  for ( Size jj = ii_offset + 1; jj <= ii_offset + ii_njobs; ++jj ) {
455  send_string_to_node( ii, file_lists[ jj ].first );
456  send_string_to_node( ii, file_lists[ jj ].second );
457  send_string_to_node( ii, crystal_native_list[ jj ] );
458  }
459  ii_offset += ii_njobs;
460  }
461  } else {
462  Size my_njobs( 0 );
463  MPI_Recv( & my_njobs, 1, MPI_UNSIGNED_LONG, 0, tag_, MPI_COMM_WORLD, & stat_ );
464  decdisc_native_decoy_pairs_.reserve( my_njobs );
465  decdisc_crystal_natives_.reserve( my_njobs );
466  for ( Size ii = 1; ii <= my_njobs; ++ii ) {
467  std::string native_pdb_list_name = receive_string_from_node( 0 );
468  std::string decoy_pdb_list_name = receive_string_from_node( 0 );
469  decdisc_native_decoy_pairs_.push_back( std::make_pair( native_pdb_list_name, decoy_pdb_list_name ) );
470  std::string crystal_native = receive_string_from_node( 0 );
471  decdisc_crystal_natives_.push_back( crystal_native );
472 
473  //TR << " PROC #" << MPI_rank_ << " "<< ii << " decdiscrim: "
474  // << native_decoy_pairs_[ ii ].first << " "
475  // << native_decoy_pairs_[ ii ].second << " "
476  // << crystal_natives_[ ii ] << std::endl;
477  }
478 #endif
479  }
480 
481  }
482 
483  if ( option[ optE::optimize_ligand_discrimination ].user() ) {
484  if ( MPI_rank_ == 0 ) {
486  utility::vector1< std::string > crystal_native_list;
487  std::ifstream native_and_decoy_lists( option[ optE::optimize_ligand_discrimination ]()().c_str() );
488  while ( native_and_decoy_lists ) {
489  std::string native_files;
490  std::string decoy_files;
491  std::string crystal_native_file;
492  native_and_decoy_lists >> native_files >> decoy_files >> crystal_native_file;
493  if ( native_files != "" && decoy_files != "" && crystal_native_file != "" ) {
494  file_lists.push_back( std::make_pair( native_files, decoy_files ));
495  crystal_native_list.push_back( crystal_native_file );
496  }
497  }
498 
499  Size const num_pdbs_per_cpu = file_lists.size() / MPI_nprocs_;
500  Size const nextra = file_lists.size() - num_pdbs_per_cpu * MPI_nprocs_;
501 
502  Size my_njobs = ( nextra >= 1 ? 1 : 0 ) + num_pdbs_per_cpu;
503  for ( Size ii = 1; ii <= my_njobs; ++ii ) {
504  ligand_native_decoy_pairs_.push_back(file_lists[ ii ] );
505  ligand_crystal_natives_.push_back( crystal_native_list[ ii ] );
506  //TR << " PROC #" << MPI_rank_ << " "<< ii << " lig discrim: "
507  // << ligand_native_decoy_pairs_[ ii ].first << " "
508  // << ligand_native_decoy_pairs_[ ii ].second << " "
509  // << ligand_crystal_natives_[ ii ] << std::endl;
510  }
511 
512 #ifdef USEMPI
513  Size ii_offset = my_njobs;
514  for ( Size ii = 1; ii < MPI_nprocs_; ++ii ) {
515  Size ii_njobs = ( nextra > ii ? 1 : 0 ) + num_pdbs_per_cpu;
516  MPI_Send( & ii_njobs, 1, MPI_UNSIGNED_LONG, ii, tag_, MPI_COMM_WORLD );
517  for ( Size jj = ii_offset + 1; jj <= ii_offset + ii_njobs; ++jj ) {
518  send_string_to_node( ii, file_lists[ jj ].first );
519  send_string_to_node( ii, file_lists[ jj ].second );
520  send_string_to_node( ii, crystal_native_list[ jj ] );
521  }
522  ii_offset += ii_njobs;
523  }
524  } else {
525  Size my_njobs( 0 );
526  MPI_Recv( & my_njobs, 1, MPI_UNSIGNED_LONG, 0, tag_, MPI_COMM_WORLD, & stat_ );
527  ligand_native_decoy_pairs_.reserve( my_njobs );
528  ligand_crystal_natives_.reserve( my_njobs );
529  for ( Size ii = 1; ii <= my_njobs; ++ii ) {
530  std::string native_pdb_list_name = receive_string_from_node( 0 );
531  std::string decoy_pdb_list_name = receive_string_from_node( 0 );
532  ligand_native_decoy_pairs_.push_back( std::make_pair( native_pdb_list_name, decoy_pdb_list_name ) );
533  std::string crystal_native = receive_string_from_node( 0 );
534  ligand_crystal_natives_.push_back( crystal_native );
535 
536  //TR << " PROC #" << MPI_rank_ << " "<< ii << " lig discrim: "
537  // << ligand_native_decoy_pairs_[ ii ].first << " "
538  // << ligand_native_decoy_pairs_[ ii ].second << " "
539  // << ligand_crystal_natives_[ ii ] << std::endl;
540  }
541 #endif
542  }
543 
544  }
545 
546  if ( option[ optE::optimize_ligand_rot ].user() ) {
547  if ( MPI_rank_ == 0 ) {
549  std::ifstream pdb_list_file( option[ optE::optimize_ligand_rot ]()().c_str() );
550  while ( pdb_list_file ) {
551  std::string pdb_file;
552  pdb_list_file >> pdb_file;
553  if ( pdb_file != "" ) {
554  file_list.push_back( pdb_file );
555  }
556  }
557 
558  Size const num_pdbs_per_cpu = file_list.size() / MPI_nprocs_;
559  Size const nextra = file_list.size() - num_pdbs_per_cpu * MPI_nprocs_;
560 
561  Size my_njobs = ( nextra >= 1 ? 1 : 0 ) + num_pdbs_per_cpu;
562  for ( Size ii = 1; ii <= my_njobs; ++ii ) {
563  ligand_repack_pdbs_.push_back( file_list[ ii ] );
564  }
565 
566 #ifdef USEMPI
567  Size ii_offset = my_njobs;
568  for ( Size ii = 1; ii < MPI_nprocs_; ++ii ) {
569  Size ii_njobs = ( nextra > ii ? 1 : 0 ) + num_pdbs_per_cpu;
570  MPI_Send( & ii_njobs, 1, MPI_UNSIGNED_LONG, ii, tag_, MPI_COMM_WORLD );
571  for ( Size jj = ii_offset + 1; jj <= ii_offset + ii_njobs; ++jj ) {
572  send_string_to_node( ii, file_list[ jj ] );
573  }
574  ii_offset += ii_njobs;
575  }
576  } else {
577  Size my_njobs( 0 );
578  MPI_Recv( & my_njobs, 1, MPI_UNSIGNED_LONG, 0, tag_, MPI_COMM_WORLD, & stat_ );
579  ligand_repack_pdbs_.reserve( my_njobs );
580  for ( Size ii = 1; ii <= my_njobs; ++ii ) {
581  std::string pdb_name = receive_string_from_node( 0 );
582  ligand_repack_pdbs_.push_back( pdb_name );
583  }
584 #endif
585  }
586 
587  }
588 
589  if ( option[ optE::optimize_dGbinding ].user() ) {
590  //TR << "divide_up_pdbs(): node " << MPI_rank_ << " reading optimize_dGbinding input file..." << std::endl;
591  if ( MPI_rank_ == 0 ) {
594  utility::io::izstream dg_data( option[ optE::optimize_dGbinding ]() );
595  while ( dg_data ) {
596  std::string bound_pdb, unbound_pdb;
597  Real dg_experimental;
598  dg_data >> bound_pdb;
599  if ( bound_pdb == "" ) break;
600  dg_data >> unbound_pdb;
601  dg_data >> dg_experimental;
602  dG_pdb_files.push_back( std::make_pair( bound_pdb, unbound_pdb ) );
603  dgs.push_back( dg_experimental );
604  }
605 
606  Size const num_dgs_per_cpu = dgs.size() / MPI_nprocs_;
607  Size const nextra = dgs.size() - num_dgs_per_cpu * MPI_nprocs_;
608 
609  //TR << "divide_up_pdbs(): node " << MPI_rank_ << " read " << dgs.size() << " dG pairs: sending " << num_dgs_per_cpu << " to slaves" << std::endl;
610 
611  Size my_njobs = ( nextra >= 1 ? 1 : 0 ) + num_dgs_per_cpu;
612  dG_bound_unbound_pairs_.reserve( my_njobs ); dG_binding_.reserve( my_njobs );
613  for ( Size ii = 1; ii <= my_njobs; ++ii ) {
614  dG_bound_unbound_pairs_.push_back( dG_pdb_files[ ii ] );
615  dG_binding_.push_back( dgs[ ii ] );
616 
617  //TR << "divide_up_pdbs(): node " << MPI_rank_ << " dG_bind: "
618  // << dG_bound_unbound_pairs_[ ii ].first << " " << dG_bound_unbound_pairs_[ ii ].second << " " << dG_binding_[ ii ] << std::endl;
619  }
620 
621 #ifdef USEMPI
622  Size ii_offset = my_njobs;
623  for ( Size ii = 1; ii < MPI_nprocs_; ++ii ) {
624  Size ii_njobs = ( nextra > ii ? 1 : 0 ) + num_dgs_per_cpu;
625  MPI_Send( & ii_njobs, 1, MPI_UNSIGNED_LONG, ii, tag_, MPI_COMM_WORLD );
626  for ( Size jj = ii_offset + 1; jj <= ii_offset + ii_njobs; ++jj ) {
627  send_string_to_node( ii, dG_pdb_files[ jj ].first );
628  send_string_to_node( ii, dG_pdb_files[ jj ].second );
629  MPI_Send( & dgs[ jj ], 1, MPI_DOUBLE, ii, tag_, MPI_COMM_WORLD );
630  }
631  ii_offset += ii_njobs;
632  }
633  } else {
634  Size my_njobs( 0 );
635  MPI_Recv( & my_njobs, 1, MPI_UNSIGNED_LONG, 0, tag_, MPI_COMM_WORLD, & stat_ );
636  dG_bound_unbound_pairs_.reserve( my_njobs );
637  dG_binding_.resize( my_njobs, 0.0 );
638  for ( Size ii = 1; ii <= my_njobs; ++ii ) {
639  std::string bound_pdb = receive_string_from_node( 0 );
640  std::string unbound_pdb = receive_string_from_node( 0 );
641  dG_bound_unbound_pairs_.push_back( std::make_pair( bound_pdb, unbound_pdb ));
642  MPI_Recv( &dG_binding_[ ii ], 1, MPI_DOUBLE, 0, tag_, MPI_COMM_WORLD, & stat_ );
643 
644  //TR << "divide_up_pdbs(): node " << MPI_rank_ << " dG_bind: "
645  // << dG_bound_unbound_pairs_[ ii ].first << " " << dG_bound_unbound_pairs_[ ii ].second << " " << dG_binding_[ ii ] << std::endl;
646  }
647 #endif
648 
649  }
650  //TR << "Exiting divide_up_pdbs for dG bind." << std::endl;
651  }
652 
653  if ( option[ optE::optimize_ddGmutation ].user() ) {
654  if ( MPI_rank_ == 0 ) {
657  utility::io::izstream ddg_data( option[ optE::optimize_ddGmutation ]() );
658  while ( ddg_data ) {
659  std::string wt_file, mut_file;
660  Real ddg_experimental;
661  ddg_data >> wt_file;
662  if ( wt_file == "" ) break;
663  ddg_data >> mut_file;
664  ddg_data >> ddg_experimental;
665  ddG_mut_files.push_back( std::make_pair( wt_file, mut_file ) );
666  ddgs.push_back( ddg_experimental );
667  }
668 
669  Size const num_ddgs_per_cpu = ddgs.size() / MPI_nprocs_;
670  Size const nextra = ddgs.size() - num_ddgs_per_cpu * MPI_nprocs_;
671 
672  //TR << "Node 0 read " << ddgs.size() << " ddG pairs: sending " << num_ddgs_per_cpu << " to slaves" << std::endl;
673 
674  Size my_njobs = ( nextra >= 1 ? 1 : 0 ) + num_ddgs_per_cpu;
675  ddg_mut_wt_pairs_.reserve( my_njobs ); ddGs_.reserve( my_njobs );
676  for ( Size ii = 1; ii <= my_njobs; ++ii ) {
677  ddg_mut_wt_pairs_.push_back( ddG_mut_files[ ii ] );
678  ddGs_.push_back( ddgs[ ii ] );
679  //TR << " PROC #" << MPI_rank_ << " "<< ii << " ddGmut: "
680  // << ddg_mut_wt_pairs_[ ii ].first << " "
681  // << ddg_mut_wt_pairs_[ ii ].second << " "
682  // << ddGs_[ ii ] << std::endl;
683  }
684 
685 #ifdef USEMPI
686  Size ii_offset = my_njobs;
687  for ( Size ii = 1; ii < MPI_nprocs_; ++ii ) {
688  Size ii_njobs = ( nextra > ii ? 1 : 0 ) + num_ddgs_per_cpu;
689  MPI_Send( & ii_njobs, 1, MPI_UNSIGNED_LONG, ii, tag_, MPI_COMM_WORLD );
690  for ( Size jj = ii_offset + 1; jj <= ii_offset + ii_njobs; ++jj ) {
691  send_string_to_node( ii, ddG_mut_files[ jj ].first );
692  send_string_to_node( ii, ddG_mut_files[ jj ].second );
693  MPI_Send( & ddgs[ jj ], 1, MPI_DOUBLE, ii, tag_, MPI_COMM_WORLD );
694  }
695  ii_offset += ii_njobs;
696  }
697  } else {
698  Size my_njobs( 0 );
699  MPI_Recv( & my_njobs, 1, MPI_UNSIGNED_LONG, 0, tag_, MPI_COMM_WORLD, & stat_ );
700  ddg_mut_wt_pairs_.reserve( my_njobs );
701  ddGs_.resize( my_njobs, 0.0 );
702  for ( Size ii = 1; ii <= my_njobs; ++ii ) {
703  std::string wt_list = receive_string_from_node( 0 );
704  std::string mut_list = receive_string_from_node( 0 );
705  ddg_mut_wt_pairs_.push_back( std::make_pair( wt_list, mut_list ));
706  MPI_Recv( &ddGs_[ ii ], 1, MPI_DOUBLE, 0, tag_, MPI_COMM_WORLD, & stat_ );
707  //TR << " PROC #" << MPI_rank_ << " "<< ii << " ddGmut: "
708  // << ddg_mut_wt_pairs_[ ii ].first << " "
709  // << ddg_mut_wt_pairs_[ ii ].second << " "
710  // << ddGs_[ ii ] << std::endl;
711  //TR_VERBOSE << "divide_up_pdbs(): node " << MPI_rank_ << " has ddG wt file: '" << ddg_mut_wt_pairs_[ ii ].first << "', ddG mutant file '"
712  // << ddg_mut_wt_pairs_[ ii ].second << "' and experimental ddG: " << ddGs_[ ii ] << std::endl;
713  }
714 
715 #endif
716 
717  }
718  }
719 
720  if ( option[ optE::optimize_ddG_bind_correlation ].user() ) {
721  if ( MPI_rank_ == 0 ) {
723  utility::vector1< Real > ddGs_binding;
724  utility::io::izstream ddG_bind_data( option[ optE::optimize_ddG_bind_correlation ]() );
725  while ( ddG_bind_data ) {
726  std::string wt_complexes_file, mut_complexes_file, wt_unbounds_file, mut_unbounds_file;
727  Real ddG_experimental;
728  ddG_bind_data >> wt_complexes_file; if ( wt_complexes_file == "" ) break;
729  ddG_bind_data >> mut_complexes_file;
730  ddG_bind_data >> wt_unbounds_file;
731  ddG_bind_data >> mut_unbounds_file;
732  ddG_bind_data >> ddG_experimental;
733  // not sure of a better way to make a vector out of these files
735  files.push_back( wt_complexes_file ); files.push_back( mut_complexes_file );
736  files.push_back( wt_unbounds_file ); files.push_back( mut_unbounds_file );
737  ddG_bind_files.push_back( files );
738  ddGs_binding.push_back( ddG_experimental );
739  }
740 
741  Size const num_ddGs_per_cpu = ddGs_binding.size() / MPI_nprocs_;
742  Size const nextra = ddGs_binding.size() - num_ddGs_per_cpu * MPI_nprocs_;
743 
744  Size my_njobs = ( nextra >= 1 ? 1 : 0 ) + num_ddGs_per_cpu;
745  // resize the class member variables to the sizes we read in
746  ddG_bind_files_.reserve( my_njobs ); ddGs_binding_.reserve( my_njobs );
747  // the local variables have the same name as the class member variables, minus a trailing underscore
748  for ( Size ii = 1; ii <= my_njobs; ++ii ) {
749  // ddG_bind_files_ is a vector of vectors
750  ddG_bind_files_.push_back( ddG_bind_files[ ii ] );
751  ddGs_binding_.push_back( ddGs_binding[ ii ] );
752  }
753 
754 #ifdef USEMPI
755  Size ii_offset = my_njobs;
756  for ( Size ii = 1; ii < MPI_nprocs_; ++ii ) {
757  Size ii_njobs = ( nextra > ii ? 1 : 0 ) + num_ddGs_per_cpu;
758  MPI_Send( & ii_njobs, 1, MPI_UNSIGNED_LONG, ii, tag_, MPI_COMM_WORLD );
759  for ( Size jj = ii_offset + 1; jj <= ii_offset + ii_njobs; ++jj ) {
760  send_string_to_node( ii, ddG_bind_files[ jj ][ DDGBindOptEData::WT_COMPLEXES_LIST_FILE ] );
761  send_string_to_node( ii, ddG_bind_files[ jj ][ DDGBindOptEData::MUT_COMPLEXES_LIST_FILE ] );
762  send_string_to_node( ii, ddG_bind_files[ jj ][ DDGBindOptEData::WT_UNBOUNDS_LIST_FILE ] );
763  send_string_to_node( ii, ddG_bind_files[ jj ][ DDGBindOptEData::MUT_UNBOUNDS_LIST_FILE ] );
764  MPI_Send( & ddGs_binding[ jj ], 1, MPI_DOUBLE, ii, tag_, MPI_COMM_WORLD );
765  }
766  ii_offset += ii_njobs;
767  }
768  } else {
769  // this code is what the slave nodes will execute; basically, receive the work unit
770  Size my_njobs( 0 );
771  MPI_Recv( & my_njobs, 1, MPI_UNSIGNED_LONG, 0, tag_, MPI_COMM_WORLD, & stat_ );
772  ddG_bind_files_.reserve( my_njobs );
773  ddGs_binding_.resize( my_njobs, 0.0 );
774  for ( Size ii = 1; ii <= my_njobs; ++ii ) {
775  std::string wt_complexes_file = receive_string_from_node( 0 );
776  std::string mut_complexes_file = receive_string_from_node( 0 );
777  std::string wt_unbounds_file = receive_string_from_node( 0 );
778  std::string mut_unbounds_file = receive_string_from_node( 0 );
779 
781  files.push_back( wt_complexes_file ); files.push_back( mut_complexes_file );
782  files.push_back( wt_unbounds_file ); files.push_back( mut_unbounds_file );
783 
784  ddG_bind_files_.push_back( files );
785  MPI_Recv( & ddGs_binding_[ ii ], 1, MPI_DOUBLE, 0, tag_, MPI_COMM_WORLD, & stat_ );
786  //TR << " PROC #" << MPI_rank_ << " "<< ii << " ddGmut: "
787  // << ddg_mut_wt_pairs_[ ii ].first << " "
788  // << ddg_mut_wt_pairs_[ ii ].second << " "
789  // << ddGs_[ ii ] << std::endl;
790  TR_VERBOSE << "divide_up_pdbs(): node " << MPI_rank_ << " has "
791  << "ddG bind wt complexes file: '" << ddG_bind_files_[ ii ][ DDGBindOptEData::WT_COMPLEXES_LIST_FILE ]
792  << "', ddG bind mut complexes file '" << ddG_bind_files_[ ii ][ DDGBindOptEData::MUT_COMPLEXES_LIST_FILE ]
793  << "', ddG bind wt unbounds file '" << ddG_bind_files_[ ii ][ DDGBindOptEData::WT_UNBOUNDS_LIST_FILE ]
794  << "', ddG bind mut unbounds file '" << ddG_bind_files_[ ii ][ DDGBindOptEData::MUT_UNBOUNDS_LIST_FILE ]
795  << "' and experimental ddG bind: " << ddGs_binding_[ ii ] << std::endl;
796  }
797 #endif
798  }
799  }
800 
801 
802  if ( option[ optE::rescore::context_round ].user() ) {
803  Size context_round = option[ optE::rescore::context_round ]();
804  if ( context_round != 0 ) {
806  }
807  }
808 
809 }
810 
811 
812 ///
813 /// @begin IterativeOptEDriver::collect_rotamer_energies()
814 ///
815 /// @brief
816 ///
817 ///
818 void
820 {
821  using namespace pack::rotamer_set;
822  using namespace basic::options;
823  using namespace basic::options::OptionKeys;
824 
825  /// Do this once per iteration through the outer loop
826  /// Make sure this happens before we abort rotamer-energy collection, if we
827  /// are using the -design_first flag.
830 
831 
832  /// Don't bother collecting rotamer optE data if we're not going to optimize
833  /// weights. Just skip straight to the design step.
834  //if ( outer_loop_counter_ == 1 && option[ optE::design_first ].user() ) {
835  // TR_VERBOSE << "collect_rotamer_energies(): design_first flag in use. leaving method" << std::endl;
836  // return;
837  //}
838  // taking this out because otherwise the rescore.log isn't made correctly (-ronj)
839 
847 
848  if ( ! option[ optE::mpi_weight_minimization ] ) {
849  if ( MPI_rank_ == 0 ) {
851  } else {
853  }
854  } // else, keep the data on the original cpus for now; send the data after minimization completes.
855 
856  /// If we're simply rescoring the optE data for a particular weight set,
857  /// quit as soon as rotamer data has been collected and written to a file.
858  if ( option[ optE::rescore::weights ].user() ) {
860 
861  // extra option for testing sequence recovery with a weight set
862  if ( option[ optE::rescore::measure_sequence_recovery ].user() ) {
863  // optimize_weights(); // leaving this line to be extra clear; definitely don't optimize the weights!
864 
865  // score_position_data() leaves all the weights in the vars and fixed_terms vectors. need to move the values
866  // out of those containers and into the after_minimization containers because that's what the write_new()
867  // function expects the values to be in.
868  free_weights_after_minimization_ = free_parameters_; // fixed params are set in score_position_data()
869 
872  }
873 
874  exit_gracefully();
875  }
876 
877 }
878 
879 void
881 {
882  barrier();
883 #ifdef USEMPI
884  MPI_Finalize();
885 #endif
886  exit( 0 );
887 }
888 
889 
890 ///
891 /// @begin IterativeOptEDriver::setup_derived_free_and_fixed_data()
892 ///
893 /// @brief
894 /// include_terms_ is an EnergyMap, as well. I think this function sets up the free and fixed score lists which are just
895 /// a vector1 of ScoreType objects. include_, fixed_ and free_count_ are just (Size) member variables.
896 ///
897 void
899 {
902 
904 
905  for( int i=1 ; i <= n_score_types ; ++i ) {
906  if( include_terms_[ ScoreType(i) ] != 0.0 ) {
907  if( fixed_parameters_[ ScoreType(i) ] == 0.0 ) {
908  free_score_list_.push_back( ScoreType(i) );
909  ++free_count_;
910  }
911  ++include_count_;
912  }
913  }
914 
915  for( int i = 1; i <= n_score_types; ++i ) {
916  if( fixed_parameters_[ ScoreType(i) ] != 0.0 ) {
917  fixed_score_list_.push_back( ScoreType(i) );
918  ++fixed_count_;
919  }
920  }
921 
922 }
923 
924 
925 ///
926 /// @begin IterativeOptEDriver::compute_rotamer_energies_for_assigned_pdbs()
927 ///
928 /// @brief
929 /// Computes the rotamer energies for all positions for all pdbs given (in the call to get_nat_aa_opte_data()).
930 /// Also, optionally, does the same for native rotamer recovery data. Creates an unweighted score function using
931 /// the the class method 'create_unweighed_scorefunction'. The scorefunction used to get the interaction energies
932 /// of the rotamers is the one that's created here.
933 ///
934 /// Note: Surface is a PackerTask option. No repacking/design is done here. Just scoring. So the surface
935 /// energies would not be included here in the optEdata. If surface was made into a ScoreType, then it could be
936 /// added. -ronj
937 /// Update: Surface score is now it's own EnergyMethod. If 'surface' is detected in the ScoreFunction, then the
938 /// EnergyMethod calculates the surface energy of the pose. You don't get per-residue surface energies this way, but
939 /// you do get the total pose surface energy. -ronj
940 ///
941 void
943 {
944  using namespace basic::options;
945  using namespace basic::options::OptionKeys;
946  using namespace core::pack::rotamer_set;
947 
948  if ( MPI_rank_ == 0 ) TR << "compute_rotamer_energies_for_assigned_pdbs(): entered method" << std::endl;
949 
950  optE_data_ = new OptEData; // get rid of old optEdata...
951 
952  if ( MPI_rank_ == 0 && option[ optE::constrain_weights ].user() ) {
954  std::string cstfilename = option[ optE::constrain_weights ]();
955  std::ifstream input( cstfilename.c_str() );
956  cst->initialize_constraints_from_file( input );
957  optE_data_->add_position_data( cst );
958  }
959 
961  if ( MPI_rank_ == 0 ) {
962  TR << "compute_rotamer_energies_for_assigned_pdbs(): created scorefxn for calculating rotamer energies" << std::endl;
963  scorefxn->show( std::cout );
964  }
965 
966  // for when unfolded term is in use... -ronj
967  // the thing to be careful about here is to create a scorefunction which has a weight for the unfolded term, but doesn't
968  // set the unfolded term's method weights. if you do that, then the unfolded energy method will return actual values.
969  // that's only meant to be used during the design steps of optE, though, not the rotamer energy collection steps.
970 
971  // do this loop for every pdb we have in the native_pdbs_ list
972  for ( Size n=1; n<= native_pdbs_.size(); ++n ) {
973  //std::string const & filename( pdbs_this_round_[n] );
974  std::string const & native_filename( native_pdbs_[n] );
975 
976  if ( option[ optE::optimize_pssm ] ) {
977  load_pssm_data( native_filename, n );
978  }
979 
980  core::pose::Pose pose, native_pose;
981 
982  if ( outer_loop_counter_ == 1 ) {
983  if ( option[ in::file::centroid_input ] ) {
984  core::import_pose::centroid_pose_from_pdb( native_pose, native_filename );
985  } else {
986  core::import_pose::pose_from_pdb( native_pose, native_filename );
987  }
988  pose = native_pose;
989 
990  /// these are stored regardless of whether or not no_design is on the command line...
991  /// useful if rotamer recovery alone is being measured
992  native_poses_.push_back( native_pose );
993  TR_VERBOSE << "compute_rotamer_energies_for_assigned_pdbs(): pushing " << native_filename << " onto native_poses_ vector." << std::endl;
994  context_poses_.push_back( native_pose );
995  if ( option[ optE::recover_nat_rot ] ) rotamer_recovery_context_poses_.push_back( native_pose );
996 
997  } else {
998  pose = context_poses_[ n ];
999  native_pose = native_poses_[ n ];
1000  }
1001 
1002  TR_VERBOSE << "compute_rotamer_energies_for_assigned_pdbs(): " << node_name( MPI_rank_ ) << " calling get_opte_data for " << native_filename << std::endl;
1003 
1004  (*scorefxn)( pose );
1005  (*scorefxn)( native_pose );
1006 
1007  utility::file::FileName natname( native_filename );
1008 
1009  if ( option[ optE::optimize_nat_aa ] || option[ optE::optimize_pssm ] ) {
1011  natname.base(), pose, native_pose,
1012  *scorefxn, free_score_list_, fixed_score_list_,
1013  *optE_data_ );
1014  }
1015 
1016  if ( option[ optE::optimize_nat_rot ]() ) {
1017  core::pose::Pose context_pose;
1018 
1019  /// Should we use the previously repacked pose, or the native pose to gather data from?
1020  if ( option[ optE::recover_nat_rot ] ) {
1021  context_pose = rotamer_recovery_context_poses_[ n ];
1022  } else {
1023  context_pose = native_pose;
1024  }
1025 
1026  utility::vector1<bool> include_rsd( context_pose.total_residue(), true );
1027 
1028  for ( Size j(1); j <= context_pose.total_residue(); ++j ) {
1029  include_rsd[j] = pose.residue_type(j).is_protein();
1030  }
1031 
1033  natname.base(), context_pose, native_pose, include_rsd, *scorefxn,
1035  }
1036 
1037  } // now do it all over again for another pdb in the list native_pdbs_
1038 
1039 }
1040 
1041 
1042 ///
1043 /// @begin IterativeOptEDriver::load_pssm_data
1044 ///
1045 void
1047  std::string const & native_filename,
1048  Size const which_protein // which of the several proteins that this node is responsible for redesigning
1049 )
1050 {
1051  if ( outer_loop_counter_ == 1 ) {
1052 
1053  std::string native_substr = native_filename.substr( 0, native_filename.size() - 4 );
1054  std::string pssm_file_name = native_substr + ".fasta.probs";
1055  //std::cerr << "Openning PSSM File " << pssm_file_name << std::endl;
1056  std::ifstream pssm_file( pssm_file_name.c_str() );
1057 
1058  std::list< std::pair< chemical::AA, utility::vector1< Real > > > pssm_data;
1060  Size linenum( 0 );
1061  while ( pssm_file ) {
1062  ++linenum;
1063  char line_aa;
1064  pssm_file >> line_aa;
1066  Real sum( 0.0 );
1067  for ( Size ii = 1; ii <= chemical::num_canonical_aas; ++ii ) {
1068  pssm_file >> pssm_prob_dist[ ii ];
1069  sum += pssm_prob_dist[ ii ];
1070  }
1071  if ( std::abs( sum - 1 ) > 0.001 ) {
1072  TR << "Warning: pssm probability distribution does not sum to 1.0: " << sum << std::endl;
1073  TR << "Problem on line " << linenum << " of " << pssm_file_name << std::endl;
1074  }
1075  pssm_data.push_back( std::make_pair( aa, pssm_prob_dist ));
1076  }
1077  pssm_data_.clear();
1078  pssm_data_.resize( pssm_data.size() );
1079 
1080  // copy to vector
1081  std::copy( pssm_data.begin(), pssm_data.end(), pssm_data_.begin() );
1082  all_pssm_data_.push_back( pssm_data_ );
1083 
1084  if ( pssm_data_.size() == 0 ) { std::cerr << "Did not read file -- possibly not found" << std::endl; }
1085  } else {
1086  pssm_data_ = all_pssm_data_[ which_protein ];
1087  }
1088 }
1089 
1090 #ifdef USEMPI
1091 ///
1092 /// @begin IterativeOptEDriver::send_string_to_node
1093 ///
1094 /// @brief
1095 /// Takes a std::string and a destination and constructs the MPI_Send call.
1096 ///
1097 void
1098 IterativeOptEDriver::send_string_to_node( int destination, std::string const & string_to_send )
1099 {
1100  int tag( 1 );
1101  int len( string_to_send.size() );
1102  MPI_Send( &len, 1, MPI_INT, destination, tag, MPI_COMM_WORLD );
1103  MPI_Send( const_cast< char * > (string_to_send.c_str()), len, MPI_CHAR, destination, tag, MPI_COMM_WORLD );
1104 }
1105 
1106 ///
1107 /// @begin IterativeOptEDriver::receive_string_to_node
1108 ///
1109 /// @brief
1110 /// Receive a string from the master node. First find out how long the message is, then allocate space for it and
1111 /// actually receive the message. Returns to the calling function the string that was received.
1112 ///
1114 IterativeOptEDriver::receive_string_from_node( int source )
1115 {
1116  int len( 0 );
1117  int tag( 1 );
1118  MPI_Status stat;
1119  MPI_Recv( &len, 1, MPI_INT, source, tag, MPI_COMM_WORLD, & stat );
1120  char * str = new char[ len + 1 ];
1121  str[ len ] = '\0'; // ? do I need null terminated strings?
1122  MPI_Recv( str, len, MPI_CHAR, source, tag, MPI_COMM_WORLD, & stat );
1123  std::string return_string( str, len );
1124  delete [] str;
1125  return return_string;
1126 
1127 }
1128 
1129 #endif
1130 
1131 ///
1132 /// @begin IterativeOptEDriver::send_rotamer_energies_to_master_cpu()
1133 ///
1134 /// @brief
1135 /// Used by all slave nodes; sends the rotamer energies (according to the protocol we've set up here) to the master node.
1136 ////
1137 void
1139 {
1140 #ifdef USEMPI
1141 
1142  using namespace core::pack::rotamer_set;
1143  //std::cout << " PROC #" << MPI_rank_ << " send_rotamer_energies_to_master_cpu" << std::endl;
1144 
1145  /// 1. Sanity: send a "boolean" vector of the free and fixed energy terms
1146  int * free_energy_terms = new int[ core::scoring::n_score_types ];
1147  int * fixed_energy_terms = new int[ core::scoring::n_score_types ];
1148  for ( Size ii = 1; ii <= n_score_types; ++ii ) {
1149  free_energy_terms[ ii - 1 ] = (int) (free_parameters_[ (ScoreType) ii ] != 0.0);
1150  fixed_energy_terms[ ii - 1 ] = (int) (fixed_parameters_[ (ScoreType) ii ] != 0.0);
1151  }
1152  MPI_Send( free_energy_terms, n_score_types, MPI_INT, 0, tag_, MPI_COMM_WORLD );
1153  MPI_Send( fixed_energy_terms, n_score_types, MPI_INT, 0, tag_, MPI_COMM_WORLD );
1154 
1155  delete [] free_energy_terms; free_energy_terms = 0;
1156  delete [] fixed_energy_terms; fixed_energy_terms = 0;
1157 
1158  /// 2. Number of positions on which OptE data has been gathered
1159  Size n_pos = optE_data_->num_positions();
1160  MPI_Send( & n_pos, 1, MPI_UNSIGNED_LONG, 0, tag_, MPI_COMM_WORLD );
1161 
1162  for ( OptEPositionDataOPs::const_iterator
1163  iter = optE_data_->position_data_begin(),
1164  iter_end = optE_data_->position_data_end();
1165  iter != iter_end; ++iter ) {
1166  int position_data_type = (*iter)->type();
1167 
1168  MPI_Send( & position_data_type, 1, MPI_INT, 0, tag_, MPI_COMM_WORLD );
1169  (*iter)->send_to_node( 0, tag_ );
1170  }
1171 #endif
1172 }
1173 
1174 
1175 ///
1176 /// @begin IterativeOptEDriver::collect_rotamer_energies_from_slave_cpus()
1177 ///
1178 /// @brief
1179 /// Helper method for collecting energies. Calls collect_rotamer_energies_from_slave_cpu for all CPU's being used.
1180 ////
1182 {
1183  using namespace core::pack::rotamer_set;
1184 
1185  for ( Size ii = 1; ii < MPI_nprocs_; ++ii ) {
1187  }
1188  TR << "collect_rotamer_energies_from_slave_cpus(): master node with " << optE_data_->num_positions() << " positions" << std::endl;
1189 
1190  Size total( 0 );
1191  for ( OptEPositionDataOPs::const_iterator
1192  iter = optE_data_->position_data_begin(),
1193  iter_end = optE_data_->position_data_end();
1194  iter != iter_end; ++iter ) {
1195  total += (*iter)->memory_use();
1196  }
1197  TR << "collect_rotamer_energies_from_slave_cpus(): master node using " << total << " bytes for "
1198  << optE_data_->num_positions() << " positions." << std::endl;
1199 
1200 }
1201 
1202 
1203 ///
1204 /// @begin IterativeOptEDriver::collect_decoy_discrimination_data()
1205 ///
1206 /// @brief
1207 /// Collect optE data for decoy discrimination. Similar to the get_nat_aa_opte_data method.
1208 ////
1209 void
1211 {
1212  using namespace core::io::pdb;
1213  using namespace core::pack::rotamer_set;
1214  using namespace core::pose;
1215  using namespace basic::options;
1216  using namespace basic::options::OptionKeys;
1217  using namespace core::scoring;
1218 
1219  utility::vector1< Real > free_data( free_score_list_.size() );
1220  utility::vector1< Real > fixed_data( fixed_score_list_.size() );
1221 
1222  bool const calc_decoy_score_sd( option[ optE::normalize_decoy_score_spread ].user() );
1223 
1224 
1225  if ( decoy_discrim_data_ == 0 ) {
1226 
1227  if ( option[ optE::ramp_nativeness ] ) {
1228  if ( option[ optE::min_decoy_rms_to_native ].user() ) {
1229  PNatStructureOptEData::set_nativeness_high( option[ optE::min_decoy_rms_to_native ] );
1230  }
1231  if ( option[ optE::max_rms_from_native ].user() ) {
1232  PNatStructureOptEData::set_nativeness_low( option[ optE::max_rms_from_native ] );
1233  }
1234  }
1235 
1237 
1240 
1241  /// Collect decoy energies to compute standard deviation
1242  ScoreFunctionOP decoy_spread_reference_sfxn;
1243 
1244  if ( option[ optE::normalize_decoy_score_spread ].user() ) {
1245  /// special case if you say "SCORE12" as the weights file, then create the score function as the standard.wts + score12.wts_patch
1246  if ( option[ optE::normalize_decoy_score_spread ]() == "SCORE12" ) {
1248  } else {
1249  decoy_spread_reference_sfxn = ScoreFunctionFactory::create_score_function( option[ optE::normalize_decoy_score_spread ] );
1250  }
1251  }
1252 
1253  if ( option[ optE::repack_and_minimize_decoys ] ) {
1257  }
1258 
1259  for ( Size ii = 1; ii <= decdisc_native_decoy_pairs_.size(); ++ii ) {
1260 
1261 
1262  PNatStructureOptEDataOP structure_data = new PNatStructureOptEData;
1263  if ( option[ optE::n_top_natives_to_optimize ].user() ) {
1264  structure_data->n_top_natives_to_score( option[ optE::n_top_natives_to_optimize ] );
1265  }
1266 
1267  {//scope
1269  structure_data->tag( cryst_fname.base() ); // trim path and extension data -- beautiful utility
1270  }
1271 
1272  utility::vector1< std::string > native_pdb_names, decoy_pdb_names;
1273  //std::cout << " PROC #" << MPI_rank_ << " reading pdb lists " ;
1274  //std::cout << decdisc_native_decoy_pairs_[ ii ].first << " and ";
1275  //std::cout << decdisc_native_decoy_pairs_[ ii ].second << std::endl;
1276 
1277  std::ifstream native_pdblist( decdisc_native_decoy_pairs_[ ii ].first.c_str() );
1278  while ( native_pdblist ) {
1279  std::string native_pdb;
1280  native_pdblist >> native_pdb;
1281  if ( native_pdb != "" ) native_pdb_names.push_back( native_pdb );
1282  }
1283  if ( native_pdb_names.size() == 0 ) {
1284  std::cerr << "ERROR: no native structures specified in " << decdisc_native_decoy_pairs_[ ii ].first << " on node " << MPI_rank_ << std::endl;
1285  }
1286 
1287  std::ifstream decoy_pdblist( decdisc_native_decoy_pairs_[ ii ].second.c_str() );
1288  while ( decoy_pdblist ) {
1289  std::string decoy_pdb;
1290  decoy_pdblist >> decoy_pdb;
1291  if ( decoy_pdb != "" ) decoy_pdb_names.push_back( decoy_pdb );
1292  }
1293  if ( decoy_pdb_names.size() == 0 ) {
1294  std::cerr << "ERROR: no native structures specified in " << decdisc_native_decoy_pairs_[ ii ].second << " on node " << MPI_rank_ << std::endl;
1295  }
1296 
1297  TR_VERBOSE << "collect_decoy_discrimination_data(): scoring natives and decoys of " << structure_data->tag() << std::endl;
1298 
1299  /// Collect decoy energies to compute standard deviation
1300  utility::vector1< Real > decoy_energies;
1301  if ( option[ optE::normalize_decoy_score_spread ].user() ) {
1302  decoy_energies.reserve( decoy_pdb_names.size() + native_pdb_names.size() );
1303  }
1304 
1305 
1306  core::pose::Pose crystal_native;
1307  load_pose( crystal_native, decdisc_crystal_natives_[ ii ], false );
1308  if ( option[ optE::repack_and_minimize_decoys ] ) {
1309  decdisc_xtal_natives_[ ii ] = crystal_native;
1310  }
1311 
1312  Size first_total_residue( 0 );
1313  for ( Size jj = 1; jj <= native_pdb_names.size(); ++jj ) {
1314  //std::cout << " PROC #" << MPI_rank_ << " reading pdb: #" << jj << " " << native_pdb_names[ jj ] << std::endl;
1315  /// read the pdb into a pose
1316  core::pose::Pose pose;
1317  load_pose( pose, native_pdb_names[ jj ], false );
1318 
1319  if ( option[ optE::repack_and_minimize_decoys ] ) {
1320  decdisc_native_poses_[ ii ].push_back( pose );
1321  }
1322 
1323  if ( jj == 1 ) {
1324  structure_data->set_total_residue( pose.total_residue() );
1325  first_total_residue = pose.total_residue();
1326  } else if ( first_total_residue != pose.total_residue() ) {
1327  std::cerr << "Warning: total_residue for " << native_pdb_names[ jj ];
1328  std::cerr << "not equal to native #1 total_residue: " << first_total_residue << " vs ";
1329  std::cerr << pose.total_residue() << std::endl;
1330  std::cerr << "Excluding structure!" << std::endl;
1331  continue;
1332  }
1333 
1335  scorefxn, pose, crystal_native, free_data, fixed_data, native_pdb_names[ jj ] );
1336 
1337  AddStatus added_native = add_structure_based_on_rms( ssd, structure_data, true /* intended native */ );
1338  if ( calc_decoy_score_sd && added_native == ADDED_STRUCTURE_OPPOSITE_AS_INTENDED ) {
1339  decoy_energies.push_back( (*decoy_spread_reference_sfxn)( pose ) );
1340  }
1341 
1342  if ( option[ optE::repack_and_minimize_input_structures ] ) {
1343  repack_and_minimize_pose( pose, weighted_sfxn );
1345  scorefxn, pose, crystal_native, free_data, fixed_data, "rpmin_0_"+native_pdb_names[ jj ] );
1346 
1347  add_structure_based_on_rms( ssd, structure_data, true /* intended native */ );
1348  }
1349  }
1350 
1351  for ( Size jj = 1; jj <= decoy_pdb_names.size(); ++jj ) {
1352 
1353  /// read the pdb into a pose
1354  core::pose::Pose pose;
1355  //std::cout << " PROC #" << MPI_rank_ << " reading pdb: #" << jj << " " << decoy_pdb_names[ jj ] << std::endl;
1356  load_pose( pose, decoy_pdb_names[ jj ], false );
1357 
1358  if ( first_total_residue != pose.total_residue() ) {
1359  std::cerr << "Warning: total_residue for " << decoy_pdb_names[ jj ];
1360  std::cerr << "not equal to native #1 total_residue: " << first_total_residue << " vs ";
1361  std::cerr << pose.total_residue() << std::endl;
1362  std::cerr << "Excluding structure!" << std::endl;
1363  continue;
1364  }
1365 
1366  if ( option[ optE::repack_and_minimize_decoys ] ) {
1367  decdisc_decoy_poses_[ ii ].push_back( pose );
1368  }
1369 
1371  scorefxn, pose, crystal_native, free_data, fixed_data, decoy_pdb_names[ jj ] );
1372 
1373  AddStatus added_decoy = add_structure_based_on_rms( ssd, structure_data, false /* intended native */ );
1374  if ( calc_decoy_score_sd && added_decoy == ADDED_STRUCTURE_AS_INTENDED ) {
1375  decoy_energies.push_back( (*decoy_spread_reference_sfxn)( pose ) );
1376  }
1377 
1378 
1379  if ( option[ optE::repack_and_minimize_input_structures ] ) {
1380  repack_and_minimize_pose( pose, weighted_sfxn );
1382  scorefxn, pose, crystal_native, free_data, fixed_data, "rpmin_0_" + decoy_pdb_names[ jj ] );
1383 
1384  add_structure_based_on_rms( ssd2, structure_data, false /* intended native */ );
1385  }
1386 
1387  }
1388  if ( calc_decoy_score_sd ) {
1389  Real decoy_score_sd = numeric::statistics::std_dev( decoy_energies.begin(), decoy_energies.end(), Real( 0.0 ) );
1390  if ( decoy_score_sd != 0 ) {
1391  structure_data->set_normalize_decoy_stddev( true );
1392  structure_data->set_initial_decoy_stddev( decoy_score_sd );
1393  }
1394  }
1395  decoy_discrim_data_->add_position_data( structure_data );
1396  }
1397  }
1398 
1399  if ( option[ optE::repack_and_minimize_decoys ] && outer_loop_counter_ != 1 ) {
1400  Size count = 0;
1403 
1404  for ( OptEPositionDataOPs::const_iterator
1405  iter = decoy_discrim_data_->position_data_begin(),
1406  iter_end = decoy_discrim_data_->position_data_end();
1407  iter != iter_end; ++iter ) {
1408 
1409  utility::vector1< Pose > new_nats, new_decs;
1410  utility::vector1< Real > new_nats_scores, new_decs_scores;
1411 
1412  ++count;
1413  runtime_assert( dynamic_cast< PNatStructureOptEData * > ( (*iter)() ) );
1414  PNatStructureOptEDataOP structure_data(
1415  static_cast< PNatStructureOptEData * > ( (*iter)() ) );
1416  /// Create new natives
1417  for ( Size ii = 1, iie = decdisc_native_poses_[ count ].size(); ii <= iie; ++ii ) {
1418  core::pose::Pose pose = decdisc_native_poses_[ count ][ ii ];
1419  repack_and_minimize_pose( pose, weighted_sfxn );
1420  new_nats.push_back( pose );
1421  new_nats_scores.push_back( ( *weighted_sfxn )( pose ) );
1423  unweighted_sfxn, pose, decdisc_xtal_natives_[ count ], free_data, fixed_data,
1424  "rpmin_nat_" + utility::to_string( outer_loop_counter_ ) );
1425  add_structure_based_on_rms( ssd, structure_data, true /* intended native */ );
1426  }
1427  /// Create new decoys
1428  for ( Size ii = 1, iie = decdisc_decoy_poses_[ count ].size(); ii <= iie; ++ii ) {
1429  core::pose::Pose pose = decdisc_decoy_poses_[ count ][ ii ];
1430  repack_and_minimize_pose( pose, weighted_sfxn );
1431  new_decs.push_back( pose );
1432  new_decs_scores.push_back( ( *weighted_sfxn )( pose ) );
1434  unweighted_sfxn, pose, decdisc_xtal_natives_[ count ], free_data, fixed_data ,
1435  "rpmin_dec_" + utility::to_string( outer_loop_counter_ ) );
1436  add_structure_based_on_rms( ssd, structure_data, false /* intended native */ );
1437  }
1438 
1439  if ( option[ optE::output_top_n_new_decoys ].user() ) {
1440  Size n_to_output = option[ optE::output_top_n_new_decoys ];
1441  utility::vector1< Size > top_decoy_inds( n_to_output, 0 );
1442  utility::arg_least_several( new_decs_scores, top_decoy_inds );
1443  for ( Size ii = 1; ii <= top_decoy_inds.size(); ++ii ) {
1444  new_decs[ top_decoy_inds[ ii ] ].dump_pdb( "workdir_" + to_string( MPI_rank_ ) +
1445  "/" + structure_data->tag() + "_" + to_string( outer_loop_counter_ ) + "_"
1446  + to_string( ii ) + ".pdb" );
1447  }
1448  }
1449  }
1450  }
1451 
1452  for ( OptEPositionDataOPs::const_iterator
1453  iter = decoy_discrim_data_->position_data_begin(),
1454  iter_end = decoy_discrim_data_->position_data_end();
1455  iter != iter_end; ++iter ) {
1456  optE_data_->add_position_data( *iter );
1457  }
1458 
1459 }
1460 
1461 
1462 ///
1463 /// @begin IterativeOptEDriver::single_structure_data_for_pose()
1464 ///
1468  core::pose::Pose & pose,
1469  core::pose::Pose const & crystal_native,
1470  utility::vector1< Real > & free_data, // scratch space; avoids new
1471  utility::vector1< Real > & fixed_data, // scratch space; avoids new
1472  std::string const & structure_tag
1473 ) const
1474 {
1475  (*scorefxn)( pose );
1476 
1477  for ( Size kk = 1; kk <= free_score_list_.size(); ++kk ) {
1478  free_data[ kk ] = pose.energies().total_energies()[ free_score_list_[ kk ] ];
1479  }
1480  for ( Size kk = 1; kk <= fixed_score_list_.size(); ++kk ) {
1481  fixed_data[ kk ] = pose.energies().total_energies()[ fixed_score_list_[ kk ] ];
1482  }
1483  SingleStructureDataOP ssd = new SingleStructureData( free_data, fixed_data );
1484  core::Real native_rms = core::scoring::CA_rmsd( crystal_native, pose );
1485  ssd->rms( native_rms );
1486  ssd->tag( structure_tag );
1487  return ssd;
1488 }
1489 
1490 
1491 ///
1492 /// @begin IterativeOptEDriver::add_structure_based_on_rms()
1493 ///
1494 /// @details Returns 1 if added as intended, 0 if not added at all, and -1 if added in the opposite of its intention.
1495 ///
1496 AddStatus
1499  PNatStructureOptEDataOP structure_data,
1500  bool intended_native
1501 ) const
1502 {
1503  bool const ramp_nativeness( basic::options::option[ basic::options::OptionKeys::optE::ramp_nativeness ] );
1504 
1505 
1506  if ( intended_native ) {
1507  if ( ramp_nativeness ) {
1508  if ( ssd->rms() > PNatStructureOptEData::nativeness_high() ) {
1509  structure_data->add_decoy( ssd );
1511  }
1512  /// else
1513  structure_data->add_native( ssd );
1515  }
1516 
1517  if ( option[ optE::max_rms_from_native ].user() ) {
1518  if ( ssd->rms() > option[ optE::max_rms_from_native ]() ) {
1519  if ( option[ optE::min_decoy_rms_to_native ]() ) {
1520  if ( ssd->rms() > option[ optE::min_decoy_rms_to_native ]() ) {
1521  structure_data->add_decoy( ssd );
1523  //TR << "Excluding decoy " << decoy_pdb_names[ jj ] << " with rms: " << decoy_rms << std::endl;
1524  }
1525  }
1526  return DID_NOT_ADD_STRUCTURE; // Do not count this structure as a native.
1527  }
1528  }
1529  structure_data->add_native( ssd );
1530  } else {
1531 
1532  if ( ramp_nativeness ) {
1533  if ( ssd->rms() > PNatStructureOptEData::nativeness_high() ) {
1534  structure_data->add_decoy( ssd );
1536  }
1537  /// else
1538  structure_data->add_native( ssd );
1540  }
1541 
1542 
1543  if ( option[ optE::min_decoy_rms_to_native ].user() ) {
1544  if ( ssd->rms() < option[ optE::min_decoy_rms_to_native ]() ) {
1545  if ( option[ optE::max_rms_from_native ].user() ) {
1546  if ( ssd->rms() < option[ optE::max_rms_from_native ]() ) {
1547  structure_data->add_native( ssd );
1549  }
1550  }
1551  //TR << "Excluding decoy " << decoy_pdb_names[ jj ] << " with rms: " << decoy_rms << std::endl;
1552  return DID_NOT_ADD_STRUCTURE; // Do not treat this structure as a decoy
1553  }
1554  }
1555  //std::cout << "decoy rms: " << structure_data->tag() << " " << decoy_rms << std::endl;
1556  structure_data->add_decoy( ssd );
1557  }
1558 
1560 }
1561 
1562 
1563 ///
1564 /// @begin IterativeOptEDriver::compute_rotamers_around_ligands()
1565 ///
1566 /// @brief
1567 /// Ligand stuff.
1568 ////
1569 void
1571 {
1572  using namespace basic::options;
1573  using namespace basic::options::OptionKeys;
1574  using namespace core::pack::rotamer_set;
1575 
1577 
1578  for ( Size n=1; n <= ligand_repack_pdbs_.size(); ++n ) {
1579  std::string const & native_filename( ligand_repack_pdbs_[n] );
1580 
1581  core::pose::Pose native_pose;//, pose;
1582  if ( outer_loop_counter_ == 1 ) {
1583  core::import_pose::pose_from_pdb( native_pose, native_filename );
1584  //pose = native_pose;
1585  ligand_repack_native_poses_.push_back( native_pose );
1586  //ligand_repack_context_poses_.push_back( native_pose );
1587  } else {
1588  //pose = ligand_repack_context_poses_[ n ];
1589  native_pose = ligand_repack_native_poses_[ n ];
1590  }
1591 
1592  //(*scorefxn)( pose );
1593  (*scorefxn)( native_pose );
1594 
1595  utility::file::FileName natname( native_filename );
1596  core::pose::Pose context_pose;
1597  /// Should we use the previously repacked pose, or the native pose to gather data from?
1598  //if ( option[ optE::recover_nat_rot ] ) {
1599  // context_pose = rotamer_recovery_context_poses_[ n ];
1600  //} else {
1601  context_pose = native_pose;
1602  //}
1603 
1604  // Only include protein residues within 6A of touching the ligand
1605  utility::vector1<bool> include_rsd( context_pose.total_residue(), false );
1606  int const jump_id = context_pose.num_jump(); // assume ligand is last jump
1607  FArray1D_bool is_upstream ( context_pose.total_residue(), false );
1608  context_pose.fold_tree().partition_by_jump( jump_id, is_upstream );
1609  for(core::Size i = 1, i_end = context_pose.total_residue(); i <= i_end; ++i) {
1610  // Nothing on ligand side can move
1611  if ( ! is_upstream(i) ) continue;
1612  // on protein side, have to do distance check
1613  core::conformation::Residue const & prot_rsd = context_pose.residue(i);
1614  if( ! prot_rsd.is_protein() ) continue;
1615  for(core::Size j = 1, j_end = context_pose.total_residue(); j <= j_end; ++j) {
1616  if ( is_upstream(j) ) continue; // compare against only ligand residues
1617  core::conformation::Residue const & lig_rsd = context_pose.residue(j);
1618  for(core::Size k = 1, k_end = lig_rsd.nheavyatoms(); k <= k_end; ++k) {
1619  double dist2 = lig_rsd.xyz(k).distance_squared( prot_rsd.xyz(prot_rsd.nbr_atom()) );
1620  double cutoff = prot_rsd.nbr_radius() + 6.0;
1621  if ( dist2 <= cutoff * cutoff ) {
1622  include_rsd[i] = true;
1623  goto END_LIGRES_LOOP; // C++ lacks multi-level break :(
1624  }
1625  }
1626  }
1627  END_LIGRES_LOOP: ; // compiler needs ; as a no-op before end of loop
1628  }
1629 
1631  natname.base(), context_pose, native_pose, include_rsd, *scorefxn,
1633  }
1634 }
1635 
1636 
1637 ///
1638 /// @begin IterativeOptEDriver::collect_ligand_discrimination_data()
1639 ///
1640 /// @brief
1641 /// Ligand stuff.
1642 ////
1643 void
1645 {
1646  using namespace core::pack::rotamer_set;
1647 
1648  if ( ligand_discrim_data_ == 0 ) {
1649 
1651 
1652  /// Refactor this
1654 
1655  //std::string scorelog_name( "workdir_" + to_string( MPI_rank_ ) + "/decdisc_scores.dat" );
1656  //std::ofstream scorelog( scorelog_name.c_str() );
1657 
1658  for ( Size ii = 1; ii <= ligand_native_decoy_pairs_.size(); ++ii ) {
1659  PNatLigPoseOptEDataOP structure_data = new PNatLigPoseOptEData;
1660 
1661  {//scope
1662  utility::file::FileName cryst_fname( ligand_crystal_natives_[ ii ] );
1663  //structure_data->tag( cryst_fname.base() ); // trim path and extension data -- beautiful utility
1664  structure_data->tag( ligand_crystal_natives_[ ii ] ); // I need full path name for debugging
1665  }
1666 
1667  utility::vector1< std::string > native_pdb_names, decoy_pdb_names;
1668  //std::cout << " PROC #" << MPI_rank_ << " reading pdb lists " ;
1669  //std::cout << native_decoy_pairs_[ ii ].first << " and ";
1670  //std::cout << native_decoy_pairs_[ ii ].second << std::endl;
1671 
1672  std::ifstream native_pdblist( ligand_native_decoy_pairs_[ ii ].first.c_str() );
1673  if( native_pdblist.bad() ) utility_exit_with_message("Cannot open file "+ligand_native_decoy_pairs_[ ii ].first);
1674  while ( native_pdblist ) {
1675  std::string native_pdb;
1676  native_pdblist >> native_pdb;
1677  if ( native_pdb != "" ) native_pdb_names.push_back( native_pdb );
1678  }
1679 
1680  std::ifstream decoy_pdblist( ligand_native_decoy_pairs_[ ii ].second.c_str() );
1681  if( decoy_pdblist.bad() ) utility_exit_with_message("Cannot open file "+ligand_native_decoy_pairs_[ ii ].second);
1682  while ( decoy_pdblist ) {
1683  std::string decoy_pdb;
1684  decoy_pdblist >> decoy_pdb;
1685  if ( decoy_pdb != "" ) decoy_pdb_names.push_back( decoy_pdb );
1686  }
1687  //std::cout << native_pdb_names.size() << " natives and " << decoy_pdb_names.size() << " decoys" << std::endl;
1688 
1689  if( native_pdb_names.size() == 0 ) {
1690  TR << "[rank " << MPI_rank_ << "] No native entries in " << ligand_native_decoy_pairs_[ ii ].first << "; skipping ligand decoy discrimination for this target." << std::endl;
1691  continue;
1692  }
1693  if( decoy_pdb_names.size() == 0 ) {
1694  TR << "[rank " << MPI_rank_ << "] No decoy entries in " << ligand_native_decoy_pairs_[ ii ].second << "; skipping ligand decoy discrimination for this target." << std::endl;
1695  continue;
1696  }
1697 
1698  core::pose::Pose crystal_native;
1699  //if ( option[ in::file::centroid_input ] ) {
1700  // core::import_pose::centroid_pose_from_pdb( crystal_native, crystal_natives_[ ii ] );
1701  //} else {
1703  //}
1704 
1705  utility::vector1< Real > free_data( free_score_list_.size() );
1706  utility::vector1< Real > fixed_data( fixed_score_list_.size() );
1707  Size first_total_residue( 0 );
1708  for ( Size jj = 1; jj <= native_pdb_names.size(); ++jj ) {
1709  //std::cout << " PROC #" << MPI_rank_ << " reading pdb: #" << jj << " " << native_pdb_names[ jj ] << std::endl;
1710  /// read the pdb into a pose
1711  core::pose::Pose pose;
1712  //if ( option[ in::file::centroid_input ] ) {
1713  // core::import_pose::centroid_pose_from_pdb( pose, native_pdb_names[ jj ] );
1714  //} else {
1715  core::import_pose::pose_from_pdb( pose, native_pdb_names[ jj ] );
1716  //}
1717 
1718  if ( jj == 1 ) {
1719  structure_data->set_total_residue( pose.total_residue() );
1720  first_total_residue = pose.total_residue();
1721  } else if ( first_total_residue != pose.total_residue() ) {
1722  std::cerr << "Warning [node " << MPI_rank_ << "]: total_residue for " << native_pdb_names[ jj ];
1723  std::cerr << " not equal to native #1 total_residue: " << first_total_residue << " vs ";
1724  std::cerr << pose.total_residue() << std::endl;
1725  continue;
1726  }
1727 
1728  ///*Real score = */(*scorefxn)( pose );
1729  //scorelog << "Decoy Discrimination NATIVE " << native_pdb_names[ jj ] << " " << score << "\n";
1730 
1731  EnergyMap emap = score_ligand_interface(*scorefxn, pose);
1732  for ( Size kk = 1; kk <= free_score_list_.size(); ++kk ) {
1733  free_data[ kk ] = emap[ free_score_list_[ kk ] ];
1734  }
1735  for ( Size kk = 1; kk <= fixed_score_list_.size(); ++kk ) {
1736  fixed_data[ kk ] = emap[ fixed_score_list_[ kk ] ];
1737  }
1738  SingleStructureDataOP ssd = new SingleStructureData( free_data, fixed_data );
1739  structure_data->add_native( ssd );
1740  //std::cout << "Adding native, size = " << structure_data->size() << std::endl;
1741  }
1742 
1743  for ( Size jj = 1; jj <= decoy_pdb_names.size(); ++jj ) {
1744 
1745  /// read the pdb into a pose
1746  core::pose::Pose pose;
1747  //std::cout << " PROC #" << MPI_rank_ << " reading pdb: #" << jj << " " << decoy_pdb_names[ jj ] << std::endl;
1748  //if ( option[ in::file::centroid_input ] ) {
1749  // core::import_pose::centroid_pose_from_pdb( pose, decoy_pdb_names[ jj ] );
1750  //} else {
1751  core::import_pose::pose_from_pdb( pose, decoy_pdb_names[ jj ] );
1752  //}
1753 
1754  if ( first_total_residue != pose.total_residue() ) {
1755  std::cerr << "Warning [node " << MPI_rank_ << "]: total_residue for " << decoy_pdb_names[ jj ];
1756  std::cerr << " not equal to native #1 total_residue: " << first_total_residue << " vs ";
1757  std::cerr << pose.total_residue() << std::endl;
1758  continue;
1759  }
1760 
1761  ///*Real score = */ (*scorefxn)( pose );
1762  //scorelog << "Decoy Discrimination DECOY " << decoy_pdb_names[ jj ] << " " << score << "\n";
1763 
1764  //if ( option[ optE::min_decoy_rms_to_native ].user() ) {
1765  // Real decoy_rms = core::scoring::CA_rmsd( crystal_native, pose );
1766  // if ( decoy_rms < option[ optE::min_decoy_rms_to_native ]() ) {
1767  // //TR << "Excluding decoy " << decoy_pdb_names[ jj ] << " with rms: " << decoy_rms << std::endl;
1768  // continue;
1769  // }
1770  //}
1771 
1772  EnergyMap emap = score_ligand_interface(*scorefxn, pose);
1773  for ( Size kk = 1; kk <= free_score_list_.size(); ++kk ) {
1774  free_data[ kk ] = emap[ free_score_list_[ kk ] ];
1775  }
1776  for ( Size kk = 1; kk <= fixed_score_list_.size(); ++kk ) {
1777  fixed_data[ kk ] = emap[ fixed_score_list_[ kk ] ];
1778  }
1779  SingleStructureDataOP ssd = new SingleStructureData( free_data, fixed_data );
1780  if ( structure_data->size() != 0 ) structure_data->add_decoy( ssd );
1781  //std::cout << "Adding decoy, size = " << structure_data->size() << std::endl;
1782  }
1783  ligand_discrim_data_->add_position_data( structure_data );
1784  }
1785  }
1786  for ( OptEPositionDataOPs::const_iterator
1787  iter = ligand_discrim_data_->position_data_begin(),
1788  iter_end = ligand_discrim_data_->position_data_end();
1789  iter != iter_end; ++iter ) {
1790  optE_data_->add_position_data( *iter );
1791  }
1792 }
1793 
1794 ///
1795 /// @begin IterativeOptEDriver::score_ligand_interface()
1796 ///
1797 /// @brief
1798 /// Ligand stuff.
1799 ////
1802 {
1803  // For the plain old total score:
1804  //scorefxn(pose);
1805  //return pose.energies().total_energies();
1806 
1807  // For the interface score:
1808  int const jump_id = pose.num_jump();
1809  core::pose::Pose split_pose( pose ); // make a copy
1810  protocols::rigid::RigidBodyTransMover trans_mover( split_pose, jump_id );
1811  // Default direction is to move centroids apart
1812  trans_mover.step_size(500); // make sure they're fully separated!
1813  trans_mover.apply( split_pose );
1814  scorefxn(pose);
1815  EnergyMap emap( pose.energies().total_energies() ); // make a copy
1816  scorefxn(split_pose);
1817  EnergyMap const & smap = split_pose.energies().total_energies();
1818  //std::cout << "delta_scores";
1819  for ( Size ii = 1; ii <= n_score_types; ++ii ) {
1820  ScoreType i = (ScoreType) ii;
1821  emap[i] -= smap[i];
1822  //if( emap[i] != 0 ) std::cout << " " << name_from_score_type(i) << " " << emap[i];
1823  }
1824  //std::cout << "\n";
1825  return emap;
1826 }
1827 
1828 
1829 ///
1830 /// @begin IterativeOptEDriver::collect_rotamer_energies_from_slave_cpu()
1831 ///
1832 /// @brief
1833 /// For a calling master node, collects the rotamer energies that were calculated on a slave CPU.
1834 ////
1835 void
1838 #ifdef USEMPI
1839  Size const which_cpu
1840 #else
1841  Size const
1842 #endif
1843 )
1844 {
1845 #ifdef USEMPI
1846  using namespace core::pack::rotamer_set;
1847 
1848  //std::cout << "Node 0 preparing to receive from " << which_cpu << std::endl;
1849 
1850  /// 1. Sanity: check that "boolean" vector of the free and fixed energy terms
1851  /// from the source cpu matches the boolean vectors we are expecting
1852  int * free_energy_terms = new int[ core::scoring::n_score_types ];
1853  int * fixed_energy_terms = new int[ core::scoring::n_score_types ];
1854 
1855  //std::cout << " PROC #" << MPI_rank_ << "collect_rotamer_energies_from_slave_cpu: " << which_cpu << std::endl;
1856 
1857  MPI_Recv( free_energy_terms, n_score_types, MPI_INT, which_cpu, tag_, MPI_COMM_WORLD, &stat_ );
1858  MPI_Recv( fixed_energy_terms, n_score_types, MPI_INT, which_cpu, tag_, MPI_COMM_WORLD, &stat_ );
1859 
1860  //std::cout << " PROC #" << MPI_rank_ << "received free and fixed from " << which_cpu << std::endl;
1861 
1862  for ( Size ii = 1; ii <= n_score_types; ++ii ) {
1863  if ( free_energy_terms[ ii - 1 ] != (int) ( free_parameters_[ (ScoreType) ii ] != 0.0 )) {
1864  std::cerr << "Free energy term mismatch! " << ScoreType( ii ) << " " << free_energy_terms[ ii - 1 ] << " & " << free_parameters_[ (ScoreType) ii ] << std::endl;
1865  utility_exit_with_message( "Free energy term on Node 0 does not match free energy term remotely");
1866  }
1867  if ( fixed_energy_terms[ ii - 1 ] != (int) ( fixed_parameters_[ (ScoreType) ii ] != 0.0 )) {
1868  std::cerr << "Fixed energy term mismatch! " << ScoreType( ii ) << " " << fixed_energy_terms[ ii - 1 ] << " & " << fixed_parameters_[ (ScoreType) ii ] << std::endl;
1869  utility_exit_with_message( "Free energy term on Node 0 does not match fixed energy term remotely");
1870  }
1871  }
1872 
1873  delete [] free_energy_terms; free_energy_terms = 0;
1874  delete [] fixed_energy_terms; fixed_energy_terms = 0;
1875 
1876  /// 2. Number of positions on which OptE data has been gathered remotely
1877  Size n_pos;
1878  MPI_Recv( & n_pos, 1, MPI_UNSIGNED_LONG, which_cpu, tag_, MPI_COMM_WORLD, &stat_ );
1879  //std::cout << " PROC #" << MPI_rank_ << "npos from " << which_cpu << " " << n_pos << std::endl;
1880 
1881  for ( Size ii = 1; ii <= n_pos; ++ii ) {
1882  //std::cout << "Waiting to receive position data type" << std::endl;
1883  int position_data_type;
1884  MPI_Recv( & position_data_type, 1, MPI_INT, which_cpu, tag_, MPI_COMM_WORLD, &stat_ );
1885 
1887  //std::cout << "Node 0 about to receive from node " << which_cpu << std::endl;
1888  ii_data->receive_from_node( which_cpu, tag_ );
1889 
1890  optE_data_->add_position_data( ii_data );
1891  //std::cout << "Node 0 added position data object " << std::endl;
1892  }
1893 #endif
1894 
1895 }
1896 
1897 
1899  using namespace basic::options;
1900  using namespace basic::options::OptionKeys;
1901 
1902  return option[ optE::n_design_cycles ];
1903 }
1904 
1906 
1907 
1908 ///
1909 /// @begin IterativeOptEDriver::intialize_free_and_fixed_energy_terms()
1910 ///
1911 /// @brief
1912 /// Calls the method initialize_free_and_fixed() which reads in the files free and fixed and sets the EnergyMap vectors
1913 /// free_parameters_ and fixed_parameters_. Also here the reference energies array gets init'd. Finally,
1914 /// setup_derived_free_and_fixed_data gets called which
1915 ///
1917 
1919 
1920  using namespace basic::options;
1921  using namespace basic::options::OptionKeys;
1922 
1923  if ( ! option[ optE::dont_use_reference_energies ].user() ) {
1924  if ( MPI_rank_ == 0 ) {
1925  //for ( Size ii = 1; ii <= n_score_types; ++ii ) {
1926  // if ( free_parameters_[ (ScoreType) ii ] != 0.0 ) {
1927  // free_parameters_[ (ScoreType) ii ] = optE_RG.uniform() + 0.01; // random non-zero starting point
1928  // }
1929  //}
1930  Real const rpp_refs[20] = {
1931  0.16, 1.7, -0.67, -0.81, 0.63, -0.17, 0.56, 0.24, -0.65, -0.1,
1932  -0.34, -0.89, 0.02, -0.97, -0.98, -0.37, -0.27, 0.29, 0.91, 0.51
1933  };
1934 
1935  for ( Size ii = 1; ii <= chemical::num_canonical_aas; ++ii ) {
1936  before_minimization_reference_energies_[ ii ] = rpp_refs[ ii - 1 ];
1937  }
1938  std::cout << "INITIALIZED before_minimization_reference_energies_ REFERENCE ENERGIES" << std::endl;
1939  }
1940  }
1942 }
1943 
1944 
1945 ///
1946 /// @begin IterativeOptEDriver::optimize_weights()
1947 ///
1948 /// @brief
1949 /// Optimizes weights using either a standard minimizer or a ParticleSwarmMinimizer (which is significantly better!).
1950 ///
1951 /// Assuming the ParticleSwarmMinimizer is used...
1952 /// Each particle traverses weight space and comes up with a set of values, then it evaluates the fitness function
1953 /// (which basically calls all of the appropriate underlying get_score() methods) to see how good the new weights are.
1954 /// The particles then update their direction and velocity and come up with a new set of weights to evaluate. This is
1955 /// done with a set number of particles and a set number of cycles. Currently using on the order of 100 particles and
1956 /// 20 cycles. (No tests have been done to determine the optimum number of particles and/or cycles, although ronj likes
1957 /// to use more particles whenever possible.)
1958 ///
1960 {
1961  using namespace basic::options;
1962  using namespace basic::options::OptionKeys;
1963  using namespace core::optimization;
1964 
1965  barrier();
1966 
1967  bool const optimize_in_parallel( option[ optE::mpi_weight_minimization ] );
1968  if ( MPI_rank_ != 0 && !optimize_in_parallel ) {
1969  return; // do nothing if we're not the master cpu AND we're not distributing weight optimization over multiple cpus.
1970  }
1971 
1972  if ( MPI_rank_ == 0 ) TR << "optimize_weights(): entered method" << std::endl;
1973 
1974  //attach_debugger();
1975 
1977  if ( option[ optE::wrap_dof_optimization ].user() )
1979 
1980  /// Skip weight optimization in the context of the native protein if the
1981  /// "design first" flag is on the command line, but only after
1982  /// free_weights_before_mininmization_ has been updated.
1983  if ( outer_loop_counter_ == 1 && option[ optE::design_first ].user() ) {
1984  TR << "optimize_weights(): design_first flag in use. node " << MPI_rank_ << " leaving method." << std::endl;
1985  return;
1986  }
1987 
1988  if ( option[ optE::starting_refEs ].user() && outer_loop_counter_ == 1 ) {
1989  std::cout << "READING REFERENCE ENERGIES FROM FILE" << std::endl;
1990  std::cout << "FREE COUNT: " << free_count_ << std::endl;
1993  //std::cout << "after_minimization_reference_energies_: ";
1994  //for ( Size ii = 1; ii <= after_minimization_reference_energies_.size(); ++ii ) {
1995  // std::cout << after_minimization_reference_energies_[ ii ] << " ";
1996  //}
1997  //std::cout << std::endl;
1998  if ( free_count_ == 0 ) {
1999  TR << "optimize weights early exit" << std::endl;
2000  return; // don't bother trying to optimize the weights
2001  }
2002  }
2003 
2004  // Create an "optE data minimizer" object
2005  // Do the actual weight optimization
2006  OptEMultifuncOP opt_min_ptr = new OptEMultifunc(
2008  (int) free_count_,
2013  OptEMultifunc & opt_min( * opt_min_ptr );
2014 
2015 
2016  if ( option[ optE::fit_reference_energies_to_aa_profile_recovery ] && option[ optE::dont_use_reference_energies ] ) {
2017  utility_exit_with_message("optimize_weights(): can't do fitting of reference energies to profile recovery when 'dont_use_reference_energies' flag in use.");
2018  }
2019 
2020  if ( option[ optE::fit_reference_energies_to_aa_profile_recovery ] && !(outer_loop_counter_ == 1 && inner_loop_counter_ == 1) ) {
2021  opt_min.fix_reference_energies( true );
2022  } else if ( option[ optE::starting_refEs ].user() && outer_loop_counter_ == 1 ) {
2023  opt_min.fix_reference_energies( true );
2024  }
2025 
2026  if ( MPI_rank_ == 0 ) {
2027 
2028  clock_t starttime = clock();
2029 
2030  /// Set include terms according to current weight set
2033 
2034  //TR << " about to start getting dofs " <<std::endl;
2036  Real start_fitness = opt_min( start_dofs );
2037  Real end_fitness = 0.0;
2038 
2039  TR << "optimize_weights(): objective function start: " << start_fitness << " dofs: [ ";
2040  for ( Size ii = 1; ii <= start_dofs.size(); ++ii) { TR << F( 8,4,start_dofs[ii] ) << ", "; }
2041  TR << "]" << std::endl;
2042 
2043  MultifuncOP opt_min2;
2044 
2045  Size ndofs = start_dofs.size();
2046  if ( option[ optE::wrap_dof_optimization ].user() ) {
2047  if ( outer_loop_counter_ == 1 ) {
2049  free_score_list_, (int) free_count_,
2051  opt_min_ptr );
2052  minimizer_dofs_before_minimization_.resize( wrapped_opt_min_->n_real_dofs(), 0.0 );
2053  minimizer_dofs_after_minimization_.resize( wrapped_opt_min_->n_real_dofs(), 0.0 );
2054  minimizer_dofs_mixed_.resize( wrapped_opt_min_->n_real_dofs(), 0.0 );
2055  } else {
2056  wrapped_opt_min_->set_multifunc( opt_min_ptr );
2057  }
2058  opt_min2 = wrapped_opt_min_;
2059  ndofs = wrapped_opt_min_->n_real_dofs();
2060  } else {
2061  opt_min2 = opt_min_ptr;
2062  }
2063 
2064  if( option[ optE::optimize_starting_free_weights ] ) {
2065 
2066  using namespace core::optimization;
2067  /// High tolerance -- don't over minimize before pswarm gets to explore
2068  optimization::MinimizerOptions options( "dfpmin_armijo_nonmonotone_atol", 1, true, false, false );
2069  optimization::Minimizer minimizer( *opt_min2, options );
2070 
2071  /// Low tolerance -- drill down!
2072  optimization::MinimizerOptions options2( "dfpmin", 1e-4, true, false, false );
2073  optimization::Minimizer minimizer2( *opt_min2, options2 );
2074 
2075  /// Lowest tolerance -- drill down!
2076  optimization::MinimizerOptions options3( "dfpmin", 1e-9, true, false, false );
2077  optimization::Minimizer minimizer3( *opt_min2, options3 );
2078 
2079  // create two Multivec's (vector1 Real) that will hold the minimum and maximum weights a Particle can achieve
2080  // sometimes the particles leave the weight on fa_intra_rep as zero since the score12 weight is so low. this messes
2081  // things up down the line when sending things via MPI because only non-zero values are sent to speed up the run.
2082  // if the particles leave the weight at zero, reset it to a very small value so it's still present.
2083  // or just enforce a minimum weight of 0.0001 on the particles. -ronj
2084  Multivec min( ndofs, 0.001), max( ndofs, 5.0);
2085 
2086  ParticleSwarmMinimizer psm(min, max);
2087 
2088  // run 20 cycles of the PS algorithm, using opt_min2 as the fitness function and some number of particles
2089  // the option has a default value of 100, if the user doesn't specify it.
2090  TR_VERBOSE << "optimize_weights(): node " << MPI_rank_ << " beginning round 1 of PSO." << std::endl;
2091  ParticleOPs particles = psm.run( option[ optE::number_of_swarm_cycles ], *opt_min2, option[ optE::number_of_swarm_particles ] );
2092 
2093 // psm.print_particles( particles, "optimize_weights(): round 1" );
2094 
2095  for(core::Size i = 1; i <= particles.size(); ++i) {
2096  Real start( (*opt_min2)( particles[i]->p_ ) );
2097  minimizer.run( particles[i]->p_ );
2098  TR.Trace << "first round minimization: " << i << " start: " << start << ", end: " << (*opt_min2)( particles[ i ]->p_ ) << std::endl;
2099  }
2100  psm.run( 0, *opt_min2, particles );
2101 // psm.print_particles( particles, "optimize_weights(): round 1 post min" );
2102 
2103  TR_VERBOSE << "optimize_weights(): node " << MPI_rank_ << " beginning round 2 of PSO using [minimized] round 1 particles." << std::endl;
2104 
2105  psm.run( option[ optE::number_of_swarm_cycles ], *opt_min2, particles );
2106 // psm.print_particles( particles, "optimize_weights(): round 2" );
2107 
2108  clock_t min_starttime = clock();
2109  //for(core::Size ii = 1; ii <= particles.size(); ++ii) {
2110  for(core::Size ii = 1; ii <= 15; ++ii) { // minimizing is really slow so only do some particles!
2111  Real start( (*opt_min2)( particles[ii]->p_ ) );
2112  TR.Trace << "starting minimization of particle " << ii << std::endl;
2113  minimizer2.run( particles[ii]->p_ );
2114  TR.Trace << "second round minimization: " << ii << " start: " << F( 9,5,start ) << ", end: " << F( 9,5,(*opt_min2)( particles[ ii ]->p_ )) << std::endl;
2115  std::cout.flush();
2116  }
2117  clock_t min_stoptime = clock();
2118  TR_VERBOSE << "optimize_weights(): particle minimization took " << ((double)min_stoptime-min_starttime) / CLOCKS_PER_SEC << " seconds." << std::endl;
2119 
2120  // This will re-sort the particles and update pbest_
2121  psm.run(0, *opt_min2, particles);
2122 // psm.print_particles( particles, "optimize_weights(): round 2 post min" );
2123 
2124  ParticleOP p = particles[1];
2125  TR_VERBOSE << "optimize_weights(): best particle fitness: " << F( 9,5,-1.0 * p->fitness_pbest() ) << " dofs: [";
2126  for ( core::Size j=1; j <= p->pbest().size(); ++j ) { TR_VERBOSE << F(8,4,p->pbest()[j]) << ", "; }
2127  TR_VERBOSE << " ]" << std::endl;
2128 
2129  start_dofs = particles[1]->pbest();
2130  //TR << "Final round of gradient-based minimization, score before: " << -1 * particles[1]->fitness_pbest() << std::endl;
2131  //minimizer3.run( start_dofs ); //ronj don't use minimization when using the new unfolded state energy
2132  //TR << "Final round of gradient-based minimization, score after: " << (*opt_min2)( start_dofs ) << std::endl;
2133 
2134  end_fitness = (*opt_min2)( start_dofs );
2135 
2136  if ( ( end_fitness > start_fitness ) && option[ optE::repeat_swarm_optimization_until_fitness_improves ] ) {
2137  // try one more round with a new set of particles that have starting values closer to the start dofs
2138 
2139  // run 40 cycles of the PS algorithm, using opt_min2 as the fitness function and some number of particles
2140  // the option has a default value of 100, if the user doesn't specify it.
2141  ParticleOPs particles = psm.run( 2 * option[ optE::number_of_swarm_cycles ], *opt_min2, option[ optE::number_of_swarm_particles ],
2143 
2144 // psm.print_particles( particles, "optimize_weights(): round extra innings" );
2145 
2146  clock_t min_starttime = clock();
2147  for(core::Size ii = 1; ii <= 10; ++ii) { // minimizing is really slow so only do some of the particles
2148  Real start( (*opt_min2)( particles[ii]->p_ ) );
2149  TR_VERBOSE << "starting minimization of particle " << ii << std::endl;
2150  //minimizer2.run( particles[ii]->p_ );
2151  minimizer.run( particles[ii]->p_ );
2152  TR_VERBOSE << "second round minimization: " << ii << " start: " << F( 9,5,start ) << ", end: " << F( 9,5,(*opt_min2)( particles[ ii ]->p_ )) << std::endl;
2153  std::cout.flush();
2154  }
2155  clock_t min_stoptime = clock();
2156  TR_VERBOSE << "optimize_weights(): particle minimization took " << ((double)min_stoptime-min_starttime) / CLOCKS_PER_SEC << " seconds." << std::endl;
2157 
2158  // re-score and re-sort the particles to get the best scoring one
2159  psm.run( 0, *opt_min2, particles );
2160 // psm.print_particles( particles, "optimize_weights(): round extra innings post min" );
2161 
2162  ParticleOP p = particles[1];
2163  TR_VERBOSE << "optimize_weights(): best particle fitness: " << F( 9,5,-1.0 * p->fitness_pbest() ) << " dofs: [";
2164  for ( core::Size j=1; j <= p->pbest().size(); ++j ) { TR_VERBOSE << F(8,4,p->pbest()[j]) << ", "; }
2165  TR_VERBOSE << " ]" << std::endl;
2166 
2167  start_dofs = particles[1]->pbest();
2168  }
2169 
2170 
2171  if ( option[ optE::wrap_dof_optimization ].user() ) {
2173  /// From this point forward start_dofs needs to be the size the OptEMultifunc expects;
2174  start_dofs = wrapped_opt_min_->derived_dofs( minimizer_dofs_after_minimization_ );
2175  }
2176 
2177  } else {
2178  // somebody is crazy and not using the Particle Swarm to do weight space exploration...
2180  TR << "optimize_weights(): minimization not recommended when using unfolded state energy" << std::endl;
2181  }
2182  optimization::MinimizerOptions options( "dfpmin", 1e-9, true, false, false );
2183  optimization::Minimizer minimizer( opt_min, options );
2184  minimizer.run( start_dofs );
2185  }
2186 
2187  TR << "optimize_weights(): end: " << ( *opt_min2 )( start_dofs ) << ", dofs: [ ";
2188  for( Size ii = 1; ii <= start_dofs.size(); ++ii) { TR << F(8,4,start_dofs[ii]) << ", "; }
2189  TR << " ]" << std::endl;
2190 
2191  if ( option[ optE::wrap_dof_optimization ].user() ) {
2192  TR << "Wrapped weights after minimization" << std::endl;
2194  TR << std::endl;
2195  }
2196 
2197  test_weight_sensitivity(opt_min, start_dofs);
2198 
2199  // set the after_min refE vector
2200  if ( ! option[ optE::dont_use_reference_energies ].user() ) {
2201  //for ( Size ii = 1; ii <= chemical::num_canonical_aas; ++ii ) { // 20 should not be hardcoded here!
2202  // after_minimization_reference_energies_[ ii ] = start_dofs[ free_count_+ii ]; // save them in non-negated form
2203  //}
2204  // apparently, now, the opt_min object can be queried to get the reference energy values instead of what the
2205  // particles get out
2207  }
2208 
2209  // create an EnergyMap from the most recent set of DOFs, but make sure to reset the energies for the fixed terms to 0.
2211  for( Size ii = 1 ; ii <= fixed_score_list_.size(); ++ii ) {
2212  free_weights_after_minimization_[ fixed_score_list_[ ii ]] = 0; // reset fixed parameters
2213  }
2214 
2215  // initialize some vectors that will later be used by the PositionData print/get_score methods
2216  optimization::Multivec vars( free_count_ + after_minimization_reference_energies_.size(), 0.0 );
2217  optimization::Multivec dE_dvars( free_count_ + after_minimization_reference_energies_.size(), 0.0 );
2218 
2219  Size num_energy_dofs( free_count_ );
2220  int num_ref_dofs( after_minimization_reference_energies_.size() );
2221  int num_total_dofs( num_energy_dofs + num_ref_dofs );
2222  scoring::EnergyMap fixed_terms = fixed_parameters_;
2223  scoring::ScoreTypes score_list( free_score_list_ );
2224  scoring::ScoreTypes fixed_score_list( fixed_score_list_ );
2225 
2226  // set the vars Mulitvec to contain the free weights and reference weights
2227  for ( Size ii = 1; ii <= free_score_list_.size(); ++ii ) {
2228  vars[ ii ] = free_weights_after_minimization_[ free_score_list_[ ii ] ] ;
2229  TR_VERBOSE << "optimize_weights(): free weights before/after minimization_: [ " << name_from_score_type( free_score_list_[ii] ) << " ]: "
2230  << F(8,4,free_weights_before_minimization_[ free_score_list_[ ii ] ]) << " -> "
2231  << F(8,4,free_weights_after_minimization_[ free_score_list_[ ii ] ]) << std::endl;
2232  }
2233 
2234  if ( ! option[ optE::dont_use_reference_energies ].user() ) {
2235  for ( Size ii = 1; ii <= chemical::num_canonical_aas; ++ii ) {
2237  }
2238  }
2239 
2240  /// NOW write all the score data to the log. This means first grabbing the data from distant nodes,
2241  /// if that data is not on the master node already
2242  /// There's no reason, of course, why the optE data should have to live on a single CPU all at once...
2243  clock_t stoptime = clock();
2244 
2245  TR << "optimize_weights(): optimization took " << ((double) stoptime-starttime)/CLOCKS_PER_SEC << " seconds." << std::endl;
2246 
2247  // release hold of the multifunc that the wrapper wraps, but
2248  /// hold on to the wrapper itself for later reuse. No more calls to func() after this point.
2249  if ( option[ optE::wrap_dof_optimization ].user() ) {
2250  wrapped_opt_min_->set_multifunc( 0 );
2251  }
2252 
2253  /// Tell remote nodes that function evaulation is over.
2254  if ( optimize_in_parallel ) {
2255  opt_min.declare_minimization_over(); // tell remote nodes to stop waiting for func/dfuncs to evaluate
2257  }
2258 
2259  ///////////////////// Log the results after minimization is complete ////////////////////////////////
2260 
2261  /// 2. open log file.
2262  std::string logname = "logdir/minimization_dat_" + utility::to_string( outer_loop_counter_ ) + ".dat";
2263  std::ofstream outlog( logname.c_str() );
2264 
2265  /// 3. score all the position data
2266  utility::vector1< Real > cumulative_score_list(n_optE_data_types,0.0);
2267  //int ii = 1;
2268  for( OptEPositionDataOPs::const_iterator itr = optE_data_->position_data_begin(),
2269  e_itr = optE_data_->position_data_end(); itr != e_itr ; ++itr ) {
2270  (*itr)->print_score( outlog, component_weights_, vars, dE_dvars,
2271  num_energy_dofs, num_ref_dofs, num_total_dofs,
2272  fixed_terms, score_list, fixed_score_list );
2273  cumulative_score_list[(*itr)->type()] += (*itr)->get_score(
2274  component_weights_, vars, dE_dvars,
2275  num_energy_dofs, num_ref_dofs, num_total_dofs,
2276  fixed_terms, score_list, fixed_score_list );
2277  //++ii;
2278  //if ( ii % 10 == 0 ) {
2279  // //TR << ".";
2280  //}
2281  }
2282  for ( Size ii=1; ii <= n_optE_data_types; ii++) {
2283  TR << "optimize_weights(): energy component: " << OptEPositionDataFactory::optE_type_name( OptEPositionDataType( ii ))
2284  << " " << cumulative_score_list[ii] << std::endl;
2285  }
2286 
2287 
2288  /// 4. prints some extra information about the range of energies observed to the minimization data file
2289 
2290  // db //
2293  Real const faux_max( -1234 ); Real const faux_min( 1234 );
2294  for ( Size ii = 1; ii <= n_optE_data_types; ++ii ) {
2295  //for ( EnergyMap::iterator iter = rawE_min[ ii ].begin(); iter != rawE_min[ ii ].end(); ++iter ) {
2296  // *iter = faux_min;
2297  //}
2298  //for ( EnergyMap::iterator iter = rawE_max[ ii ].begin(); iter != rawE_max[ ii ].end(); ++iter ) {
2299  // *iter = faux_max;
2300  //}
2301  for ( Size jj = 1; jj <= n_score_types; ++jj ) {
2302  rawE_min[ ii ][ (ScoreType) jj ] = faux_min;
2303  rawE_max[ ii ][ (ScoreType) jj ] = faux_max;
2304  }
2305 
2306  }
2307 
2308  for( OptEPositionDataOPs::const_iterator itr = optE_data_->position_data_begin(),
2309  e_itr = optE_data_->position_data_end(); itr != e_itr ; ++itr ) {
2310  (*itr)->range( score_list, fixed_score_list,
2311  rawE_min[(*itr)->type()], rawE_max[(*itr)->type()] );
2312  }
2313 
2314  ScoreTypes free_and_fixed( score_list );
2315  for ( Size ii = 1; ii <= fixed_score_list.size(); ++ii ) {
2316  free_and_fixed.push_back( fixed_score_list[ ii ] );
2317  }
2318  std::sort(free_and_fixed.begin(), free_and_fixed.end() );
2319 
2320  for ( Size ii = 1; ii <= n_optE_data_types; ++ii ) {
2321  outlog << "DATA RANGE: ";
2323  outlog << " ";
2324  for ( Size jj = 1; jj <= free_and_fixed.size(); ++jj ) {
2325  if ( rawE_min[ ii ][ free_and_fixed[ jj ] ] > rawE_max[ ii ][ free_and_fixed[ jj ] ] ) continue;
2326  outlog << "( " << name_from_score_type( free_and_fixed[ jj ]) << ", ";
2327  outlog << rawE_min[ ii ][ free_and_fixed[ jj ] ] << ", ";
2328  outlog << rawE_max[ ii ][ free_and_fixed[ jj ] ] << " ) ";
2329  }
2330  outlog << "\n";
2331  }
2332  } else {
2333  opt_min.wait_for_remote_vars();
2334  /// once minimization has completed, send this data to the master cpu so it can output it to the log.
2335  /// There's no reason, of course, why the optE data should have to live on a single CPU all at once...
2337  }
2338  TR << "leaving optimize_weights()" << std::endl;
2339 }
2340 
2341 
2342 void
2344  OptEMultifunc const & func,
2345  core::optimization::Multivec const & dofs
2346 ) const
2347 {
2348  std::string logname = "weightdir/sensitivity_" + utility::to_string( outer_loop_counter_ ) + ".dat";
2349  std::ofstream out( logname.c_str() );
2350  Real const minval = func(dofs);
2351  out << "Minimum function value " << minval << std::endl;
2352  // Later DOFs include the AA ref energies, so we don't want ALL of them:
2353  //for(Size dof_idx = 1; dof_idx <= dofs.size(); ++dof_idx) {
2354  for(Size dof_idx = 1; dof_idx <= free_score_list_.size(); ++dof_idx) {
2355  Real maxval = minval;
2356  out << "term_" << dof_idx << " " << free_score_list_[dof_idx];
2357  for(Real val = 0.0; val <= 2.0; val += 0.1) {
2358  //for(Real scale = 0.0; scale <= 2.0; scale += 0.5) {
2359  //if(scale == 1.0) continue;
2360  core::optimization::Multivec dofs_copy = dofs;
2361  //dofs_copy[dof_idx] *= scale;
2362  dofs_copy[dof_idx] = val;
2363  Real const newval = func(dofs_copy);
2364  maxval = std::max(maxval, newval);
2365  out << " " << (newval - minval);
2366  }
2367  out << " maxDelta " << (maxval - minval) << std::endl;
2368  }
2369  out.close();
2370 }
2371 
2374 {
2375  utility::vector1< Real > reference_energies( chemical::num_canonical_aas, 0.0 );
2376 
2377  utility::io::izstream weight_file( fname.c_str() );
2378  bool read_refEs = false;
2379  while ( weight_file ) {
2380  std::string tag;
2381  weight_file >> tag;
2382  if ( tag == "METHOD_WEIGHTS" ) {
2383  weight_file >> tag; // "ref"
2384  if ( tag != "ref" ) continue;
2385  for ( Size ii = 1; ii <= chemical::num_canonical_aas; ++ii ) {
2386  Real aa_refE;
2387  weight_file >> aa_refE;
2388  if (!weight_file) break;
2389  reference_energies[ ii ] = aa_refE;
2390  }
2391  read_refEs = true;
2392  }
2393  }
2394  if ( ! read_refEs ) {
2395  utility_exit_with_message( "Failed to read METHOD_WEIGHTS from file " + fname + ". IterativeOptEDriver::read_reference_energies_from_file() " );
2396  }
2397  return reference_energies;
2398 
2399 }
2400 
2401 void
2403 {
2404  using namespace core::pack::rotamer_set;
2405  using namespace basic::options;
2406  using namespace basic::options::OptionKeys;
2407 
2408  runtime_assert( option[ optE::rescore::weights ].user() );
2409 
2410  bool const optimize_in_parallel( option[ optE::mpi_weight_minimization ] );
2411 
2412  if ( MPI_rank_ != 0 ) {
2413  //opt_min.wait_for_remote_vars();
2414  /// once minimization has completed, send this data to the master cpu so it can output it to the log.
2415  /// There's no reason, of course, why the optE data should have to live on a single CPU all at once...
2416  if ( optimize_in_parallel ) {
2418  } else {
2419  return;
2420  }
2421 
2422  } else {
2423 
2424  /// Tell remote nodes that function evaulation is over.
2425  if ( optimize_in_parallel ) {
2426  //opt_min.declare_minimization_over(); // tell remote nodes to stop waiting for func/dfuncs to evaluate
2428  }
2429 
2430  /// 1. read in the weight set and reference energies
2431  /// and initialize the mutliVects needed to call OptEPositionData::print_score()
2432  utility::vector1< Real > reference_energies( chemical::num_canonical_aas, 0.0 );
2433  EnergyMap weight_set;
2434 
2435  std::ifstream weight_file( option[ optE::rescore::weights ]()().c_str() );
2436  while ( weight_file ) {
2437  std::string tag;
2438  weight_file >> tag;
2439  if ( tag == "METHOD_WEIGHTS" ) {
2440  weight_file >> tag; // "ref"
2441  for ( Size ii = 1; ii <= chemical::num_canonical_aas; ++ii ) {
2442  Real aa_refE;
2443  weight_file >> aa_refE;
2444  reference_energies[ ii ] = aa_refE;
2445  }
2446  } else if ( ScoreTypeManager::is_score_type( tag ) ) {
2448  Real weight;
2449  weight_file >> weight;
2450  TR << "Reading weight: " << st << " " << weight << std::endl;
2451  weight_set[ st ] = weight;
2452  } else {
2453  TR << "Warning:: ignoring tag " << tag << " from " << option[ optE::rescore::weights ]()() << std::endl;
2454  }
2455 
2456  }
2457 
2458  /// The following variables are input parameters to the print_score function in optEPositionData
2461 
2462  Size num_energy_dofs( free_count_ );
2463  int num_ref_dofs( before_minimization_reference_energies_.size() );
2464  int num_total_dofs( num_energy_dofs + num_ref_dofs );
2465 
2466  scoring::EnergyMap fixed_terms;
2467  scoring::ScoreTypes score_list( free_score_list_ );
2468  scoring::ScoreTypes fixed_score_list( fixed_score_list_ );
2469 
2470  for ( Size ii = 1; ii <= score_list.size(); ++ii ) {
2471  TR_VERBOSE << "setting vars " << ii << " to " << weight_set[ free_score_list_[ ii ] ] << std::endl;
2472  vars[ ii ] = weight_set[ free_score_list_[ ii ] ];
2473  // also set the free_parameters Map in case the user isn't optimizing weights but just rescoring a weight set
2474  free_parameters_[ free_score_list_[ ii ] ] = weight_set[ free_score_list_[ ii ] ];
2475  }
2476 
2477  if ( ! option[ optE::dont_use_reference_energies ].user() ) {
2478  TR_VERBOSE << "setting reference energies ";
2479  for ( Size ii = 1; ii <= chemical::num_canonical_aas; ++ii ) {
2480  TR_VERBOSE << ii << " " << reference_energies[ ii ] << ", ";
2481  vars[ ii + free_count_ ] = reference_energies[ ii ];
2482  after_minimization_reference_energies_[ ii ] = reference_energies[ ii ];
2483  }
2484  TR_VERBOSE << std::endl;
2485  }
2486 
2487  for ( Size ii = 1; ii <= fixed_score_list_.size(); ++ii ) {
2488  TR_VERBOSE << "setting fixed term " << ii << " " << fixed_score_list_[ ii ] << " " << weight_set[ fixed_score_list_[ ii ] ] << std::endl;
2489  fixed_terms[ fixed_score_list_[ ii ] ] = weight_set[ fixed_score_list_[ ii ] ];
2490  // also set the fixed_parameters Map in case the user isn't optimizing weights but just rescoring a weight set
2491  fixed_parameters_[ fixed_score_list_[ ii ] ] = weight_set[ fixed_score_list_[ ii ] ];
2492  }
2493 
2494  /// 2. open log file.
2495  std::ofstream outlog( option[ optE::rescore::outlog ]()().c_str() );
2496  TR_VERBOSE << "log file opened, scoring position data" << std::endl;
2497 
2498  /// 3. score all the position data
2499  utility::vector1< Real > cumulative_score_list( n_optE_data_types, 0.0 );
2500  //int ii = 1;
2501  for( OptEPositionDataOPs::const_iterator itr = optE_data_->position_data_begin(), e_itr = optE_data_->position_data_end() ; itr != e_itr ; ++itr ) {
2502  (*itr)->print_score( outlog, component_weights_, vars, dE_dvars, num_energy_dofs, num_ref_dofs, num_total_dofs, fixed_terms, score_list, fixed_score_list );
2503  cumulative_score_list[(*itr)->type()] += (*itr)->get_score( component_weights_, vars, dE_dvars, num_energy_dofs, num_ref_dofs, num_total_dofs, fixed_terms, score_list, fixed_score_list );
2504  //++ii;
2505  //if ( ii % 10 == 0 ) {
2506  // //TR << ".";
2507  //}
2508  }
2509  for ( Size ii=1; ii <= n_optE_data_types; ii++) {
2510  if ( cumulative_score_list[ii] != 0 )
2511  TR_VERBOSE << "optimization function energy component: " << OptEPositionDataFactory::optE_type_name( OptEPositionDataType( ii ))
2512  << " " << cumulative_score_list[ii] << std::endl;
2513  }
2514 
2515  /// 4. close log file.
2516  /// noop -- happens automatically when the ofstream goes out of scope.
2517  }
2518 }
2519 
2520 
2521 ///
2522 /// @begin IterativeOptEDriver::write_new_scorefile()
2523 ///
2524 /// @brief
2525 /// send new score file via MPI instead of writing to disk.
2526 ///
2527 /// @remarks
2528 /// The reference energy term is automatically added to the end of the scorefile with a weight of 1, so don't put it as
2529 /// a free or fixed param or it will be counted twice. The actual reference energies per AA are varied during the
2530 /// protocol and also written out here.
2531 ///
2533 {
2534  using namespace basic::options;
2535  using namespace basic::options::OptionKeys;
2536 
2537  optE_data_ = 0; /// clear memory!
2538 
2539  if ( MPI_rank_ == 0 ) {
2540 
2542  Real alpha = 1 - mixing_factor_;
2543 
2544  // here we begin a significant branching of write_new_scorefile
2545  // we have four possibilities: writing with and without consideration of sequence entropy and writing
2546  // with and without use of the wrapped dof minimizer
2547 
2548  if ( option[ optE::fit_reference_energies_to_aa_profile_recovery ] ) {
2549  using namespace core::chemical;
2550 
2551  // the below is too confusing and should be against mini coding guidelines (-ronj)
2552  //Real const fa_atr_weight(
2553  // fixed_parameters_[ fa_atr ] == 0.0 ?
2554  // (free_weights_inner_loop_[ fa_atr ] == 0.0 ? 1.0 : free_weights_inner_loop_[ fa_atr ] )
2555  // : fixed_parameters_[ fa_atr ] );
2556 
2557  Real fa_atr_weight;
2558  if ( fixed_parameters_[ fa_atr ] == 0.0 ) {
2559  if ( free_weights_inner_loop_[ fa_atr ] == 0.0 ) {
2560  fa_atr_weight = 1.0;
2561  } else {
2562  fa_atr_weight = free_weights_inner_loop_[ fa_atr ];
2563  }
2564  } else {
2565  fa_atr_weight = fixed_parameters_[ fa_atr ];
2566  }
2567 
2568  //// Maybe I'll add flags for these so that I can tweak them from
2569  //// the command line.
2570  Real const pretty_close( 1.10 );
2571  Real const way_off( 1.5 );
2572  Real const way_way_off( 3.0 );
2573  Real const nowhere_close( 10.0 );
2574  Real const biggest_step( 0.8 * fa_atr_weight );
2575  Real const bigger_step( 0.4 * fa_atr_weight );
2576  Real const big_step( 0.1 * fa_atr_weight );
2577  Real const small_step( 0.05 * fa_atr_weight );
2578  Real const tiny_step( 0.01 * fa_atr_weight );
2579 
2580  if ( inner_loop_counter_ == 1 && ( outer_loop_counter_ == 1 || free_count_ != 0 ) ) {
2581  /// all the way, baby!
2582  ///free_weights_inner_loop_ = free_weights_after_minimization_;
2583 
2584  if ( option[ optE::wrap_dof_optimization ].user() ) {
2585  /// Interpolate the DOFs, reconstruct the weight sets from the dofs.
2586  if ( ! wrapped_opt_min_ ) {
2587  utility_exit_with_message( "ERROR in IterativeOptEDriver::write_new_scorefile(); wrapped_opt_min_ is NULL");
2588  }
2589  for ( Size kk = 1; kk <= minimizer_dofs_before_minimization_.size(); ++kk ) {
2590  minimizer_dofs_mixed_[ kk ] =
2591  alpha * minimizer_dofs_before_minimization_[ kk ] +
2593  }
2596  if ( ! option[ optE::no_design ] ) {
2597  TR << "Wrapped weights round: " << outer_loop_counter_ << ", " << inner_loop_counter_ << std::endl;
2598  wrapped_opt_min_->print_dofs( minimizer_dofs_mixed_, TR );
2599  TR << std::endl;
2600  }
2601 
2603  // moving this call up to here, b/c if wrap dof not in use, the reference energies get set correctly
2604 
2605  } else {
2606 
2607  if ( outer_loop_counter_ == 1 && option[ optE::design_first ].user() ) { mixing_factor_ = 0.0; alpha = 1.0; }
2608  if ( option[ optE::rescore::measure_sequence_recovery ].user() ) { mixing_factor_ = 1.0; alpha = 0.0; } // the right wts are in after_min vector
2609  if ( option[ optE::optimize_ddGmutation ].user() && ! ( option[ optE::optimize_nat_aa ].user() ) ) { mixing_factor_ = 1.0; alpha = 0.0; }
2610 
2611  for ( Size kk = 1; kk <= n_score_types; ++kk ) {
2614  }
2615 
2616  // output to the terminal the weights before and after mixing
2617  for ( Size ii = 1; ii <= n_score_types; ++ii ) {
2618  if ( free_weights_inner_loop_[ (ScoreType) ii ] != 0.0 ) {
2619  TR_VERBOSE << "write_new_scorefile(): free weights before/after mixing: [ " << name_from_score_type( (ScoreType) ii ) << " ]: "
2620  << F(8,4,free_weights_after_minimization_[ (ScoreType) ii ]) << " -> "
2621  << F(8,4,free_weights_inner_loop_[ (ScoreType) ii ]) << std::endl;
2622  }
2623  }
2624 
2625  for ( Size kk = 1; kk <= before_minimization_reference_energies_.size(); ++kk ) {
2628  }
2629 
2630  TR_VERBOSE << "write_new_scorefile(): reference energies after mixing: ";
2631  for ( Size ii = 1; ii <= num_canonical_aas; ++ii ) {
2633  }
2634  TR_VERBOSE << std::endl;
2635 
2636  // special section for the unfolded state energy term; this block of code could be removed if the wrapped multifunc
2637  // optimizer was being used, but I added this prior to APL's release of the wrapped multifunc optimizer and I'm used
2638  // to running things this way. (-ronj)
2640  }
2641 
2642  } else { // inner loop counter is not 1 or we have no other free terms besides refEs
2643  TR << "Tuning reference energies using amino acid profile recovery data: round " <<
2644  outer_loop_counter_ << " " << inner_loop_counter_ << std::endl;
2645  TR << "write_new_scorefile(): reference energies before entropy: " << inner_loop_counter_ << ": ";
2646  for ( Size ii = 1; ii <= num_canonical_aas; ++ii ) {
2648  }
2649  TR << std::endl;
2650 
2651  /// Only modify the reference energies so that the designed sequence profile
2652  /// matches the experimentally observed sequence profile.
2653  for ( Size ii = 1; ii <= num_canonical_aas; ++ii ) {
2654  if ( aa_freq_exp_[ ii ] != 0 ) {
2655  if ( aa_freq_obs_[ ii ] > aa_freq_exp_[ ii ] ) {
2656  // if the designed freq is greater than what's observed in nature, INCREASE the values on the reference energies
2657  if ( aa_freq_obs_[ ii ] / aa_freq_exp_[ ii ] > nowhere_close ) {
2658  reference_energies_inner_loop_[ ii ] += biggest_step;
2659  } else if ( aa_freq_obs_[ ii ] / aa_freq_exp_[ ii ] > way_way_off ) {
2660  reference_energies_inner_loop_[ ii ] += bigger_step;
2661  } else if ( aa_freq_obs_[ ii ] / aa_freq_exp_[ ii ] > way_off ) {
2662  reference_energies_inner_loop_[ ii ] += big_step;
2663  } else if ( aa_freq_obs_[ ii ] / aa_freq_exp_[ ii ] > pretty_close ) {
2664  reference_energies_inner_loop_[ ii ] += small_step;
2665  } else {
2666  reference_energies_inner_loop_[ ii ] += tiny_step; // we're < 1.1% away, take a tiny step
2667  }
2668  } else {
2669  // if the designed freq is less than what's observed in nature, DECREASE the values on the reference energies
2670  if ( aa_freq_obs_[ ii ] / aa_freq_exp_[ ii ] < ( 1.0 / nowhere_close ) ) {
2671  reference_energies_inner_loop_[ ii ] -= biggest_step;
2672  } else if ( aa_freq_obs_[ ii ] / aa_freq_exp_[ ii ] < ( 1.0 / way_way_off ) ) {
2673  reference_energies_inner_loop_[ ii ] -= bigger_step;
2674  } else if ( aa_freq_obs_[ ii ] / aa_freq_exp_[ ii ] < ( 1.0 / way_off ) ) {
2675  reference_energies_inner_loop_[ ii ] -= big_step;
2676  } else if ( aa_freq_obs_[ ii ] / aa_freq_exp_[ ii ] < ( 1.0 / pretty_close ) ) {
2677  reference_energies_inner_loop_[ ii ] -= small_step;
2678  } else {
2679  reference_energies_inner_loop_[ ii ] -= tiny_step;
2680  }
2681  }
2682  }
2683  }
2684  }
2685 
2686  TR << "write_new_scorefile(): reference energies after entropy: " << inner_loop_counter_ << ": ";
2687  for ( Size ii = 1; ii <= num_canonical_aas; ++ii ) {
2689  }
2690  TR << std::endl;
2691 
2692  Real total_refE( 0.0 );
2693  for ( Size ii = 1; ii <= num_canonical_aas; ++ii ) total_refE += reference_energies_inner_loop_[ ii ];
2694  for ( Size ii = 1; ii <= num_canonical_aas; ++ii ) reference_energies_inner_loop_[ ii ] -= total_refE / num_canonical_aas;
2695 
2696  TR << "write_new_scorefile(): ref energies after normalization: " << inner_loop_counter_ << ": ";
2697  for ( Size ii = 1; ii <= num_canonical_aas; ++ii ) { TR << F(5,2,reference_energies_inner_loop_[ ii ]) << ", "; }
2698  TR << std::endl;
2699 
2700  } else { // not fitting reference energies to a profile
2701 
2702  if ( option[ optE::wrap_dof_optimization ].user() ) {
2703  /// Interpolate the DOFs, reconstruct the weight sets from the dofs.
2704  if ( ! wrapped_opt_min_ ) {
2705  utility_exit_with_message( "ERROR in IterativeOptEDriver::write_new_scorefile(); wrapped_opt_min_ is NULL");
2706  }
2707  for ( Size kk = 1; kk <= minimizer_dofs_before_minimization_.size(); ++kk ) {
2708  minimizer_dofs_mixed_[ kk ] =
2709  alpha * minimizer_dofs_before_minimization_[ kk ] +
2711  }
2714  if ( ! option[ optE::no_design ] ) {
2715  TR << "Wrapped weights round: " << outer_loop_counter_ << ", " << inner_loop_counter_ << std::endl;
2716  wrapped_opt_min_->print_dofs( minimizer_dofs_mixed_, TR );
2717  TR << std::endl;
2718  }
2719 
2720  } else { // don't use the wrapped multifunc optimizer
2721 
2722  if ( outer_loop_counter_ == 1 && option[ optE::design_first ].user() ) { mixing_factor_ = 0.0; alpha = 1.0; }
2723  if ( option[ optE::rescore::measure_sequence_recovery ].user() ) { mixing_factor_ = 1.0; alpha = 0.0; } // the right wts are in after_min vector
2724  if ( option[ optE::optimize_ddGmutation ].user() && ! ( option[ optE::optimize_nat_aa ].user() ) ) { mixing_factor_ = 1.0; alpha = 0.0; }
2725 
2726  for ( Size kk = 1; kk <= n_score_types; ++kk ) {
2729  }
2730 
2731  // output to the terminal the weights before and after mixing
2732  for ( Size ii = 1; ii <= n_score_types; ++ii ) {
2733  if ( free_weights_inner_loop_[ (ScoreType) ii ] != 0.0 ) {
2734  TR_VERBOSE << "write_new_scorefile(): free weights before/after mixing: [ " << name_from_score_type( (ScoreType) ii ) << " ]: "
2735  << F(8,4,free_weights_after_minimization_[ (ScoreType) ii ]) << " -> "
2736  << F(8,4,free_weights_inner_loop_[ (ScoreType) ii ]) << std::endl;
2737  }
2738  }
2739 
2740  if ( ! option[ optE::dont_use_reference_energies ].user() ) {
2741  for ( Size kk = 1; kk <= before_minimization_reference_energies_.size(); ++kk ) {
2744  }
2745  }
2746 
2747  // special section for the unfolded state energy term; this block of code could be removed if the wrapped multifunc
2748  // optimizer was being used, but I added this prior to APL's release of the wrapped multifunc optimizer and I'm used
2749  // to running things this way. (-ronj)
2751 
2752  }
2753  }
2754 
2755 #ifdef USEMPI
2756  /// Send the weights we've just computed to the other nodes
2757  Real * free_wts = new Real[ n_score_types ];
2758  for ( Size ii = 0; ii < n_score_types; ++ii ) {
2759  free_wts[ ii ] = free_weights_inner_loop_[ ScoreType( ii + 1 ) ];
2760  }
2761 
2762  // if refE's not in use, this code will create arrays of size 0 - that's fine
2763  Size n_ref_Es = reference_energies_inner_loop_.size();
2764  Real * ref_Es = new Real[ n_ref_Es ];
2765  for ( Size ii = 0; ii < n_ref_Es; ++ii ) {
2766  ref_Es[ ii ] = reference_energies_inner_loop_[ ii + 1 ];
2767  }
2768 
2769  for ( Size ii = 1; ii < MPI_nprocs_; ++ii ) {
2770  MPI_Send( free_wts, n_score_types, MPI_DOUBLE, ii, tag_, MPI_COMM_WORLD );
2771  if ( ! option[ optE::dont_use_reference_energies ].user() ) {
2772  MPI_Send( & n_ref_Es, 1, MPI_UNSIGNED_LONG, ii, tag_, MPI_COMM_WORLD );
2773  MPI_Send( ref_Es, n_ref_Es, MPI_DOUBLE, ii, tag_, MPI_COMM_WORLD );
2774  }
2775  }
2776 
2777  delete [] ref_Es; ref_Es = 0;
2778  delete [] free_wts; free_wts = 0;
2779 
2780  } else {
2781  /// MPI_rank_ != 0; receive energies from the master node.
2782  Real * free_wts = new Real[ n_score_types ];
2783  MPI_Recv( free_wts, n_score_types, MPI_DOUBLE, 0, tag_, MPI_COMM_WORLD, &stat_ );
2784 
2785  for ( Size ii = 0; ii < n_score_types; ++ii ) {
2786  free_weights_inner_loop_[ ScoreType( ii + 1 ) ] = free_wts[ ii ];
2787  }
2788  delete [] free_wts; free_wts = 0;
2789 
2790  // don't bother with the reference energies if the user doesn't want them
2791  if ( ! option[ optE::dont_use_reference_energies ].user() ) {
2792 
2793  Size n_ref_Es( 0 );
2794  MPI_Recv( & n_ref_Es, 1, MPI_UNSIGNED_LONG, 0, tag_, MPI_COMM_WORLD, &stat_ );
2795  Real * ref_Es = new Real[ n_ref_Es ];
2796  MPI_Recv( ref_Es, n_ref_Es, MPI_DOUBLE, 0, tag_, MPI_COMM_WORLD, &stat_ );
2797 
2798  reference_energies_inner_loop_.resize( n_ref_Es );
2799  for ( Size ii = 0; ii < n_ref_Es; ++ii ) {
2800  reference_energies_inner_loop_[ ii + 1 ] = ref_Es[ ii ];
2801  }
2802 
2803  delete[] ref_Es; ref_Es = 0;
2804  }
2805 #endif // USEMPI
2806  }
2807 
2808  if ( MPI_rank_ == 0 ) {
2809  /// for posterity.
2810  EnergyMap combined_weights( fixed_parameters_ );
2811  combined_weights += free_weights_inner_loop_;
2812 
2813  std::string scorefile_name = get_scorefile_name();
2814  // Make sure directory exists:
2815  utility::file::create_directory_recursive( utility::file::PathName(scorefile_name).parent() );
2816  std::ofstream fout( scorefile_name.c_str() );
2817 
2818  /// Ensure the score file includes soft rep if its requested.
2819  if ( option[ optE::optE_soft_rep ].user() ) {
2820  fout << "ETABLE FA_STANDARD_SOFT" << std::endl;
2821  }
2822 
2823  if ( ! option[ optE::dont_use_reference_energies ].user() ) {
2824  /// Weight file output:
2825  fout << "METHOD_WEIGHTS ref ";
2826  for ( Size ii = 1; ii <= reference_energies_inner_loop_.size(); ++ii ) {
2827  fout << reference_energies_inner_loop_[ ii ] << " ";
2828  }
2829  fout << "\n";
2830  }
2831  if ( option[ optE::no_hb_env_dependence ] ) {
2832  fout << "NO_HB_ENV_DEP\n";
2833  }
2834 
2835  for( Size ii = 1; ii <= core::scoring::n_score_types; ++ii ) {
2836  if ( combined_weights[ ScoreType( ii ) ] != 0 ) {
2837  fout << name_from_score_type( ScoreType( ii ) ) << " " << combined_weights[ ScoreType( ii ) ] << "\n";
2838  }
2839  }
2840 
2841  if ( ! option[ optE::dont_use_reference_energies ].user() ) {
2842  fout << "ref 1\n"; // DONT FORGET TO USE THE REFERENCE ENERGIES YOU JUST CALCULATED!
2843  }
2844 
2845  fout.close();
2846  }
2847 
2848 }
2849 
2850 ///
2851 /// @begin IterativeOptEDriver::output_weighted_unfolded_energies
2852 ///
2853 /// @brief Multiply out the unweighted unfolded energies with the current free and fixed term weights
2854 ///
2855 /// @details
2856 /// Dots the unweighted, unfolded energies with the current free and fixed term weights and prints out the weighted
2857 /// unfolded energies to stdout.
2858 ///
2859 void
2861 
2863 
2865 
2867  utility::vector1< EnergyMap > unweighted_unfolded_emap( chemical::num_canonical_aas );
2868  for ( Size aa=1; aa <= chemical::num_canonical_aas; ++aa ) {
2869  unweighted_unfolded_emap[ aa ].zero();
2870  unfE_potential.raw_unfolded_state_energymap( chemical::name_from_aa( (chemical::AA) aa ), unweighted_unfolded_emap[ aa ] );
2871  }
2872 
2873  for( Size aa = 1; aa <= chemical::num_canonical_aas; ++aa ) {
2874 
2875  Real unfolded_energy_for_one_aa = 0.0;
2876  Real weighted_unfolded_energy_for_one_aa = 0.0;
2877 
2878  // free weights first
2879  for( Size ii = 1; ii <= n_score_types; ++ii ) {
2880  if ( free_weights_inner_loop_[ (ScoreType) ii ] != 0.0 ) {
2881  TR_VERBOSE << "output_weighted_unfolded_energies(): adding unfolded energy for aa '" << chemical::name_from_aa( (chemical::AA) aa )
2882  << "' unweighted free '" << name_from_score_type( (ScoreType)ii ) << "' energy: "
2883  << unweighted_unfolded_emap[ aa ][ (ScoreType)ii ] << " * '" << name_from_score_type( (ScoreType)ii )
2884  << "' weight: " << free_weights_inner_loop_[ (ScoreType)ii ]
2885  << " = " << unweighted_unfolded_emap[ aa ][ (ScoreType)ii ] * free_weights_inner_loop_[ (ScoreType)ii ]
2886  << std::endl;
2887  unfolded_energy_for_one_aa += (unweighted_unfolded_emap[ aa ][ (ScoreType)ii ] * free_weights_inner_loop_[ (ScoreType)ii ]);
2888  }
2889  }
2890 
2891  // then fixed weights
2892  for( Size ii = 1; ii <= fixed_score_list_.size(); ++ii ) {
2893  if ( fixed_parameters_[ fixed_score_list_[ ii ] ] != 0.0 ) {
2894  TR_VERBOSE << "output_weighted_unfolded_energies(): adding unfolded energy for aa '" << chemical::name_from_aa( (chemical::AA) aa )
2895  << "' unweighted fixed '" << name_from_score_type( fixed_score_list_[ ii ] ) << "' energy: "
2896  << unweighted_unfolded_emap[ aa ][ fixed_score_list_[ ii ]] << " * '"
2898  << "' weight: " << fixed_parameters_[ fixed_score_list_[ ii ] ]
2899  << " = " << unweighted_unfolded_emap[ aa ][ fixed_score_list_[ ii ]] * fixed_parameters_[ fixed_score_list_[ ii ] ]
2900  << std::endl;
2901  unfolded_energy_for_one_aa += (unweighted_unfolded_emap[ aa ][ fixed_score_list_[ ii ] ] * fixed_parameters_[ fixed_score_list_[ ii ] ]);
2902  }
2903  }
2904 
2905  if ( free_weights_inner_loop_[ unfolded ] == 0.0 && fixed_parameters_[ unfolded ] == 0.0 ) {
2906  TR << "output_weighted_unfolded_energies(): unfolded term has no weight! using 1.0 to avoid errors." << std::endl;
2907  weighted_unfolded_energy_for_one_aa = unfolded_energy_for_one_aa * 1.0;
2908  } else if ( free_weights_inner_loop_[ unfolded ] != 0.0 ) {
2909  // unfolded term weight is variable
2910  TR << "output_weighted_unfolded_energies(): weighting unfolded energy '" << unfolded_energy_for_one_aa << "' by unfolded term weight: '"
2911  << free_weights_inner_loop_[ unfolded ] << "' gives weighted unfolded energy for one aa of "
2912  << F(4,2,unfolded_energy_for_one_aa * free_weights_inner_loop_[ unfolded ]) << std::endl;
2913  weighted_unfolded_energy_for_one_aa = unfolded_energy_for_one_aa * free_weights_inner_loop_[ unfolded ];
2914  } else if ( fixed_parameters_[ unfolded ] != 0.0 ) {
2915  TR << "output_weighted_unfolded_energies(): weighting unfolded energy '" << unfolded_energy_for_one_aa << "' by unfolded term weight: '"
2916  << fixed_parameters_[ unfolded ] << "' gives weighted unfolded energy for one aa of "
2917  << F(4,2,unfolded_energy_for_one_aa * fixed_parameters_[ unfolded ]) << std::endl;
2918  weighted_unfolded_energy_for_one_aa = unfolded_energy_for_one_aa * fixed_parameters_[ unfolded ];
2919  } else {
2920  TR << "output_weighted_unfolded_energies(): error with checking the weight of the unfolded term. using 1.0 to avoid errors." << std::endl;
2921  weighted_unfolded_energy_for_one_aa = unfolded_energy_for_one_aa * 1.0;
2922  }
2923 
2924  wtd_unfE[ aa ] = weighted_unfolded_energy_for_one_aa;
2925  }
2926 
2927  TR << "output_weighted_unfolded_energies(): weighted unfoldedE by aa: [ ";
2928  for( Size aa = 1; aa <= chemical::num_canonical_aas; ++aa ) {
2929  TR << wtd_unfE[ aa ] << " ";
2930  }
2931  TR << std::endl;
2932  }
2933 }
2934 
2935 ///
2936 /// @begin IterativeOptEDriver::free_terms_energy_map_from_dofs
2937 ///
2938 /// @brief Expand free variables and combine with fixed to make an Energy Map
2939 ///
2940 /// @details This dofs Multivec is the list of weights that the OptEMultifunc
2941 /// sees; do not confuse this set of dofs with the set of dofs that the
2942 /// Minimizer and the WrappedOptEMultifunc use.
2943 ///
2946  core::optimization::Multivec const & dofs
2947 ) const
2948 {
2949  EnergyMap return_map;
2950 
2951  // This covers the variable weights
2952  Size dof_index( 1 );
2953  for( ScoreTypes::const_iterator itr = free_score_list_.begin(),
2954  end_itr = free_score_list_.end() ;
2955  itr != end_itr ; ++itr ) {
2956  return_map[ *itr ] = dofs[ dof_index++ ];
2957  }
2958 
2959  return return_map;
2960 }
2961 
2962 
2963 ///
2964 /// @begin IterativeOptEDriver::free_weights_and_refEs_from_vars
2965 ///
2966 void
2968  utility::vector1< Real > const & vars,
2969  core::scoring::EnergyMap & weights,
2970  utility::vector1< Real > & reference_energies
2971 ) const
2972 {
2973  // conditional needed because if reference_energies is accessed when refE's are not being optimized, errors will occur
2974  if ( ! option[ optE::dont_use_reference_energies ].user() ) {
2975  for ( Size ii = 1; ii <= chemical::num_canonical_aas; ++ii ) {
2976  reference_energies[ ii ] = vars[ free_count_ + ii ]; // save them in non-negated form
2977  }
2978  }
2979  weights.zero();
2980  weights = free_terms_energy_map_from_dofs( vars );
2981 }
2982 
2983 
2984 ///
2985 /// @begin IterativeOptEDriver::configure_new_scorefunction
2986 ///
2987 /// @brief
2988 /// Sets functional forms (e.g. soft-rep) but doesn't set any weights.
2989 /// If the option -optE::optE_soft_rep is specified, then an empty scorefunction with the FA_STANDARD_SOFT etable
2990 /// is returned. Another option is -optE::optE_no_protein_hack_elec. This excludes protein_protein_hack_elec in the
2991 /// the scorefunction energy method options.
2992 ///
2995 {
2996  ScoreFunctionOP scorefxn = new ScoreFunction;
2997  if ( option[ optE::optE_soft_rep ].user() ) {
2998  methods::EnergyMethodOptions options( scorefxn->energy_method_options() );
2999  options.etable_type( FA_STANDARD_SOFT );
3000  scorefxn->set_energy_method_options( options );
3001  }
3002  if ( option[ optE::optE_no_protein_hack_elec ]() ) {
3003  methods::EnergyMethodOptions options( scorefxn->energy_method_options() );
3004  options.exclude_protein_protein_hack_elec( true );
3005  scorefxn->set_energy_method_options( options );
3006  }
3007  if ( option[ optE::no_hb_env_dependence ] ) {
3008  methods::EnergyMethodOptions options( scorefxn->energy_method_options() );
3009  options.hbond_options().use_hb_env_dep( false );
3010  scorefxn->set_energy_method_options( options );
3011  }
3012  if ( option[ optE::no_hb_env_dependence_DNA ] ) {
3013  methods::EnergyMethodOptions options( scorefxn->energy_method_options() );
3014  options.hbond_options().use_hb_env_dep_DNA( false );
3015  scorefxn->set_energy_method_options( options );
3016  }
3018  methods::EnergyMethodOptions options( scorefxn->energy_method_options() );
3020  scorefxn->set_energy_method_options( options );
3021  }
3022  return scorefxn;
3023 }
3024 
3025 
3026 ///
3027 /// @begin IterativeOptEDriver::create_unweighted_scorefunction
3028 ///
3029 /// @brief
3030 /// Takes a std::string and a destination and constructs the MPI_Send call. Does this include the reference energy
3031 /// term somehow? I don't believe it does.
3032 ///
3035 {
3037  for( int i=1 ; i <= n_score_types ; ++i ) {
3038  if ( include_terms_[ ScoreType(i) ] != 0.0 ) {
3039  scorefxn->set_weight( ScoreType(i), include_terms_[ ScoreType(i) ] );
3040  }
3041  }
3042  return scorefxn;
3043 }
3044 
3045 /// @details Construct a score function: set etable type, set weights, set reference weights.
3048 {
3050  for( int i=1 ; i <= n_score_types ; ++i ) {
3051  if ( free_weights_inner_loop_[ ScoreType(i) ] != 0.0 ) {
3052  //std::cout << " PROC #" << MPI_rank_ << " include term: " << ScoreType(i) << std::endl;
3053  sfxn->set_weight( ScoreType(i), free_weights_inner_loop_[ ScoreType(i) ] );
3054  } else if ( fixed_parameters_[ ScoreType(i) ] != 0.0 ) {
3055  sfxn->set_weight( ScoreType(i), fixed_parameters_[ ScoreType(i) ] );
3056  }
3057  }
3058  // sfxn->energy_method_options().set_method_weights( ref, reference_energies_inner_loop_ );
3059  sfxn->set_method_weights( ref, reference_energies_inner_loop_ );
3060  sfxn->set_weight( ref, 1.0 );
3061 
3062  return sfxn;
3063 }
3064 
3065 
3066 ///
3067 /// @begin IterativeOptEDriver::get_scorefile_name()
3068 ///
3069 /// @remarks
3070 /// IMPORTANT IMPORTANT IMPORTANT: requires weightdir having been created before launching the program.
3071 ///
3074 {
3075  std::stringstream instream;
3076  instream << outer_loop_counter_;
3077  std::string scorefile_name = "weightdir/optE_scorefile_" + instream.str() + ".wts";
3078  return scorefile_name;
3079 }
3080 
3081 ///
3082 /// @begin IterativeOptEDriver::test_sequence_recovery()
3083 ///
3084 /// @brief
3085 /// Calls run_design on all pdbs and collects the results from slave cpus if MPI is in use.
3086 ///
3088 {
3089  using namespace basic::options;
3090  using namespace basic::options::OptionKeys;
3091 
3092  if ( option[ optE::no_design ]() ) {
3093  exit_gracefully();
3094  }
3095 
3096  if ( MPI_rank_ == 0 ) {
3097  run_design_on_assigned_pdbs(); std::cout.flush();
3098  repack_assigned_pdbs(); std::cout.flush();
3101  } else {
3102  run_design_on_assigned_pdbs(); std::cout.flush();
3103  repack_assigned_pdbs(); std::cout.flush();
3104  send_sequence_recovery_data_to_master_cpu(); std::cout.flush();
3106  }
3107 }
3108 
3109 
3110 ///
3111 /// @begin IterativeOptEDriver::collect_sequence_recovery_data_from_slave_cpus()
3112 ///
3113 /// @brief
3114 /// Helper method for master node. Calls collect_recovery_data_from_slave_cpu on all slave CPUs.
3115 ///
3117 
3118  for ( Size ii = 1; ii < MPI_nprocs_; ++ii ) {
3120  }
3121 
3122  TR << "collect_sequence_recovery_data_from_slave_cpus(): collected data from " << total_positions_ << " residues: " << count_recovered_
3123  << " native amino acids recovered" << std::endl;
3125 
3126  TR << "collect_sequence_recovery_data_from_slave_cpus(): overall sequence recovery rate: " << inner_loop_sequence_recovery_rate_ << std::endl;
3127 
3128  /// This is now a good time to compute the designed frequency and the experimental frequency of the various aa's.
3129  for ( Size ii = 1; ii <= core::chemical::num_canonical_aas; ++ii ) {
3130  aa_freq_obs_[ ii ] = ((Real) aa_obs_[ ii ] ) / total_positions_;
3131  aa_freq_exp_[ ii ] = ((Real) aa_exp_[ ii ] ) / total_positions_;
3132  }
3133 
3134  TR << "amino acid counts: observed: ";
3135  for ( Size ii = 1; ii <= core::chemical::num_canonical_aas; ++ii ) {
3136  TR << core::chemical::oneletter_code_from_aa( core::chemical::AA( ii ) ) << ": " << aa_obs_[ ii ] << ", ";
3137  //if ( ii % 5 == 0 ) TR << std::endl;
3138  }
3139  TR << std::endl;
3140 
3141  TR << "amino acid counts: expected: ";
3142  for ( Size ii = 1; ii <= core::chemical::num_canonical_aas; ++ii )
3143  TR << core::chemical::oneletter_code_from_aa( core::chemical::AA( ii ) ) << ": " << aa_exp_[ ii ] << ", ";
3144  TR << std::endl;
3145 
3146 
3147  Real cross_entropy( 0.0 );
3148  TR << "amino acid frequency: obs (exp):";
3149  for ( Size ii = 1; ii <= core::chemical::num_canonical_aas; ++ii ) {
3151  << ": " << ((Real) aa_obs_[ ii ] ) / total_positions_ << " (" << ((Real) aa_exp_[ ii ] ) / total_positions_<< ")";
3152  //if ( ii % 5 == 0 ) TR << std::endl;
3153  cross_entropy -= (((Real) aa_exp_[ ii ] ) / total_positions_) * std::log( ((Real) aa_obs_[ ii ] ) / total_positions_ + 1e-5 );
3154  }
3155  TR << std::endl;
3156 
3157  //TR << "Cross Entropy: " << cross_entropy << std::endl;
3158 }
3159 
3160 
3161 ///
3162 /// @begin IterativeOptEDriver::collect_sequence_recovery_data_from_slave_cpu()
3163 ///
3165 #ifdef USEMPI
3166  Size const which_cpu
3167 #else
3168  Size const
3169 #endif
3170 )
3171 {
3172 #ifdef USEMPI
3173  Size cpu_positions;
3174  Size cpu_recovered;
3175  MPI_Recv( & cpu_positions, 1, MPI_UNSIGNED_LONG, which_cpu, tag_, MPI_COMM_WORLD, &stat_ );
3176  MPI_Recv( & cpu_recovered, 1, MPI_UNSIGNED_LONG, which_cpu, tag_, MPI_COMM_WORLD, &stat_ );
3177 
3178  total_positions_ += cpu_positions;
3179  count_recovered_ += cpu_recovered;
3180 
3182 
3183  /// 1. Send counts of amino acid types coming out of design (observed)
3184  MPI_Recv( aa_counts, core::chemical::num_canonical_aas, MPI_UNSIGNED_LONG, which_cpu, tag_, MPI_COMM_WORLD, &stat_ );
3185  for ( Size ii = 1, iim1 = 0; ii <= core::chemical::num_canonical_aas; ++ii, ++iim1 ) aa_obs_[ ii ] += aa_counts[ iim1 ];
3186 
3187  /// 2. Send counts of amino acid types in the input data (expected)
3188  MPI_Recv( aa_counts, core::chemical::num_canonical_aas, MPI_UNSIGNED_LONG, which_cpu, tag_, MPI_COMM_WORLD, &stat_ );
3189  for ( Size ii = 1, iim1 = 0; ii <= core::chemical::num_canonical_aas; ++ii, ++iim1 ) aa_exp_[ ii ] += aa_counts[ iim1 ];
3190 
3191 #endif
3192 }
3193 
3194 
3195 
3196 ///
3197 /// @begin IterativeOptEDriver::collect_rotamer_recovery_data_from_slave_cpus()
3198 ///
3199 /// @brief
3200 /// Helper method for master node. Calls collect_rotamer_recovery_data_from_slave_cpu on all slave CPUs.
3201 ///
3203 {
3204  if ( basic::options::option[ basic::options::OptionKeys::in::file::centroid_input ] ) return;
3205  if ( ! basic::options::option[ basic::options::OptionKeys::optE::recover_nat_rot ] ) return;
3206 
3207  for ( Size ii = 1; ii < MPI_nprocs_; ++ii ) {
3209  }
3210  TR << "collect_rotamer_recovery_data_from_slave_cpus(): collected rotamer recovery data from " << total_rotamer_positions_
3211  << " residues: " << count_rotamers_recovered_ << " native amino acids recovered" << std::endl;
3213  TR << "collect_rotamer_recovery_data_from_slave_cpus(): rotamer recovery rate: " << inner_loop_rotamer_recovery_rate_ << std::endl;
3214 
3215 }
3216 
3217 
3218 ///
3219 /// @begin IterativeOptEDriver::collect_rotamer_recovery_data_from_slave_cpu()
3220 ///
3222 #ifdef USEMPI
3223  Size const which_cpu
3224 #else
3225  Size const
3226 #endif
3227 )
3228 {
3229 #ifdef USEMPI
3230  Size cpu_rotamer_positions;
3231  Size cpu_rotamer_recovered;
3232  MPI_Recv( & cpu_rotamer_positions, 1, MPI_UNSIGNED_LONG, which_cpu, tag_, MPI_COMM_WORLD, &stat_ );
3233  MPI_Recv( & cpu_rotamer_recovered, 1, MPI_UNSIGNED_LONG, which_cpu, tag_, MPI_COMM_WORLD, &stat_ );
3234 
3235  total_rotamer_positions_ += cpu_rotamer_positions;
3236  count_rotamers_recovered_ += cpu_rotamer_recovered;
3237 
3238 #endif
3239 }
3240 
3241 
3242 ///
3243 /// @begin IterativeOptEDriver::run_design_on_assigned_pdbs()
3244 ///
3245 /// @brief
3246 /// Runs design on the pdbs assigned to this node/cpu.
3247 ///
3249 {
3250  /// NO MORE: don't read from disk, instead, create a score function
3251  /// based on the *_inner_loop weight/reference energy arrays
3252  ///ScoreFunctionOP sfxn = ScoreFunctionFactory::create_score_function( get_scorefile_name() );
3253 
3254  /// Construct a score function: set etable type, set weights, set reference weights.
3255  //ScoreFunctionOP sfxn = create_weighted_scorefunction();
3256 
3257  /// Veto'ing APL's code above because users may not want refE's; need to rewrite this section to use the new
3258  /// configure_new_scorefunction method but then sets the refE weight to zero.
3259 
3260  EnergyMap wts_map;
3262 
3263  for( int i=1 ; i <= n_score_types ; ++i ) {
3264  if ( free_weights_inner_loop_[ ScoreType(i) ] != 0.0 ) {
3265  sfxn->set_weight( ScoreType(i), free_weights_inner_loop_[ ScoreType(i) ] );
3266  wts_map[ ScoreType(i) ] = free_weights_inner_loop_[ ScoreType(i) ];
3267  } else if ( fixed_parameters_[ ScoreType(i) ] != 0.0 ) {
3268  sfxn->set_weight( ScoreType(i), fixed_parameters_[ ScoreType(i) ] );
3269  wts_map[ ScoreType(i) ] = fixed_parameters_[ ScoreType(i) ];
3270  }
3271  }
3272 
3273  // Adding a special check here for the unfolded state energy term. Like the reference energy term, the unfolded
3274  // term needs to have extra weights set. These can be set by passing in a vector1 of Reals of size n_score_types
3275  // which is the weights for every score term desired in the unfolded state energy. So what needs to be done is
3276  // an EnergyMap needs to be created to hold all of the free and fixed weights - and then the emap needs to be
3277  // converted into a vector1 of Reals.
3278 
3279  // the vector of weights and the set_method_weights function will only get called if 'unfolded' is being used,
3280  // i.e. if the term has a nonzero weight during the run
3281 
3283  // obtaining a vector of Reals from an EnergyMap should probably be an EnergyMap method, but whatever.
3285  for ( int ii=1; ii < n_score_types; ++ii ) {
3286  wts_vector[ ii ] = wts_map[ ScoreType(ii) ];
3287  }
3288  // set method weights should cause the 'unfolded' EnergyMethod object living inside the score function to be
3289  // recreated with the weights in the wts_vector. this should allow the unfolded state term to actually be
3290  // used during design (unlike in collect_rotamer_energies() where it just returns zero).
3291  sfxn->set_method_weights( scoring::unfolded, wts_vector );
3292  }
3293 
3294  if ( ! option[ optE::dont_use_reference_energies ].user() ) {
3295  sfxn->set_method_weights( ref, reference_energies_inner_loop_ );
3296  sfxn->set_weight( ref, 1.0 );
3297  }
3298 
3299  if ( MPI_rank_ == 0 ) {
3300  TR_VERBOSE << "run_design_on_assigned_pdbs(): created scorefxn for running design" << std::endl;
3301  //sfxn->show( std::cout );
3302  }
3303 
3304  /// prep arrays so that we can recover data from
3305  zero_aa_counts();
3306 
3308  native_pdbs_,
3310  sfxn,
3313  );
3314 
3315 }
3316 
3317 
3318 ///
3319 /// @begin IterativeOptEDriver::repack_assigned_pdbs()
3320 ///
3322 {
3323  if ( basic::options::option[ basic::options::OptionKeys::in::file::centroid_input ] ) return;
3324  if ( ! basic::options::option[ basic::options::OptionKeys::optE::recover_nat_rot ] ) return;
3325 
3327 
3328  /// NO MORE: don't read from disk, instead, create a score function
3329  /// based on the *_inner_loop weight/reference energy arrays
3330  ///ScoreFunctionOP sfxn = ScoreFunctionFactory::create_score_function( get_scorefile_name() );
3331 
3332 
3333  /// Construct a score function: set etable type, set weights, set reference weights.
3335  for( int i=1 ; i <= n_score_types ; ++i ) {
3336  if ( free_weights_inner_loop_[ ScoreType(i) ] != 0.0 ) {
3337  //std::cout << " PROC #" << MPI_rank_ << " include term: " << ScoreType(i) << std::endl;
3338  sfxn->set_weight( ScoreType(i), free_weights_inner_loop_[ ScoreType(i) ] );
3339  } else if ( fixed_parameters_[ ScoreType(i) ] != 0.0 ) {
3340  sfxn->set_weight( ScoreType(i), fixed_parameters_[ ScoreType(i) ] );
3341  }
3342  }
3343 
3344  if ( ! option[ optE::dont_use_reference_energies ].user() ) {
3345  // sfxn->energy_method_options().set_method_weights( ref, reference_energies_inner_loop_ );
3346  sfxn->set_method_weights( ref, reference_energies_inner_loop_ );
3347  sfxn->set_weight( ref, 1.0 );
3348  }
3349 
3351  native_pdbs_,
3353  sfxn,
3356  );
3357 
3358 }
3359 
3360 
3361 ///
3362 /// @begin IterativeOptEDriver::send_recovery_data_to_master_cpu()
3363 ///
3365 {
3366 #ifdef USEMPI
3367  MPI_Send( & total_positions_, 1, MPI_UNSIGNED_LONG, 0, tag_, MPI_COMM_WORLD );
3368  MPI_Send( & count_recovered_, 1, MPI_UNSIGNED_LONG, 0, tag_, MPI_COMM_WORLD );
3369 
3371  /// 1. Send counts of amino acid types coming out of design (observed)
3372  for ( Size ii = 1, iim1 = 0; ii <= core::chemical::num_canonical_aas; ++ii, ++iim1 ) aa_counts[ iim1 ] = aa_obs_[ ii ];
3373  MPI_Send( aa_counts, core::chemical::num_canonical_aas, MPI_UNSIGNED_LONG, 0, tag_, MPI_COMM_WORLD );
3374 
3375  /// 2. Send counts of amino acid types in the input data (expected)
3376  for ( Size ii = 1, iim1 = 0; ii <= core::chemical::num_canonical_aas; ++ii, ++iim1 ) aa_counts[ iim1 ] = aa_exp_[ ii ];
3377  MPI_Send( aa_counts, core::chemical::num_canonical_aas, MPI_UNSIGNED_LONG, 0, tag_, MPI_COMM_WORLD );
3378 
3379 #endif
3380 
3381 }
3382 
3383 ///
3384 /// @begin IterativeOptEDriver::send_rotamer_recovery_data_to_master_cpu()
3385 ///
3387 {
3388  if ( basic::options::option[ basic::options::OptionKeys::in::file::centroid_input ] ) return;
3389  if ( ! basic::options::option[ basic::options::OptionKeys::optE::recover_nat_rot ] ) return;
3390 #ifdef USEMPI
3391  MPI_Send( & total_rotamer_positions_, 1, MPI_UNSIGNED_LONG, 0, tag_, MPI_COMM_WORLD );
3392  MPI_Send( & count_rotamers_recovered_, 1, MPI_UNSIGNED_LONG, 0, tag_, MPI_COMM_WORLD );
3393 #endif
3394 }
3395 
3396 
3397 ///
3398 /// @begin IterativeOptEDriver::decide_if_sequence_recovery_improved()
3399 ///
3400 /// @brief
3401 /// The final function call of the go() method. After all the pdbs have been designed and repacked, and the recovery
3402 /// data collected, decide if this new set of weights improved the sequence recovery.
3403 ///
3405 {
3406  using namespace basic::options;
3407  using namespace basic::options::OptionKeys;
3408 
3409  int accept_new_weight_set( 0 ); // 0 == weight set rejected; 1 == weight set accepted
3410 
3411  if ( MPI_rank_ != 0 ) {
3412 #ifdef USEMPI
3413  MPI_Recv( & accept_new_weight_set, 1, MPI_INT, 0, tag_, MPI_COMM_WORLD, & stat_ );
3414 #endif
3415  } else {
3416 
3417  if ( option[ optE::fit_reference_energies_to_aa_profile_recovery ] ) {
3418 
3419  Real const Wcross_ent = -0.1;
3420  // typically x-entropy is ~3. typical seq. rec is ~0.30. make x-entropy 2x as important as seq. recovery
3421 
3422  if ( outer_loop_counter_ <= 2 && inner_loop_counter_ == 4 ) { /// perform 4 rounds the first AND SECOND time thru the outer loop
3423  accept_new_weight_set = 1;
3424  }
3425 
3426  // but after this, accept a weight set if the sequence recovery and the entropy improve - don't force a certain number of
3427  // inner iterations before accepting a weight set
3428  // the weight on the entropy term below decided how important entropy is relative to overall sequence recovery
3429  // the weight is negative so that we can MAXIMIZE the cross entropy (when typically you minimize cross entropy)
3430  // there's two ways to make the decision whether or not to accept a weight set
3431  // 1) add seq recovery and wtd cross entropy and accept if the sum is better
3432  // 2) accept only if seq recovery is better and wtd cross entropy is better
3433  //
3434  // Reverting my change to something new: force 3 runs of the outer loop before we accept a weight set. the entropy just
3435  // doesn't kick in until the later rounds.
3436 
3437  Real cross_entropy( 0.0 );
3438  for ( Size ii = 1; ii <= core::chemical::num_canonical_aas; ++ii ) {
3439  cross_entropy -= aa_freq_exp_[ ii ] * std::log( aa_freq_obs_[ ii ] + 1e-5 );
3440  }
3441  Real weighted_cross_entropy = cross_entropy * Wcross_ent;
3442 
3443  TR << "decide_if_sequence_recovery_improved(): inner loop recovery rate: " << inner_loop_sequence_recovery_rate_
3444  << ", cross entropy: " << cross_entropy << ", weighted cross entropy: " << weighted_cross_entropy
3445  << ", inner loop count: " << inner_loop_counter_ << std::endl;
3446 
3447  if ( outer_loop_counter_ > 2 &&
3448  /* inner_loop_counter_ > 3 && */
3449  accept_new_weight_set == 0 &&
3451  TR << "decide_if_sequence_recovery_improved(): accepting new weight set: "
3452  << "inner loop recovery rate: " << inner_loop_sequence_recovery_rate_
3453  << ", outer loop recovery rate: " << outer_loop_last_sequence_recovery_rate_
3454  << ", inner loop weighted cross entropy: " << weighted_cross_entropy
3455  << ", outer loop weighted cross entropy: " << outer_loop_seq_profile_cross_entropy_ * Wcross_ent << std::endl;
3456  accept_new_weight_set = 1;
3457  }
3458 
3459  //if ( inner_loop_counter_ == num_inner_iterations() ) {
3460  // accept_new_weight_set = 1;
3461  //}
3462  // ronj just because we hit the max number of inner iterations, don't accept it it's worse. the inner loop will kill itself and
3463  // ronj we'll just reswarm and mix with the last good weight set.
3464 
3465  if ( accept_new_weight_set != 0 ) {
3466  outer_loop_seq_profile_cross_entropy_ = cross_entropy;
3467  }
3468  } else {
3470  TR << "decide_if_sequence_recovery_improved(): accepting new weight set: "
3471  << "inner loop recovery rate: " << inner_loop_sequence_recovery_rate_
3472  << ", outer loop recovery rate: " << outer_loop_last_sequence_recovery_rate_
3473  << ", mixing factor: " << mixing_factor_ << std::endl;
3474  accept_new_weight_set = 1;
3475  } else {
3476  accept_new_weight_set = 0;
3477  }
3478  }
3479 
3480 #ifdef USEMPI
3481  for ( Size ii = 1; ii < MPI_nprocs_; ++ii ) {
3482  MPI_Send( & accept_new_weight_set, 1, MPI_INT, ii, tag_, MPI_COMM_WORLD );
3483  }
3484 #endif
3485  }
3486 
3487  /// if we're going to accept this weight set, prepare for the next round of sequence optimization
3488  if ( accept_new_weight_set != 0 && MPI_rank_ == 0 ) {
3489 
3490  if ( ! option[ optE::dont_use_reference_energies ].user() ) {
3492  }
3493 
3494  TR << "decide_if_sequence_recovery_improved(): free_parameters_ before: ";
3496  TR << std::endl;
3497 
3498  TR << "decide_if_sequence_recovery_improved(): free_parameters_ after: ";
3500  TR << std::endl;
3501 
3505 
3506  } else if ( MPI_rank_ == 0 ) {
3507  TR << "decide_if_sequence_recovery_improved(): rejected weight set:\n";
3509  TR << std::endl;
3510 
3511  // accept_new_weight_set is 0
3512  //ronj accept the refEs BUT NOT THE OTHER FREE TERMS on the last iteration to avoid redoing the same entropy work
3513  if ( inner_loop_counter_ == num_inner_iterations() && option[ optE::fit_reference_energies_to_aa_profile_recovery ] ) {
3514  if ( ! option[ optE::dont_use_reference_energies ].user() ) {
3516  }
3517  }
3518  }
3519  return accept_new_weight_set != 0;
3520 
3521 }
3522 
3523 ///
3524 /// @begin IterativeOptEDriver::go()
3525 ///
3526 /// @brief
3527 /// Main loop for the optE protocol. This is function the apps call to do optE.
3528 ///
3529 void
3531 {
3532 
3533  barrier();
3534  //intialize_free_and_fixed_energy_terms();
3535  TR << "go(): " << node_name( MPI_rank_ ) << std::endl;
3536  divide_up_pdbs();
3537 
3539  TR.Debug << "Node " << MPI_rank_ << " (" << outer_loop_counter_ << "," << inner_loop_counter_ << ") collect_rotamer_energies ..." << std::endl;
3541  TR.Debug << "Node " << MPI_rank_ << " (" << outer_loop_counter_ << "," << inner_loop_counter_ << ") optimize_weights ..." << std::endl;
3542  optimize_weights();
3544  TR.Debug << "Node " << MPI_rank_ << " (" << outer_loop_counter_ << "," << inner_loop_counter_ << ") write_new_scorefile ..." << std::endl;
3545  if ( MPI_rank_ == 0 )
3546  TR << "go(): " << node_name( MPI_rank_ ) << " (" << outer_loop_counter_ << "," << inner_loop_counter_ << ")" << std::endl;
3548  TR.Debug << "Node " << MPI_rank_ << " (" << outer_loop_counter_ << "," << inner_loop_counter_ << ") barrier [1] ..." << std::endl;
3549  barrier();
3550  if ( ! ( option[ optE::optimize_nat_aa ] || option[ optE::optimize_pssm ] ) ) { break; }
3551  TR.Debug << "Node " << MPI_rank_ << " (" << outer_loop_counter_ << "," << inner_loop_counter_ << ") test_sequence_recovery ..." << std::endl;
3553  TR.Debug << "Node " << MPI_rank_ << " (" << outer_loop_counter_ << "," << inner_loop_counter_ << ") barrier [2] ..." << std::endl;
3554  barrier();
3555  TR.Debug << "Node " << MPI_rank_ << " (" << outer_loop_counter_ << "," << inner_loop_counter_ << ") decide_if_sequence_recovery_improved ..." << std::endl;
3556  if ( decide_if_sequence_recovery_improved() ) break;
3557  }
3558  }
3559 
3560  if ( MPI_rank_ == 0 ) { TR << "go(): DONE with weight optimization." << std::endl; }
3561 }
3562 
3563 void
3565 {
3566 #ifdef USEMPI
3567  MPI_Barrier( MPI_COMM_WORLD );
3568 #endif
3569  std::cout.flush();
3570 }
3571 
3572 
3573 ///
3574 /// @begin IterativeOptEDriver::get_native_pdb_names
3575 ///
3576 /// @brief
3577 /// Reads in the list of pdb file names to use for the optimization. Uses basic::options::start_file which returns a vector1 of strings.
3578 /// Unfortunately, this method reads data out of the return value of start_file() which is why you have to use a listfile with the
3579 /// -s option for things to work correctly.
3580 ///
3583 {
3584  using namespace basic::options;
3585  using namespace basic::options::OptionKeys;
3586 
3587  // read list file of pdbs
3589 
3590 
3591  std::string const listfile( start_file() );
3592  std::ifstream data( listfile.c_str() );
3593  std::string line;
3594  while ( getline( data,line ) ){
3595  filenames.push_back( line );
3596  }
3597  data.close();
3598 
3599  return filenames;
3600 }
3601 
3602 
3603 ///
3604 /// @begin IterativeOptEDriver::get_nat_aa_opte_data
3605 ///
3606 /// @brief
3607 /// This function is the heart of the optE protocol.
3608 /// For each position of the protein, we're going to create a new PackerTask that only tries all of the various amino
3609 /// acids at that position. (It also uses any packer task related flags on the command line.)
3610 /// opte_data is a container class for optE data objects. So each position is going to have a PNatAAOptEPositionData
3611 /// object. For each rotamer that's built for each position, the energies for the fixed and free energy terms are
3612 /// stored in a PNatAAOptERotamerData object which gets added to the PNatAAOptEPositionData object. So at the end
3613 /// of this function we have one optEData object which has position info for all position and rotamer data for all
3614 /// rotamers at each position.
3615 ///
3616 void
3618  std::string const & pdb_name,
3619  pose::Pose & pose,
3620  pose::Pose & native_pose,
3621  ScoreFunction const & scorefxn,
3622  ScoreTypes & score_list,
3623  ScoreTypes & fixed_score_vec,
3624  OptEData & opte_data
3625 )
3626 {
3627  using namespace basic::options;
3628  using namespace basic::options::OptionKeys;
3629  using namespace pose;
3630  using namespace pack::task;
3631  using namespace pack::rotamer_set;
3632 
3633  TR.Trace << "Getting native aa opte data for " << pdb_name << ":";
3634 
3635  PackerTaskOP design_task;
3636  // If desired lock the task to the starting pose to prevent changes as we go on.
3637  // Prevent the task from changing which residues are designed over the course of the runs.
3638  if(option[ optE::constant_logic_taskops_file ].user() ) {
3639  design_task = copy_native_packertask_logic( native_pose,
3640  pose,
3641  task_factory_);
3642  } else if ( option[ optE::design_with_minpack ] ) {
3643  // Don't use extra rotamers during design, but do use extra rotamers when coming up with the
3644  // rotamers used in the weight fitting step. Basically, with design_with_minpack on the command
3645  // line, command line flags that effect the packer no longer effect the design phase.
3646  design_task = task_factory_->create_task_and_apply_taskoperations( pose );
3648  ifcl_operation.apply( pose, *design_task );
3649  } else {
3650  design_task = task_factory_->create_task_and_apply_taskoperations( pose ) ;
3651  }
3652  design_task->set_bump_check( true );
3653  //design_task->or_include_current( true ); // WHOA we definately don't want to include current.
3654 
3655  scorefxn.setup_for_packing( pose, design_task->repacking_residues(), design_task->designing_residues() );
3656 
3657  // create some unfolded state potential objects here before we go into the pose residue loop to speed things up
3661  e.resize( chemical::num_canonical_aas );
3662  for ( Size aa=1; aa <= chemical::num_canonical_aas; ++aa ) {
3663  e[ aa ].zero();
3664  unfE_potential.raw_unfolded_state_energymap( chemical::name_from_aa( (chemical::AA) aa ), e[ aa ] );
3665  }
3666  }
3667 
3668  // used to restrict design to one position at a time
3669  utility::vector1< bool > task_mask( pose.total_residue(), false );
3670  Size num_diffs_between_native_and_input( 0 );
3671 
3672  for ( Size resi = 1; resi <= pose.total_residue(); ++resi ) {
3673 
3674  if ( ! pose.residue(resi).is_protein() ) continue;
3675  // do not consider residues that are not designable
3676  if ( ! design_task->residue_task( resi ).being_designed() ) continue;
3677 
3678  TR.Trace << " ";
3679  if ( pose.pdb_info() ) TR.Trace << pose.pdb_info()->number(resi);
3680  else TR.Trace << resi;
3681  TR.Trace << "." << pose.residue_type(resi).name3();
3682 
3683  // use new naive PackerTask for getting single-residue data
3684  PackerTaskOP single_residue_task = TaskFactory::create_packer_task( pose );
3685  single_residue_task->initialize_from_command_line();
3686  task_mask[ resi ] = true;
3687  // the following turns off packing of all other residues
3688  single_residue_task->restrict_to_residues( task_mask );
3689  task_mask[ resi ] = false;
3690 
3691  PNatAAOptEPositionDataOP this_pos_data;
3692 
3693  if ( option[ optE::optimize_pssm ] && resi <= pssm_data_.size() ) {
3694  if ( pssm_data_[ resi ].first == native_pose.residue( resi ).aa() ) {
3696  data->set_pssm_probabilities( pssm_data_[ resi ].second );
3697  this_pos_data = data;
3698  } else {
3699  std::cerr << "Warning position " << resi << " in " << pdb_name << " pssm data does not match native amino acid: ";
3700  std::cerr << pssm_data_[ resi ].first << " vs " << native_pose.residue( resi ).aa() << std::endl;
3701  std::cerr << "Falling back on PNatAAOptEPositionData" << std::endl;
3702  this_pos_data = new PNatAAOptEPositionData;
3703  }
3704  } else {
3705  if ( option[ optE::optimize_pssm ]() ) {
3706  TR << "Warning: " << pdb_name << ".fasta.probs is shorter than PDB file!\n";
3707  TR << "Falling back on PNatAAOptEPositionData for residue " << resi << std::endl;
3708  }
3709 
3710  // Create a position data object of the type that has special processing for unfolded state energy calculations.
3711  // If not for this special check, then we'll always be creating the standard position data objects and won't ever
3712  // get to the code that deals with the unfolded state energy.
3713  // Note: This special position data class is not compatible with PSSM optimization.
3715  this_pos_data = new NestedEnergyTermPNatAAOptEPositionData;
3717 
3718  } else {
3719  this_pos_data = new PNatAAOptEPositionData;
3720  }
3721  }
3722 
3723  this_pos_data->tag( pdb_name );
3724  this_pos_data->set_position( resi );
3725  this_pos_data->set_native_aa( native_pose.residue( resi ).aa() );
3726  if ( native_pose.residue( resi ).aa() != pose.residue( resi ).aa() ) {
3727  //std::cout << "native_residue # " << resi << " of " << native_pose.residue( resi ).aa() << " differs with pose residue " << pose.residue( resi ).aa() << std::endl;
3728  ++num_diffs_between_native_and_input;
3729  }
3730  this_pos_data->set_neighbor_count(
3732 
3733  graph::GraphCOP packer_neighbor_graph( pack::create_packer_graph( pose, scorefxn, single_residue_task ) );
3734 
3735  RotamerSetFactory rsf;
3736  RotamerSetOP rotset = rsf.create_rotamer_set( pose.residue( resi ) );
3737 // RotamerSetOP rotset = RotamerSetFactory::create_rotamer_set( pose.residue( resi ) );
3738 
3739  rotset->set_resid( resi );
3740  rotset->build_rotamers( pose, scorefxn, *single_residue_task, packer_neighbor_graph );
3741  scorefxn.prepare_rotamers_for_packing( pose, *rotset );
3742 
3743  // First, need a vector of energy maps
3744  utility::vector1< EnergyMap > emap_vector( rotset->num_rotamers() );
3745 
3746  // Call the new energy map fn
3747  rotset->compute_one_body_energy_maps( pose, scorefxn, *single_residue_task, packer_neighbor_graph, emap_vector );
3748 
3749  for ( Size jj = 1; jj <= rotset->num_rotamers(); ++jj ) {
3750  EnergyMap & emap_total( emap_vector[jj] );
3751 
3752  // Hacky limit for fa_rep
3753  if( emap_total[ fa_rep ] > 10.0 )
3754  emap_total[ fa_rep ] = 10.0;
3755 
3756  utility::vector1< Real > energy_info;
3757  utility::vector1< Real > fixed_energy_info;
3758 
3759  // put all the energies for the free energy terms into the 'energy_info' vector
3760  for( utility::vector1< ScoreType >::iterator score_type_iter = score_list.begin(),
3761  end_iter = score_list.end(); score_type_iter != end_iter; ++score_type_iter ) {
3762  energy_info.push_back( emap_total[ *score_type_iter ] );
3763  }
3764 
3765  // put all the energies for the fixed energy terms into the 'fixed_energy_info' vector
3766  for( utility::vector1< ScoreType >::iterator score_type_iter = fixed_score_vec.begin(),
3767  end_iter = fixed_score_vec.end(); score_type_iter != end_iter; ++score_type_iter ) {
3768  fixed_energy_info.push_back( emap_total[ *score_type_iter ] );
3769  }
3770 
3771  // the data held inside a PNatAAOptERotamerData object is the two vectors we just set above and the amino acid type
3772  PNatAAOptERotamerDataOP new_rot_line = new PNatAAOptERotamerData( (*rotset->rotamer( jj )).aa(), jj, energy_info, fixed_energy_info );
3773 
3774  // this rotamer line information gets added to the PNatAAOptEPositionData object created above
3775  this_pos_data->add_rotamer_line_data( new_rot_line );
3776  }
3777 
3778  // Now that we have rotamer data for all rotamers in the position data object, store the position data object in the
3779  // optE data container (which holds position data objects for all positions).
3780  opte_data.add_position_data( this_pos_data );
3781  }
3782  TR.Trace << std::endl;
3783 
3784  //TR << "get_nat_aa_opte_data(): num_diffs_between_native_and_input: " << num_diffs_between_native_and_input << std::endl;
3785 
3786 }
3787 
3788 ///
3789 /// @begin IterativeOptEDriver::get_nat_rot_opte_data
3790 ///
3791 /// @brief
3792 /// Similar to get_nat_aa_opte_data. See comments there for more info.
3793 ///
3794 void
3796  std::string const & pdb_name,
3797  pose::Pose & pose,
3798  pose::Pose & native_pose,
3799  utility::vector1<bool> include_rsd,
3800  ScoreFunction const & scorefxn,
3801  ScoreTypes & score_list,
3802  ScoreTypes & fixed_score_vec,
3803  OptEData & opte_data
3804 )
3805 {
3806  using namespace pose;
3807  using namespace pack::task;
3808  using namespace pack::rotamer_set;
3809  using namespace pack::dunbrack;
3810  using namespace chemical;
3811 
3812  TR.Trace << "Getting native rotamer opte data for " << pdb_name << ":";
3813  PackerTaskOP packing_task;
3814  // If desired lock the task to the starting pose to prevent changes as we go on.
3815  if(option[ optE::constant_logic_taskops_file ].user() ) {
3816  packing_task = copy_native_packertask_logic( native_pose, pose, task_factory_);
3817  } else if ( option[ optE::design_with_minpack ] ) {
3818  // Don't use extra rotamers during design, but do use extra rotamers when coming up with the
3819  // rotamers used in the weight fitting step. Basically, with design_with_minpack on the command
3820  // line, command line flags that effect the packer no longer effect the design phase.
3821  packing_task = task_factory_->create_task_and_apply_taskoperations( pose );
3823  ifcl_operation.apply( pose, *packing_task );
3824  } else {
3825  packing_task = task_factory_->create_task_and_apply_taskoperations( pose ) ;
3826  }
3827 
3828  packing_task->set_bump_check( false );
3829  packing_task->restrict_to_repacking();
3830 
3831  scorefxn( native_pose );
3832  scorefxn( pose );
3833 
3834  scorefxn.setup_for_packing( pose, packing_task->repacking_residues(), packing_task->designing_residues() );
3835 
3836  utility::vector1< bool > task_mask( pose.total_residue(), false );
3837  for ( Size resi = 1; resi <= pose.total_residue(); ++resi ) {
3838  // only consider residues that the master task considers packable (via task_factory_)
3839  if ( ! packing_task->residue_task( resi ).being_packed() ) continue;
3840 
3841  //if ( residue_has_unacceptably_bad_dunbrack_energy( native_pose, resi )) continue;
3842  if ( residue_has_bad_bfactor( native_pose, resi ) ) continue;
3843 
3844  //if ( ! pose.residue(resi).is_protein() ) continue;
3845  if ( ! include_rsd[ resi ] ) continue;
3846 
3847  TR.Trace << " ";
3848  if ( pose.pdb_info() ) TR.Trace << pose.pdb_info()->number(resi);
3849  else TR.Trace << resi;
3850  TR.Trace << "." << pose.residue_type(resi).name3();
3851 
3852  // use new naive PackerTask to get data for one residue at a time
3853  PackerTaskOP task = TaskFactory::create_packer_task( pose );
3854  task_mask[ resi ] = true;
3855  task->restrict_to_residues( task_mask );
3856  task->restrict_to_repacking();
3857  task->initialize_from_command_line();
3858  task->set_bump_check( false );
3859  //task->or_include_current( true ); // apl TO DO -- do we want to include the native rotamer?
3860  task_mask[ resi ] = false;
3861 
3862  utility::vector1< Size > rot_wells;
3863 
3864  SingleResidueDunbrackLibrary::n_rotamer_bins_for_aa( pose.residue_type( resi ).aa(), rot_wells );
3865 
3866  if ( rot_wells.size() == 0 ) continue;
3867 
3869  core::pack::dunbrack::RotamerLibrary::get_instance().get_rsd_library( pose.residue_type( resi ) ) );
3870 
3871  runtime_assert( dynamic_cast< SingleResidueDunbrackLibrary const * > ( srlib() ) );
3872  SingleResidueDunbrackLibraryCAP srdlib( static_cast< SingleResidueDunbrackLibrary const * > ( srlib() ));
3873 
3874 
3876  this_pos_data->aa() = pose.residue( resi ).aa();
3877  std::string tag_to_assign =
3878  pdb_name + " " +
3879  chemical::oneletter_code_from_aa( pose.residue( resi ).aa() ) + " " +
3880  utility::to_string( resi );
3881  this_pos_data->tag( tag_to_assign );
3882  this_pos_data->phi() = pose.phi( resi );
3883  this_pos_data->psi() = pose.psi( resi );
3884  this_pos_data->set_rotamer_well_counts( rot_wells );
3885  set_aa_periodicity( this_pos_data, native_pose.residue(resi).aa() );
3886  this_pos_data->set_native_rotamer_chi( native_pose.residue(resi).chi() );
3887 
3888  runtime_assert( native_pose.residue( resi ).aa() == pose.residue( resi ).aa() );
3889 
3890  utility::vector1< Size > nat_rot_indices;
3891  srdlib->get_rotamer_from_chi(
3892  native_pose.residue( resi ).chi(),
3893  nat_rot_indices );
3894  nat_rot_indices.resize( rot_wells.size() );
3895  this_pos_data->set_native_rotamer_index( nat_rot_indices );
3896 
3897  graph::GraphCOP packer_neighbor_graph( pack::create_packer_graph( pose, scorefxn, task ) );
3898 
3899  RotamerSetFactory rsf;
3900  RotamerSetOP rotset = rsf.create_rotamer_set( pose.residue( resi ) );
3901 // RotamerSetOP rotset = RotamerSetFactory::create_rotamer_set( pose.residue( resi ) );
3902 
3903  rotset->set_resid( resi );
3904  rotset->build_rotamers( pose, scorefxn, *task, packer_neighbor_graph );
3905  scorefxn.prepare_rotamers_for_packing( pose, *rotset );
3906 
3907  // First, need a vector of energy maps
3908  utility::vector1< EnergyMap > emap_vector( rotset->num_rotamers() );
3909 
3910  // Call the new energy map fn
3911  rotset->compute_one_body_energy_maps( pose, scorefxn, *task, packer_neighbor_graph, emap_vector );
3912 
3913  //std::cout << "Nrotamers for " << pdb_name << " " << resi << " " << rotset->num_rotamers() << " " << pose.residue_type( resi ).name() << std::endl;
3914 
3915  for ( Size jj = 1; jj <= rotset->num_rotamers(); ++jj ) {
3916  EnergyMap & emap_total( emap_vector[jj] );
3917 
3918  // Hacky limit for fa_rep
3919  if( emap_total[ fa_rep ] > 10.0 ) emap_total[ fa_rep ] = 10.0;
3920 
3921  utility::vector1< Real > free_energy_info;
3922  utility::vector1< Real > fixed_energy_info;
3923 
3924  for( utility::vector1< ScoreType >::iterator score_type_iter = score_list.begin(),
3925  end_iter = score_list.end() ; score_type_iter != end_iter ; ++score_type_iter ) {
3926  free_energy_info.push_back( emap_total[ *score_type_iter ] );
3927  }
3928 
3929  for( utility::vector1< ScoreType >::iterator score_type_iter = fixed_score_vec.begin(),
3930  end_iter = fixed_score_vec.end() ; score_type_iter != end_iter ; ++score_type_iter ) {
3931  fixed_energy_info.push_back( emap_total[ *score_type_iter ] );
3932  }
3933 
3934  utility::vector1< Size > rot_index_vector;
3935  srdlib->get_rotamer_from_chi(
3936  rotset->rotamer( jj )->chi(),
3937  rot_index_vector );
3938 
3939  rot_index_vector.resize( rot_wells.size() );
3940 
3941  PNatRotOptERotamerDataOP new_rot_line =
3943  rot_index_vector,
3944  rotset->rotamer( jj )->chi(),
3945  free_energy_info,
3946  fixed_energy_info );
3947 
3948  this_pos_data->add_rotamer_line_data( new_rot_line );
3949  }
3950  // Done with rotamers for this position, store this position data object
3951  opte_data.add_position_data( this_pos_data );
3952 
3953  //TR << "Added rotamer position data for residue " << resi << " in pose " << pdb_name << std::endl;
3954 
3955  }
3956  TR.Trace << std::endl;
3957 
3958 }
3959 
3960 
3961 ///
3962 /// @begin IterativeOptEDriver::set_aa_periodicity
3963 ///
3964 /// @remarks Andrew?
3965 ///
3966 void
3968 {
3969  using namespace core::chemical;
3970  switch ( aa ) {
3971  case aa_ala: case aa_gly: break;
3972  case aa_cys: case aa_ser: case aa_thr: case aa_val: {
3973  utility::vector1< Real > asym1(1, 360);
3974  pos_data->set_native_chi_periodicity( asym1 );
3975  }
3976  break;
3977  case aa_asp: case aa_phe: case aa_tyr: {
3978  utility::vector1< Real > sym2( 2 ); sym2[ 1 ] = 360; sym2[ 2 ] = 180;
3979  pos_data->set_native_chi_periodicity( sym2 );
3980  }
3981  break;
3982  case aa_his: case aa_ile: case aa_leu: case aa_asn: case aa_trp: {
3983  utility::vector1< Real > asym2(2, 360);
3984  pos_data->set_native_chi_periodicity( asym2 );
3985  }
3986  break;
3987  case aa_glu: {
3988  utility::vector1< Real > sym3( 3, 360 ); sym3[ 3 ] = 180;
3989  pos_data->set_native_chi_periodicity( sym3 );
3990  }
3991  break;
3992 
3993  case aa_met: case aa_gln: case aa_pro: {
3994  utility::vector1< Real > asym3( 3, 360 );
3995  pos_data->set_native_chi_periodicity( asym3 );
3996  }
3997  break;
3998  case aa_arg: case aa_lys: {
3999  utility::vector1< Real > asym4( 4, 360 );
4000  pos_data->set_native_chi_periodicity( asym4 );
4001  }
4002  default:
4003  break;
4004 
4005  }
4006 }
4007 
4008 
4009 ///
4010 /// @begin IterativeOptEDriver::residue_has_unacceptably_bad_dunbrack_energy
4011 ///
4012 /// @details Precondition: pose must have been scored
4013 ///
4014 bool
4016 {
4017  using namespace core::chemical;
4018  using namespace core::scoring;
4019  switch ( pose.residue_type( resid ).aa() ) {
4020  case aa_ala: case aa_gly: return false; break;
4021  case aa_cys: case aa_ser: case aa_thr: case aa_val: case aa_pro :
4022  if ( pose.energies().residue_total_energies( resid )[ fa_dun ] > 10 ) return true;
4023  break;
4024  case aa_asp: case aa_phe: case aa_his: case aa_ile: case aa_leu: case aa_asn: case aa_trp: case aa_tyr:
4025  if ( pose.energies().residue_total_energies( resid )[ fa_dun ] > 15 ) return true;
4026  break;
4027  case aa_glu: case aa_met: case aa_gln:
4028  if ( pose.energies().residue_total_energies( resid )[ fa_dun ] > 18 ) return true;
4029  break;
4030  case aa_arg: case aa_lys:
4031  if ( pose.energies().residue_total_energies( resid )[ fa_dun ] > 22 ) return true;
4032  break;
4033  default:
4034  break;
4035  }
4036  return false;
4037 }
4038 
4039 
4040 ///
4041 /// @begin IterativeOptEDriver::residue_has_bad_bfactor
4042 ///
4043 /// @details pose must have been read from a pdb.
4044 ///
4045 bool
4047 {
4048  using namespace core::pose;
4049  PDBInfoCOP info = pose.pdb_info();
4050  if ( !info ) return false;
4051 
4052  for ( Size ii = 1; ii <= info->natoms( resid ); ++ii ) {
4053  //std::cout << "Temperature on " << resid << " " << ii << " " << info->temperature( resid, ii ) << std::endl;
4054  if ( info->temperature( resid, ii ) > 40 ) {
4055  return true;
4056  }
4057 
4058  }
4059  return false;
4060 }
4061 
4062 ///
4063 /// @begin IterativeOptEDriver::make_simple_ssd_from_pdb()
4064 ///
4065 /// @brief
4066 /// Helper function to reduce code duplication.
4067 ////
4070  core::scoring::ScoreFunctionOP sfxn, bool pretend_no_fa_rep ) const
4071 {
4072  using namespace basic::options;
4073  using namespace basic::options::OptionKeys;
4074 
4076  if ( option[ in::file::centroid_input ] ) {
4078  } else {
4080  }
4081 
4082  /// score this pose, create SingleStructureData.
4083  (*sfxn)( structure );
4084  utility::vector1< Real > free_data( free_score_list_.size(), 0.0 );
4085  utility::vector1< Real > fixed_data( fixed_score_list_.size(), 0.0 );
4086 
4087  for ( Size kk = 1; kk <= free_score_list_.size(); ++kk ) {
4088  if ( !pretend_no_fa_rep || free_score_list_[ kk ] != fa_rep )
4089  free_data[ kk ] = structure.energies().total_energies()[ free_score_list_[ kk ] ];
4090  }
4091  for ( Size kk = 1; kk <= fixed_score_list_.size(); ++kk ) {
4092  if ( !pretend_no_fa_rep || fixed_score_list_[ kk ] != fa_rep )
4093  fixed_data[ kk ] = structure.energies().total_energies()[ fixed_score_list_[ kk ] ];
4094  }
4095 
4096  SingleStructureDataOP ssd = new SingleStructureData( free_data, fixed_data );
4097  return ssd;
4098 }
4099 
4100 
4101 ///
4102 /// @begin IterativeOptEDriver::collect_dG_of_binding_data()
4103 ///
4104 /// @brief
4105 /// dG optimization optE data collection.
4106 ////
4107 void
4109 {
4110  using namespace basic::options;
4111  using namespace basic::options::OptionKeys;
4112 
4113  if ( dG_binding_data_ == 0 ) {
4114  dG_binding_data_ = new OptEData();
4116 
4117  bool const no_fa_rep = option[ optE::pretend_no_ddG_repulsion ]();
4118  for ( Size ii = 1; ii <= dG_bound_unbound_pairs_.size(); ++ii ) {
4119  DGBindOptEDataOP dg_data = new DGBindOptEData();
4120  dg_data->deltaG_bind( dG_binding_[ ii ] );
4121  dg_data->bound_struct( make_simple_ssd_from_pdb( dG_bound_unbound_pairs_[ ii ].first, sfxn, no_fa_rep ) );
4122  dg_data->unbound_struct( make_simple_ssd_from_pdb( dG_bound_unbound_pairs_[ ii ].second, sfxn, no_fa_rep ) );
4123  dG_binding_data_->add_position_data( dg_data );
4124  }
4125  }
4126 
4127  for ( OptEPositionDataOPs::const_iterator
4128  iter = dG_binding_data_->position_data_begin(),
4129  iter_end = dG_binding_data_->position_data_end();
4130  iter != iter_end; ++iter ) {
4131  optE_data_->add_position_data( *iter );
4132  }
4133 }
4134 
4135 
4136 ///
4137 /// @begin IterativeOptEDriver::collect_ddG_of_mutation_data()
4138 ///
4139 /// @brief
4140 /// ddG optimization optE data collection.
4141 ////
4142 void
4144 {
4145  using namespace basic::options;
4146  using namespace basic::options::OptionKeys;
4147 
4148  if ( ddG_mutation_data_ == 0 ) {
4150 
4151  // rescore wt's less often
4152  std::map< std::string, std::pair< SingleStructureDataOP, std::string > > structure_map;
4153  std::map< std::string, EnergyMap > structure_energy_map;
4154  std::map< std::string, Energy > structure_bestfarep_map;
4155 
4156 
4158 
4159  if ( MPI_rank_ == 0 ) {
4160  TR_VERBOSE << "collect_ddG_of_mutation_data(): created sfxn for calculating ddGs" << std::endl;
4161  sfxn->show( std::cout );
4162  }
4163 
4164  // for each line of the input ddG data file...
4165  // the first column is a text file that contains the names of wt pdbs
4166  // the second column is another text file that contains the names of mutant pdbs
4167  // the third column gets added as the experimental ddG for this mutation
4168 
4169  // this input system allows one to have multiple structures of the same exact sequence. this way, if a mutation is causing a slight clash
4170  // or maybe creating a hydrogen bond in one structure but not in another, the better structure will be used for weight optimization.
4171  // this can make a big difference in energies depending on what the weight on the fa_rep or hbond term, etc, is.
4172  for ( Size ii = 1; ii <= ddg_mut_wt_pairs_.size(); ++ii ) {
4173 
4174  DDGMutationOptEDataOP ddg_data;
4176 
4177  // Create a position data object of the type that has special processing for unfolded state energy calculations.
4178  // If not for this special check, then we'll always be creating the standard position data objects and won't ever
4179  // get to the code that deals with the unfolded state energy.
4181  ddg_data = new NestedEnergyTermDDGMutationOptEData;
4182  } else {
4183  ddg_data = new DDGMutationOptEData;
4184  }
4185 
4186  // save the experimental ddg for this wt/mut list-of-files pair
4187  ddg_data->set_experimental_ddg( ddGs_[ ii ] );
4188 
4189  utility::file::FileName wts(ddg_mut_wt_pairs_[ ii ].first);
4190  std::string file_extension = wts.ext();
4191 
4192  utility::vector1< std::string > wt_pdb_names, mut_pdb_names;
4193 
4194  bool read_silent( false );
4197 
4198  if(file_extension.compare("list") == 0){
4199 
4200  /// Read names of wt pdbs; i.e. open up the filename given in the 1st column of the ddG data file
4201  /// and read out the strings of pdbs listed there?
4202  TR << "collect_ddG_of_mutation_data(): reading file '" << wts() << "' to get list of wt pdb names." << std::endl;
4203  std::ifstream wt_pdblist( wts().c_str() );
4204  while ( wt_pdblist ) {
4205  std::string wt_pdb;
4206  wt_pdblist >> wt_pdb;
4207  if ( wt_pdb != "" ) wt_pdb_names.push_back( wt_pdb );
4208  }
4209  }
4210  else if(file_extension.compare("out") == 0){//add in silent file capabilities
4211  read_silent=true;
4212  sfd_wt.set_filename(wts()); //for now assume binary
4213  if(!sfd_wt.read_file(wts())){
4214  std::cout << "[ERROR ERROR ERROR] did not read in silent file properly! " << wts() << std::endl;
4215  }
4216  wt_pdb_names = sfd_wt.tags();
4217  }
4218  else{
4219  //file extension not recognized
4220  std::cerr << "ERROR! file " << wts() << " has un-recognized extension " << file_extension << std::endl;
4221  utility_exit();
4222  }
4223 
4224  /// Read names of mut pdbs
4225  bool no_tag_yet_assigned( true );
4226  utility::file::FileName muts(ddg_mut_wt_pairs_[ii].second);
4227  file_extension = muts.ext();
4228 
4229  if(file_extension.compare("list") == 0){
4230  TR << "collect_ddG_of_mutation_data(): reading file '" << muts() << "' to get list of mutant pdb names." << std::endl;
4231  std::ifstream mut_pdblist( muts().c_str() );
4232  while ( mut_pdblist ) {
4233  std::string mut_pdb;
4234  mut_pdblist >> mut_pdb;
4235  if ( mut_pdb != "" ) mut_pdb_names.push_back( mut_pdb );
4236  if ( no_tag_yet_assigned ) {
4237  utility::file::FileName mut1( mut_pdb );
4238  ddg_data->tag( mut1.base() );
4239  no_tag_yet_assigned = false;
4240  }
4241  }
4242  }
4243  else if(file_extension.compare("out") == 0){
4244  read_silent=true;
4245  sfd_mut.set_filename(muts());
4246  if(!sfd_mut.read_file(muts())){
4247  std::cout << "[ERROR ERROR ERROR] did not read in silent file properly! " << muts() << std::endl;
4248  }
4249  mut_pdb_names = sfd_mut.tags();
4250  if( no_tag_yet_assigned ){
4251  ddg_data->tag(mut_pdb_names[1]);
4252  no_tag_yet_assigned = false;
4253  }
4254  }
4255  else{
4256  std::cerr << "ERROR! file " << muts() << " has un-recognized extension " << file_extension << std::endl;
4257  utility_exit();
4258  //file extension not recognized
4259  }
4260  std::string wt_seq, mut_seq; // wt and mutant sequences; must differ at exactly one position
4261  Real best_wt_rep( 12345678 ), best_mut_rep( 12345678 );
4262  bool collect_best_rep( option[ optE::exclude_badrep_ddGs ].user() );
4263 
4264  for ( Size jj = 1; jj <= wt_pdb_names.size(); ++jj ) {
4265 
4266  //
4267  // since a given wt protein might have a few hundred characterized mutants, there's no point in scoring the
4268  // wild type structure for each of those hundred mutants. we can score the wt once and save that score for
4269  // all of the mutants of that structure. ingenious time-saver thanks to APL. -ronj
4270  if ( structure_map.find( wts()+wt_pdb_names[ jj ] ) == structure_map.end() ) { //wt_pdb_name not already in structure_map
4271 
4272  core::pose::Pose wt_structure;
4273  if(!read_silent){
4274  if ( option[ in::file::centroid_input ] ) {
4275  core::import_pose::centroid_pose_from_pdb( wt_structure, wt_pdb_names[ jj ] );
4276  } else {
4277  core::import_pose::pose_from_pdb( wt_structure, wt_pdb_names[ jj ] );
4278  }
4279  }else{
4280  core::io::silent::SilentStructOP ss = sfd_wt[wt_pdb_names[ jj ]];
4282  ss->fill_pose(wt_structure,*rsd_set);
4283  }
4284 
4285  /// make sure sequences match across poses...
4286  if ( jj == 1 ) {
4287  wt_seq = wt_structure.sequence();
4288  //wt_structure.dump_pdb("wt_structure.pdb");
4289  } else {
4290  if ( wt_seq != wt_structure.sequence() ) {
4291  std::cerr << "Wild type sequence inconsistent across wts " << std::endl;
4292  std::cerr << wt_seq << std::endl;
4293  std::cerr << wt_structure.sequence() << std::endl;
4294  std::cerr << "Node " << MPI_rank_ << " " << wt_pdb_names[ jj ] << " and " << wt_pdb_names[ 1 ] << std::endl;
4295  wt_structure.dump_pdb("offending_wt_sequence.pdb");
4296  utility_exit();
4297  }
4298  }
4299 
4300  /// score this pose, create SingleStructureData.
4301  utility::vector1< Real > free_data( free_score_list_.size(), 0.0 );
4302  utility::vector1< Real > fixed_data( fixed_score_list_.size(), 0.0 );
4303 
4304  (*sfxn)( wt_structure );
4305 
4306  // Adding a special check for whether or not surface scoring is in use. If yes, then we need to zero out the value for
4307  // the surface energy. It appears to be hurting the correlations for ddG rather than helping. The EnergyMethod returns
4308  // 0.0 when the residue_energy() method is called, but at the end of scoring in finalize_total_energy(), the total
4309  // surface score for the pose is calculated and placed into the pose.energies() object.
4310  wt_structure.energies().total_energies()[ scoring::surface ] = 0.0;
4311 
4312  if ( collect_best_rep ) {
4313  if ( wt_structure.energies().total_energies()[ fa_rep ] < best_wt_rep ) {
4314  best_wt_rep = wt_structure.energies().total_energies()[ fa_rep ];
4315  }
4316  structure_bestfarep_map[ wts()+wt_pdb_names[ jj ] ] = best_wt_rep;
4317  }
4318 
4319  for ( Size kk = 1; kk <= free_score_list_.size(); ++kk ) {
4320  if ( !option[ optE::pretend_no_ddG_repulsion ] || free_score_list_[ kk ] != fa_rep )
4321  free_data[ kk ] = wt_structure.energies().total_energies()[ free_score_list_[ kk ] ];
4322  }
4323  for ( Size kk = 1; kk <= fixed_score_list_.size(); ++kk ) {
4324  if ( !option[ optE::pretend_no_ddG_repulsion ] || fixed_score_list_[ kk ] != fa_rep )
4325  fixed_data[ kk ] = wt_structure.energies().total_energies()[ fixed_score_list_[ kk ] ];
4326  }
4327  SingleStructureDataOP ssd = new SingleStructureData( free_data, fixed_data );
4328  ddg_data->add_wt( ssd );
4329  structure_map[ wts()+wt_pdb_names[ jj ] ] = std::make_pair( ssd, wt_seq );
4330 
4331  // The 'unfolded' energy term, if in use, also needs special handling. However, at this point, we don't have a
4332  // set of weights. So we can only store the unweighted energy term energies that go into the unfolded energy
4333  // here and not actually calculate a final unfolded energy that can be placed in the pose emap. This case will be
4334  // handled in the same way as PNatAA handles it, with a class that extends DDGOptEData. But, we need to set that
4335  // extra variable here. Regardless of whether the wild-type structures have already been encountered or not,
4336  // set the emap here, on this first iteration through the list of wild-type structures.
4338  if ( jj == 1 ) {
4339  EnergyMap e;
4340  unfE_potential.pose_raw_unfolded_state_energymap( wt_structure, e );
4341  (dynamic_cast< protocols::optimize_weights::NestedEnergyTermDDGMutationOptEData * >( ddg_data() ))->set_wt_unfolded_energies_emap( e );
4342  structure_energy_map[ wts()+wt_pdb_names[ jj ] ] = e;
4343  }
4344  }
4345 
4346  } else {
4347  // else, this wild-type structure (or list of wild-type structures) has already been encountered previously. So don't waste
4348  // time scoring it again. Just store the results from the previous one. But make sure to also store the best rep and
4349  // unfolded energy.
4350  if ( jj == 1 ) {
4351  wt_seq = structure_map[ wts()+wt_pdb_names[ jj ] ].second;
4352  }
4353 
4354  if ( collect_best_rep ) {
4355  best_wt_rep = structure_bestfarep_map[ wts()+wt_pdb_names[ jj ] ];
4356  }
4357 
4359  if ( jj == 1 ) {
4361  ddg_data() ))->set_wt_unfolded_energies_emap( structure_energy_map[ wts()+wt_pdb_names[ jj ] ] );
4362  }
4363  }
4364 
4365  ddg_data->add_wt( structure_map[ wts()+wt_pdb_names[ jj ] ].first );
4366 
4367  }
4368 
4369  }
4370 
4371  for ( Size jj = 1; jj <= mut_pdb_names.size(); ++jj ) {
4372  core::pose::Pose mut_structure;
4373  if(!read_silent){
4374  if ( option[ in::file::centroid_input ] ) {
4375  core::import_pose::centroid_pose_from_pdb( mut_structure, mut_pdb_names[ jj ] );
4376  } else {
4377  core::import_pose::pose_from_pdb( mut_structure, mut_pdb_names[ jj ] );
4378  }
4379  }else{
4380  core::io::silent::SilentStructOP ss = sfd_mut[mut_pdb_names[ jj ]];
4381  ss->fill_pose(mut_structure,core::chemical::ChemicalManager::get_instance()->nonconst_residue_type_set(core::chemical::FA_STANDARD));
4382  }
4383 
4384  /// make sure sequences match across poses...
4385  if ( jj == 1 ) {
4386  mut_seq = mut_structure.sequence();
4387  } else {
4388  if ( mut_seq != mut_structure.sequence() ) {
4389  std::cerr << "Mutant sequence inconsistent across muts " << std::endl;
4390  std::cerr << mut_seq << std::endl;
4391  std::cerr << mut_structure.sequence() << std::endl;
4392  std::cerr << "Node " << MPI_rank_ << " " << mut_pdb_names[ jj ] << " and " << mut_pdb_names[ 1 ] << std::endl;
4393  mut_structure.dump_pdb("offending_mutant_structure_seq.pdb");
4394  utility_exit();
4395  }
4396  }
4397 
4398  /// score this pose, create SingleStructureData.
4399  utility::vector1< Real > free_data( free_score_list_.size(), 0.0 );
4400  utility::vector1< Real > fixed_data( fixed_score_list_.size(), 0.0 );
4401 
4402  (*sfxn)( mut_structure );
4403 
4404  // Taking out the calculation of the surface score for ddG optimization. It appears to be hurting the correlations rather
4405  // than helping.
4406  mut_structure.energies().total_energies()[ scoring::surface ] = 0.0;
4407 
4408  if ( collect_best_rep ) {
4409  if ( mut_structure.energies().total_energies()[ fa_rep ] < best_mut_rep ) {
4410  best_mut_rep = mut_structure.energies().total_energies()[ fa_rep ];
4411  }
4412  }
4413 
4414 
4415  for ( Size kk = 1; kk <= free_score_list_.size(); ++kk ) {
4416  if ( !option[ optE::pretend_no_ddG_repulsion ] || free_score_list_[ kk ] != fa_rep )
4417  free_data[ kk ] = mut_structure.energies().total_energies()[ free_score_list_[ kk ] ];
4418  }
4419  for ( Size kk = 1; kk <= fixed_score_list_.size(); ++kk ) {
4420  if ( !option[ optE::pretend_no_ddG_repulsion ] || fixed_score_list_[ kk ] != fa_rep )
4421  fixed_data[ kk ] = mut_structure.energies().total_energies()[ fixed_score_list_[ kk ] ];
4422  }
4423  SingleStructureDataOP ssd = new SingleStructureData( free_data, fixed_data );
4424  ddg_data->add_mutant( ssd );
4425 
4426  // See note above for explanation of what's needed to correctly handle the unfolded state energy
4428  if ( jj == 1 ) {
4429  EnergyMap e;
4430  unfE_potential.pose_raw_unfolded_state_energymap( mut_structure, e );
4431  (dynamic_cast< protocols::optimize_weights::NestedEnergyTermDDGMutationOptEData * >( ddg_data() ))->set_mut_unfolded_energies_emap( e );
4432  }
4433  }
4434 
4435  }
4436 
4437  /// Find the discrepant position;
4438  //TR << "collect_ddG_of_mutation_data(): looking for discrepant position between wt and mut structures." << std::endl;
4439  int discrepant_position( -1 );
4440  if ( wt_seq.size() != mut_seq.size() ) {
4441  std::cerr << "ERROR: Wild type and mutant sequences of different length: " << std::endl;
4442  std::cerr << wt_seq << std::endl;
4443  std::cerr << mut_seq << std::endl;
4444  std::cerr << "Rank " << MPI_rank_ << " " << wt_pdb_names[ 1 ] << " vs " << mut_pdb_names[ 1 ] << std::endl;
4445  utility_exit();
4446  } else {
4447  for ( Size jj = 0; jj < wt_seq.size(); ++jj ) {
4448  if ( discrepant_position == -1 ) {
4449  if ( wt_seq[ jj ] != mut_seq[ jj ] ) {
4450  discrepant_position = jj;
4451  }
4452  } else if ( wt_seq[ jj ] != mut_seq[ jj ] ) {
4453  std::cerr << "Error: Wild type and mutant sequences differ at more than one position: "
4454  << discrepant_position << " " << jj << std::endl;
4455  std::cerr << wt_seq << std::endl;
4456  std::cerr << mut_seq << std::endl;
4457  std::cerr << "Rank " << MPI_rank_ << " " << wt_pdb_names[ 1 ] << " vs " << mut_pdb_names[ 1 ] << std::endl;
4458  utility_exit();
4459  }
4460  }
4461  }
4462  if ( discrepant_position == -1 ) {
4463  std::cerr << "ERROR: mutant and wild type sequences must differ by at least one position";
4464  std::cerr << "Rank " << MPI_rank_ << " " << wt_pdb_names[ 1 ] << " vs " << mut_pdb_names[ 1 ] << std::endl;
4465  utility_exit();
4466  }
4467  ddg_data->set_wt_aa( chemical::aa_from_oneletter_code( wt_seq[ discrepant_position ] ));
4468  ddg_data->set_mut_aa( chemical::aa_from_oneletter_code( mut_seq[ discrepant_position ] ));
4469 
4470  if ( ! collect_best_rep || best_mut_rep - best_wt_rep < option[ optE::exclude_badrep_ddGs ]() ) {
4471  ddG_mutation_data_->add_position_data( ddg_data );
4472  } else { /// else, discard this position...
4473  TR << "Rank " << MPI_rank_ << " Excluding ddG data from " << ddg_mut_wt_pairs_[ ii ].second
4474  << " with mt-wt rep delta: "<< best_mut_rep << " - " << best_wt_rep
4475  << " = " << best_mut_rep - best_wt_rep << std::endl;
4476  }
4477  }
4478  } // if ddg_mutation_data_ == 0
4479 
4480  for ( OptEPositionDataOPs::const_iterator
4481  iter = ddG_mutation_data_->position_data_begin(),
4482  iter_end = ddG_mutation_data_->position_data_end();
4483  iter != iter_end; ++iter ) {
4484  optE_data_->add_position_data( *iter );
4485  }
4486 }
4487 
4488 ///
4489 /// @begin IterativeOptEDriver::collect_ddG_of_binding_data()
4490 ///
4491 /// @brief
4492 /// The calculations for ddG of binding for interfaces doesn't really fit with the ddG stability and dG binding
4493 /// optE modes. This functions loads all of the necessary structures for optimization of ddG of binding into the
4494 /// optE framework.
4495 ////
4496 void
4498 {
4499  using namespace core;
4500  using namespace basic::options;
4501  using namespace basic::options::OptionKeys;
4502 
4503  if ( ddG_bind_optE_data_ == 0 ) {
4505 
4506  // rescore wt's less often
4507  std::map< std::string, std::pair< SingleStructureDataOP, std::string > > structure_map;
4508  std::map< std::string, Energy > structure_bestfarep_map;
4509 
4511 
4512  if ( MPI_rank_ == 0 ) {
4513  TR_VERBOSE << "collect_ddG_of_binding_data(): created sfxn for calculating ddGs of binding" << std::endl;
4514  sfxn->show( std::cout );
4515  }
4516 
4517  // for each line of the input ddG data file...
4518  // the first and second columns are text files containing the names of wt and mutant complex pdbs
4519  // the third and fourth columns are text files containing the names of wt and mutant unbounded pdbs
4520  // the fifth column gets added as the experimental ddG of binding for this mutation
4521 
4522  // this input system allows one to have multiple structures of the same exact sequence. this way, if a mutation is causing a slight clash
4523  // or maybe creating a hydrogen bond in one structure but not in another, the better structure will be used for weight optimization.
4524  // this can make a big difference in energies depending on what the weight on the fa_rep or hbond term, etc, is.
4525  for ( Size ii = 1; ii <= ddG_bind_files_.size(); ++ii ) {
4526 
4527  DDGBindOptEDataOP ddg_bind_position_data = new DDGBindOptEData;
4528 
4529  // save the experimental ddg for this wt/mut list-of-files pair
4530  ddg_bind_position_data->set_experimental_ddg_bind( ddGs_binding_[ ii ] );
4531 
4532 
4537 
4538  utility::vector1< std::string > wt_complex_pdb_names, mut_complex_pdb_names, wt_unbounded_pdb_names, mut_unbounded_pdb_names;
4539 
4540  /// Read names of wt complexes pdbs
4541  TR << "collect_ddG_of_binding_data(): reading file '" << wt_complexes_list_file() << "' to get list of wt complex pdb names." << std::endl;
4542  std::ifstream wt_complex_pdblist( wt_complexes_list_file().c_str() );
4543  while ( wt_complex_pdblist ) {
4544  std::string wt_complex_pdb;
4545  wt_complex_pdblist >> wt_complex_pdb;
4546  if ( wt_complex_pdb != "" ) wt_complex_pdb_names.push_back( wt_complex_pdb );
4547  }
4548 
4549  /// Read names of mut complexes pdbs
4550  bool no_tag_yet_assigned( true );
4551 
4552  TR << "collect_ddG_of_binding_data(): reading file '" << mut_complexes_list_file() << "' to get list of mutant complex pdb names." << std::endl;
4553  std::ifstream mut_complex_pdblist( mut_complexes_list_file().c_str() );
4554  while ( mut_complex_pdblist ) {
4555  std::string mut_complex_pdb;
4556  mut_complex_pdblist >> mut_complex_pdb;
4557  if ( mut_complex_pdb != "" ) mut_complex_pdb_names.push_back( mut_complex_pdb );
4558  if ( no_tag_yet_assigned ) {
4559  utility::file::FileName mut1( mut_complex_pdb );
4560  ddg_bind_position_data->tag( mut1.base() );
4561  TR << "collect_ddG_of_binding_data(): assigned tag: '" << mut1.base() << "' to this set of ddG bind files." << std::endl;
4562  no_tag_yet_assigned = false;
4563  }
4564  }
4565 
4566  /// Read names of wt unbounded pdbs
4567  TR << "collect_ddG_of_binding_data(): reading file '" << wt_unbounds_list_file() << "' to get list of wt unbounded pdb names." << std::endl;
4568  std::ifstream wt_unbounded_pdblist( wt_unbounds_list_file().c_str() );
4569  while ( wt_unbounded_pdblist ) {
4570  std::string wt_unbounded_pdb;
4571  wt_unbounded_pdblist >> wt_unbounded_pdb;
4572  if ( wt_unbounded_pdb != "" ) wt_unbounded_pdb_names.push_back( wt_unbounded_pdb );
4573  }
4574 
4575  /// Read names of mut unbounded pdbs
4576  TR << "collect_ddG_of_binding_data(): reading file '" << mut_unbounds_list_file() << "' to get list of mutant unbounded pdb names." << std::endl;
4577  std::ifstream mut_unbounded_pdblist( mut_unbounds_list_file().c_str() );
4578  while ( mut_unbounded_pdblist ) {
4579  std::string mut_unbounded_pdb;
4580  mut_unbounded_pdblist >> mut_unbounded_pdb;
4581  if ( mut_unbounded_pdb != "" ) mut_unbounded_pdb_names.push_back( mut_unbounded_pdb );
4582  }
4583 
4584 
4585  // make sure the wt complexes and wt unbounds have the same sequence; same for the mutant
4586  // however, don't require that the wt and mutant complex (or unbounds) differ in only one position as for ddG stability
4587  std::string wt_complex_seq, mut_complex_seq;
4588  Real best_wt_complex_rep( 12345678 ), best_mut_complex_rep( 12345678 );
4589  bool filtering_bad_ddGs( option[ optE::exclude_badrep_ddGs ].user() );
4590 
4591  for ( Size jj = 1; jj <= wt_complex_pdb_names.size(); ++jj ) {
4592 
4593  // since a given wt protein might have a few hundred characterized mutants, there's no point in scoring the
4594  // wild type structure for each of those hundred mutants. we can score the wt once and save that score for
4595  // all of the mutants of that structure. ingenious time-saver thanks to APL. -ronj
4596  if ( structure_map.find( wt_complexes_list_file()+wt_complex_pdb_names[ jj ] ) == structure_map.end() ) { //wt_pdb_name not already in structure_map
4597 
4598  pose::Pose wt_complex;
4599  core::import_pose::pose_from_pdb( wt_complex, wt_complex_pdb_names[ jj ] );
4600 
4601  /// make sure sequences match across the list of poses...
4602  if ( jj == 1 ) {
4603  wt_complex_seq = wt_complex.sequence();
4604  } else {
4605  if ( wt_complex_seq != wt_complex.sequence() ) {
4606  std::cerr << "wild type complex sequence inconsistent across wts " << std::endl;
4607  std::cerr << wt_complex_seq << std::endl;
4608  std::cerr << wt_complex.sequence() << std::endl;
4609  std::cerr << "Node " << MPI_rank_ << " " << wt_complex_pdb_names[ jj ] << " and " << wt_complex_pdb_names[ 1 ] << std::endl;
4610  wt_complex.dump_pdb("offending_wt_complex_sequence.pdb");
4611  utility_exit();
4612  }
4613  }
4614 
4615  /// score this pose, create SingleStructureData.
4616  utility::vector1< Real > free_data( free_score_list_.size(), 0.0 );
4617  utility::vector1< Real > fixed_data( fixed_score_list_.size(), 0.0 );
4618 
4619  (*sfxn)( wt_complex );
4620 
4621  for ( Size kk = 1; kk <= free_score_list_.size(); ++kk )
4622  { free_data[ kk ] = wt_complex.energies().total_energies()[ free_score_list_[ kk ] ]; }
4623  for ( Size kk = 1; kk <= fixed_score_list_.size(); ++kk )
4624  { fixed_data[ kk ] = wt_complex.energies().total_energies()[ fixed_score_list_[ kk ] ]; }
4625 
4626  if ( filtering_bad_ddGs ) {
4627  if ( wt_complex.energies().total_energies()[ fa_rep ] < best_wt_complex_rep ) {
4628  best_wt_complex_rep = wt_complex.energies().total_energies()[ fa_rep ];
4629  }
4630  structure_bestfarep_map[ wt_complexes_list_file()+wt_complex_pdb_names[ jj ] ] = best_wt_complex_rep;
4631  }
4632 
4633  SingleStructureDataOP ssd = new SingleStructureData( free_data, fixed_data );
4634  ddg_bind_position_data->add_wt_complex( ssd );
4635  structure_map[ wt_complexes_list_file()+wt_complex_pdb_names[ jj ] ] = std::make_pair( ssd, wt_complex_seq );
4636 
4637  } else {
4638 
4639  // else, this wild-type structure (or list of wild-type structures) has already been encountered previously. So don't waste
4640  // time scoring it again. Just store the results from the previous one. But make sure to also store the best rep and
4641  // unfolded energy.
4642  if ( jj == 1 )
4643  wt_complex_seq = structure_map[ wt_complexes_list_file()+wt_complex_pdb_names[ jj ] ].second;
4644 
4645  if ( filtering_bad_ddGs )
4646  best_wt_complex_rep = structure_bestfarep_map[ wt_complexes_list_file()+wt_complex_pdb_names[ jj ] ];
4647 
4648  ddg_bind_position_data->add_wt_complex( structure_map[ wt_complexes_list_file()+wt_complex_pdb_names[ jj ] ].first );
4649  }
4650  }
4651 
4652  for ( Size jj = 1; jj <= mut_complex_pdb_names.size(); ++jj ) {
4653 
4654  pose::Pose mut_complex;
4655  core::import_pose::pose_from_pdb( mut_complex, mut_complex_pdb_names[ jj ] );
4656 
4657  /// make sure sequences match across poses...
4658  if ( jj == 1 ) {
4659  mut_complex_seq = mut_complex.sequence();
4660  } else {
4661  if ( mut_complex_seq != mut_complex.sequence() ) {
4662  std::cerr << "mutant complex sequence inconsistent across muts " << std::endl;
4663  std::cerr << mut_complex_seq << std::endl;
4664  std::cerr << mut_complex.sequence() << std::endl;
4665  std::cerr << "Node " << MPI_rank_ << " " << mut_complex_pdb_names[ jj ] << " and " << mut_complex_pdb_names[ 1 ] << std::endl;
4666  mut_complex.dump_pdb("offending_mutant_complex_structure_seq.pdb");
4667  utility_exit();
4668  }
4669  }
4670 
4671  /// score this pose, create SingleStructureData.
4672  utility::vector1< Real > free_data( free_score_list_.size(), 0.0 );
4673  utility::vector1< Real > fixed_data( fixed_score_list_.size(), 0.0 );
4674 
4675  (*sfxn)( mut_complex );
4676 
4677  for ( Size kk = 1; kk <= free_score_list_.size(); ++kk )
4678  { free_data[ kk ] = mut_complex.energies().total_energies()[ free_score_list_[ kk ] ]; }
4679  for ( Size kk = 1; kk <= fixed_score_list_.size(); ++kk )
4680  { fixed_data[ kk ] = mut_complex.energies().total_energies()[ fixed_score_list_[ kk ] ]; }
4681 
4682  if ( filtering_bad_ddGs ) {
4683  if ( mut_complex.energies().total_energies()[ fa_rep ] < best_mut_complex_rep ) {
4684  best_mut_complex_rep = mut_complex.energies().total_energies()[ fa_rep ];
4685  }
4686  }
4687 
4688  SingleStructureDataOP ssd = new SingleStructureData( free_data, fixed_data );
4689  ddg_bind_position_data->add_mutant_complex( ssd );
4690  }
4691 
4692  for ( Size jj = 1; jj <= wt_unbounded_pdb_names.size(); ++jj ) {
4693 
4694  if ( structure_map.find( wt_unbounds_list_file()+wt_unbounded_pdb_names[ jj ] ) == structure_map.end() ) {
4695 
4696  pose::Pose wt_unbounded;
4697  core::import_pose::pose_from_pdb( wt_unbounded, wt_unbounded_pdb_names[ jj ] );
4698 
4699  /// make sure these unbounded structure sequences match the complex sequences...
4700  /// this will also check to make sure all of the unbounded structure match in sequence
4701  //if ( wt_unbounded.sequence() != wt_complex_seq ) {
4702  // std::cerr << "wild type unbounded sequence inconsistent with complex structure " << std::endl;
4703  // std::cerr << wt_unbounded.sequence() << std::endl;
4704  // std::cerr << wt_complex_seq << std::endl;
4705  // std::cerr << "Node " << MPI_rank_ << " " << wt_unbounded_pdb_names[ jj ] << " and " << wt_complex_pdb_names[ 1 ] << std::endl;
4706  // wt_unbounded.dump_pdb("offending_wt_unbounded_sequence.pdb");
4707  // utility_exit();
4708  //}
4709 
4710  /// score this pose, create SingleStructureData.
4711  utility::vector1< Real > free_data( free_score_list_.size(), 0.0 );
4712  utility::vector1< Real > fixed_data( fixed_score_list_.size(), 0.0 );
4713 
4714  (*sfxn)( wt_unbounded );
4715 
4716  for ( Size kk = 1; kk <= free_score_list_.size(); ++kk )
4717  { free_data[ kk ] = wt_unbounded.energies().total_energies()[ free_score_list_[ kk ] ]; }
4718  for ( Size kk = 1; kk <= fixed_score_list_.size(); ++kk )
4719  { fixed_data[ kk ] = wt_unbounded.energies().total_energies()[ fixed_score_list_[ kk ] ]; }
4720 
4721  SingleStructureDataOP ssd = new SingleStructureData( free_data, fixed_data );
4722  ddg_bind_position_data->add_wt_unbounds( ssd );
4723  structure_map[ wt_unbounds_list_file()+wt_unbounded_pdb_names[ jj ] ] = std::make_pair( ssd, wt_complex_seq ); // use the wt_complex_seq here
4724 
4725  } else {
4726  ddg_bind_position_data->add_wt_unbounds( structure_map[ wt_unbounds_list_file()+wt_unbounded_pdb_names[ jj ] ].first );
4727  }
4728  }
4729 
4730  for ( Size jj = 1; jj <= mut_unbounded_pdb_names.size(); ++jj ) {
4731 
4732  pose::Pose mut_unbounded;
4733  core::import_pose::pose_from_pdb( mut_unbounded, mut_unbounded_pdb_names[ jj ] );
4734 
4735  /// make sure these unbounded structure sequences match the complex sequences...
4736  /// this will also check to make sure all of the unbounded structure match in sequence
4737  //if ( mut_unbounded.sequence() != mut_complex_seq ) {
4738  // std::cerr << "mutant unbounded sequence inconsistent with complex structure " << std::endl;
4739  // std::cerr << mut_unbounded.sequence() << std::endl;
4740  // std::cerr << mut_complex_seq << std::endl;
4741  // std::cerr << "Node " << MPI_rank_ << " " << mut_unbounded_pdb_names[ jj ] << " and " << mut_complex_pdb_names[ 1 ] << std::endl;
4742  // mut_unbounded.dump_pdb("offending_mutant_unbounded_structure_seq.pdb");
4743  // utility_exit();
4744  //}
4745 
4746  /// score this pose, create SingleStructureData.
4747  utility::vector1< Real > free_data( free_score_list_.size(), 0.0 );
4748  utility::vector1< Real > fixed_data( fixed_score_list_.size(), 0.0 );
4749 
4750  (*sfxn)( mut_unbounded );
4751 
4752  for ( Size kk = 1; kk <= free_score_list_.size(); ++kk )
4753  { free_data[ kk ] = mut_unbounded.energies().total_energies()[ free_score_list_[ kk ] ]; }
4754  for ( Size kk = 1; kk <= fixed_score_list_.size(); ++kk )
4755  { fixed_data[ kk ] = mut_unbounded.energies().total_energies()[ fixed_score_list_[ kk ] ]; }
4756 
4757  SingleStructureDataOP ssd = new SingleStructureData( free_data, fixed_data );
4758  ddg_bind_position_data->add_mutant_unbounds( ssd );
4759  }
4760 
4761 
4762  /// Report the discrepant positions
4763  TR << "collect_ddG_of_binding_data(): looking for discrepant positions between wt and mut complex structures: ";
4764  bool one_discrepancy_found( false );
4765  if ( wt_complex_seq.size() != mut_complex_seq.size() ) {
4766  std::cerr << "ERROR: Wild type and mutant complex sequences of different length: " << std::endl;
4767  std::cerr << wt_complex_seq << std::endl;
4768  std::cerr << mut_complex_seq << std::endl;
4769  utility_exit();
4770  } else {
4771  for ( Size jj = 0; jj < wt_complex_seq.size(); ++jj ) {
4772  if ( wt_complex_seq[ jj ] != mut_complex_seq[ jj ] ) {
4773  // we're going to have to store all of the mutations if the get_score method is to work correctly
4774  // but we want to keep the wt and mut and position together
4775  // vector of pairs, with first being the position, and second being a pair of wt and mut aa's
4776  ddg_bind_position_data->add_mutation(
4777  std::make_pair( jj, std::make_pair( chemical::aa_from_oneletter_code( wt_complex_seq[ jj ] ), chemical::aa_from_oneletter_code( mut_complex_seq[ jj ] ) ) ) );
4778  TR << wt_complex_seq[ jj ] << jj << mut_complex_seq[ jj ] << ", ";
4779  one_discrepancy_found = true;
4780  }
4781  }
4782  TR << std::endl;
4783  }
4784  if ( one_discrepancy_found == false ) {
4785  std::cerr << "ERROR: mutant and wild type complex sequences must differ by at least one position";
4786  utility_exit();
4787  }
4788 
4789  if ( filtering_bad_ddGs && ( best_mut_complex_rep - best_wt_complex_rep > option[ optE::exclude_badrep_ddGs ]() ) ) {
4790  TR << "Rank " << MPI_rank_ << " Excluding ddG bind data from " << ddG_bind_files_[ ii ][ DDGBindOptEData::MUT_COMPLEXES_LIST_FILE ]
4791  << " with mut-wt rep delta: "<< best_mut_complex_rep << " - " << best_wt_complex_rep
4792  << " = " << best_mut_complex_rep - best_wt_complex_rep << std::endl;
4793  } else {
4794  ddG_bind_optE_data_->add_position_data( ddg_bind_position_data );
4795  }
4796  }
4797  } // if ddG_bind_optE_data_ == 0
4798 
4799  for ( OptEPositionDataOPs::const_iterator iter = ddG_bind_optE_data_->position_data_begin(),
4800  iter_end = ddG_bind_optE_data_->position_data_end(); iter != iter_end; ++iter ) {
4801  optE_data_->add_position_data( *iter );
4802  }
4803 }
4804 
4805 
4806 ///
4807 /// @begin IterativeOptEDriver::zero_aa_counts()
4808 ///
4809 /// @brief
4810 /// Set the counts for the amino acid frequencies (observed and expected) to zero.
4811 ///
4812 void
4814  std::fill( aa_obs_.begin(), aa_obs_.end(), 0 );
4815  std::fill( aa_exp_.begin(), aa_exp_.end(), 0 );
4816  std::fill( aa_freq_obs_.begin(), aa_freq_obs_.end(), 0.0 );
4817  std::fill( aa_freq_exp_.begin(), aa_freq_exp_.end(), 0.0 );
4818 }
4819 
4820 
4821 ///
4822 /// @begin IterativeOptEDriver::measure_sequence_recovery()
4823 ///
4824 /// @detail iterate across all the native pdbs,
4825 ///
4826 Real
4828  utility::vector1< std::string > const & native_pdb_names,
4829  utility::vector1< std::string > const & names_for_output_pdbs,
4831  //std::list< core::pack::task::operation::TaskOperationOP > operation_list,
4832  Size & nresidues_designed,
4833  Size & nresidues_recovered
4834 )
4835 {
4836  using namespace basic::options;
4837  using namespace basic::options::OptionKeys;
4838  using namespace core::pack;
4839  using namespace core::pack::task;
4840  using namespace core::scoring;
4841 
4842  //sleep( MPI_rank_ );
4843  //std::cout << "NODE: " << MPI_rank_ << " with reference weight " << sfxn->weights()[ ref ] << " and refE's: ";
4844  //for ( Size ii = 1; ii <= sfxn->energy_method_options().method_weights( ref ).size(); ++ii ) {
4845  // std::cout << sfxn->energy_method_options().method_weights( ref )[ ii ] << " ";
4846  //}
4847  //std::cout << std::endl;
4848 
4849  nresidues_designed = 0;
4850  nresidues_recovered = 0;
4851 
4852  //ScoreFunctionOP sfxn2 = ScoreFunctionFactory::create_score_function( get_scorefile_name() );
4853 
4854  TaskFactoryOP task_factory_for_design = new TaskFactory( *task_factory_ );
4855  task_factory_for_design->push_back( new ScaleAnnealerTemperatureOperation( sfxn->weights()[ fa_atr ] / 0.8 ) );
4856 
4857  if ( MPI_rank_ == 0 )
4858  if ( sfxn->get_weight( scoring::surface ) != 0.0 )
4859  TR << "measure_sequence_recovery(): designing with surface term, with weight: "
4860  << F(8,4, sfxn->get_weight( scoring::surface )) << std::endl;
4861 
4862  protocols::moves::MoverOP design_mover;
4863 
4864  if ( option[ optE::design_with_minpack ]) {
4866  minpack_mover->task_factory( task_factory_for_design );
4867  minpack_mover->score_function( sfxn );
4868  design_mover = minpack_mover;
4869  } else {
4871  pack_mover->task_factory( task_factory_for_design );
4872  pack_mover->score_function( sfxn );
4873  design_mover = pack_mover;
4874  }
4875 
4876  for ( Size poseindex = 1; poseindex <= native_pdb_names.size(); ++poseindex ) {
4877  TR << "begin measure_sequence_recovery(): PDB: " << native_pdb_names[ poseindex ] << std::endl;
4878  /// read the pdb into a pose
4879  core::pose::Pose pose;
4880 
4881  // the native_poses_ vector1 gets set in compute_energies,
4882  // but that doesn't get called if design_first is on the command line
4883  // so either set the native_poses_ array or load the pdb correctly
4884  if ( option[ optE::design_first ].user() && outer_loop_counter_ == 1 ) {
4885  TR << "measure_sequence_recovery(): design_first in use! pushing "
4886  << native_pdb_names[poseindex] << " onto native_poses_ vector." << std::endl;
4887  core::pose::Pose native_pose;
4888  core::import_pose::pose_from_pdb( native_pose, native_pdb_names[ poseindex ] );
4889  native_poses_.push_back( native_pose );
4890  context_poses_.push_back( native_pose );
4891 
4892  if ( option[ optE::recover_nat_rot ] ) {
4893  rotamer_recovery_context_poses_.push_back( native_pose );
4894  }
4895  }
4896 
4897  // read in the native pose to do design on
4898  pose = native_poses_[ poseindex ];
4899 
4900  //if ( option[ in::file::centroid_input ] ) {
4901  // core::import_pose::centroid_pose_from_pdb( pose, native_pdb_names[ poseindex ] );
4902  //} else {
4903  // core::import_pose::pose_from_pdb( pose, native_pdb_names[ poseindex ] );
4904  //}
4905 
4906  //Real score2 = (*sfxn2)( pose );
4907  //Real score1 = (*sfxn)( pose );
4908 
4909  //if ( std::abs( score1 - score2 ) > 1e-8 ) {
4910  // std::cerr << "Score discrepancy for " << native_pdb_names[ poseindex ]
4911  // << "score1: " << score1 << " score2: " << score2 << std::endl;
4912  //}
4913 
4914  Size const nresidues_pose( pose.total_residue() );
4915 
4916  /// record original sequence (for all residues in pose)
4917  utility::vector1< chemical::AA > full_input_sequence( nresidues_pose );
4918  for ( Size resi = 1; resi <= nresidues_pose; ++resi ) {
4919  full_input_sequence[ resi ] = pose.residue(resi).aa();
4920  }
4921 
4922  /// redesign the native pose
4923  design_mover->apply( pose );
4924 
4925  context_poses_[ poseindex ] = pose; // save this for the next round of optimization
4926 
4927  // use a 'dummy' PackerTask to determine which residues were designed in this pose
4928  // this should reflect the same PackerTask that PackRotamersMover used for design
4930  // If desired make sure the task logic is aligned to the native.
4931  if(option[ optE::constant_logic_taskops_file ].user() ) {
4932  ptask = copy_native_packertask_logic( native_poses_[ poseindex ],
4933  pose,
4934  task_factory_for_design);
4935  }
4936  else
4937  ptask = task_factory_for_design->create_task_and_apply_taskoperations( pose ) ;
4938  //PackerTaskOP task_for_design = task_factory_for_design->create_task_and_apply_taskoperations( pose );
4939  //pack_mover->task(task_for_design)
4940 
4941 
4942  /// measure seq recov
4943  for ( Size resi = 1; resi <= nresidues_pose; ++resi ) {
4944  // do not compile statistics for residues that were not designed
4945  if ( ! ptask->being_designed(resi) ) continue;
4946  if ( ! pose.residue(resi).is_protein() ) continue;
4947  ++nresidues_designed;
4948  ++aa_exp_[ full_input_sequence[ resi ]];
4949  ++aa_obs_[ pose.residue(resi).aa() ];
4950  if ( full_input_sequence[ resi ] == pose.residue(resi).aa() ) ++nresidues_recovered;
4951  }
4952 
4953  /// write out new pdb for posterity
4954  if ( ! option[ optE::no_design_pdb_output ] ) {
4955  pose.dump_scored_pdb( names_for_output_pdbs[ poseindex ], *sfxn );
4956  }
4957 
4958  // print out score information for the redesign
4959  (*sfxn)( pose );
4960  print_energies( pose, sfxn, TR.Trace );
4961 
4962  }
4963 
4964  Real recovery(0.0);
4965  if ( nresidues_designed != 0 ) {
4966  recovery = ( static_cast< Real > (nresidues_recovered) ) / nresidues_designed;
4967  TR_VERBOSE << "measure_sequence_recovery(): recovery: " << recovery << std::endl;
4968  }
4969 
4970  return recovery;
4971 }
4972 
4973 
4974 ///
4975 /// @begin IterativeOptEDriver::measure_rotamer_recovery()
4976 ///
4977 Real
4979  utility::vector1< std::string > const & native_pdb_names,
4980  utility::vector1< std::string > const & , // names_for_output_pdbs,
4982  //std::list< core::pack::task::operation::TaskOperationOP > operation_list,
4983  Size & nresidues_repacked,
4984  Size & nrotamers_recovered
4985 )
4986 {
4987  using namespace basic::options;
4988  using namespace basic::options::OptionKeys;
4989  using namespace core::pack::dunbrack;
4990 
4991  //sleep( MPI_rank_ );
4992  //std::cout << "NODE: " << MPI_rank_ << " with reference weight " << sfxn->weights()[ ref ] << " and refE's: ";
4993  //for ( Size ii = 1; ii <= sfxn->energy_method_options().method_weights( ref ).size(); ++ii ) {
4994  // std::cout << sfxn->energy_method_options().method_weights(ref )[ ii ] << " ";
4995  //}
4996  //std::cout << std::endl;
4997 
4998  nresidues_repacked = 0;
4999  nrotamers_recovered = 0;
5000 
5001  //ScoreFunctionOP sfxn2 = ScoreFunctionFactory::create_score_function( get_scorefile_name() );
5002 
5003  using namespace core::pack::task;
5004  TaskFactoryOP task_factory_for_repacking = new TaskFactory( *task_factory_ );
5005  task_factory_for_repacking->push_back( new operation::RestrictToRepacking );
5006 
5008  pack_mover->task_factory( task_factory_for_repacking );
5009  pack_mover->score_function( sfxn );
5010 
5011  for ( Size poseindex = 1; poseindex <= native_pdb_names.size(); ++poseindex ) {
5012  /// read the pdb into a pose
5013  core::pose::Pose pose, start_pose;
5014  pose = native_poses_[ poseindex ];
5015  start_pose = native_poses_[ poseindex ];
5016  Size const nresidues_pose( pose.total_residue() );
5017 
5018  //Real score2 = (*sfxn2)( pose );
5019  //Real score1 = (*sfxn)( pose );
5020  //if ( std::abs( score1 - score2 ) > 1e-8 ) {
5021  // std::cerr << "Score discrepancy for " << native_pdb_names[ poseindex ] << "score1: " << score1 << " score2: " << score2 << std::endl;
5022  //}
5023 
5024  /// repack the pose
5025  pack_mover->apply( pose );
5026 
5027  rotamer_recovery_context_poses_[ poseindex ] = pose; // save this for the next round of optimization
5028 
5029  /// measure rotamer recovery
5030 
5031  // use a 'dummy' PackerTask to determine which residues were repacked in this pose
5032  // this should reflect the same PackerTask that PackRotamersMover used for repacking
5034  // If desired make sure the task logic is aligned to the native.
5035  if(option[ optE::constant_logic_taskops_file ].user() ) {
5036  ptask = copy_native_packertask_logic( native_poses_[ poseindex ],
5037  pose,
5038  task_factory_for_repacking);
5039  }
5040  else
5041  ptask = task_factory_for_repacking->create_task_and_apply_taskoperations( pose ) ;
5042 
5043  for ( Size resi = 1; resi <= nresidues_pose; ++resi ) {
5044  // do not compile statistics for residues that were not repacked
5045  if ( ! ptask->being_packed(resi) ) continue;
5046  if ( ! pose.residue(resi).is_protein() ) continue;
5047  if ( start_pose.residue(resi).nchi() == 0 ) continue; // don't count gly/ala in stats.
5048  ++nresidues_repacked;
5049 
5050  RotVector original_rotbins, repacked_rotbins;
5051  rotamer_from_chi( start_pose.residue(resi), original_rotbins );
5052  rotamer_from_chi( pose.residue(resi), repacked_rotbins );
5053 
5054  bool all_chi_match( true );
5055  for ( Size chi_index = 1; chi_index <=original_rotbins.size(); ++chi_index ) {
5056  if ( original_rotbins[ chi_index ] != repacked_rotbins[ chi_index ] ) {
5057  all_chi_match = false;
5058  break;
5059  }
5060  }
5061  if ( all_chi_match ) ++nrotamers_recovered;
5062  }
5063 
5064  /// don't write out new pdb for posterity
5065  /// pose.dump_pdb( names_for_output_pdbs[ poseindex ] );
5066 
5067  }
5068 
5069  Real recovery( 0.0 );
5070  if ( nresidues_repacked != 0 ) {
5071  recovery = ( static_cast< Real > (nrotamers_recovered) ) / nresidues_repacked;
5072  }
5073  return recovery;
5074 }
5075 
5076 
5077 
5078 ///
5079 /// @begin IterativeOptEDriver::opte_weight_mixing_factor
5080 ///
5081 Real
5082 IterativeOptEDriver::opte_weight_mixing_factor( Size outer_loop_counter, Size inner_loop_counter )
5083 {
5084  if ( outer_loop_counter == 1 ) {
5085  return 1.0;
5086  } else if ( inner_loop_counter <= 5 ) {
5087  return ( 1.0 / ( outer_loop_counter + inner_loop_counter) );
5088  } else {
5089  return 0.1;
5090  }
5091 }
5092 
5093 ///
5094 /// @begin IterativeOptEDriver::initialize_free_and_fixed()
5095 ///
5096 /// @brief
5097 /// Reads in the files specified by opt_e::free and opt_e::fixed. Figures out what ScoreType the user placed on each line of the file
5098 /// and then sets the free_parameters array with that ScoreType. If the user does not place a starting weight, a random starting
5099 /// weight is given for that type. Also sets the fixed terms in fixed_parameters. Both of these EnergyMap references that are
5100 /// passed in are actually vectors of EnergyMaps? Either way, the free and fixed params are set in this method.
5101 /// If no fixed or free files are found, then there are some hardcoded defaults that get used.
5102 ///
5103 void
5105 {
5106  using namespace basic::options;
5107  using namespace basic::options::OptionKeys;
5108 
5109  if ( option[ optE::free ].user() && option[ optE::fixed ].user() ) {
5110  utility::io::izstream input_free( option[ optE::free ]() );
5111  if( !input_free ) utility_exit_with_message("Couldn't find input file for 'free' parameters");
5112  Size free_line_number = 1;
5113  while ( input_free ) {
5115  if ( line_tokens.size() == 0 ) {
5116  // noop
5117  } else if ( line_tokens.size() == 1 && ! option[ optE::design_first ].user() ) {
5118  // free value randomized
5119  ScoreType free_score_type = ScoreTypeManager::score_type_from_name( line_tokens[ 1 ] );
5120  free_parameters[ free_score_type ] = optE_RG.uniform();
5121  } else if ( line_tokens.size() == 2 ) {
5122  ScoreType free_score_type = ScoreTypeManager::score_type_from_name( line_tokens[ 1 ] );
5123  Real free_starting_weight = utility::from_string( line_tokens[ 2 ], Real(0.0) );
5124  free_parameters[ free_score_type ] = free_starting_weight;
5125  if ( option[ optE::randomly_perturb_starting_free_weights ].user() && free_parameters[ free_score_type ] != 0.0 ) {
5126  Real perturb_range = option[ optE::randomly_perturb_starting_free_weights ]();
5127  free_parameters[ free_score_type ] += 2 * perturb_range * optE_RG.uniform() - perturb_range;
5128  if ( free_parameters[ free_score_type ] == 0.0 ) {
5129  free_parameters[ free_score_type ] = 0.0001; // correct if we should accidentally end up here.
5130  }
5131  }
5132  } else {
5133  if ( option[ optE::design_first ].user() ) {
5134  std::cerr << "\n\n";
5135  std::cerr << "Error reading weight file line: " << free_line_number << " ";
5136  for ( Size ii = 1; ii <= line_tokens.size(); ++ii ) {
5137  std::cerr << line_tokens[ ii ] << " ";
5138  }
5139  std::cerr << std::endl << "Expected exactly 2 arguments (i.e. you cannot ask for a random starting weight!) since optE:design_first flag found on command line" << std::endl;
5140  } else {
5141  std::cerr << "Error reading free weight file line: " << free_line_number << " ";
5142  for ( Size ii = 1; ii <= line_tokens.size(); ++ii ) {
5143  std::cerr << ii << ": " << line_tokens[ ii ];
5144  }
5145  std::cerr << std::endl << "Expected only 2 tokens" << std::endl;
5146  }
5147  utility_exit();
5148  }
5149  ++free_line_number;
5150  }
5151 
5152  utility::io::izstream input_fixed( option[ optE::fixed ]() );
5153  if( !input_fixed ) utility_exit_with_message("Couldn't find input file for 'fixed' parameters");
5154  Size fixed_line_number = 1;
5155  while ( input_fixed ) {
5157  if ( line_tokens.size() == 0 ) {
5158  // noop
5159  } else if ( line_tokens.size() == 2 ) {
5160  ScoreType fixed_score_type = ScoreTypeManager::score_type_from_name( line_tokens[ 1 ] );
5161  Real fixed_weight = utility::from_string( line_tokens[ 2 ], Real(0.0) );
5162  fixed_parameters[ fixed_score_type ] = fixed_weight;
5163  if ( free_parameters[ fixed_score_type ] != 0 ) {
5164  std::cerr << "Error reading free weights file. Term '" << line_tokens[ 1 ] << "' is listed as both free and fixed.";
5165  utility_exit();
5166  }
5167  } else {
5168  std::cerr << "Error reading free weight file line: " << free_line_number << " ";
5169  for ( Size ii = 1; ii <= line_tokens.size(); ++ii ) {
5170  std::cerr << ii << ": " << line_tokens[ ii ];
5171  }
5172  std::cerr << std::endl << "Expected exactly 2 tokens" << std::endl;
5173  utility_exit();
5174  }
5175  ++fixed_line_number;
5176  }
5177 
5178 /// HARD CODED DEFAULTS FOR THOSE THAT LIKE RECOMPILING
5179 
5180  } else if ( option[ in::file::centroid_input ] ) {
5181 
5182  free_parameters[ vdw ] = 1; //optE_RG.uniform();
5183  free_parameters[ pair ] = 1; //optE_RG.uniform();
5184  free_parameters[ rama ] = 1; //optE_RG.uniform();
5185  free_parameters[ p_aa_pp ] = 1; //optE_RG.uniform();
5186  free_parameters[ cenpack ] = 1; //optE_RG.uniform();
5187 
5188  fixed_parameters[ env ] = 0.4;
5189  } else {
5190  /*
5191  free_parameters[ fa_rep ] = 1.0;
5192  free_parameters[ fa_sol ] = 1.0;
5193  free_parameters[ fa_dun ] = 1.0;
5194  free_parameters[ fa_pair ] = 1.0;
5195  free_parameters[ p_aa_pp ] = 1.0;
5196  free_parameters[ hbond_bb_sc ] = 1.0;
5197  free_parameters[ hbond_sc ] = 1.0;
5198  free_parameters[ envsmooth ] = 1.0;
5199  */
5200  free_parameters[ envsmooth ] = 0.001;
5201 
5202 
5203  free_parameters[ fa_rep ] = 0.44;
5204  //free_parameters[ fa_sol_apo ] = 0.65;
5205  //free_parameters[ fa_sol_chr ] = 0.65;
5206  //free_parameters[ fa_sol_pol ] = 0.65;
5207  free_parameters[ fa_dun ] = 0.56;
5208  //free_parameters[ fa_pair ] = 0.49;
5209  free_parameters[ p_aa_pp ] = 0.64;
5210  //free_parameters[ hbond_chr_chr ] = 1.1;
5211  //free_parameters[ hbond_chr_pol ] = 1.1;
5212  //free_parameters[ hbond_pol_pol ] = 1.1;
5213 
5214  fixed_parameters[ fa_atr ] = 0.8;
5215  fixed_parameters[ hbond_sr_bb ] = 1.17;
5216  fixed_parameters[ hbond_lr_bb ] = 1.17;
5217  fixed_parameters[ dslf_ss_dst ] = 1.0;
5218  fixed_parameters[ dslf_cs_ang ] = 1.0;
5219  fixed_parameters[ dslf_ss_dih ] = 1.0;
5220  fixed_parameters[ dslf_ca_dih ] = 1.0;
5221  }
5222 
5223  if ( MPI_rank_ == 0 ) {
5224  for ( Size ii = 1; ii <= n_score_types; ++ii ) {
5225  if ( free_parameters[ (ScoreType) ii ] != 0 ) {
5226  TR_VERBOSE << "initialize_free_and_fixed(): initial free_parameters: " << name_from_score_type( (ScoreType) ii )
5227  << " " << free_parameters[ (ScoreType) ii ] << std::endl;
5228  }
5229  }
5230  //fixed_parameters[ vdw ] = 1;
5231  for ( Size ii = 1; ii <= n_score_types; ++ii ) {
5232  if ( fixed_parameters[ (ScoreType) ii ] != 0 ) {
5233  TR_VERBOSE << "initialize_free_and_fixed(): initial fixed_parameters: " << name_from_score_type( (ScoreType) ii )
5234  << " " << fixed_parameters[ (ScoreType) ii ] << std::endl;
5235  }
5236  }
5237  }
5238 
5239 }
5240 
5241 
5242 ///
5243 /// @begin IterativeOptEDriver::converged
5244 ///
5245 /// @brief
5246 /// This function is not used.
5247 ///
5248 bool
5250  core::scoring::EnergyMap & free_parameters_prev,
5251  core::scoring::EnergyMap & free_parameters_curr,
5252  utility::vector1< Real > const & reference_energies_prev,
5253  utility::vector1< Real > const & reference_energies_curr
5254 )
5255 {
5256  using namespace core::scoring;
5257 
5258  if ( ! option[ optE::dont_use_reference_energies ].user() ) {
5259  runtime_assert( reference_energies_prev.size() == reference_energies_curr.size() );
5260  }
5261 
5262  for ( Size ii = 1; ii <= n_score_types; ++ii ) {
5263  if ( std::abs( free_parameters_prev[ (ScoreType) ii ] - free_parameters_curr[ (ScoreType) ii ]) > 0.001 ) {
5264  return false;
5265  }
5266  }
5267 
5268  if ( ! option[ optE::dont_use_reference_energies ].user() ) {
5269  for ( Size ii = 1; ii <= reference_energies_prev.size(); ++ii ) {
5270  if ( std::abs( reference_energies_prev[ ii ] - reference_energies_curr[ ii ]) > 0.001 ) {
5271  return false;
5272  }
5273  }
5274  }
5275 
5276  TR << "Converged: " << std::endl;
5277  for ( Size ii = 1; ii <= n_score_types; ++ii ) {
5278  if ( free_parameters_prev[ (ScoreType) ii ] != 0 || free_parameters_curr[ (ScoreType) ii ] != 0)
5279  TR << name_from_score_type( ScoreType( ii ) ) << " prev " << free_parameters_prev[ (ScoreType) ii ] << " curr " << free_parameters_curr[ (ScoreType) ii ] << std::endl;
5280  }
5281 
5282  if ( ! option[ optE::dont_use_reference_energies ].user() ) {
5283  for ( Size ii = 1; ii <= reference_energies_prev.size(); ++ii ) {
5284  TR << ii << " prev " << reference_energies_prev[ ii ] << " curr " << reference_energies_curr[ ii ] << std::endl;
5285  }
5286  }
5287 
5288 
5289  return true;
5290 }
5291 
5292 void
5294 {
5295  for ( Size ii = 1; ii <= n_score_types; ++ii ) {
5296  if ( free_parameters[ (ScoreType) ii ] != 0 )
5297  TR << name_from_score_type( ScoreType( ii ) ) << " " << free_parameters[ (ScoreType) ii ] << std::endl;
5298  }
5299  for ( Size ii = 1; ii <= reference_energies.size(); ++ii ) {
5300  TR << "Reference energy for " << ii << " " << reference_energies[ ii ] << std::endl;
5301  }
5302 
5303 }
5304 
5305 
5306 ///
5307 /// @begin IterativeOptEDriver::setup_pdbnames_next_round
5308 ///
5309 /// @detail
5310 /// This method takes the filenames from the current round and creates new filenames based on what iteration of the
5311 /// outer_loop_counter we're on. So, each iteration we want to use the newest pdbs. This method ensures the filename
5312 /// get updated.
5313 ///
5314 /// @remark
5315 /// Requires workdir_{0..MPI_numprocs_ - 1 } directories already exist.
5316 ///
5317 void
5319  Size const outer_loop_counter,
5320  utility::vector1< std::string > & pdbs_next_round,
5321  utility::vector1< std::string > const & native_pdb_names
5322 )
5323 {
5324  // assumption, native_pdb_names end in ".pdb"
5325  pdbs_next_round.resize( native_pdb_names.size() );
5326  for ( Size ii = 1; ii <= native_pdb_names.size(); ++ii ) {
5327  //std::string native_substr = native_pdb_names[ ii ].substr( 0, native_pdb_names[ ii ].size() - 4 );
5328  utility::file::FileName natfilename( native_pdb_names[ ii ] );
5329 #ifndef USEMPI
5330  pdbs_next_round[ ii ] = natfilename.base() + "_" + to_string( outer_loop_counter ) + ".pdb";
5331 #else
5332  // Write to separate directories.
5333  pdbs_next_round[ ii ] = "workdir_" + to_string( MPI_rank_ ) + "/" + natfilename.base() + "_" + to_string( outer_loop_counter ) + ".pdb";
5334 #endif
5335 
5336  }
5337 }
5338 
5339 void
5341  core::pose::Pose & pose,
5343 ) const
5344 {
5345  using namespace moves;
5346  using namespace core::pack::task;
5347 
5349  TaskFactoryOP factory = new TaskFactory;
5350  factory->push_back( new operation::RestrictToRepacking );
5351  factory->push_back( new operation::IncludeCurrent );
5352  factory->push_back( new operation::InitializeExtraRotsFromCommandline );
5353  packer.task_factory( factory );
5354 
5355  packer.apply( pose );
5356 
5358  minmover.min_type( "dfpmin_armijo_nonmonotone_atol" );
5359  minmover.score_function( sfxn );
5360 
5361  minmover.apply( pose );
5362 
5363  ( *sfxn )( pose );
5364 }
5365 
5366 
5367 ///
5368 /// @begin IterativeOptEDriver::load_component_weights()
5369 ///
5370 /// @details input file should be white-space delimited component-name/weight pairs.
5371 ///
5372 void
5374  utility::vector1< core::Real > & component_weights
5375 )
5376 {
5377  using namespace basic::options;
5378  using namespace basic::options::OptionKeys;
5379 
5380  component_weights.resize( n_optE_data_types );
5381  std::fill( component_weights.begin(), component_weights.end(), 1.0 );
5382 
5383  if ( option[ optE::component_weights ].user() ) {
5384  utility::io::izstream input( option[ optE::component_weights ]() );
5385  if( !input ) utility_exit_with_message("Couldn't find input file for 'component_weights_file' parameters");
5386  Size line_number = 1; Size nread = 0;
5387  while ( input ) {
5389  if ( line_tokens.size() == 0 ) {
5390  // noop
5391  } else if ( line_tokens.size() == 2 ) {
5392  if ( ! OptEPositionDataFactory::is_optE_type_name( line_tokens[ 1 ] )) {
5393  utility_exit_with_message("Error reading optE component weights file: Token 1 on line " + utility::to_string( line_number ) + " " + line_tokens[ 1 ] + " is not recognized as an optE data type" );
5394  }
5395  OptEPositionDataType component_type = OptEPositionDataFactory::optE_type_from_name( line_tokens[ 1 ] );
5396  Real component_weight = utility::from_string( line_tokens[ 2 ], Real(1.0) );
5397  component_weights[ component_type ] = component_weight;
5398  ++nread;
5399  } else {
5400  std::cerr << "Error reading optE component weights file line: " << line_number << " ";
5401  for ( Size ii = 1; ii <= line_tokens.size(); ++ii ) {
5402  std::cerr << ii << ": " << line_tokens[ ii ];
5403  }
5404  std::cerr << std::endl << "Expected exactly 2 tokens" << std::endl;
5405  utility_exit();
5406  }
5407  ++line_number;
5408  }
5409  if ( line_number == 1 ) {
5410  TR << "WARNING: read no lines from component weight file: " << option[ optE::component_weights ]() << std::endl;
5411  } else if ( nread == 0 ) {
5412  TR << "WARNING: only blank lines found in component weight file: " << option[ optE::component_weights ]() << std::endl;
5413  }
5414  }
5415 }
5416 
5417 
5418 ///
5419 /// @begin copy_native_packertask_logic
5420 ///
5421 /// @brief
5422 /// Copies the logic in the native task factory from the native_pose
5423 /// to the context pose The context pose should be filled with a parsable
5424 /// file that does NOT restrict any residues that might be packable in the native
5427  core::pose::Pose context_pose,
5428  core::pack::task::TaskFactoryOP native_taskfactory){
5429  using namespace core::pack::task;
5430  TaskFactoryOP context_taskfactory = new TaskFactory;
5431  std::string context_tagfile( option[ optE::constant_logic_taskops_file ]() );
5432 
5433  read_tagfile_to_taskfactory(context_tagfile, context_taskfactory);
5434  PackerTaskOP context_task = context_taskfactory->create_task_and_apply_taskoperations( context_pose );
5435 
5436  // Lock the task to the starting pose
5437  operation::TaskOperationOP mimic_nat_task_op = new operation::ReplicateTask(native_pose, native_taskfactory);
5438  mimic_nat_task_op->apply( context_pose, *(context_task) );
5439 
5440  return context_task;
5441 }
5442 
5443 ///
5444 /// @begin attach_debugger
5445 ///
5446 /// @brief for parallel applications. Wait at a specific point and stay there until
5447 /// you can attach a gdb process (with the --pid <ID> flag in gdb) and internally
5448 /// modify the variable "i" to some non-zero value with a "set var i = 7" command.
5450 {
5451 #ifdef USEMPI
5452  int i = 0;
5453  char hostname[256];
5454  gethostname(hostname, sizeof(hostname));
5455  printf("PID %d on %s ready for attach\n", getpid(), hostname);
5456  fflush(stdout);
5457  while (0 == i)
5458  sleep(5);
5459 #endif
5460 }
5461 
5462 
5463 ///
5464 /// @begin IterativeOptEDriver::node_name()
5465 ///
5468 
5469  if ( rank == 0 ) {
5470  return "master node";
5471  } else {
5472  std::stringstream r;
5473  r << "slave node " << rank;
5474  return r.str();
5475  }
5476 }
5477 
5478 
5479 ///
5480 /// @begin IterativeOptEDriver::print_energies()
5481 ///
5482 void
5484  pose::Pose & pose,
5486  std::ostream & os /* = std::cout */
5487 )
5488 {
5489  scoring::EnergyMap const & wts( sfxn->weights() );
5490  scoring::EnergyMap const & unweighted_scores( pose.energies().total_energies() );
5491 
5492  os << "---------------------------------------------------" << std::endl;
5493 
5494  // for each energy term, print the weighted energy
5495  float sum_weighted = 0.0;
5496  for ( int jj = 1; jj <= scoring::n_score_types; ++jj ) {
5497  Real const weight = wts[ scoring::ScoreType(jj) ];
5498 
5499  switch( scoring::ScoreType( jj ) ) {
5500  case scoring::fa_atr:
5501  case scoring::fa_rep:
5502  case scoring::fa_sol:
5503  case scoring::fa_intra_rep:
5504  case scoring::fa_pair:
5505  case scoring::hbond_sr_bb:
5506  case scoring::hbond_lr_bb:
5507  case scoring::hbond_bb_sc:
5508  case scoring::hbond_sc:
5509  case scoring::rama:
5510  case scoring::omega:
5511  case scoring::fa_dun:
5512  case scoring::ref:
5513  case scoring::p_aa_pp:
5514  case scoring::pro_close:
5515  case scoring::surface:
5516  case scoring::unfolded:
5517  if ( weight != 0.0 ) {
5518  Real const val = unweighted_scores[ scoring::ScoreType(jj) ];
5519  //TR << A(18,scoring::ScoreType(jj)) << ": weight:" << F(5,2,weight) << ", rawE:" << F(5,2,val) << ", weightedE: " << F(5,2, weight * val ) << std::endl;
5520  os << LJ(18,ScoreType(jj)) << F(7,4,weight) << X(4) << F(10,3,val) << X(4) << F(10,3, weight * val ) << std::endl;
5521  sum_weighted += weight * val;
5522  }
5523  break;
5524  default:
5525  break;
5526  }
5527  }
5528 
5529  os << "---------------------------------------------------\n" << LJ(25, "Total weighted score: ") << X(12) << F(10,3,sum_weighted) << std::endl;
5530 
5531 }
5532 
5533 }
5534 }