Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
PointMutScanDriver.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // This file is part of the Rosetta software suite and is made available under license.
5 // The Rosetta software is developed by the contributing members of the Rosetta Commons consortium.
6 // (C) 199x-2009 Rosetta Commons participating institutions and developers.
7 // For more information, see http://www.rosettacommons.org/.
8 
9 /// @file protocols/pmut_scan/PointMutScanDriver.cc
10 /// @brief A protocol that tries to find stability enhancing mutations
11 /// @author Ron Jacak (ron.jacak@gmail.com)
12 
13 // Unit headers
16 
17 //project Headers
18 #include <basic/MetricValue.hh>
19 #include <basic/Tracer.hh>
20 #include <basic/options/util.hh>
21 
22 #include <core/chemical/AA.hh>
24 // AUTO-REMOVED #include <core/init.hh>
26 
27 #include <core/graph/Graph.hh>
30 
35 
36 #include <core/pose/Pose.hh>
37 #include <core/pose/PDBInfo.hh>
40 
44 // AUTO-REMOVED #include <core/scoring/ScoreFunctionInfo.hh>
48 
53 
56 
57 // Utility Headers
58 #include <utility/file/FileName.hh>
59 
60 // Numeric Headers
61 
62 // ObjexxFCL Headers
63 #include <ObjexxFCL/format.hh>
64 
65 // C++ headers
66 #include <iostream>
67 #include <fstream>
68 #include <string>
69 
70 #ifdef USEMPI
71 /// MPI
72 #include <mpi.h>
73 #endif
74 
75 // option key includes
76 #include <basic/options/keys/run.OptionKeys.gen.hh>
77 
78 //Auto Headers
81 #include <utility/vector0.hh>
82 #include <utility/vector1.hh>
83 
84 
85 using namespace basic::options;
86 using namespace basic::options::OptionKeys;
87 
88 using namespace core;
89 using namespace core::pack::task::operation;
90 using namespace core::pack::task;
91 
92 using namespace protocols;
93 using namespace ObjexxFCL::fmt;
94 using namespace utility;
95 
96 
97 namespace protocols {
98 namespace pmut_scan {
99 
100 
101 static basic::Tracer TR("protocols.pmut_scan.PointMutScanDriver");
102 
103 
104 ///
105 /// @begin PointMutScanDriver::PointMutScanDriver
106 ///
107 /// @brief
108 /// Main constructor for the class. What all does it do?
109 ///
110 PointMutScanDriver::PointMutScanDriver( utility::vector1< std::string > & pdb_file_names, bool double_mutant_scan, std::string list_file, bool output_mutant_structures ) :
111  double_mutant_scan_( double_mutant_scan ),
112  mutants_list_file_( list_file ),
113  output_mutant_structures_( output_mutant_structures ),
114  pdb_file_names_( pdb_file_names ),
115  DDG_cutoff_(0),
116  scorefxn_(core::scoring::getScoreFunction())
117 {
118 
119 #ifdef USEMPI
120  tag_ = 1; // need to initialize the tag on all nodes to 1 or MPI_Send/_Recv calls start acting funny
121 #endif
122 
123  int mpi_rank( 0 ), mpi_nprocs( 1 );
124 #ifdef USEMPI
125  MPI_Comm_rank( MPI_COMM_WORLD, &mpi_rank );/* get current process id */
126  MPI_Comm_size( MPI_COMM_WORLD, &mpi_nprocs );/* get number of processes */
127 #endif
128 
129  MPI_rank_ = (Size)( mpi_rank );
130  MPI_nprocs_ = (Size)( mpi_nprocs );
131 
132  read_in_structures(); // all processes read in the structures
133 
134 
135  // create a scorefxn that will be used for all the mutants
136  // (to enable hpatch scoring, the command line weights file flag will have to be used)
137  // decompose bb hbond energies into pair energies
138  //
139  //scoring::ScoreFunctionOP scorefxn = scoring::getScoreFunction(); //in initialization list
140  scoring::methods::EnergyMethodOptions energymethodoptions( scorefxn_->energy_method_options() );
141  energymethodoptions.hbond_options().decompose_bb_hb_into_pair_energies( true );
142  scorefxn_->set_energy_method_options( energymethodoptions );
143 
144 }
145 
146 
147 ///
148 /// @begin PointMutScanDriver::~PointMutScanDriver
149 ///
150 /// @brief
151 /// Destructor. What all needs to be done here?
152 ///
154  //This used to be in the parent application. For consistency with most JD2-style MPI-compatible apps (which this is not), Finalize has been moved here.
155 #ifdef USEMPI
156  MPI_Finalize();
157 #endif
158 }
159 
160 
161 ///
162 /// @begin PointMutScanDriver::go()
163 ///
164 /// @brief
165 /// Entry point for the pmut_scan protocol. This is function the app calls to do the scan.
166 ///
168 
169  clock_t entire_starttime(0);
170  if ( MPI_rank_ == 0 ) {
171  // time the protocol, doesn't include the time spent reading in input structures.
172  entire_starttime = clock();
173  }
174 
175  TR << "go(): " << node_name( MPI_rank_ ) << std::endl;
176 
177  if ( MPI_rank_ == 0 ) {
178  // set up the list of mutations that will be tried.
179  // if the user specified a list, then do just those. if not, try all possible combinations of mutants.
181  }
182 
183  barrier(); // do we really want all processes to hold here?
185 
186  barrier(); // do we really want all processes to hold here?
187  make_mutants();
188 
189  barrier();
190  if ( MPI_rank_ == 0 ) {
191  clock_t entire_stoptime = clock();
192  TR << "main(): whole protocol took " << ((double)entire_stoptime-entire_starttime) / CLOCKS_PER_SEC << " seconds" << std::endl;
193  TR << "go(): DONE with pmut scan." << std::endl;
194  }
195 
196 }
197 
198 ///
199 /// @begin PointMutScanDriver::node_name()
200 ///
202 
203  if ( rank == 0 ) {
204  return "master node";
205  } else {
206  std::stringstream r;
207  r << "slave node " << rank;
208  return r.str();
209  }
210 }
211 
212 ///
213 /// @begin PointMutScanDriver::barrier()
214 ///
215 /// Make all processes stop and wait here.
216 ///
218 
219 #ifdef USEMPI
220  MPI_Barrier( MPI_COMM_WORLD );
221 #endif
222  std::cout.flush();
223 
224 }
225 
226 
227 ///
228 /// @begin PointMutScanDriver::read_in_structures()
229 ///
230 /// @brief
231 /// Reads in the structure (or list of structures) specified on the command line to a class member variable. Create
232 /// Pose objects for all of the structures because we'll pass these out to the slave nodes later.
233 ///
234 /// NOTE: This protocol assumes that if you pass multiple structures, they are all variants of the same structure and you
235 /// want to use all of them for the ddG calculation.
236 ///
237 ///
239 
240  //
241  // read in all the PDB files into a vector of Pose objects
242  //
243  utility::vector1< std::string >::iterator input_pdb_filename, last_pdb;
244  for ( input_pdb_filename = pdb_file_names_.begin(), last_pdb = pdb_file_names_.end(); input_pdb_filename != last_pdb; ++input_pdb_filename ) {
245  pose::Pose pose;
246  core::import_pose::pose_from_pdb( pose, *input_pdb_filename );
247  input_poses_.push_back( pose );
248  }
249 
250 }
251 
252 
253 ///
254 /// @begin PointMutScanDriver::fill_mutations_list
255 ///
256 /// @brief
257 /// Determines whether the user specified a list of mutants or just wants to do a scan over all possible combinations.
258 ///
259 /// If we're doing a scan over all possible mutations:
260 ///
261 /// If we have a two residue protein, 1 and 2, there are 19 possible aa's we can mutate each residue to. Since 1 and 2
262 /// are independent, the number of possible double mutants is 19*19 = 361. It's easy to enumerate all of the possible
263 /// double mutants, but we want to come up with an efficient way of making those mutants and calculating the ddGs. The
264 /// easiest solution is to enumerate all of the possible double mutants, make that a work unit, and then distribute all
265 /// of the work units out to a large cluster. The downside to this approach is that several processors will end up making
266 /// the same mutation, at least in part. For example, the double mutant A1C A2C is similar to the mutant A1C A2D. In fact,
267 /// A1C will have to be paired not only with A2C and A2D, but also A2E, A2F and so on. It would be more efficient to make
268 /// a pose for A1C, and then go into a second for loop that tries A2C-A2W on that already mutated pose.
269 ///
270 /// What's the outermost thing this protocol has to do. For the single mutant scan, you have to try all 19 non-wt aas at
271 /// every position. That lends itself to parallelization rather easily. Each protein position is independent of the others
272 /// so you can have nres processes each testing the mutations at that position. At most, each processor will test 19 mutations.
273 /// With double_mutants, you have to fix one mutation (eg. A1C) and then try all possible other mutations at all other
274 /// positions. So, if you have nres processors, each processor will fix some position to 1 of 19 aas and then scan through
275 /// a mutant at all other positions. Let's assume we have a 10 residue protein. Position 1 will mutate to 1 of 19 aas.
276 /// For each of 1 of those 19, we have to test 19 * 9 = 171 other mutations (at the other positions). That results in a
277 /// grand total of 3249 possibilites. And that's only residue 1's mutants! We also have to try to fix the 19 non-wt aas
278 /// for position 2 and try 19 * 8 = 152 mutations at the other locations for a total of 2888 mutations for just position
279 /// 2. 3: 19 * 19 * 7 = 2527. 4: 19 * 19 * 6 = 2166. 5: 19 * 19 * 5 = 1805. Continuing on in this fashion leads to a
280 /// grand grand total of 16245 possible double mutants in a 10 residue protein. Doing the same kind of protocol for a
281 /// 233 residue protein results in 9,841,221 possible double mutants!
282 ///
283 /// Testing ~10 million mutants even on 512 cpus could take quite a bit of time. We really need to find a way to prune
284 /// down the number of possible mutants to just the ones that will be most interesting. I definitely could change it so
285 /// that if the two mutations are more than some number of Angstroms apart, then don't bother making that mutant and
286 /// scoring. The question then becomes how often you have a stabilizing first mutant, and then find a stabilizing (better
287 /// than -0.1) second mutant on the first structure that is more than xAng away. Probably happens often.
288 ///
289 /// Another problem is that the parallelization is not balanced. Because we have directionality in the approach for
290 /// testing double mutants - for example, if we've already done 1AC 2AC we don't have to do 2AC 1AC - processor 1 which
291 /// handles all of the possible mutants at 1 and every other residue has to do way way less
292 ///
293 ///
294 /// For triple mutants, assuming a 10 residue protein there would be 19 * 19 * 19 * nres(nres+1)/2, or ~377,000, possible
295 /// mutants. The 233 residue antibody: 186,983,199 possible combinations.
296 ///
297 ///
298 ///
300 
301  if ( !mutants_list_file_.empty() ) {
303  return;
304  }
305 
306  // otherwise, we're just going to do a scan over all mutations
307  // this outer for loop is over all sets of mutations: either single mutants, double mutants, triple mutants, combinations
308  // of single, double and triple mutants, etc.
309  //utility::vector1< Mutant > stabilizing_mutants;
310  //scan_for_mutations( input_poses, scorefxn, stabilizing_mutants, double_mutant_scan_ );
311 
312  Size no_double_mutants_possible = 0;
313  Size no_double_mutants_excluded_for_distance = 0;
314  Size no_double_mutants_excluded_otherwise = 0;
315  Size no_single_mutants_excluded_otherwise = 0;
316 
317 
318  // use the first structure to determine neighborship for all residues. this neighbor_graph will be used inside the
319  // nested for loops to skip mutants that are on opposite sides of the protein.
321  calculate_neighbor_table( input_poses_[1], neighbors );
322 
323  pose::Pose & pose = input_poses_[1];
324  Size n_residue = pose.n_residue();
325 
326  for ( Size resid1 = 1; resid1 <= n_residue; ++resid1 ) {
327 
328  // try every type at each position (well, except the native type at this position!)
329  for ( Size aa_enum_index_a = 1; aa_enum_index_a <= chemical::num_canonical_aas; ++aa_enum_index_a ) {
330 
331  //if ( resid1 > 1 ) { break; } // for debugging only
332 
333  if ( pose.residue( resid1 ).aa() == chemical::AA( aa_enum_index_a ) ) { continue; }
334  if ( !pose.residue_type( resid1 ).is_protein() ) { continue; }
335 
336  MutationData md1(
337  pose.residue( resid1 ).name1(),
338  oneletter_code_from_aa( chemical::AA( aa_enum_index_a ) ),
339  resid1,
340  pose.pdb_info()->number( resid1 ),
341  pose.pdb_info()->icode( resid1 ),
342  pose.pdb_info()->chain( resid1 )
343  );
344 
345  //single mutant scan
346  Mutant m;
347  m.add_mutation( md1 ); // the variable mutations is a vector of vectors!
348  if (reject_mutant(m, pose)) { //offers a chance for child classes to inject mutant selection logic
349  ++no_single_mutants_excluded_otherwise;
350  } else {
351  all_mutants_.push_back( m );
352  //TR << "fill_mutations_list(): adding mutation: " << m << std::endl;
353  }
354 
355  // only do a double mutant scan if the user asked for it
356  if ( double_mutant_scan_ ) {
357 
358  // only need to iterate over higher indexed residues. can't make two mutations at the same position!
359  for ( Size resid2 = resid1 + 1; resid2 <= n_residue; ++resid2 ) {
360 
361  // check to see if these residues are neighbors of each other. we don't want to make double mutants
362  // where the mutants are on opposite sides of the protein.
363  if ( neighbors[ resid1 ][ resid2 ] == false ) {
364  no_double_mutants_possible += 19;
365  no_double_mutants_excluded_for_distance += 19;
366  //TR << "skipping residue pair " << md1.mutation_string_PDB_numbering() << " and " << pose.pdb_info()->chain( resid2 ) << "-" << pose.pdb_info()->number( resid2 ) << pose.pdb_info()->icode( resid2 ) << " based on distance" << std::endl;
367  continue;
368  }
369 
370  // try every type at each position (well, except the native type at this position!)
371  for ( Size aa_enum_index_b = 1; aa_enum_index_b <= chemical::num_canonical_aas; ++aa_enum_index_b ) {
372 
373  if ( pose.residue( resid2 ).aa() == chemical::AA( aa_enum_index_b ) ) { continue; }
374  if ( !pose.residue_type( resid2 ).is_protein() ) { continue; }
375 
376  no_double_mutants_possible++;
377 
378  MutationData md2(
379  pose.residue( resid2 ).name1(),
380  oneletter_code_from_aa( chemical::AA( aa_enum_index_b ) ),
381  resid2,
382  pose.pdb_info()->number( resid2 ),
383  pose.pdb_info()->icode( resid2 ),
384  pose.pdb_info()->chain( resid2 )
385  );
386 
387  Mutant m;
388  m.add_mutation( md1 ); // the variable mutations is a vector of vectors!
389  m.add_mutation( md2 ); // the variable mutations is a vector of vectors!
390  if (reject_mutant(m, pose)) { //offers a chance for child classes to inject mutant selection logic
391  ++no_double_mutants_excluded_otherwise;
392  continue;
393  }
394  all_mutants_.push_back( m );
395  //TR << "fill_mutations_list(): adding mutation: " << m << std::endl;
396  }//for all residue types for resid 2
397  } // all residues resid2
398  }//if a double mutant scan
399  }//for all res types for resid 1
400  }//for all residues resid1
401 
402  if ( MPI_rank_ == 0 ) {
403  Size const single_possible = 19 * n_residue;
404  TR << "fill_mutations_list(): number single mutants possible: " << single_possible << std::endl;
405  TR << "fill_mutations_list(): number single mutants excluded otherwise: " << no_single_mutants_excluded_otherwise << std::endl;
406  if ( double_mutant_scan_ ) {
407  TR << "fill_mutations_list(): number double mutants possible: " << no_double_mutants_possible << std::endl;
408  TR << "fill_mutations_list(): number double mutants excluded for distance: " << no_double_mutants_excluded_for_distance << std::endl;
409  TR << "fill_mutations_list(): number double mutants excluded otherwise: " << no_double_mutants_excluded_otherwise << std::endl;
410  }
411  }
412 
413 }
414 
415 
416 ///
417 /// @begin PointMutScanDriver::read_mutants_list_file()
418 ///
419 /// @brief
420 /// If the user specified mutants, it reads the lines in the mutant list file and parses those lines to get mutation
421 /// data and then saves them all to the class member variable.
422 /// Needs access to a pose to translate the lines in the mutations_list file to pose numbering
423 ///
425 
426  std::ifstream data( list_file.c_str() );
427  if ( !data.good() ) {
428  utility_exit_with_message( "Unable to open mutations file: " + list_file + '\n' );
429  }
430 
431  // read in all lines in file
432  utility::vector1< std::string > mutant_file_lines;
433  std::string line;
434  while ( getline( data, line ) ) {
435  if ( line.size() < 1 || line[0] == '#' ) continue; // skip comment lines
436  mutant_file_lines.push_back( line );
437  }
438  data.close();
439 
440 
441  // iterate over all the lines
442  for ( Size ii=1; ii <= mutant_file_lines.size(); ++ii ) {
443  std::string const & line( mutant_file_lines[ ii ] );
444  std::istringstream iss( line );
445 
446  char wt_residue, mut_residue, chain;
447  std::string position_code;
448 
449  Mutant m;
450 
451  // there might be more than one mutation per line!
452  while ( iss.peek() && !iss.eof() ) {
453 
454  iss >> chain >> wt_residue >> position_code >> mut_residue;
455 
456  // check to see if an insertion code is present in the position_code string
457  // if the string is made of all digits, no icode is present
458  Size pdb_resnum; char icode = ' ';
459  std::stringstream ss;
460 
461  if ( position_code.find_first_not_of("0123456789") == std::string::npos ) {
462  icode = ' ';
463  ss << position_code;
464  ss >> pdb_resnum;
465 
466  } else {
467  for ( std::string::iterator it = position_code.begin(); it < position_code.end(); ++it ) {
468  if ( isdigit(*it) ) {
469  ss << (*it);
470  } else {
471  icode = *it; // assumes that insertion code is only 1-letter!!
472  }
473  }
474  ss >> pdb_resnum; // converts the ss buffer contents to a Size type
475  }
476 
477  // figure out what the pose residue number for this residue is
478  pose::Pose & pose = input_poses_[ 1 ];
479  Size pose_resnum = (pose.pdb_info())->pdb2pose( chain, pdb_resnum, icode );
480 
481  if ( pose.residue( pose_resnum ).name1() != wt_residue ) {
482  TR << "wt_residue: " << wt_residue << ", pdb resnum: " << pdb_resnum << ", pose resnum: " << pose_resnum
483  << ", residue at pose resnum: " << pose.residue( pose_resnum ).name1() << std::endl;
484  utility_exit_with_message("Error. Wild-type residue given in mutatons_list file does not match input structure. Please try again.");
485  }
486 
487  //TR << "Found mutation of " << wt_residue << " to " << mut_residue << " at position " << pose_resnum << " (pdb chain: '" << chain << "', resnum: '" << pdb_resnum << "', icode: '" << icode << "')" << std::endl;
488 
489  MutationData md( wt_residue, mut_residue, pose_resnum, pdb_resnum, icode, chain );
490  m.add_mutation( md ); // the variable mutations is a vector of vectors!
491 
492  } // done parsing line
493 
494  all_mutants_.push_back( m );
495 
496  } // end iterating over lines read from input file
497 
498 }
499 
500 ///
501 /// @begin PointMutScanDriver::calculate_neighbor_table
502 ///
503 /// @brief
504 /// Calculates the 10A neighbor graph using the given pose object and then sets values in a 2D array to indicate which
505 /// resids are neighbors.
506 ///
508 
509  // size the neighbors 2D table
510  neighbors.resize( pose.n_residue(), utility::vector1< bool >( pose.n_residue(), false ) );
511 
512  // PointGraph is a one-way graph, which makes it somewhat annoying for iterating over neighbors of a certain
513  // position. Only edges to higher-indexed nodes exist. So instead, make a graph which has all the edges at every
514  // node to simplify iterating over all neighboring edges.
517  core::conformation::find_neighbors( pg, 10.0 /* Angstrom cutoff */ ); // create edges
518 
519  // actually create the neighbor graph from the point graph
520  core::graph::Graph neighbor_graph( pose.n_residue() );
521  for ( Size r=1; r <= pose.total_residue(); ++r ) {
522  for ( core::conformation::PointGraph::UpperEdgeListConstIter edge_iter = pg->get_vertex(r).upper_edge_list_begin(),
523  edge_end_iter = pg->get_vertex(r).upper_edge_list_end(); edge_iter != edge_end_iter; ++edge_iter ) {
524  neighbor_graph.add_edge(r, edge_iter->upper_vertex());
525  }
526  }
527 
528  for ( Size ii=1; ii <= pose.n_residue(); ++ii ) {
529 
530  conformation::Residue const & ii_rsd( pose.residue( ii ) );
531  for ( core::graph::EdgeListConstIterator eli = neighbor_graph.get_node( ii )->const_edge_list_begin(),
532  eli_end = neighbor_graph.get_node( ii )->const_edge_list_end(); eli != eli_end; ++eli ) {
533 
534  Size nb_resnum = (*eli)->get_other_ind( ii );
535  if ( nb_resnum < ii ) { continue; } // only want higher indexed residues
536 
537  // check to see if any of the atoms on this neighboring residue "interact" with any atoms on the ii residue.
538  // our definition of interact: one sc-sc atom pair within 4.5A (BK's suggestion)
539  conformation::Residue const & jj_rsd( pose.residue( nb_resnum ) );
540 
541  for ( Size jja = jj_rsd.first_sidechain_atom(); jja <= jj_rsd.nheavyatoms(); ++jja ) {
542  conformation::Atom const & jja_atom( jj_rsd.atom( jja ) );
543  Vector const & jja_atom_xyz = jja_atom.xyz();
544 
545  for ( Size iia = ii_rsd.first_sidechain_atom(); iia <= ii_rsd.nheavyatoms(); ++iia ) {
546  conformation::Atom const & iia_atom( ii_rsd.atom( iia ) );
547  Vector const & iia_atom_xyz = iia_atom.xyz();
548 
549  if ( iia_atom_xyz.distance( jja_atom_xyz ) < 4.5 ) {
550  neighbors[ ii ][ nb_resnum ] = true; // only set the upper half of the 2D table; i.e. res1 must always be < res2
551  break;
552  }
553 
554  } // ii rsd atoms
555 
556  if ( neighbors[ ii ][ nb_resnum ] ) {
557  // already found an atom pair within 4.5A; no point in going through all the rest of jj rsd's atoms!
558  break;
559  }
560 
561  } // jj rsd atoms
562  }
563  }
564 
565 }
566 
567 
568 ///
569 /// @begin PointMutScanDriver::divide_up_mutations
570 ///
571 /// @brief
572 /// This function takes the vector of all possible mutants and splits them up as evenly as possible among all the CPUs.
573 ///
575 
576  //TR << "Node " << MPI_rank_ << ", entered method divide_up_mutations()" << std::endl;
577 
578  if ( MPI_rank_ == 0 ) {
579  //utility::vector1< Mutant > all_mutants_;
580 
581  Size const num_mutants_per_cpu = all_mutants_.size() / MPI_nprocs_;
582  Size const nextra = all_mutants_.size() - ( num_mutants_per_cpu * MPI_nprocs_ );
583 
584  Size my_njobs = ( nextra >= 1 ? 1 : 0 ) + num_mutants_per_cpu;
585  for ( Size ii = 1; ii <= my_njobs; ++ii ) {
586  mutants_list_.push_back( all_mutants_[ ii ] );
587  }
588 
589 #ifdef USEMPI
590  //TR << "divide_up_mutations(): number of nodes " << MPI_nprocs_ << std::endl;
591  Size mutant_offset = my_njobs;
592 
593  // send the other nodes their mutations lists so they know what they'll be working on
594  for ( Size node_index = 1; node_index < MPI_nprocs_; ++node_index ) {
595  Size node_njobs = ( nextra > node_index ? 1 : 0 ) + num_mutants_per_cpu;
596  MPI_Send( & node_njobs, 1, MPI_UNSIGNED_LONG, node_index, tag_, MPI_COMM_WORLD );
597 
598  for ( Size mutant_index = mutant_offset + 1; mutant_index <= mutant_offset + node_njobs; ++mutant_index ) {
599  send_mutant_data_to_node( node_index, all_mutants_[ mutant_index ] );
600  }
601  mutant_offset += node_njobs;
602  }
603 
604  } else {
605  // slave node. need to receive work order from master node.
606  Size my_njobs;
607  MPI_Recv( & my_njobs, 1, MPI_UNSIGNED_LONG, 0, tag_, MPI_COMM_WORLD, & stat_ );
608 
609  //TR << "divide_up_mutations(): received my_njobs: '" << my_njobs << "'" << std::endl;
610  mutants_list_.reserve( my_njobs );
611  for ( Size ii = 1; ii <= my_njobs; ++ii ) {
612  mutants_list_.push_back( receive_mutant_data_from_node( 0 ) );
613  }
614 #endif
615  }
616 
617 #ifdef USEMPI
618  sleep( MPI_rank_ ); // a crude way to order processes...
619  for ( Size ii = 1; ii <= mutants_list_.size(); ++ii ) {
620  char hostname[256];
621  gethostname(hostname, sizeof(hostname));
622  TR << "divide_up_pdbs(): mutation '" << mutants_list_[ ii ] << "' assigned to " << hostname << " (rank = " << MPI_rank_ << ")" << std::endl;
623  }
624 #endif
625 
626 }
627 
628 
629 #ifdef USEMPI
630 ///
631 /// @begin PointMutScanDriver::send_mutant_data_to_node
632 ///
633 /// @brief
634 /// Takes a Mutant and a destination and constructs the MPI_Send call.
635 ///
636 void PointMutScanDriver::send_mutant_data_to_node( int destination, const protocols::pmut_scan::Mutant & m ) {
637 
638  int tag( 1 );
639 
640  // each particular mutant can have one, two or more mutations associated with it, make sure to send all of them!
641  Size mutant_num_mutations = m.n_mutations();
642  //TR << "sending mutant_num_mutations: " << mutant_num_mutations << " to node " << destination << std::endl;
643  MPI_Send( & mutant_num_mutations, 1, MPI_UNSIGNED_LONG, destination, tag, MPI_COMM_WORLD );
644 
645  for ( utility::vector1< MutationData >::const_iterator iter = m.mutations_begin(); iter != m.mutations_end(); ++iter ) {
646 
647  char wt_residue = iter->wt_residue_;
648  char mut_residue = iter->mut_residue_;
649  //TR << "sending wt_residue: '" << wt_residue << "' and mut_residue: '" << mut_residue << "' to node " << destination << "." << std::endl;
650  MPI_Send( & wt_residue, 1, MPI_CHAR, destination, tag, MPI_COMM_WORLD );
651  MPI_Send( & mut_residue, 1, MPI_CHAR, destination, tag, MPI_COMM_WORLD );
652 
653  Size pose_resnum = iter->pose_resnum_;
654  Size pdb_resnum = iter->pdb_resnum_;
655  MPI_Send( & pose_resnum, 1, MPI_UNSIGNED_LONG, destination, tag, MPI_COMM_WORLD );
656  MPI_Send( & pdb_resnum, 1, MPI_UNSIGNED_LONG, destination, tag, MPI_COMM_WORLD );
657 
658  char icode = iter->icode_;
659  char chain = iter->chain_;
660  //TR << "sending icode: '" << icode << "' and chain: '" << chain << "' to node " << destination << "." << std::endl;
661  MPI_Send( & icode, 1, MPI_CHAR, destination, tag, MPI_COMM_WORLD );
662  MPI_Send( & chain, 1, MPI_CHAR, destination, tag, MPI_COMM_WORLD );
663 
664  }
665 
666 }
667 
668 ///
669 /// @begin PointMutScanDriver::receive_mutant_data_to_node
670 ///
671 /// @brief
672 /// Receive mutant data from the master node. First find out how many mutations are in this mutant and then actually
673 /// get the mutation data.
674 ///
675 Mutant PointMutScanDriver::receive_mutant_data_from_node( int source ) {
676 
677  int tag( 1 );
678  MPI_Status stat;
679 
680  Size num_mutations;
681  MPI_Recv( & num_mutations, 1, MPI_UNSIGNED_LONG, source, tag, MPI_COMM_WORLD, & stat );
682  //TR << "received mutant_num_mutations from node " << source << ": " << num_mutations << std::endl;
683 
684  Mutant m;
685  for ( Size ii = 1; ii <= num_mutations; ++ii ) {
686 
687  char wt_residue, mut_residue;
688  MPI_Recv( & wt_residue, 1, MPI_CHAR, source, tag, MPI_COMM_WORLD, & stat );
689  MPI_Recv( & mut_residue, 1, MPI_CHAR, source, tag, MPI_COMM_WORLD, & stat );
690  //TR << "received wt_residue: " << wt_residue << " and mut_residue: " << mut_residue << " from node " << source << "." << std::endl;
691 
692  Size pose_resnum = 1, pdb_resnum = 1;
693  MPI_Recv( & pose_resnum, 1, MPI_UNSIGNED_LONG, source, tag, MPI_COMM_WORLD, & stat );
694  MPI_Recv( & pdb_resnum, 1, MPI_UNSIGNED_LONG, source, tag, MPI_COMM_WORLD, & stat );
695 
696  char icode, chain;
697  MPI_Recv( & icode, 1, MPI_CHAR, source, tag, MPI_COMM_WORLD, & stat );
698  MPI_Recv( & chain, 1, MPI_CHAR, source, tag, MPI_COMM_WORLD, & stat );
699  //TR << "received icode: '" << icode << "' and chain: '" << chain << "' from node " << source << "." << std::endl;
700 
701  MutationData md( wt_residue, mut_residue, pose_resnum, pdb_resnum, icode, chain );
702  m.add_mutation( md );
703  }
704 
705  //TR << "receive_mutant_data_from_node(): received mutant '" << m << "'" << std::endl;
706 
707  return m;
708 }
709 
710 #endif
711 
712 
713 ///
714 /// @begin PointMutScanDriver::make_mutants
715 ///
716 /// @brief
717 /// Calls make_specific_mutant on all mutants assigned to this node.
718 /// Also responsible for creating the score function that's used for all mutants.
719 ///
721 
722  utility::vector1< pose::Pose > mutant_poses( input_poses_.size() ); // this will get set in the function below
723  utility::vector1< pose::Pose > native_poses( input_poses_.size() );
724 
725  // print out a header to the terminal
726  if ( MPI_rank_ == 0 ) {
727  TR << A( "mutation" ) << X(3) << A( "mutation_PDB_numbering" ) << X(3) << A( "average_ddG" ) << X(3) << A( "average_total_energy" ) << std::endl;
728  }
729 
730  for ( Size ii=1; ii <= mutants_list_.size(); ++ii ) {
731  Mutant & m = mutants_list_[ ii ];
732 
733  //TR << "make_mutants(): making mutant: " << m << std::endl;
734 
735  // the make specific mutant function changes both the mutant and native poses. we want to start with our
736  // original starting structures each time though. so we have to copy the input poses to some working native
737  // and mutant poses vectors.
738  for ( Size ii=1; ii <= input_poses_.size(); ++ii ) {
739  mutant_poses[ ii ] = input_poses_[ ii ];
740  native_poses[ ii ] = input_poses_[ ii ];
741  }
742 
743  make_specific_mutant( mutant_poses, native_poses, m, "", "" );
744  // this will result in the Mutant object 'm' being modified, and since m is a reference, the original mutants_list_
745  // will be modified, as well.
746  }
747 
748 }
749 
750 
751 ///
752 /// @begin PointMutScanDriver::make_specific_mutant
753 ///
754 /// @brief
755 /// Function which takes in a mutation to make (either single, double or more) and calls itself recursively until the desired
756 /// structure is created. Useful for testing certain combinations of mutations (like putting two double mutants together)
757 /// without having to run an entire scan protocol that would cover those mutations.
758 ///
760  Mutant & m, std::string mutation_string, std::string mutation_string_PDB_numbering ) {
761 
762  //TR << "make_specific_mutant() called. mutant_poses.size(): " << mutant_poses.size() << ", native_poses.size(): " << native_poses.size()
763  // << ", num mutations: " << m.n_mutations() << ", mutation_string: " << mutation_string << std::endl;
764 
765  // if the mutants vector has more than element, we have to take out the first element of the vector
766  if ( m.n_mutations() > 1 ) {
767 
768  // need to make the first mutation and call this function recursively
769  MutationData md = m.pop_mutation();
770 
771  // make the first mutation on the mutant_poses
772  for ( Size ii = 1; ii <= native_poses.size(); ++ii ) {
773 
774  // make the specific mutation, but don't do any scoring; the scorefxn is needed for packing
775  make_mutant_structure( mutant_poses[ ii ], native_poses[ ii ], md );
776 
777  }
778 
779  std::stringstream out;
780  out << mutation_string;
781  if ( mutation_string != "" ) { out << ","; }
782  out << md.mutation_string();
783  std::string updated_mutation_string = out.str();
784  out.str("");
785 
786  out << mutation_string_PDB_numbering;
787  if ( mutation_string_PDB_numbering != "" ) { out << ","; }
788  out << md.mutation_string_PDB_numbering();
789  std::string updated_mutation_string_PDB_numbering = out.str();
790 
791 
792  make_specific_mutant( mutant_poses, native_poses, m, updated_mutation_string, updated_mutation_string_PDB_numbering );
793 
794  } else {
795  // make the last mutation, calculate the ddG, and print out the results
796  MutationData md = m.pop_mutation();
797 
798  //TR << "make_specific_mutant(): making final mutation: " << md << std::endl;
799 
800  Energy sum_mutant_scores = 0.0;
801  Energy average_mutant_score = 0.0;
802 
803  Energy sum_native_scores = 0.0;
804  Energy average_native_score = 0.0;
805 
806  utility::vector1< Real > native_poses_total_energies( native_poses.size() );
807  utility::vector1< Real > mutant_poses_total_energies( native_poses.size() );
808 
809  for ( Size ii=1; ii <= native_poses.size(); ++ii ) {
810  // make the specific mutation, but don't do any scoring; the scorefxn is needed for packing
811  // send in the input_pose for the mutant. that way the mutant poses will be "returned" because mutant_poses
812  // is actually a reference!
813  make_mutant_structure( mutant_poses[ii], native_poses[ii], md );
814 
815  // score the created mutant structure
816  pose::Pose & mutant_pose = mutant_poses[ ii ];
817  Energy mutant_score = score( mutant_pose );
818  mutant_poses_total_energies[ ii ] = mutant_score;
819  sum_mutant_scores += mutant_score;
820 
821  // score the update native structure
822  pose::Pose & native_pose = native_poses[ ii ];
823  Energy native_score = score( native_pose );
824  native_poses_total_energies[ ii ] = native_score;
825  sum_native_scores += native_score;
826 
828  std::stringstream out;
829  out << mutation_string;
830  if ( mutation_string != "" ) { out << ", "; }
831  out << md.mutation_string();
833  std::string mutant_filename = fn.base() + "." + out.str() + ".pdb";
834  mutant_pose.dump_scored_pdb( mutant_filename, *scorefxn_ );
835  }
836 
837  }
838 
839  average_mutant_score = sum_mutant_scores / mutant_poses.size();
840  average_native_score = sum_native_scores / native_poses.size();
841 
842  Real ddG_mutation = average_mutant_score - average_native_score;
843  if ( ddG_mutation > DDG_cutoff_ ) {
844  return;
845  }
846 
847  std::stringstream out;
848  out << mutation_string;
849  if ( mutation_string != "" ) { out << ","; }
850  out << md.mutation_string();
851  std::string final_mutation_string = out.str();
852 
853  out.str("");
854  out << mutation_string_PDB_numbering;
855  if ( mutation_string_PDB_numbering != "" ) { out << ","; }
856  out << md.mutation_string_PDB_numbering();
857  std::string final_mutation_string_PDB_numbering = out.str();
858 
859 
860  TR << final_mutation_string << X(3) << final_mutation_string_PDB_numbering << X(3) << F( 9,3,ddG_mutation ) << X(3) << F( 9,2,average_mutant_score ) << std::endl;
861 
862 
863  /*TR << "native poses total energies: ";
864  for ( Size ii=1; ii <= native_poses_total_energies.size(); ++ii ) {
865  TR << native_poses_total_energies[ ii ] << ", ";
866  }
867  TR << std::endl;
868  TR << "mutant poses total energies: ";
869  for ( Size ii=1; ii <= mutant_poses_total_energies.size(); ++ii ) {
870  TR << mutant_poses_total_energies[ ii ] << ", ";
871  }
872  TR << std::endl;*/
873  TR.flush_all_channels();
874 
875 
876  } // end loop over all mutants
877 
878 }
879 
880 
881 ///
882 /// @begin PointMutScanDriver::make_mutant_structure
883 ///
884 /// @brief
885 /// Given mutant and native pose references and the mutation to make, this function constructs all the necessary PackerTask
886 /// Operations and Movers to apply the mutation and repacking steps to both the mutant and native poses.
887 ///
888 void PointMutScanDriver::make_mutant_structure( pose::Pose & mutant_pose, pose::Pose & native_pose, MutationData const & md ) {
889 
890  Size resid = md.pose_resnum();
892 
893  // need to create a neighborhood by distance calculator so we can identify neighbors of the mutated residue
894  std::stringstream out;
895  out << md.mutation_string() << "_mutant_nb_calculator";
896  std::string calculator_name = out.str();
897 
899  pose::metrics::CalculatorFactory::Instance().register_calculator( calculator_name, mutant_nb_calculator );
900 
901  basic::MetricValue< std::set< Size > > mv_neighbors;
902  mutant_pose.metric( calculator_name, "neighbors", mv_neighbors );
903  std::set< Size > const neighbor_set( mv_neighbors.value() );
904 
905  //TR << "make_mutant_structure(): neighbor_set: ";
906  //for ( std::set< Size >::iterator it = neighbor_set.begin() ; it != neighbor_set.end(); it++ ) {
907  // TR << *it << ", ";
908  //}
909  //TR << std::endl;
910 
911  TaskFactoryOP native_tf = new TaskFactory();
912  TaskFactoryOP mutant_tf = new TaskFactory();
913 
914  // the restrict operation class (which in the end is just a TaskOperation) takes a calculator during construction. I've already
915  // created that calculator above. This operation will disable repacking and design at all positions except those in the neighborhood
916  // of the mutated position.
918  native_tf->push_back( nb_op ); mutant_tf->push_back( nb_op );
919 
920  // extra task operations we want to also include
921  // the restrict residue to repacking ops are used to make sure that only repacking and not design is done to the residues in the neighborhood
923  native_tf->push_back( init_op ); mutant_tf->push_back( init_op );
924 
925  IncludeCurrentOP ic_op = new IncludeCurrent();
926  native_tf->push_back( ic_op ); mutant_tf->push_back( ic_op );
927 
929  RestrictResidueToRepackingOP wt_repack_op = new RestrictResidueToRepacking(); // will include one extra residue to repack
930  for ( Size ii = 1; ii <= mutant_pose.n_residue(); ++ii ) {
931  // resid is the position on the original pose. ii is the position on the copy.
932  if ( ii == resid ) {
933  // do design on this position
934  utility::vector1< bool > keep_canonical_aas( chemical::num_canonical_aas, false );
935  keep_canonical_aas[ mut_aa ] = true;
936  RestrictAbsentCanonicalAASOP restrict_op = new RestrictAbsentCanonicalAAS( ii, keep_canonical_aas );
937  mutant_tf->push_back( restrict_op );
938  wt_repack_op->include_residue( ii ); // for the wild type, don't design on the mutant resid - but do allow repacking
939  } else {
940  // make this position repackable only; because of the commutativity of packer task ops, only the residues that are in the neighborhood
941  // of the mutant will be allowed to repack. the restrict to neighborhood op will disallow packing at all positions not near the mutant.
942  mutant_repack_op->include_residue( ii );
943  wt_repack_op->include_residue( ii );
944  }
945  }
946  native_tf->push_back( wt_repack_op );
947  mutant_tf->push_back( mutant_repack_op );
948 
949  //TR << "Finished creating all TaskOperation's and TaskFactory's. Creating MoveMap." << std::endl;
950 
952  std::set< core::Size >::const_iterator iter;
953  for ( iter = neighbor_set.begin(); iter != neighbor_set.end(); iter++ ) {
954  //movemap_->set_bb(i, true); // don't do any backbone minimization
955  movemap->set_chi( *iter, true ); // but do minimize the side chains
956  }
957  //movemap->show( std::cout, mutant_pose.n_residue() );
958 
959  //TR << "Movemap created... Beginning repacking/minimization of mutant pose." << std::endl;
960 
961  // create an actual PackerTask from the TaskFactory
962  pack::task::PackerTaskOP scan_task = mutant_tf->create_task_and_apply_taskoperations( mutant_pose );
963  //scan_task->num_to_be_packed();
964  //TR << "mutant packer task: " << *scan_task << std::endl; // generates a TON of output
965 
966  // now create the movers that will do the repacking and minimization
967  protocols::simple_moves::PackRotamersMoverOP mutant_repacker_mover = new protocols::simple_moves::PackRotamersMover( scorefxn_, scan_task, 2 ); // ndruns: 2
968  protocols::simple_moves::MinMoverOP min_mover = new protocols::simple_moves::MinMover( movemap, scorefxn_, option[ OptionKeys::run::min_type ].value(), 0.01, true ); // use nb_list: true
969  protocols::simple_moves::TaskAwareMinMoverOP task_aware_min_mover = new protocols::simple_moves::TaskAwareMinMover( min_mover, mutant_tf );
971  seq_mover->add_mover( mutant_repacker_mover );
972  seq_mover->add_mover( task_aware_min_mover );
973 
974  seq_mover->apply( mutant_pose );
975 
976  //TR << "Beginning repacking/minimization of wt pose." << std::endl;
977 
978  // create an actual PackerTask from the TaskFactory
979  pack::task::PackerTaskOP wt_task = native_tf->create_task_and_apply_taskoperations( native_pose );
980 
981  // now create the movers that will do the repacking and minimization of the native structure
983  min_mover = new protocols::simple_moves::MinMover( movemap, scorefxn_, option[ OptionKeys::run::min_type ].value(), 0.01, true ); // use nb_list: true
984  task_aware_min_mover = new protocols::simple_moves::TaskAwareMinMover( min_mover, native_tf );
985  seq_mover = new protocols::moves::SequenceMover;
986  seq_mover->add_mover( native_pack_mover );
987  seq_mover->add_mover( task_aware_min_mover );
988 
989  seq_mover->apply( native_pose );
990 
991  // this needs to get recreated each time around
992  pose::metrics::CalculatorFactory::Instance().remove_calculator( calculator_name );
993  mutant_nb_calculator = NULL;
994 
995  return;
996 
997 } // done with make_mutant_structure
998 
999 //Virtual functions, refactored out so they can be overridden by child AlterSpecDisruptionDriver
1000 
1001 ///@brief score the pose for the purposes of determining if a mutation is "good" or not. In the base implementation, it's just a scorefunction call, but in child implementations it may be fancier (for example, calculating a binding energy instead)
1003  return (*scorefxn_)(pose);
1004 }
1005 
1006 // setters used by the unit tests only
1008  DDG_cutoff_ = threshold;
1009 }
1010 
1011 
1012 ///
1013 /// @begin PointMutScanDriver::mutants_begin
1014 ///
1015 /// @brief
1016 /// returns a const iterator to the beginning of the Mutant data member variable vector
1017 ///
1019  return all_mutants_.begin();
1020 }
1021 
1022 
1023 ///
1024 /// @begin PointMutScanDriver::mutants_end
1025 ///
1026 /// @brief
1027 /// returns a const iterator to the end of the Mutant data member variable vector
1028 ///
1030  return all_mutants_.end();
1031 }
1032 
1033 
1034 ///
1035 /// @begin PointMutScanDriver::n_mutants
1036 ///
1037 /// @brief
1038 /// returns the size of the Mutant data member variable vector
1039 ///
1041  return all_mutants_.size();
1042 }
1043 
1045  return scorefxn_;
1046 }
1047 
1048 
1049 } // namespace pmut_scan
1050 } // namespace protocols