Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
MPIWorkPoolJobDistributor.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file protocols/jd2/MPIWorkPoolJobDistributor.cc
11 /// @brief implementation of MPIWorkPoolJobDistributor
12 /// @author P. Douglas Renfrew (renfrew@unc.edu)
13 
14 // MPI headers
15 #ifdef USEMPI
16 #include <mpi.h> //keep this first
17 #endif
18 
19 // Unit headers
21 
22 // Package headers
24 #include <protocols/jd2/Job.hh>
25 #include <basic/message_listening/MessageListenerFactory.hh>
26 #include <basic/message_listening/MessageListener.hh>
27 #include <basic/message_listening/util.hh>
28 
29 
30 #include <protocols/moves/Mover.hh>
31 
32 // Utility headers
33 #include <basic/Tracer.hh>
34 #include <basic/options/option.hh>
35 #include <utility/exit.hh>
36 #include <utility/assert.hh>
37 #include <utility/mpi_util.hh>
38 
39 // Option headers
40 #include <basic/options/keys/out.OptionKeys.gen.hh>
41 #ifdef USEMPI
42 #include <basic/options/keys/jd2.OptionKeys.gen.hh>
43 #endif
44 
45 // C++ headers
46 #include <string>
47 
48 //Auto Headers
49 #include <utility/vector1.hh>
50 static basic::Tracer TR("protocols.jd2.MPIWorkPoolJobDistributor");
51 
52 namespace protocols {
53 namespace jd2 {
54 
55 using namespace basic::options;
56 using namespace basic::options::OptionKeys;
57 
58 ///@details constructor. Notice it calls the parent class! It also builds some internal variables for determining
59 ///which processor it is in MPI land.
62  npes_( 1 ),
63  rank_( 0 ),
64  current_job_id_( 0 ),
65  next_job_to_assign_( 0 ),
66  bad_job_id_( 0 ),
67  repeat_job_( false ),
68  finalize_MPI_( true )
69 {
70  // set npes and rank based on whether we are using MPI or not
71 #ifdef USEMPI
72  //npes_ = MPI::COMM_WORLD.Get_size();
73  //rank_ = MPI::COMM_WORLD.Get_rank();
74  MPI_Comm_rank( MPI_COMM_WORLD, ( int* )( &rank_ ) );
75  MPI_Comm_size( MPI_COMM_WORLD, ( int* )( &npes_ ) );
76 #else
77  utility_exit_with_message( "ERROR ERROR ERROR: The MPIWorkPoolJobDistributor will not work unless you have compiled using extras=mpi" );
78 #endif
79 }
80 
81 ///@brief dtor
82 ///WARNING WARNING! SINGLETONS' DESTRUCTORS ARE NEVER CALLED IN MINI! DO NOT TRY TO PUT THINGS IN THIS FUNCTION!
83 ///here's a nice link explaining why: http://www.research.ibm.com/designpatterns/pubs/ph-jun96.txt
85 { }
86 
87 ///@brief dummy for master/slave version
88 void
90 {
91  if ( rank_ == 0 ) {
92  master_go( mover );
93  } else {
94  slave_go( mover );
95  }
96 
97  // ideally these would be called in the dtor but the way we have the singleton pattern set up the dtors don't get
98  // called
99 #ifdef USEMPI
100  //MPI::COMM_WORLD.Barrier();
101  //MPI::Finalize();
102  MPI_Barrier( MPI_COMM_WORLD );
103  if(finalize_MPI_)
104  {
105  MPI_Finalize();
106  }
107 #endif
108 }
109 
110 
111 ///@details This is the heart of the MPIWorkPoolJobDistributor. It consists of two while loops: the job
112 ///distribution loop (JDL) and the node spin down loop (NSDL). The JDL has three functions. The first is to receive and
113 ///process messages from the slave nodes requesting new job ids. The second is to receive and process messages from the
114 ///slave nodes indicating a bad input. The third is to receive and process job_success messages from the slave nodes and
115 ///block while the slave node is writing its output. This is prevent interleaving of output in score files and silent
116 ///files. The function of the NSDL is to keep the head node alive while there are still slave nodes processing. Without
117 ///the NSDL if a slave node finished its allocated job after the head node had finished handing out all of the jobs and
118 ///exiting (a very likely scenario), it would wait indefinitely for a response from the head node when requesting a new
119 ///job id.
120 void
122 {
123 #ifdef USEMPI
124  runtime_assert( rank_ == 0 );
125 
126  int slave_data( 0 );
127  MPI_Status status;
128 
129  // set first job to assign
131 
132  // Job Distribution Loop
133  while ( next_job_to_assign_ != 0 ) {
134  TR << "Master Node: Waiting for job requests..." << std::endl;
135  //MPI::COMM_WORLD.Recv( &slave_data, 1, MPI::INT, MPI::ANY_SOURCE, MPI::ANY_TAG, status );
136  //TR << "Master Node: Received message from " << status.MPI::Status::Get_source() << " with tag " << status.MPI::Status::Get_tag() << std::endl;
137  MPI_Recv( &slave_data, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
138  TR << "Master Node: Received message from " << status.MPI_SOURCE << " with tag " << status.MPI_TAG << std::endl;
139 
140  // decide what to do based on message tag
141  //switch ( status.MPI::Status::Get_tag() ) {
142  switch ( status.MPI_TAG ) {
143  case NEW_JOB_ID_TAG:
144  //TR << "Master Node: Sending new job id " << next_job_to_assign_ << " to node " << status.MPI::Status::Get_source() << " with tag " << NEW_JOB_ID_TAG << std::endl;
145  //MPI::COMM_WORLD.Send( &next_job_to_assign_, 1, MPI::INT, status.MPI::Status::Get_source(), NEW_JOB_ID_TAG );
146  TR << "Master Node: Sending new job id " << next_job_to_assign_ << " to node " << status.MPI_SOURCE << " with tag " << NEW_JOB_ID_TAG << std::endl;
147  MPI_Send( &next_job_to_assign_, 1, MPI_INT, status.MPI_SOURCE, NEW_JOB_ID_TAG, MPI_COMM_WORLD );
149  break;
150  case BAD_INPUT_TAG:
151  //TR << "Master Node: Received job failure message for job id " << slave_data << " from node " << status.MPI::Status::Get_source() << std::endl;
152  TR << "Master Node: Received job failure message for job id " << slave_data << " from node " << status.MPI_SOURCE << std::endl;
153  bad_job_id_ = slave_data;
155  break;
156  case JOB_SUCCESS_TAG:
157  TR << "Master Node: Received job success message for job id " << slave_data << " from node " << status.MPI_SOURCE << " blocking till output is done " << std::endl;
158  MPI_Send( &next_job_to_assign_, 1, MPI_INT, status.MPI_SOURCE, JOB_SUCCESS_TAG, MPI_COMM_WORLD );
159  MPI_Recv( &slave_data, 1, MPI_INT, status.MPI_SOURCE, JOB_SUCCESS_TAG, MPI_COMM_WORLD, &status);
160  TR << "Master Node: Received job output finish message for job id " << slave_data << " from node " << status.MPI_SOURCE << std::endl;
161  break;
162  case REQUEST_MESSAGE_TAG:
163  {
164 
165  using namespace basic::message_listening;
166 
167  listener_tags listener_tag((listener_tags)slave_data);
168  MessageListenerOP listener(MessageListenerFactory::get_instance()->get_listener(listener_tag));
169 
170  std::string message_data = utility::receive_string_from_node(status.MPI_SOURCE);
171  std::string return_info="";
172  bool request_slave_data = listener->request(message_data, return_info);
173  utility::send_string_to_node(status.MPI_SOURCE, return_info);
174 
175  TR
176  << "Master Node: node '" << status.MPI_SOURCE << "' "
177  << "requests from the message listener '" << listener_tag_to_name(listener_tag) << "' "
178  << "data on '" << message_data << "', "
179  << "respond with '" << return_info << "' "
180  << (request_slave_data ? " and requests more data." : ".") << std::endl;
181 
182  if(request_slave_data){
183  message_data = utility::receive_string_from_node(status.MPI_SOURCE);
184  TR
185  << "Master Node: Received from node '" << status.MPI_SOURCE << "' "
186  << "'" << message_data << "'" << std::endl;
187  listener->receive(message_data);
188  }
189 
190  break;
191 
192  }
193  default:
194  {
195  std::stringstream err_msg;
196  err_msg
197  << "Received unrecognized mpi_tag '" << status.MPI_TAG << "' " << std::endl
198  << "\tfrom node '" << status.MPI_SOURCE << "' " << std::endl
199  << "\twith data '" << slave_data << "'";
200  utility_exit_with_message(err_msg.str());
201  }
202 
203  }
204  }
205  TR << "Master Node: Finished handing out jobs" << std::endl;
206 
207  core::Size n_nodes_left_to_spin_down( npes_ - 1 ); // don't have to spin down self
208 
209  // Node Spin Down loop
210  while ( n_nodes_left_to_spin_down > 0 ) {
211  TR << "Master Node: Waiting for " << n_nodes_left_to_spin_down << " slaves to finish jobs" << std::endl;
212  //MPI::COMM_WORLD.Recv( &slave_data, 1, MPI::INT, MPI::ANY_SOURCE, MPI::ANY_TAG, status );
213  //TR << "Master Node: Received message from " << status.MPI::Status::Get_source() << " with tag " << status.MPI::Status::Get_tag() << std::endl;
214  MPI_Recv( &slave_data, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
215  TR << "Master Node: Received message from " << status.MPI_SOURCE << " with tag " << status.MPI_TAG << std::endl;
216 
217  // decide what to do based on message tag
218  //switch ( status.MPI::Status::Get_tag() ) {
219  switch ( status.MPI_TAG ) {
220  case NEW_JOB_ID_TAG:
221  //TR << "Master Node: Sending spin down signal to node " << status.MPI::Status::Get_source() << std::endl;
222  //MPI::COMM_WORLD.Send( &next_job_to_assign_, 1, MPI::INT, status.MPI::Status::Get_source(), NEW_JOB_ID_TAG );
223  TR << "Master Node: Sending spin down signal to node " << status.MPI_SOURCE << std::endl;
224  MPI_Send( &next_job_to_assign_, 1, MPI_INT, status.MPI_SOURCE, NEW_JOB_ID_TAG, MPI_COMM_WORLD );
225  n_nodes_left_to_spin_down--;
226  break;
227  case BAD_INPUT_TAG:
228  break;
229  case JOB_SUCCESS_TAG:
230  TR << "Master Node: Received job success message for job id " << slave_data << " from node " << status.MPI_SOURCE << " blocking till output is done " << std::endl;
231  MPI_Send( &next_job_to_assign_, 1, MPI_INT, status.MPI_SOURCE, JOB_SUCCESS_TAG, MPI_COMM_WORLD );
232  MPI_Recv( &slave_data, 1, MPI_INT, status.MPI_SOURCE, JOB_SUCCESS_TAG, MPI_COMM_WORLD, &status);
233  TR << "Master Node: Received job output finish message for job id " << slave_data << " from node " << status.MPI_SOURCE << std::endl;
234  break;
235  case REQUEST_MESSAGE_TAG:
236  {
237  using namespace basic::message_listening;
238 
239  listener_tags listener_tag((listener_tags)slave_data);
240  MessageListenerOP listener(MessageListenerFactory::get_instance()->get_listener(listener_tag));
241 
242  std::string message_data = utility::receive_string_from_node(status.MPI_SOURCE);
243  std::string return_info="";
244  bool request_slave_data = listener->request(message_data, return_info);
245  utility::send_string_to_node(status.MPI_SOURCE, return_info);
246 
247  TR
248  << "Master Node: node '" << status.MPI_SOURCE << "' "
249  << "requests from the message listener '" << listener_tag_to_name(listener_tag) << "' "
250  << "data on '" << message_data << "', "
251  << "respond with '" << return_info << "' "
252  << (request_slave_data ? " and requests more data." : ".") << std::endl;
253 
254  if(request_slave_data){
255  message_data = utility::receive_string_from_node(status.MPI_SOURCE);
256  TR
257  << "Master Node: Received from node '" << status.MPI_SOURCE << "' "
258  << "'" << message_data << "'" << std::endl;
259  listener->receive(message_data);
260  }
261 
262  break;
263  }
264  default:
265  {
266  std::stringstream err_msg;
267  err_msg
268  << "Received unrecognized mpi_tag '" << status.MPI_TAG << "' " << std::endl
269  << "\tfrom node '" << status.MPI_SOURCE << "' " << std::endl
270  << "\twith data '" << slave_data << "'";
271  utility_exit_with_message(err_msg.str());
272  }
273  }
274  }
275  TR << "Master Node: Finished sending spin down signals to slaves" << std::endl;
276 #endif
277 }
278 
279 void
281 {
282  runtime_assert( !( rank_ == 0 ) );
283  go_main( mover );
284 }
285 
286 ///@brief dummy for master/slave version
289 {
290  core::Size temp( 0 );
291 
292  if ( rank_ == 0 ) {
293  temp = master_get_new_job_id();
294  } else {
295  temp = slave_get_new_job_id();
296  }
297 
298  return temp;
299 }
300 
303 {
304  using namespace basic::options;
305  using namespace basic::options::OptionKeys;
306 
307  Jobs const & jobs( get_jobs() );
308  JobOutputterOP outputter = job_outputter();
309 
310  while( next_job_to_assign_ <= jobs.size()) {
312  if ( next_job_to_assign_ > jobs.size() ) {
313  TR << "Master Node: No more jobs to assign, setting next job id to zero" << std::endl;
315  return 0;
316  } else if ( !outputter->job_has_completed( jobs[ next_job_to_assign_ ] ) ) {
317  TR << "Master Node: Getting next job to assign from list id " << next_job_to_assign_ << " of " << jobs.size() << std::endl;
318  return next_job_to_assign_; //not used by callers
319  } else if ( outputter->job_has_completed( jobs[ next_job_to_assign_ ] ) && option[ out::overwrite ].value() ) {
320  TR << "Master Node: Getting next job to assign from list, overwriting id " << next_job_to_assign_ << " of " << jobs.size() << std::endl;
321  return next_job_to_assign_; //not used by callers
322  }
323  }
324 
325  return 0; //we won't get here
326 }
327 
330 {
331 #ifdef USEMPI
332  runtime_assert( !( rank_ == 0 ) );
333 
334  if ( repeat_job_ == true ) {
335  TR << "Slave Node " << rank_ << ": Repeating job id " << current_job_id_ <<std::endl;
336  repeat_job_ = false;
337  } else {
338  TR << "Slave Node " << rank_ << ": Requesting new job id from master" <<std::endl;
339  int empty_data( 0 );
340  MPI_Status status;
341  current_job_id_ = 0;
342  //MPI::COMM_WORLD.Send( &empty_data, 1, MPI::INT, 0, NEW_JOB_ID_TAG );
343  //MPI::COMM_WORLD.Recv( &current_job_id_, 1, MPI::INT, 0, NEW_JOB_ID_TAG );
344  MPI_Send( &empty_data, 1, MPI_INT, 0, NEW_JOB_ID_TAG, MPI_COMM_WORLD );
345  MPI_Recv( &current_job_id_, 1, MPI_INT, 0, NEW_JOB_ID_TAG, MPI_COMM_WORLD, &status );
346  TR << "Slave Node " << rank_ << ": Received job id " << current_job_id_ << " from master" <<std::endl;
347  }
348 #endif
349  return current_job_id_;
350 }
351 
352 ///@brief dummy for master/slave version
353 void
355 {
356  if ( rank_ == 0 ) {
358  } else {
360  }
362 }
363 
364 void
366 {
367  runtime_assert( rank_ == 0 );
368  TR << "Master Node: Mark current job for repetition" << std::endl;
369  utility_exit_with_message( "Master Node: master_mark_current_job_id_for_repetition() should never be called" );
370 
371 }
372 
373 void
375 {
376  runtime_assert( !( rank_ == 0 ) );
377  TR << "Slave Node " << rank_ << ": Mark current job for repetition, id " << current_job_id_ << std::endl;
378  repeat_job_ = true;
379 }
380 
381 ///@brief dummy for master/slave version
382 void
384 {
385  if ( rank_ == 0 ) {
387  } else {
389  }
390 }
391 
392 void
394 {
395  //#ifdef USEMPI
396  runtime_assert( rank_ == 0 );
397 
398  Jobs const & jobs( get_jobs() );
399 
400  std::string const & bad_job_id_input_tag( jobs[ bad_job_id_ ]->input_tag() );
401 
402  TR << "Master Node: Job id " << bad_job_id_ << " failed, reporting bad input; other jobs of same input will be canceled: " << job_outputter()->output_name( jobs[ bad_job_id_ ] ) << std::endl;
403 
404  while( next_job_to_assign_ <= jobs.size() && jobs[ next_job_to_assign_ ]->input_tag() == bad_job_id_input_tag ) {
405  TR << "Master Node: Job canceled without trying due to previous bad input: " << job_outputter()->output_name( jobs[ next_job_to_assign_ ] ) << " id " << next_job_to_assign_ << std::endl;
407  }
408 
409  //iterate through for overwrite/end of vector statuses
410  --next_job_to_assign_; //master_get_new_job_id() will ++ this again first thing
412 
413  //#endif
414 }
415 
416 void
418 {
419 #ifdef USEMPI
420  runtime_assert( !( rank_ == 0 ) );
421 
422  //MPI::COMM_WORLD.Send( &current_job_id_, 1, MPI::INT, 0, BAD_INPUT_TAG );
423  MPI_Send( &current_job_id_, 1, MPI_INT, 0, BAD_INPUT_TAG, MPI_COMM_WORLD );
424 #endif
425 }
426 
427 ///@brief dummy for master/slave version
428 void
430 {
431  if ( rank_ == 0 ) {
432  master_job_succeeded( pose );
433  } else {
434  slave_job_succeeded( pose );
435  }
436 }
437 
439 {
440  finalize_MPI_ = finalize;
441 }
442 
443 void
445 {
446 #ifdef USEMPI
447  runtime_assert( rank_ == 0 );
448  TR << "Master Node: Job Succeeded" << std::endl;
449  utility_exit_with_message( "Master Node: master_job_succeeded() should never be called" );
450 #endif
451 }
452 
453 void
455 {
456 #ifdef USEMPI
457  runtime_assert( !( rank_ == 0 ) );
458 
459  if ( option[ OptionKeys::jd2::mpi_fast_nonblocking_output ].value() == true ) {
460  job_outputter()->final_pose( current_job(), pose );
461  } else {
462  int empty_data( 0 );
463  MPI_Status status;
464 
465  // send job success message to master
466  TR << "Slave Node " << rank_ << ": Finished job successfully! Sending output request to master." << std::endl;
467  MPI_Send( &current_job_id_, 1, MPI_INT, 0, JOB_SUCCESS_TAG, MPI_COMM_WORLD );
468 
469  // receive message from master that says is okay to write
470  TR << "Slave Node " << rank_ << ": Received output confirmation from master. Writing output." << std::endl;
471  MPI_Recv( &empty_data, 1, MPI_INT, 0, JOB_SUCCESS_TAG, MPI_COMM_WORLD, &status );
472  // time and write output (pdb, silent file, score file etc.)
473  clock_t starttime = clock();
474  job_outputter()->final_pose( current_job(), pose );
475  clock_t stoptime = clock();
476 
477  // send message to master that we are done outputing
478  TR << "Slave Node " << rank_ << ": Finished writing output in " << ((double) stoptime-starttime) / CLOCKS_PER_SEC << " seconds. Sending message to master" << std::endl;
479  MPI_Send( &empty_data, 1, MPI_INT, 0, JOB_SUCCESS_TAG, MPI_COMM_WORLD );
480  }
481 #endif
482 }
483 
484 
485 }//jd2
486 }//protocols