Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
MPIArchiveJobDistributor.hh
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // This file is part of the Rosetta software suite and is made available under license.
5 // The Rosetta software is developed by the contributing members of the Rosetta Commons consortium.
6 // (C) 199x-2009 Rosetta Commons participating institutions and developers.
7 // For more information, see http://www.rosettacommons.org/.
8 
9 /// @file protocols/jd2/MPIWorkPoolJobDistributor.hh
10 /// @brief header for MPIWorkPoolJobDistributor - intended for continuous resamplig jobs that spawn new jobs based on a pool/archive of
11 /// structures
12 /// @author Oliver Lange olange@u.washington.edu
13 
14 #ifndef INCLUDED_protocols_jd2_archive_MPIArchiveJobDistributor_hh
15 #define INCLUDED_protocols_jd2_archive_MPIArchiveJobDistributor_hh
16 
17 // Unit headers
21 // Package headers
23 #include <protocols/jd2/Job.fwd.hh>
25 
27 
28 // Utility headers
29 #include <core/types.hh>
30 #include <utility/exit.hh>
31 
32 // C++ headers
33 #include <string>
34 #include <deque>
35 
36 #include <platform/types.hh>
37 #include <core/pose/Pose.fwd.hh>
39 #include <protocols/jd2/Job.hh>
45 #include <utility/down_cast.hh>
46 #include <utility/vector1.fwd.hh>
47 #include <utility/vector1.hh>
48 #include <utility/vector1_bool.hh>
49 #include <utility/vectorL.fwd.hh>
50 #include <utility/vectorL.hh>
51 #include <utility/vectorL_Selector.hh>
52 #include <utility/vectorL_bool.hh>
53 #include <utility/pointer/ReferenceCount.fwd.hh>
54 #include <utility/pointer/ReferenceCount.hh>
55 #include <utility/pointer/access_ptr.fwd.hh>
56 #include <utility/pointer/access_ptr.hh>
57 #include <utility/pointer/owning_ptr.functions.hh>
58 #include <utility/pointer/owning_ptr.fwd.hh>
59 #include <utility/pointer/owning_ptr.hh>
60 #include <numeric/xyzVector.fwd.hh>
61 #include <algorithm>
62 #include <cassert>
63 #include <cstddef>
64 #include <iosfwd>
65 #include <limits>
66 #include <list>
67 #include <map>
68 #include <vector>
69 
70 
71 namespace protocols {
72 namespace jd2 {
73 namespace archive {
74 
75 //Archive has numbers 100+
76 core::Size const BATCH_SYNC = 101;
77 core::Size const QUEUE_EMPTY = 102;
78 core::Size const ADD_BATCH = 103;
81 
82 core::Size const MPI_ARCHIVE_TAG = 12310925; //keep unique TAG to communicate between ArchiveManager and ArchiveJobDistributor
83 ///Tags used to tag messeges sent by MPI functions used to decide whether a slave is requesting a new job id or
84 ///flagging as job as being a bad input
85 
86 ///@brief JobDistributor for the iterative ArchiveManager/Archive Framework
87 ///@details This job distributor is meant for running iterative jobs with the ArchiveManager/Archive Framework.
88 ///could vary greatly. In this configuration the three first nodes are dedicated processes (JobDistributor, FileBuffer, and ArchiveManger )
89 ///and the remaining CPUs form slave or worker nodes. This JD will not work at all
90 ///without MPI and the implementations of all but the interface functions have been put inside of ifdef directives.
91 ///Generally each function has a master and slave version, and the interface functions call one or the other depending
92 ///on processor rank.
93 
95 {
96 public:
97 
98  ///@brief CompletionMessage(s) are send to the ArchiveManager whenever more than nr_notify decoys have been finished
99  //// or when the full batch is finished.
101  public:
102  CompletionMessage() : batch_id( 0 ), final( false ), bad( 0 ), good( 0 ), njobs( 0 ), msg_tag( JOB_COMPLETION ) {};
103  CompletionMessage( core::Size id, bool fi, core::Size bad_in, core::Size good_in, core::Size total_in )
104  : batch_id( id ), final( fi), bad( bad_in ),good( good_in ), njobs( total_in ), msg_tag( JOB_COMPLETION ) {};
106  : batch_id( batch_id ), final( false ), bad( 0 ), good( 0 ), njobs( 0 ), msg_tag( QUEUE_EMPTY )
107  { runtime_assert( tag == QUEUE_EMPTY ); };
109  bool final;
114  };
115 
116 protected:
118 
119  ///@brief ctor is protected; singleton pattern
121  friend class protocols::jd2::JobDistributorFactory; //ctor access
122 
123  virtual void handle_interrupt() {}
124 
125 public:
126 
127  ///@brief overloaded to also start the ArchiveManager process
128  virtual
129  void
130  go( protocols::moves::MoverOP mover );
131 
132  void
134 
135  bool is_archive_rank() const {
136  return archive_rank() == rank();
137  }
138 
139 protected:
140  ///@brief triggered in slave if new batch_ID comes in.
141  virtual void batch_underflow();
142 
143  ///@brief act on a message, return true if message was understood
144  virtual bool process_message(
145  core::Size msg_tag,
146  core::Size slave_rank,
147  core::Size slave_job_id,
148  core::Size slave_batch_id,
149  core::Real run_time
150  );
151 
152  ///@brief overloaded to allow statistics and sending of CompletionMessages
153  virtual void mark_job_as_completed( core::Size job_id, core::Size batch_id, core::Real run_time );
154 
155  ///@brief overloaded to allow statistics and sending of CompletionMessages
156  virtual void mark_job_as_bad( core::Size job_id, core::Size batch_id );
157 
158  ///@brief overloaded to start new entries in nr_new_completed_, nr_completed_, nstruct_ and nr_bad_ ...
159  virtual void load_new_batch();
160 
161  ///@brief rank of ArchiveManger process
163  return archive_rank_;
164  }
165 
166 private:
167 
168 
169  //actually transmit a notify msg -- this should only be called out of the process_message method
170  void _notify_archive();
171 
172  ///@brief receive a new Batch from ArchiveManager
173  bool receive_batch( core::Size source_rank );
174 
175  ///@brief sync batch queue with slave node
176  void sync_batches( core::Size slave_rank );
177 
178  ///@brief send message to ArchiveManager
179  void master_to_archive( core::Size tag );
180 
181  //some statistics about completion for ArchiveManager to query
187 
188  //after how many completed decoys should we tell the Archive ?
191 
192  ///@brief add a notifcation (CompletionMessage) to the msg queue ...
193  // these are send out by _notify_archive() at beginning of process_message()
194  void notify_archive( CompletionMessage const& );
195 
196  ///@brief work out if a notifcation should be send (using above method)
197  void notify_archive( core::Size batch_id );
198 
199  ///@brief unsent notifications
200  std::deque< CompletionMessage > pending_notifications_;
201 
202 
204 };
205 
206 } //archive
207 } //jd2
208 } //protocols
209 
210 #endif //INCLUDED_protocols_jd2_MPIArchiveJobDistributor_HH