Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
JobDistributor.hh
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file protocols/jd2/JobDistributor.cc
11 /// @brief August 2008 job distributor as planned at RosettaCon08 - Base class
12 /// @author Andrew Leaver-Fay
13 /// @author Steven Lewis smlewi@gmail.com
14 /// @author Modified by Sergey Lyskov
15 
16 #ifndef INCLUDED_protocols_jd2_JobDistributor_hh
17 #define INCLUDED_protocols_jd2_JobDistributor_hh
18 
19 // Unit headers
21 
22 // Package headers
26 
28 
29 #include <core/pose/Pose.fwd.hh>
30 
31 // Utility headers
32 #include <core/types.hh>
33 
34 #include <protocols/jd2/Job.fwd.hh>
35 #include <utility/vector1.hh>
36 #include <string>
37 
38 #ifdef WIN32
39  #include <protocols/jd2/Job.hh>
40 #endif
41 
42 namespace protocols {
43 namespace jd2 {
44 
45 ///Enforced JobDistributor destruction turned out to cause problems - calls to Tracers and/or the Options system
46 ///during destruction caused crashes if those systems had been destructed first. So this is deprecated.
47 // simple class to ensure that JobDistributor objects are destroyed at program exit.
48 // class JobDistributorDestroyer {
49 // public:
50 // JobDistributorDestroyer(JobDistributor* = 0);
51 // ~JobDistributorDestroyer();
52 // void set_job_distributor(JobDistributor* s);
53 
54 // JobDistributor* jd_;
55 // };
56 
58 
59 protected:
60  /// @brief Singleton instantiation pattern; Derived classes will call default ctor, but their ctors, too must be
61  /// protected (and the JDFactory must be their friend.)
63 
64  /// @brief MPIArchiveJobDistributor starts with an empty job-list...
65  JobDistributor( bool empty );
66 
67 private:
68  //the actual c'tor work is done here
69  void init_jd();
70 
71 public:
72 
73  static
75  get_instance();
76 
77  // NO LONGER called as a result of the static JobDistributorDestroyer object declared in JobDistributor.cc.
78  ///WARNING WARNING! SINGLETONS' DESTRUCTORS ARE NEVER CALLED IN MINI! DO NOT TRY TO PUT THINGS IN THIS FUNCTION!
79  ///here's a nice link explaining why: http://www.research.ibm.com/designpatterns/pubs/ph-jun96.txt
80  virtual
82 
83 public:
84 
85  /// @brief This may be overridden by derived classes. Default implementation invokes go_main.
86  virtual
87  void
89 
90  /// @brief invokes go, after setting JobOutputter
91  void
93 
94  /// @brief Movers may ask their controlling job distributor for information about the current job. They may also load
95  /// information into this job for later output.
96  JobOP
97  current_job() const;
98 
99  /// @brief Movers may ask their controlling job distributor for the output name as defined by the Job and JobOutputter.
101  current_output_name() const;
102 
103  /// @brief Movers (or derived classes) may ask for the JobOutputter
105  job_outputter() const;
106 
107  /// @brief Movers (or derived classes) may ask for the JobOutputter
108  void set_job_outputter( const JobOutputterOP &new_job_outputter );
109 
110  /// @brief JobInputter access
112  job_inputter() const;
113 
114  ///@brief should the go() function call MPI_finalize()? It probably should, this is true by default.
115  virtual
116  void mpi_finalize(bool finalize);
117 
118  /// @brief The input source for the current JobInputter.
121 
122  friend class JobDistributorFactory; // calls private ctor
123 
124  virtual void restart();
125 
127  return jobs_.size();
128  }
129 
130 
131  /// @brief integer access - which job are we on?
132  core::Size current_job_id() const;
133 
134 protected:
135  /// @brief Non-virtual get-job, run it, & output loop. This function is pretty generic and your subclass may be able
136  /// to use it. It is NOT virtual - this implementation can be shared by (at least) the simple
137  /// FileSystemJobDistributor, the MPIWorkPoolJobDistributor, and the MPIWorkPartitionJobDistributor. Do not feel that
138  /// you need to use it as-is in your class - but DO plan on implementing all its functionality!
139 
140  void
142 
143  /// Read access to private data for derived classes.
144 
145  /// @brief Jobs is the container of Job objects
146  Jobs const &
147  get_jobs() const;
148 
149  /// @brief Jobs is the container of Job objects
150  /// need non-const to mark Jobs as completed on Master in MPI-JobDistributor
151  // Jobs&
152  // get_jobs(); //get dedicated accessor instead
153  void mark_job_as_completed( core::Size job_id, core::Real run_time );
154 
155  void mark_job_as_bad( core::Size job_id );
156 
157 
158  /// @brief Parser access
159  ParserOP
160  parser() const;
161 
162  void begin_critical_section();
163 
164  void end_critical_section();
165 
166 
167 protected:
168  /// @brief this function updates the current_job_id_ and current_job_ fields. The boolean return states whether or not
169  ///a new job was obtained (if false, quit distributing!)
170  bool obtain_new_job( bool re_consider_current_job = false ); //if true we check if current_job is still selectable (after remove_bad_input)
171 
172  /// @brief Return 0 to signal that no available jobs remain. Otherwise return an index into the Jobs object.
173  virtual
174  core::Size
175  get_new_job_id() = 0;
176 
177  /// @brief This function is called upon a successful job completion; it has been virtualized so BOINC and MPI can delay/protect output
178  ///base implementation is just a call to the job outputter
179  virtual
180  void
181  job_succeeded( core::pose::Pose & pose, core::Real run_time );
182 
183  /// @brief This function is called when we give up on the job; it has been virtualized so BOINC and MPI can delay/protect output
184  ///base implementation is just a call to the job outputter
185  virtual
186  void
187  job_failed( core::pose::Pose & /*pose*/, bool /*will_retry*/ );
188 
189  /// @brief this function is called whenever a job "soft-fails" and needs to be retried. Generally it should ensure
190  ///that the subsequent call to obtain_new_job returns this job over again.
191  virtual
192  void
194 
195  /// @brief this function is called inside go_main if a mover returns FAIL_BAD_INPUT. Its purpose is to remove other
196  ///jobs with the same input (which will still be bad) from the available list of jobs. The default implementation is
197  ///a no-op and not all distributors are expected/able to implement this functionality, only those that can guaruntee
198  ///no other jobs of that input are currently running.
199  virtual
200  void
202 
203  /// @brief Derived classes are allowed to clean up any temporary files or data relating to the current job after the
204  /// current job has completed. Called inside go_main loop. Default implementation is a no-op.
205  virtual
206  void
208 
209  /// @brief Derived classes are allowed to perform some kind of action when the job distributor runs out of jobs to
210  /// execute. Called inside go_main. Default implementation is a no-op.
211  virtual
212  void
214 
215  void
217 
218  /// @brief This function got called when job is not yet finished and got termitated abnormaly (ctrl-c, kill etc).
219  /// when implimenting it in subclasses make sure to delete all in-progress-data that your job spawn.
220  virtual void handle_interrupt() = 0;
221 
222 
223 private:
224 
228 
230  /// @brief pointer to current job. Information is somewhat duplicated with current_job_id_.
232 
233  /// @brief access into jobs_ vector indicating current job. Contains more information than current_job_ in that it can be incremented...
235 
236  /// @brief access into jobs_ bector indicating the previous job. Used with the -jd2:delete_old_poses option for deleting unnecessary poses
238 
239  //this is if NOT defined - keep this static pointer for singleton management only in non-multithreaded case
240 #ifndef MULTITHREADED
242 #endif
243  // necessary for destroying instance_ when JobDistributorDestroyer is finished.
244  //static JobDistributorDestroyer destroyer_;
245 
246  ///BATCH interface:
247  /// @details the BATCH interface of the JobDistributor is used to allow consecutive execution of a set of jobs with different flags
248  /// different batches behave like completely independent rosetta runs --- but of course a number of processes can already work on
249  /// a new batch, while others are still finishing the last jobs of the previous batch.
250  /// run from command-line with -run:batches flag1 flag2 flag3
251  /// the flag1 flag2... point to @flag1 files that are added to all other flags ( and removed at end of batch )
252  /// you can have all output in same output file or ( by redefining e.g. -out:file:silent in each batch-flag file ) in different output files
253 
254 public:
255 
256  /// @brief what is the current batch ? --- name refers to the flag-file used for this batch
258 
259  /// @brief add a new batch ( name will be interpreted as flag_file )
260  // positive id means we want to set a particular batch id ...
261  // fill-up with BOGUS_BATCH_ID
262  // if current_batch_id > id this will not have any effect... --> error?
263  virtual void add_batch( std::string const&, core::Size id = 0 );
264 
265  /// @brief what is the current batch number ? --- refers to position in batches_
267  return current_batch_id_;
268  }
269 
270 protected:
271 
272  /// @brief set current_batch_id --- eg for slave nodes in MPI framework
273  void set_batch_id( core::Size setting );
274 
275  /// @brief switch current_batch_id_ to next batch
276  virtual bool next_batch();
277 
278  /// @brief if end of batches_ reached via next_batch or set_batch_id ...
279  virtual void batch_underflow() {}; //no action for base-class
280 
281  /// @brief called by next_batch() or set_batch_id() to switch-over and restart JobDistributor on new batch
282  virtual void load_new_batch();
283 
284  /// @brief how many batches are in our list ... this can change dynamically
286  return batches_.size();
287  }
288 
289  /// @brief give name of batch with given id
290  std::string const& batch( core::Size batch_id ) {
291  return batches_[ batch_id ];
292  }
293 
294 
295 protected:
296  /// @brief Setting up callback function that will be call when our process is about to terminate.
297  /// This will allow us to exit propely (clean up in_progress_files/tmp files if any).
298  static void setup_system_signal_handler( void (*prev_fn)(int) = jd2_signal_handler);
299 
300  /// @brief Set signal handler back to default state.
301  static void remove_system_signal_handler();
302 
303  /// @brief Default callback function for signal handling
304  static void jd2_signal_handler(int Signal);
305 
306 
307 private:
308 
309  /// @brief read -run:batches
311 
312  /// @brief current_batch or 0
314 
315  /// @brief all batches if present
317 };
318 
319 }//jd2
320 }//protocols
321 
322 #endif //INCLUDED_protocols_jd2_JobDistributor_HH