31 #include <utility/io/ozstream.hh>
34 #include <basic/Tracer.hh>
35 #include <basic/MemTracer.hh>
36 #include <basic/options/option.hh>
37 #include <utility/exit.hh>
39 #include <basic/prof.hh>
40 #include <ObjexxFCL/string.functions.hh>
43 #include <basic/options/keys/out.OptionKeys.gen.hh>
45 #include <basic/options/keys/archive.OptionKeys.gen.hh>
46 #include <basic/options/keys/run.OptionKeys.gen.hh>
52 #include <basic/prof.hh>
54 #include <utility/vector1.hh>
56 static basic::Tracer
tr(
"protocols.jd2.MPIArchiveJobDistributor");
69 using namespace basic::options;
70 using namespace basic::options::OptionKeys;
78 nr_notify_( option[ OptionKeys::archive::completion_notify_frequency] ),
83 if ( option[ OptionKeys::run::test_cycles ] || option[ OptionKeys::run::dry_run ] ) {
100 mem_tr <<
"MPIArchiveJobDistributor::go" << std::endl;
103 tr.Warning <<
"Master JD starts" << std::endl;
108 tr.Warning <<
"FileBuffer starts " << std::endl;
112 tr.Warning <<
"Archive starts... " << std::endl;
116 tr.Warning <<
"send STOP to FileBuffer " << std::endl;
127 MPI_Barrier( MPI_COMM_WORLD );
131 std::cerr <<
"MPI FINALIZED closing down... " << std::endl;
132 std::cout <<
"MPI FINALIZED closing down... " << std::endl;
144 MPI_Recv( buf, 2, MPI_INT, source_rank,
MPI_JOB_DIST_TAG, MPI_COMM_WORLD, &status );
149 char *cbuf =
new char[ size+1 ];
150 MPI_Recv( cbuf, size, MPI_CHAR, source_rank,
MPI_JOB_DIST_TAG, MPI_COMM_WORLD, &status );
154 tr.Debug <<
"received STOP signal from Archive " << std::endl;
160 new_batch.assign( cbuf, size );
163 tr.Info <<
"received new batch " << new_batch <<
" with id " <<
id << std::endl;
173 PROF_START( basic::ARCHIVE_SYNC_BATCHES );
175 tr.Trace <<
"Node " <<
rank() <<
" sync batches with " << slave_rank << std::endl;
184 buf[ 0 ] = slave_batch_size;
187 MPI_Recv( &buf, 1, MPI_INT, slave_rank,
MPI_JOB_DIST_TAG, MPI_COMM_WORLD, &status );
188 slave_batch_size = buf[ 0 ];
191 tr.Trace <<
"Node " <<
rank() <<
" slave_batch_size " << slave_batch_size << std::endl;
197 nr_to_have = buf[ 0 ];
199 buf[ 0 ] = nr_to_have;
202 tr.Trace <<
"Node " <<
rank() <<
" master_batch_size " << nr_to_have << std::endl;
205 for (
Size send_id = slave_batch_size + 1; send_id <= nr_to_have; ++send_id ) {
208 tr.Trace <<
"nr_batches() " <<
nr_batches() <<
" send_id " << send_id << std::endl;
212 buf[ 0 ] =
batch( send_id ).size();
220 PROF_STOP( basic::ARCHIVE_SYNC_BATCHES );
230 Size const mpi_size( 6 );
231 int mpi_buf[ mpi_size ];
247 PROF_START( basic::MPI_JD2_WAITS_FOR_ARCHIVE );
248 tr.Debug <<
"no more batches... ask ArchiveManager if there is some more to do... wait..." << std::endl;
250 basic::show_time(
tr,
"no more batches: send QUEUE_EMPTY to archive" );
252 tr.Info <<
"wait for answer on QUEUE-EMPTY msg... send with " <<
current_batch_id() <<
" batch_id " << std::endl;
255 tr.Debug <<
"...received " << std::endl;
256 basic::show_time(
tr,
"refilled queue: received new batches after QUEUE_EMPTY" );
257 PROF_STOP( basic::MPI_JD2_WAITS_FOR_ARCHIVE );
313 tr.Debug <<
"add to notification queue " << msg.
batch_id << std::endl;
326 MPI_Request notify_request;
328 bool notify_first(
true );
334 PROF_START( basic::MPI_NOTIFY_ARCHIVE );
335 static basic::Tracer notification_tracer(
"protocols.jd2.notifications");
343 if ( !notify_first ) {
344 notification_tracer.Debug <<
"test MPI-Send completion of last JOB_COMPLETION ( batch_" << notify_buf[ 1 ] <<
" ) message...";
345 basic::show_time(
tr,
"try to send JOB_COMPLETION" );
347 MPI_Test( ¬ify_request, &flag, &status );
349 MPI_Test_cancelled( &status, &flag2 );
350 notification_tracer.Debug << ( flag ?
"completed " :
"pending " ) << ( !flag2 ?
"/ test succeeded " :
"/ test cancelled" ) << std::endl;
355 notification_tracer.Debug <<
"send out JOB_COMPLETION " << msg.
batch_id << std::endl;
356 basic::show_time(
tr,
"send JOB_COMPLETION" );
358 notify_buf[ 0 ] = msg.msg_tag;
359 notify_buf[ 1 ] = msg.batch_id;
360 notify_buf[ 2 ] = msg.final ? 1 : 0;
361 notify_buf[ 3 ] = msg.bad;
362 notify_buf[ 4 ] = msg.good;
363 notify_buf[ 5 ] = msg.njobs;
366 notify_first =
false;
368 basic::show_time(
tr,
"finished _notify_archive" );
370 PROF_STOP( basic::MPI_NOTIFY_ARCHIVE );
378 tr.Trace <<
"notify_archive for batch: " << batch_id <<
" now " <<
nr_new_completed_[ batch_id ] <<
" decoys " << std::endl;
405 tr.Trace <<
"mark_job_as_completed " << job_id <<
" batch: " << batch_id <<
" " << run_time <<
" seconds" << std::endl;
408 runtime_assert( batch_id <=
nr_jobs_.size() );
417 runtime_assert( batch_id <=
nr_jobs_.size() );
433 nstruct_.push_back( option[ out::nstruct ] );
436 mem_tr <<
"MPIArchiveJobDistributor::load_new_batch()'ed" << std::endl;