33 #include <basic/Tracer.hh>
34 #include <basic/MemTracer.hh>
35 #include <basic/options/option.hh>
36 #include <utility/exit.hh>
40 #include <ObjexxFCL/string.functions.hh>
41 #include <utility/file/file_sys_util.hh>
44 #include <basic/options/keys/out.OptionKeys.gen.hh>
45 #include <basic/options/keys/jd2.OptionKeys.gen.hh>
46 #include <basic/options/keys/in.OptionKeys.gen.hh>
47 #include <basic/options/keys/run.OptionKeys.gen.hh>
48 #include <basic/options/keys/broker.OptionKeys.gen.hh>
49 #include <basic/options/keys/abinitio.OptionKeys.gen.hh>
52 #include <basic/options/option.cc.gen.hh>
53 #include <basic/options/option_macros.hh>
54 #include <basic/prof.hh>
65 #include <utility/io/izstream.hh>
67 #include <utility/io/ozstream.hh>
69 #if (defined WIN32) //&& (!defined WIN_PYROSETTA)
77 #include <utility/vector1.hh>
78 #include <boost/bind.hpp>
80 static basic::Tracer
tr(
"protocols.jd2.Archive");
88 using namespace basic::options;
89 using namespace basic::options::OptionKeys;
94 NEW_OPT( iterative::input_pool,
"read these structures into pool",
"" );
95 NEW_OPT( iterative::input_pool_struct_type,
"specifies the input-silent-struct type",
"protein" );
104 void sleep(
int seconds){
106 Sleep( seconds * 1000 );
111 using namespace basic::options;
112 using namespace basic::options::OptionKeys;
113 using namespace core;
120 return "batch_" + ObjexxFCL::lead_zero_string_of(
id(), 6 );
124 return batch() +
"/";
128 return batch() +
"/decoys.out";
133 return batch() +
"/decoys.in";
139 return batch() +
"/score.fsc";
143 return batch() +
"/flags";
147 return batch() +
"/setup.tpb";
156 out <<
"ID " << id() << eol
157 <<
"INPUT " << ( has_silent_in() ?
"yes" :
"no" ) << eol
158 <<
"NSTRUCT " << nstruct() << eol
159 <<
"RETURNED " << decoys_returned() << eol
160 <<
"FINISHED " << ( has_finished() ?
"yes" :
"no" ) << eol
161 <<
"CANCELLED " << ( is_cancelled() ?
"yes" :
"no" ) << eol
162 <<
"ALLOW_READING_CANCELLED_DECOYS " << ( allow_reading_cancelled_decoys() ?
"yes" :
"no" ) << eol;
166 batch.
show( out,
true );
171 utility::io::ozstream out( dir() +
"BATCH_INFO" );
172 tr.Debug <<
"write batch info " << dir() <<
"BATCH_INFO" << std::endl;
179 utility::io::izstream in( dir() +
"BATCH_INFO" );
180 if ( !in.good() )
throw(
EXCN_Archive(
"cannot find " + dir() +
"BATCH_INFO" ) );
182 if ( this_id !=
id() ) {
183 throw(
EXCN_Archive(
"Inconsistency detected when reading BATCH_INFO for "+ this_batch+
" ID in BATCH_INFO is " + batch() ) );
189 throw(
EXCN_Archive(
"Error reading batch information for batch: "+batch.
batch()+
" expected_tag: "+expected_tag+
" found " + tag) );
193 throw(
EXCN_Archive(
"Error reading batch information for batch: "+batch.
batch()+
" wrong value for tag: "+tag ) );
202 if ( tag == expected_tag ) {
208 expected_tag =
"INPUT";
209 if ( tag == expected_tag ) {
219 expected_tag =
"NSTRUCT";
220 if ( tag == expected_tag ) {
226 expected_tag =
"RETURNED";
227 if ( tag == expected_tag ) {
233 expected_tag =
"FINISHED";
234 if ( tag == expected_tag ) {
243 expected_tag =
"CANCELLED";
244 if ( tag == expected_tag ) {
260 archive_rank_( archive_rank ),
261 jd_master_rank_( jd_master_rank ),
262 file_buf_rank_( file_buf_rank ),
263 save_archive_time_interval_( 60 )
270 for ( BatchList::const_iterator it =
batches().begin(); it !=
batches().end(); ++it ) {
271 if ( !it->has_finished() && !it->is_cancelled() && it->valid() ) ++unfinished_batches;
283 mem_tr <<
"initialized IterativeAbrelax" << std::endl;
286 if ( option[ OptionKeys::iterative::input_pool ].user() ) {
287 std::string const& decoys( option[ OptionKeys::iterative::input_pool ]() );
288 tr.Info <<
"reading decoys from " << decoys <<
" into archive " << std::endl;
296 }
catch ( utility::excn::EXCN_Base& excn ) {
307 bool print_status(
true );
310 if ( print_status &&
tr.Debug.visible() ) {
311 tr.Debug <<
"probing for message in ArchiveManager" << std::endl;
312 tr.Debug <<
"\nSTATUS: " << (stop ?
"STOP send: " :
"" ) <<
" ------ unfinished_batches: " <<
unfinished_batches() << std::endl;
313 tr.Debug <<
"POOL_STATUS: " << std::endl;
315 tr.Debug <<
"END_STATUS\n\n"<< std::endl;
316 basic::show_time(
tr,
"manager main msg-loop: probe for message..." );
317 print_status =
false;
341 int buf[ 6 ]={0,0,0,0,0,0};
345 if ( merrno != MPI_SUCCESS )
tr.Error <<
"ERROR: MPI_Recv error " << std::endl;
355 Size const msg_tag( buf[ 0 ]);
356 tr.Debug <<
"received message in ArchiveManager " << msg_tag << std::endl;
360 Size const batch_id( buf[ 1 ] );
361 bool const final( buf[ 2 ] == 1 );
362 Size const bad( buf[ 3 ] );
363 Size const good( buf[ 4 ] );
364 Size const total( buf[ 5 ] );
365 basic::show_time(
tr,
"ArchiveManager receveid job-completion..." );
366 tr.Debug <<
"ArchiveManager received JOB_COMPLETION " << batch_id <<
" " << bad <<
" " << good <<
" " << total << std::endl;
371 Size const batch_id( buf[ 1 ] );
378 while ( max_working_batch_id > 0
379 && ( !
batches_[ max_working_batch_id ].valid() ||
batches_[ max_working_batch_id ].has_finished() ) )
380 --max_working_batch_id;
381 if ( batch_id <= max_working_batch_id ) {
382 tr.Info <<
"ArchiveManager ignored outdated QUEUE_EMPTY with batch_id " << batch_id <<
" -- already submitted " <<
batches_.size() << std::endl;
387 PROF_START( basic::ARCHIVE_CRITICAL_JOBSCOMPLETE );
391 PROF_STOP( basic::ARCHIVE_CRITICAL_JOBSCOMPLETE );
394 PROF_START( basic::ARCHIVE_GEN_BATCH );
396 tr.Info <<
"ArchiveManager received QUEUE_EMPTY" << std::endl;
397 tr.Debug <<
"JD batch_id: " << batch_id <<
" max_working_batch_id: " << max_working_batch_id << std::endl;
398 basic::show_time(
tr,
"manager main msg-loop: queue empty..." );
409 tr.Debug <<
"archive is finished ... spinning down" << std::endl;
413 PROF_STOP( basic::ARCHIVE_GEN_BATCH );
418 utility_exit_with_message(
"unknown msg in ArchiveManager " + ObjexxFCL::string_of( msg_tag ) );
420 }
catch ( utility::excn::EXCN_Base &excn ) {
421 basic::show_time(
tr,
"Exception in main msg-loop !" );
422 tr.Error <<
"[ERROR] " << excn.msg() << std::endl;
423 tr.Error <<
"spinning down" << std::endl;
426 utility_exit_with_message(
"error detected in ArchiveManager -- spinning down" );
432 tr.Info <<
"ArchiveManager finished !!!" << std::endl;
439 static time_t last_save( time(NULL) );
440 time_t now( time( NULL ) );
441 Size const elapsedtime( now - last_save );
450 PROF_START( basic::ARCHIVE_JOBSCOMPLETE );
452 PROF_STOP( basic::ARCHIVE_JOBSCOMPLETE );
459 time_t before( time(NULL) );
461 time_t after( time( NULL ) );
462 if ( after-before > 1 )
tr.Debug <<
"spend " << after-before <<
" seconds in archives idle method... " << std::endl;
464 if ( after-before < 5 )
sleep( (5 - ( after - before )) );
473 Size batch_id( msg.batch_id );
474 bool final( msg.final );
476 Size good_decoys( msg.good );
480 if ( option[ run::constant_seed ] && !
final )
return;
482 tr.Debug <<
"jobs_completed for " << batch.
batch() <<
"..." <<
"already "
484 runtime_assert( batch.
id() == batch_id );
489 PROF_START( basic::ARCHIVE_BLOCK_FILE );
491 tr.Debug <<
"not final ... block file" << std::endl;
493 file_buf.block_file(
".//"+batch.
silent_out() );
495 tr.Debug <<
"final ... close file " << std::endl;
500 tr.Debug <<
"returned decoys of cancelled batch.. ignore..." << std::endl;
503 PROF_STOP( basic::ARCHIVE_BLOCK_FILE );
505 PROF_START( basic::ARCHIVE_READ_DECOYS );
510 tr.Debug <<
"read file " << batch.
silent_out() << std::endl;
511 utility::io::izstream testin( batch.
silent_out() );
512 tr.Debug <<
"stream is " << ( testin.good() ?
"good " :
"bad" ) << std::endl;
513 if ( !testin.good() ) {
520 using namespace core::io::silent;
527 tr.Debug <<
"...and release file" << std::endl;
528 file_buf.release_file(
".//"+batch.
silent_out() );
531 tr.Debug <<
"found " << tags_in_file.size() <<
" decoys in " << batch.
silent_out() << std::endl;
535 iter != tags_in_file.end() && ct <= batch.
decoys_returned(); ++iter, ++ct ) { };
538 std::copy( iter, tags_in_file.end(), std::back_inserter( tags_to_read ) );
539 if ( tags_to_read.size() ) {
541 sfd.read_file( batch.
silent_out(), tags_to_read );
542 }
catch ( utility::excn::EXCN_Base& excn ) {
544 tr.Error <<
"[ignored ERROR] " << excn.msg() << std::endl;
545 tr.Error <<
"this is not the final version of " << batch.
silent_out() <<
"\n... maybe some data is still held in a cache of the filesystem..."
546 <<
" let's see if it works better the next time we have to read" << std::endl;
551 PROF_STOP( basic::ARCHIVE_READ_DECOYS );
552 tr.Debug <<
"add " << tags_to_read.size() <<
" structures to archive " << std::endl;
554 PROF_START( basic::ARCHIVE_EVAL_DECOYS );
565 PROF_STOP( basic::ARCHIVE_EVAL_DECOYS );
567 tr.Info <<
"no more decoys to read from file " << batch.
silent_out() << std::endl;
568 PROF_STOP( basic::ARCHIVE_READ_DECOYS );
572 PROF_START( basic::SAVE_ARCHIVE );
574 PROF_STOP( basic::SAVE_ARCHIVE );
576 tr.Debug <<
" no good decoys to read " << std::endl;
577 throw EXCN_Archive(
"all decoys returned with FAIL_BAD_INPUT" );
593 tr.Debug <<
"queue new batch into MPIArchiveJobDistributor " << batch.
flag_file() << std::endl;
598 buf[ 1 ] = batch.
id();
606 buf[ 0 ] = strbuf.size();
607 buf[ 1 ] = batch.
id();
618 for ( BatchList::iterator it =
batches_.begin(); it!=
batches_.end(); ++it) {
619 if ( it->id() == batch_id )
break;
626 if ( option[ OptionKeys::run::constant_seed ]() ) {
627 tr.Warning <<
"asked to cancel batch, but ignore in constant_seed mode to enable integration test" << std::endl;
630 tr.Debug <<
"cancel batch " << batch.
flag_file() << std::endl;
635 buf[ 1 ] = batch.
id();
643 buf[ 0 ] = strbuf.size();
644 buf[ 1 ] = batch.
id();
662 if ( option[ OptionKeys::jd2::mpi_nowait_for_remaining_jobs ]() ) {
664 utility_exit_with_message(
"quick exit from job-distributor due to flag jd2::mpi_nowait_for_remaining_jobs --- this is not an error " );
667 tr.Debug <<
"send STOP signal to JobDistributor " << std::endl;
669 Batch stop_batch( 0 );
676 using namespace basic::options::OptionKeys;
680 bool b_reread_all_structures( option[ OptionKeys::archive::reread_all_structures ]() );
688 runtime_assert( new_batch.
id() == id );
689 tr.Info <<
"found existing batch " << new_batch.
batch() << std::endl;
692 tr.Debug << new_batch << std::endl;
695 tr.Warning <<
"[ WARNING ] "+new_batch.
batch()+
" is errorneous: " + excn.msg() << std::endl;
696 tr.Warning <<
"[ WARNING ] ignoring this batch..." << std::endl;
701 if ( b_reread_all_structures ) {
702 if (
batches_[
id ].decoys_returned() ) {
706 batches_[ id ].set_decoys_returned( 0 );
708 aBatch =
Batch( ++
id );
723 tr.Debug <<
"start new batch " << batch_id << std::endl;
727 new_batch.
set_id( batch_id );
729 utility::file::create_directory( new_batch.dir() );
730 if ( start_decoys.size() ) {
731 new_batch.set_has_silent_in();
733 for ( core::io::silent::SilentStructOPs::const_iterator
734 it = start_decoys.begin(); it != start_decoys.end(); ++it ) {
739 new_batch.user_options().add_built_in_options();
740 add_all_rosetta_options( new_batch.user_options() );
749 new_batch.nstruct() = basic::options::option[ basic::options::OptionKeys::out::nstruct ];
754 throw(
EXCN_Archive(
"inconsistency detected when re-reading "+new_batch.
batch()+
" for " + tag) );
760 using namespace basic::options::OptionKeys;
761 tr.Debug <<
"finalize_batch " << new_batch << std::endl;
765 utility::io::ozstream broker( new_batch.
broker_file() );
766 broker <<
"# NO CLAIMERS PRESENT" << std::endl;
771 tr.Debug <<
"checking aBatch.flag_file()... " << std::endl;
772 utility::options::OptionCollection batch_opts;
773 batch_opts.add_built_in_options();
774 add_all_rosetta_options( batch_opts );
776 tr.Debug <<
"load options from file" << std::endl;
777 batch_opts.load_options_from_file_exception( new_batch.
flag_file() );
778 }
catch ( utility::excn::EXCN_Msg_Exception &excn ) {
779 tr.Error <<
"[ERROR] problems with flags in " << new_batch.
flag_file() <<
" aborting... " << std::endl;
786 if ( batch_opts[ in::file::silent ].user() )
787 tr.Warning <<
"option -in:file:silent will be overwritten by ArchiveMaster"
788 <<
" -- control directly via class Batch" << std::endl;
789 if ( batch_opts[ out::nstruct ].user() )
790 tr.Warning <<
"option -nstruct will be overwritten by ArchiveMaster "
791 <<
"-- control directly via class Batch" << std::endl;
792 if ( batch_opts[ run::intermediate_structures ].user() )
793 tr.Warning <<
"option -run::intermediate_structures will be overwritten by ArchiveMaster "
794 <<
"-- control directly via class Batch" << std::endl;
795 if ( batch_opts[ out::file::silent ].user() )
796 tr.Warning <<
"option -out:file:silent will be overwritten by ArchiveMaster "
797 <<
"-- control directly via class Batch" << std::endl;
798 if ( batch_opts[ broker::setup ].user() )
799 tr.Warning <<
"option -broker:setup will be overwritten by ArchiveMaster "
800 <<
"-- control directly via class Batch" << std::endl;
801 if ( batch_opts[ out::file::scorefile ].user() )
802 tr.Warning <<
"option -out:file:scorefile will be overwritten by ArchiveMaster "
803 <<
"-- control directly via class Batch" << std::endl;
806 bool has_silent( batch_opts[ in::file::silent ].user() );
807 core::Size nstruct( batch_opts[ out::nstruct ]() );
808 bool intermeds( batch_opts[ run::intermediate_structures ]() );
809 std::string silent_out( batch_opts[ out::file::silent ]() );
811 std::ostringstream broker_files;
812 std::copy( broker.begin(), broker.end(), std::ostream_iterator<std::string>( broker_files,
" "));
813 std::string score_file( batch_opts[ out::file::scorefile ]() );
816 std::stringstream user_flags;
817 batch_opts.show_inaccessed_user_options( user_flags );
818 tr.Debug <<
"user_options: \n" << user_flags.str() << std::endl;
821 new_batch.
user_options().load_options_from_stream( user_flags,
"USER_FLAGS" );
836 utility::io::ozstream flag_out( new_batch.
flag_file() );
838 flag_out <<
"\n\n#Archive controlled flags" << std::endl;
839 flag_out <<
"-out:file:silent " << new_batch.
silent_out() << std::endl;
842 flag_out <<
"-out:nstruct " << new_batch.
nstruct() << std::endl;
843 flag_out <<
"-out:file:scorefile " << new_batch.
score_file() << std::endl;
846 if ( new_batch.
intermediate_structs() ) flag_out <<
"-run:intermediate_structures" << std::endl;
853 tr.Debug <<
"queue " << new_batch.
batch() <<
" " << new_batch.
flag_file() << std::endl;
859 tr.Debug <<
"\n" << std::endl;