14 #define TRDEBUG TR.Debug
38 #include <basic/options/keys/in.OptionKeys.gen.hh>
41 #include <basic/options/keys/lh.OptionKeys.gen.hh>
42 #include <basic/options/option.hh>
46 #include <basic/Tracer.hh>
48 #include <ObjexxFCL/format.hh>
50 #include <ObjexxFCL/string.functions.hh>
52 #include <numeric/random/random.hh>
54 #ifndef _WIN32 // REQUIRED FOR WINDOWS
60 #include <utility/string_util.hh>
61 #include <boost/algorithm/string.hpp>
62 #include <boost/lexical_cast.hpp>
67 #include <utility/vector1.hh>
69 #include <numeric/random/random.hh>
70 #include <numeric/random/random_permutation.hh>
72 using namespace ObjexxFCL;
73 using namespace ObjexxFCL::fmt;
78 using namespace protocols::wum;
80 static basic::Tracer
TR(
"MPI.LHR.Master");
82 static numeric::random::RandomGenerator
RG(3893251);
85 MPI_LoopHashRefine_Master::set_defaults(){
86 using namespace basic::options;
87 using namespace basic::options::OptionKeys;
88 max_loophash_per_structure_ = option[ OptionKeys::lh::max_loophash_per_structure ]();
89 batch_relax_chunks_ = option[ OptionKeys::lh::mpi_batch_relax_chunks ]();
90 batch_relax_absolute_max_ = option[ OptionKeys::lh::mpi_batch_relax_absolute_max ]();
91 outbound_wu_buffer_size_ = option[ OptionKeys::lh::mpi_outbound_wu_buffer_size ]();
92 loophash_split_size_ = option[ OptionKeys::lh::mpi_loophash_split_size ]();
93 library_expiry_time_ = option[ OptionKeys::lh::library_expiry_time ]();
94 expire_after_rounds_ = option[ OptionKeys::lh::expire_after_rounds ]();
95 mpi_master_save_score_only_ = option[ OptionKeys::lh::mpi_master_save_score_only ]();
102 if( mpi_resume() !=
"" ){
103 TR <<
"Resuming job from IDENT: " << mpi_resume() << std::endl;
104 load_state( mpi_resume() );
106 load_structures_from_cmdline_into_library( max_lib_size() * master_rank() );
110 load_sample_weight();
111 TR <<
"STARTLIB: " << std::endl;
118 MPI_LoopHashRefine_Master::go()
121 TR <<
"Init Master: " <<
mpi_rank() << std::endl;
124 TR <<
"Master Node: Waiting for job requests..." << std::endl;
127 TRDEBUG <<
"Master: processing msgs.." << std::endl;
128 process_incoming_msgs();
130 TRDEBUG <<
"Master: process incoming" << std::endl;
131 process_inbound_wus();
133 TRDEBUG <<
"Master: process outbound" << std::endl;
134 process_outbound_wus();
137 process_incoming_msgs(
true );
149 MPI_LoopHashRefine_Master::process_inbound_wus(){
150 using namespace protocols::loops;
152 check_library_expiry_dates();
153 TRDEBUG <<
"Finished checking library dates"<<std::endl;
154 if( inbound().
size() > 0 ){
155 TRDEBUG <<
"Processing inbound WUs on master.." << std::endl;
157 while( inbound().
size() > 0 )
160 runtime_assert( next_wu );
163 if ( next_wu->get_wu_type() ==
"waitwu" )
continue;
169 if ( structure_wu.get() == NULL ){
170 TR <<
"Cannot save structural data for WU: " << std::endl;
171 next_wu->print( TR );
176 TRDEBUG <<
"Saving decoy store.. " << std::endl;
179 if ( structure_wu->get_wu_type() ==
"loophasher" ){
180 totaltime_loophash() += structure_wu->get_run_time();
181 TR <<
"LoopHash return: " << decoys.
size() <<
" structs in " << structure_wu->get_run_time() <<
"s " <<
" frm " << structure_wu->last_received_from() << std::endl;
184 if( (*iter)->get_wu_type() ==
"loophasher" ) {
194 if( (*iter)->extra_data_1() == structure_wu->extra_data_1() && (*iter)->extra_data_3() == structure_wu->extra_data_3() ) {
195 (*iter)->add_blacklist( structure_wu->last_received_from() );
196 TRDEBUG <<
"Added node " << structure_wu->last_received_from() <<
" to blacklist of WU " << (*iter)->id() << std::endl;
202 if( decoys.
size() > 0 ){
203 add_relax_batch( decoys );
204 total_structures_ += decoys.
size();
207 if ( structure_wu->get_wu_type() ==
"resultpack" ){
210 TR <<
"Emperor sent: " << decoys.
size() <<
" structs" << std::endl;
212 add_structures_to_library( decoys,
"add_n_limit" );
217 if ( structure_wu->get_wu_type() ==
"batchrelax" ){
220 totaltime_batchrelax_ += structure_wu->get_run_time();
222 TR <<
"BatchRelax return: " << decoys.
size() <<
" structs in " << structure_wu->get_run_time() <<
"s " <<
" frm " << structure_wu->last_received_from() << std::endl;
223 add_structures_to_library( decoys );
224 dump_structures( decoys, mpi_master_save_score_only_ );
226 TR.Error <<
"Unknown workunit received. " << std::endl;
238 MPI_LoopHashRefine_Master::process_outbound_wus(){
239 TRDEBUG <<
"Adding loophash WUs if necessary .. " << std::endl;
240 if( outbound().
size() < outbound_wu_buffer_size_ ){
241 if ( library_central().
size() == 0 ){
242 TR.Error <<
"FATAL ERROR: library_central_ is empty! " << std::endl;
243 utility_exit_with_message(
"FATAL ERROR: library_central_ is empty! " );
249 if( max_loophash_per_structure_ > (*it)->get_energy(
"lhcount"))
251 TRDEBUG <<
"Adding: " << (*it) <<
" " << (*it)->get_energy(
"lhcount") << std::endl;
252 (*it)->add_energy(
"lhcount", (*it)->get_energy(
"lhcount") + 1.0 );
253 create_loophash_WUs( *it );
255 finished_structures += 1;
256 TRDEBUG <<
"Already done: " << (*it) <<
" " << (*it)->get_energy(
"lhcount") << std::endl;
259 TR <<
"WARNING: " << finished_structures <<
" " << library_central().size() << std::endl;
260 if ( finished_structures >= library_central().
size() ){
261 TR <<
"WARNING: The starting structs exhausted!" << std::endl;
272 runtime_assert( start_struct );
274 start_struct->fill_pose( start_pose );
283 using namespace basic::options;
284 using namespace basic::options::OptionKeys;
289 ss->fill_struct( start_pose );
290 ss->copy_scores( *start_struct );
296 ss->add_energy(
"round",
round );
297 ss->add_energy(
"masterid",
mpi_rank() );
298 ss->add_energy(
"parent_score", ss->get_energy(
"score") );
305 for( ;start_ir< start_pose.
total_residue(); start_ir+=loophash_split_size_ )
307 end_ir = std::min( start_ir + loophash_split_size_ - 1, start_pose.
total_residue());
308 if( end_ir < start_ir) end_ir = start_ir;
310 TRDEBUG <<
"Adding a new loophash WU: " << start_ir <<
" - " << end_ir <<
", ssid = " << ssid << std::endl;
315 new_wu->set_wu_type(
"loophasher");
316 new_wu->decoys().add( ss);
317 new_wu->clear_serial_data();
318 outbound().add( new_wu );
321 TR <<
"Added " << count_wus <<
" loophash WUs to queue. ssid=" << ssid << std::endl;
328 if( start_decoys.
size() == 0 )
return;
329 TR <<
"Adding relax WUs.." << start_decoys.
size() << std::endl;
336 core::Size batchrelax_batchsize_ = (start_decoys.
size() / chunks) + 1;
338 while( dcount < start_decoys.
size() ){
340 new_wu->set_wu_type(
"batchrelax");
343 for(lcount=0; lcount < batchrelax_batchsize_; lcount++ ){
344 if ( dcount < start_decoys.
size() ){
346 TRDEBUG <<
"AddRelaxStructure: " << format_silent_struct(new_relax_structure) << std::endl;
347 new_wu->decoys().add( new_relax_structure );
354 numeric::random::random_permutation(new_wu->decoys().begin(), new_wu->decoys().end(),
numeric::random::RG);
357 core::Size chunk_size = new_wu->decoys().size();
358 new_wu->decoys().limit( batch_relax_absolute_max_ );
360 total_structures_relax_ += new_wu->decoys().size();
361 new_wu->clear_serial_data();
363 count_adds += new_wu->decoys().size();
364 count_adds_b4_limit += chunk_size;
367 outbound().push_front( new_wu );
370 TR <<
"Adding " << count_adds <<
"/" << count_adds_b4_limit <<
" structs for batchrlx. " << count_wus <<
" WUs" << std::endl;
379 MPI_LoopHashRefine_Master::check_library_expiry_dates(){
385 jt != library_central().end(); jt ++ )
387 TR.Debug <<
"Checking structure.." << std::endl;
392 bool expired =
false;
394 if( (
int(current_time) -
int(struct_time)) > (
int)library_expiry_time_ ){
396 TR <<
"Structure: " << ssid <<
" is expired: " <<
int(current_time) -
int(struct_time) <<
" > " << (
int)library_expiry_time_ << std::endl;
400 if( (expire_after_rounds_ > 0) && ( round >= expire_after_rounds_ ) ){
402 TR <<
"Structure: " << ssid <<
" Round: is expired: " << round <<
" >= " << expire_after_rounds_ << std::endl;
412 (*jt)->add_energy(
"expire", (
core::Size)(*jt)->get_energy(
"expire") + 1);
417 getnewstruct->set_wu_type(
"getnewstruct" );
418 getnewstruct->decoys().add( (*jt) );
419 send_MPI_workunit( getnewstruct, 0 );
426 if( (*iter)->get_wu_type() ==
"loophasher" && ssid == (*iter)->extra_data_3() ) {
427 TRDEBUG<<
"erasing wu" <<std::endl;
428 iter->reset_to_null();
429 TRDEBUG<<
"erasing wu from list" <<std::endl;
430 iter = outbound().erase( iter );
431 TRDEBUG<<
"erasing done" <<std::endl;
437 TR <<
"Erased " << erase_count <<
" deprecated WUs from outbound queue" << std::endl;
440 library_central().erase(jt);
442 TR <<
"Reported expired structure to emperor: - waiting for new structure" << std::endl;
443 receive_MPI_workunit( 0 );
444 TR <<
"Done. Restarting reporting.." << std::endl;
447 jt=library_central().begin();
449 TRDEBUG <<
"Library state: " << std::endl;
452 TRDEBUG <<
"end of check_library_expiry_dates" << std::endl;
460 bool result = MPI_LoopHashRefine::add_structure_to_library( pss, add_algorithm );
461 TR <<
"MPI_LoopHashRefine_Master::add_structure_to_library: " << std::endl;
462 if(result) report_structure_to_emperor( pss );
469 resultpack->set_wu_type(
"resultpack" );
470 resultpack->decoys().add( ss );
471 send_MPI_workunit( resultpack, my_emperor() );
472 TR <<
"Reported structure to emperor: " << format_silent_struct( ss ) << std::endl;
478 resultpack->set_wu_type(
"resultpack" );
479 resultpack->decoys().add( pss );
480 send_MPI_workunit( resultpack, my_emperor() );
481 TR <<
"Reported structure to emperor: " << format_silent_struct(pss) << std::endl;
486 MPI_LoopHashRefine_Master::load_sample_weight() {
487 using namespace basic::options;
488 using namespace basic::options::OptionKeys;
493 if( option[ OptionKeys::lh::sample_weight_file ].active() ) {
494 std::string pathtofile = option[ OptionKeys::lh::sample_weight_file ]();
495 std::ifstream file( pathtofile.c_str() );
496 if (!file) utility_exit_with_message(
"Failed to open sample_weight file. Check path." );
498 while(getline( file, line ) ) {
501 std::vector < std::string > r;
509 }
catch( boost::bad_lexical_cast &) {
510 utility_exit_with_message(
"Sample weight second column can't be casted to an int.");
514 utility_exit_with_message(
"Sample weight second column is not an float larger than 0." );
521 std::list < std::string >
t;
522 t = utility::split_to_list(tmp);
523 if( t.size() != (*(library_central().begin()))->nres() )
524 utility_exit_with_message(
"Sample weight file either improperly formatted or does not have same number of residues as structure." );
525 TR <<
"Sample weight file successfully loaded" << std::endl;
526 sample_weight_str_ = tmp;
528 TR <<
"Using default sample weight of 50 for every residue" << std::endl;
530 for(
Size i = 0; i < (*(library_central().begin()))->nres() - 1; i++ ) {
533 sample_weight_str_ =
t;