5 #include <ObjexxFCL/FArray2D.hh>
8 #include <basic/Tracer.hh>
12 #include <ObjexxFCL/format.hh>
14 #include <basic/prof.hh>
16 #include <utility/io/mpistream.hh>
17 #include <utility/exit.hh>
26 #include <basic/options/option.hh>
27 #include <basic/options/after_opts.hh>
28 #include <basic/options/option_macros.hh>
35 namespace canonical_sampling{
36 namespace mc_convergence_checks {
38 static basic::Tracer
tr(
"MPIBPool_ConvergenceCheck");
44 using namespace ObjexxFCL;
46 using namespace utility::io::mpi_stream;
47 using namespace core::io::silent;
50 typedef FArray2P<double> FArray2P_double;
54 MPI_Comm protocols::canonical_sampling::mc_convergence_checks::MPIBPool_RMSD::MPI_COMM_POOL;
57 Pool_RMSD( silent_file ),
58 workers_finished_( 0 ),
65 transition_threshold_(-1),
66 new_decoys_out_(
"discovered_decoys.out" ),
67 tracer_visible_(false),
71 if( tracer_visible_ ){
72 tr.Debug <<
"finished initializing!" <<std::endl;
73 tr.Debug <<
"checking: rank " << rank_ <<
" has " <<
Pool_RMSD::size() <<
" structures in its pool " << std::endl;
120 PROF_START( basic::CHECK_COMM_SIZE );
121 MPI_Comm_rank( MPI_COMM_WORLD, (
int* )( &
rank_ ) );
122 MPI_Comm_size( MPI_COMM_WORLD, (
int* )( &
npes_ ) );
123 PROF_STOP( basic::CHECK_COMM_SIZE );
124 PROF_START( basic::INITIALIZE );
135 if (
rank_ == master_node_ ) {
142 PROF_STOP( basic::INITIALIZE );
144 PROF_START( basic::MPICOMMCREATION );
146 for(
int ii = master_node_; ii <
npes_; ii++){
151 MPI_Group pool_group, all;
156 returnval = MPI_Comm_group( MPI_COMM_WORLD, &all);
157 if ( returnval != MPI_SUCCESS ) {
158 utility_exit_with_message(
"failed in creating a new communicator!");
162 if ( returnval != MPI_SUCCESS ) {
163 utility_exit_with_message(
"failed in creating a new communicator!");
166 returnval = MPI_Comm_create( MPI_COMM_WORLD, pool_group, &MPI_COMM_POOL );
167 if ( returnval != MPI_SUCCESS ) {
168 utility_exit_with_message(
"failed in creating a new communicator!");
172 PROF_STOP( basic::MPICOMMCREATION );
179 PROF_START( basic::FARRAY_MANIPULATION );
181 PROF_STOP( basic::FARRAY_MANIPULATION );
188 if(
rank_ == master_node_ ){
194 if( optional_id_num == 0 ){
196 tr.Debug <<
"assigning a tag with value " << lead_zero_string_of(
Pool_RMSD::size(), 8 ) << std::endl;
201 tr.Debug <<
"assigning a tag with value " << lead_zero_string_of( optional_id_num, 8 ) << std::endl;
203 new_tag =
"new."+lead_zero_string_of( optional_id_num, 8 );
215 if ( num_to_send == 0 )
return;
217 PROF_START( basic::MPIBARRIER );
218 MPI_Barrier( MPI_COMM_POOL );
219 PROF_STOP( basic::MPIBARRIER );
221 if (
rank_ == master_node_ ) {
226 tr.Debug <<
"broadcasting " << num_to_send <<
" structures " << std::endl;
227 for(
core::Size ii = 0; ii < num_to_send; ii++) {
236 PROF_START( basic::COPY_COORDS );
237 int shifted_index = 0;
239 for(
core::Size ii = 0; ii < num_to_send; ii++ ) {
245 PROF_STOP( basic::COPY_COORDS );
248 PROF_START( basic::MPI_MASTER_BCAST_COORDS );
256 PROF_STOP( basic::MPI_MASTER_BCAST_COORDS );
258 if (
rank_ != master_node_ ) {
260 int num_to_receive = num_to_send;
265 tr.Debug <<
"receiving " << num_to_receive <<
" structures " << std::endl;
274 PROF_START( basic::COPY_COORDS );
277 while( num_to_receive > 0 ){
281 tr.Debug <<
"assigned a new tag to new structure " << tag_to_get <<
" " << num_to_receive << std::endl;
285 tr.Debug <<
"successfully assign array to farray " << std::endl;
289 tr.Debug <<
"successfully added structure to pool " << std::endl;
294 tr.Debug <<
"next will be accessing index " << index << std::endl;
297 PROF_STOP( basic::COPY_COORDS );
305 runtime_assert(
rank_ == master_node_ );
308 tr.Debug <<
"expecting " <<
pool_npes_ <<
" updates" << std::endl;
311 PROF_START( basic::MPI_GATHER_BARRIER );
312 MPI_Barrier( MPI_COMM_POOL );
313 PROF_STOP( basic::MPI_GATHER_BARRIER );
317 int structures_to_report = 0;
318 PROF_START( basic::MPI_SLAVE_REPORT_SIZES );
320 PROF_STOP( basic::MPI_SLAVE_REPORT_SIZES );
329 tr.Debug <<
"max_coord_size now has a value of " << max_coord_size << std::endl;
338 tr.Debug <<
"tabulating another trajectory finished! " << std::endl;
352 tr.Debug <<
"checking the contents of nodes_finished: ";
356 tr.Debug << std::endl;
357 tr.Debug <<
"about to receive the new coordinates " << max_coord_size << std::endl;
363 PROF_START( basic::MPI_SLAVE_REPORT_NEW_COORDS );
365 PROF_STOP( basic::MPI_SLAVE_REPORT_NEW_COORDS );
367 tr.Debug <<
"expecting a total size of max: " << max_coord_size << std::endl;
368 tr.Debug <<
"received all-coordinates! ";
369 for(
core::Size ii = 0; ii < max_coord_size; ii++){
372 tr.Debug << std::endl;
373 tr.Debug <<
"finished receiving all coordinated" << std::endl;
389 tr.Debug <<
" calling gather to report slave size of coords" << std::endl;
392 PROF_START( basic::MPI_GATHER_BARRIER );
393 MPI_Barrier( MPI_COMM_POOL );
394 PROF_STOP( basic::MPI_GATHER_BARRIER );
397 PROF_START( basic::MPI_SLAVE_REPORT_SIZES );
399 PROF_STOP( basic::MPI_SLAVE_REPORT_SIZES );
402 tr.Debug <<
" slave: finished calling gather! sending coordinate of size " << size_to_report << std::endl;
404 PROF_START( basic::MPI_SLAVE_REPORT_NEW_COORDS );
416 PROF_STOP( basic::MPI_SLAVE_REPORT_NEW_COORDS );
421 int num_to_report = 0;
426 PROF_START( basic::MPI_GATHER_BARRIER );
427 MPI_Barrier( MPI_COMM_POOL );
428 PROF_STOP( basic::MPI_GATHER_BARRIER );
431 tr.Debug <<
" slave: reporting no new coordinates" << std::endl;
433 PROF_START( basic::MPI_SLAVE_REPORT_SIZES );
434 MPI_Gather( &num_to_report, 1, MPI_INT, 0, 1, MPI_INT, (
pool_master_node_), MPI_COMM_POOL);
435 PROF_STOP( basic::MPI_SLAVE_REPORT_SIZES );
436 PROF_START( basic::MPI_SLAVE_REPORT_NEW_COORDS );
437 MPI_Gatherv( &empty_coords, 0, MPI_DOUBLE, &dummy, &dummy, &dummy, MPI_DOUBLE, (
pool_master_node_), MPI_COMM_POOL);
438 PROF_STOP( basic::MPI_SLAVE_REPORT_NEW_COORDS );
442 PROF_START( basic::FARRAY_MANIPULATION );
448 PROF_STOP( basic::FARRAY_MANIPULATION );
456 PROF_START( basic::FARRAY_MANIPULATION );
463 PROF_STOP( basic::FARRAY_MANIPULATION );
511 tr.Debug <<
"num trajectories finished: " <<
513 ( npes_ - master_node_ - 1 ) << std::endl;
518 tr.Debug <<
"FINISHED! num trajectories finished: " <<
520 ( npes_ - master_node_ - 1 ) << std::endl;
528 if(
rank_ != master_node_ ){
530 tr.Debug <<
"sending broadcast finalized message to master " << std::endl;
532 int size_to_report = -1;
536 PROF_START( basic::MPI_GATHER_BARRIER );
537 MPI_Barrier( MPI_COMM_POOL );
538 PROF_STOP( basic::MPI_GATHER_BARRIER );
540 PROF_START( basic::MPI_SLAVE_REPORT_SIZES );
541 MPI_Gather( &size_to_report, 1, MPI_INT, &size_to_report, 1, MPI_INT, (
pool_master_node_), MPI_COMM_POOL );
542 PROF_STOP( basic::MPI_SLAVE_REPORT_SIZES );
544 PROF_START( basic::MPI_SLAVE_REPORT_NEW_COORDS );
545 MPI_Gatherv( &empty_coords, 0, MPI_DOUBLE, &empty_size, &empty_size, &empty_size, MPI_DOUBLE, (
pool_master_node_), MPI_COMM_POOL );
546 PROF_STOP( basic::MPI_SLAVE_REPORT_NEW_COORDS );
548 int num_poses_added = 0;
551 PROF_START( basic::MPI_MASTER_BCAST_NUM_STRUCTURES_TO_ADD );
553 PROF_STOP( basic::MPI_MASTER_BCAST_NUM_STRUCTURES_TO_ADD );
555 PROF_START( basic::MPI_MASTER_BCAST_WINNING_RANKS );
557 PROF_STOP( basic::MPI_MASTER_BCAST_WINNING_RANKS );
561 PROF_START( basic::MPI_MASTER_BCAST_NEW_COMM_SIZE );
563 PROF_STOP( basic::MPI_MASTER_BCAST_NEW_COMM_SIZE );
567 tr.Debug <<
"new size is " << new_size <<
" current size: " <<
pool_npes_ << std::endl;
569 PROF_START( basic::COMM_REDUCE_SIZE );
571 PROF_STOP( basic::COMM_REDUCE_SIZE );
574 tr.Debug <<
"creating new communicator from ranks: ";
575 for(
int ii = 0; ii < new_size; ii++){
578 tr.Debug << std::endl;
614 MPI_Group new_pool_group, old_pool_group;
615 MPI_Comm dup_pool_comm;
619 for(
int ii = 0; ii < new_size; ii++ ){
621 if( (
int)(
pool_rank_) == ranks_to_include[ ii ]){
622 tr.Debug <<
"this rank " <<
pool_rank_ <<
" is designated an active node" << std::endl;
623 is_active_node =
true;
628 PROF_START( basic::MPICOMMCREATION );
629 MPI_Comm_dup( MPI_COMM_POOL, &dup_pool_comm );
630 returnval = MPI_Comm_group( dup_pool_comm, &old_pool_group );
631 assert(returnval == MPI_SUCCESS );
633 returnval = MPI_Group_incl( old_pool_group, (new_size), ranks_to_include, &new_pool_group );
634 assert(returnval == MPI_SUCCESS );
636 returnval = MPI_Comm_create( dup_pool_comm, new_pool_group, &MPI_COMM_POOL );
637 assert(returnval == MPI_SUCCESS );
639 if( is_active_node ){
641 MPI_Comm_size(MPI_COMM_POOL, (
int* )( &new_size ) );
646 tr.Debug <<
"new size of pool is " << new_size << std::endl;
649 PROF_STOP( basic::MPICOMMCREATION );
655 bool is_active =
false;
659 tr.Debug <<
"listing active ranks: ";
660 for(
int ii = 0; ii < new_size; ii++ ){
661 tr.Debug << active_nodes[ ii ] <<
" ";
663 tr.Debug << std::endl;
669 for (
int ii = 0; ii < new_size; ii++ ){
671 if ( active_nodes[ ii ] == (
int)(
pool_rank_) ) {
673 tr.Debug <<
"this node " <<
pool_rank_ <<
" is still active " << std::endl;
675 PROF_START( basic::CHECK_COMM_SIZE );
676 MPI_Comm_size(MPI_COMM_POOL, (
int* )(&
pool_npes_) );
677 MPI_Comm_rank(MPI_COMM_POOL, (
int* )(&
pool_rank_) );
678 PROF_STOP( basic::CHECK_COMM_SIZE );
680 tr.Debug <<
"master node is rank " << master_node_ << std::endl;
681 tr.Debug <<
"new size of comm is " << npes_ <<
" pool-size " <<
pool_npes_ <<
" this node now has rank " <<
rank_ <<
" and pool_rank " <<
pool_rank_ << std::endl;
682 tr.Debug <<
"double checking node_finished contents: \n";
686 tr.Debug << std::endl;
699 farray_coord_ptr_(0),
700 temp_coord_for_evaluation_(),
707 size_( num_slave_nodes )
709 set_size( num_slave_nodes );
723 tr.Debug <<
"setting the size of the transfer_buf_ to " << num_slave_nodes << std::endl;
734 size_ = num_slave_nodes;
743 tr.Debug <<
"now adding a pose with the assigned-tag: " << tag << std::endl;
760 tr.Debug <<
"about to gather coords from slaves" << std::endl;
770 tr.Debug <<
"finished gathering coordinates from slave nodes checking size of transfer_buf " <<
transfer_buf_.
size_ << std::endl;
773 PROF_START( basic::MPI_POOL_MASTER_THINKS );
777 if ( num_poses_added == 0 ){
785 tr.Debug <<
"continuing . . . " << std::endl;
792 tr.Debug <<
"index is now " << index_new_pose <<
" size is " <<
transfer_buf_.
size_ <<
" performing evaluation now. num structures in pool "
794 << num_poses_added <<
" so index is " << (
new_structures_ - num_poses_added + 1 ) << std::endl;
804 tr.Debug <<
"finished converting array to farray, index: " << (index_new_pose) << std::endl;
815 tr.Debug <<
"finished evaluating decoy against pool, index "
816 << (
new_structures_ + 1 - num_poses_added) <<
" best_rms: " << best_rmsd << std::endl;
820 tr.Debug <<
"finished eval, best rms is " << best_rmsd
821 <<
" which is greater than threshold " <<
transition_threshold_ <<
" so adding to pool " << std::endl;
826 tr.Debug <<
"finished adding pose to pool, now have " <<
new_structures_ << std::endl;
832 PROF_STOP( basic::MPI_POOL_MASTER_THINKS );
835 PROF_START( basic::MPI_MASTER_BCAST_NUM_STRUCTURES_TO_ADD );
837 PROF_STOP( basic::MPI_MASTER_BCAST_NUM_STRUCTURES_TO_ADD );
840 tr.Debug <<
"broadcasting winning ranks " << std::endl;
842 PROF_START( basic::MPI_MASTER_BCAST_WINNING_RANKS );
845 tr.Debug <<
"checking the contents of int_buf, which should contain starting indices\n";
846 for(
core::Size ii = 0; ii < num_poses_added; ii++){
849 tr.Debug << std::endl;
851 PROF_STOP( basic::MPI_MASTER_BCAST_WINNING_RANKS );
857 PROF_START( basic::CHECK_COMM_SIZE );
858 unsigned int new_size = 0;
859 for (
unsigned int ii = 0; ii <
pool_npes_; ii++ ){
868 PROF_STOP( basic::CHECK_COMM_SIZE );
869 PROF_START( basic::MPI_MASTER_BCAST_NEW_COMM_SIZE );
871 PROF_STOP( basic::MPI_MASTER_BCAST_NEW_COMM_SIZE );
873 if( new_size != pool_npes_ ){
874 PROF_START( basic::MPI_MASTER_BCAST_NEW_POOL_RANKS );
876 PROF_STOP( basic::MPI_MASTER_BCAST_NEW_POOL_RANKS );
878 tr.Debug <<
"new size is " << new_size <<
" current size is " << pool_npes_ <<std::endl;
880 PROF_START( basic::MPICOMMCREATION );
882 PROF_STOP( basic::MPICOMMCREATION );
888 tr.Debug <<
"master node finished " << std::endl;
930 PROF_STOP( basic::MPI_POOL_SLAVE_THINKS );
939 tr.Debug <<
"this node is rank " <<
rank_ <<
" pool-rank is " <<
pool_rank_ <<
" master node is rank " << master_node_ <<
" and total size is " << npes_ <<
" pool-size is " << pool_npes_ << std::endl;
946 tr.Debug <<
"best rmsd after first evaluation is " << best_rmsd <<
" threashold " << transition_threshold <<
" and index " << best_index << std::endl;
950 if( best_rmsd > transition_threshold ){
952 PROF_START( basic::FARRAY_MANIPULATION );
954 PROF_STOP( basic::FARRAY_MANIPULATION );
956 tr.Debug <<
" slave: about to report new coordinates to master!" << std::endl;
962 tr.Debug <<
" slave: about to report, no new coordinates, to master!" << std::endl;
968 int num_structures_to_add = 0;
969 PROF_START( basic::MPI_MASTER_BCAST_NUM_STRUCTURES_TO_ADD );
970 MPI_Bcast( &num_structures_to_add, 1, MPI_INT, (
pool_master_node_), MPI_COMM_POOL );
971 PROF_STOP( basic::MPI_MASTER_BCAST_NUM_STRUCTURES_TO_ADD );
974 tr.Debug <<
" expecting " << num_structures_to_add <<
" from master node " << std::endl;
976 PROF_START( basic::MPI_MASTER_BCAST_WINNING_RANKS );
978 PROF_STOP( basic::MPI_MASTER_BCAST_WINNING_RANKS );
980 tr.Debug <<
"received "<< num_structures_to_add <<
" from the master node! " << std::endl;
981 for (
core::Size ii = 0; ii < num_structures_to_add; ii++ ) {
984 tr.Debug << std::endl;
986 bool i_am_a_winning_rank =
false;
987 for(
int ii = 0; ii < num_structures_to_add; ii++ ){
990 tr.Debug <<
"I WON! I'm one of the winning ranks! " << std::endl;
992 i_am_a_winning_rank =
true;
995 tr.Debug <<
"I'm gonna add a new structure, this is it's tag: " << new_tag << std::endl;
996 best_decoy = new_tag;
997 best_decoy = new_tag;
998 PROF_START( basic::WRITE_TO_FILE );
1001 ss->fill_struct( pose, new_tag );
1004 PROF_STOP( basic::WRITE_TO_FILE );
1009 PROF_START( basic::MPI_MASTER_BCAST_NEW_COMM_SIZE );
1011 PROF_STOP( basic::MPI_MASTER_BCAST_NEW_COMM_SIZE );
1014 tr.Debug <<
"new size of pool " << new_size <<
" current_size of pool_npes_ " << pool_npes_ << std::endl;
1016 if( new_size != pool_npes_ ){
1018 PROF_START( basic::MPI_MASTER_BCAST_NEW_POOL_RANKS );
1020 PROF_STOP( basic::MPI_MASTER_BCAST_NEW_POOL_RANKS );
1022 tr.Debug <<
"creating new communicator from ranks: ";
1023 for(
int ii = 0; ii < new_size; ii++){
1026 tr.Debug << std::endl;
1030 PROF_START( basic::MPI_POOL_SLAVE_THINKS );
1032 if( !i_am_a_winning_rank && num_structures_to_add > 0 ){
1035 tr.Debug <<
"checking coords" << std::endl;
1037 tr.Debug <<std::endl;
1041 tr.Debug <<
"before update, this is my info: " << best_rmsd <<
" " << best_decoy <<
" " << best_index << std::endl;
1045 tr.Debug <<
"after 2nd eval, this is my info: " << competing_best_rmsd <<
" " << competing_best_decoy <<
" " << alt_index << std::endl;
1047 if( competing_best_rmsd < best_rmsd ) {
1048 best_rmsd = competing_best_rmsd;
1049 best_decoy = competing_best_decoy;
1054 tr.Debug <<
"best rmsd after evaluation is " << best_rmsd <<
" threashold " << transition_threshold <<
" num_structures_to_add " << num_structures_to_add <<
" pool-size " <<
Pool_RMSD::size() <<
" and index " << (
Pool_RMSD::size() - num_structures_to_add + 1 ) << std::endl;