Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
MPIBPool_ConvergenceCheck.cc
Go to the documentation of this file.
1 #ifdef USEMPI
4 #include <core/pose/Pose.hh>
5 #include <ObjexxFCL/FArray2D.hh>
8 #include <basic/Tracer.hh>
11 #include <protocols/jd2/util.hh>
12 #include <ObjexxFCL/format.hh>
13 #include <fstream>
14 #include <basic/prof.hh>
15 
16 #include <utility/io/mpistream.hh>
17 #include <utility/exit.hh>
18 
19 
20 //SilentFileStuff
24 
25 //option stuff
26 #include <basic/options/option.hh>
27 #include <basic/options/after_opts.hh>
28 #include <basic/options/option_macros.hh>
29 
30 #include <mpi.h>
31 //
32 
33 
34 namespace protocols {
35 namespace canonical_sampling{
36 namespace mc_convergence_checks {
37 
38 static basic::Tracer tr("MPIBPool_ConvergenceCheck");
39 
40  //
43 
44 using namespace ObjexxFCL;
45 using namespace core;
46 using namespace utility::io::mpi_stream;
47 using namespace core::io::silent;
48 
49 
50 typedef FArray2P<double> FArray2P_double;
51 typedef FArray2D<double> FArray2D_double;
52 typedef FArray3D<double> FArray3D_double;
53 
54 MPI_Comm protocols::canonical_sampling::mc_convergence_checks::MPIBPool_RMSD::MPI_COMM_POOL;
55 
56 MPIBPool_RMSD::MPIBPool_RMSD( std::string const& silent_file ):
57  Pool_RMSD( silent_file ),
58  workers_finished_( 0 ),
59  nodes_finished_( 0 ),
60  pool_size_( 0 ),
61  new_structures_( 0 ),
62  rank_( 0 ),
63  pool_rank_( 0 ),
64  npes_( 0 ),
65  transition_threshold_(-1),
66  new_decoys_out_( "discovered_decoys.out" ),
67  tracer_visible_(false),
68  transfer_buf_()
69 {
70  initialize();
71  if( tracer_visible_ ){
72  tr.Debug << "finished initializing!" <<std::endl;
73  tr.Debug << "checking: rank " << rank_ << " has " << Pool_RMSD::size() << " structures in its pool " << std::endl;
74  }
75 }
76 
77  /**
78 void
79 MPIBPool_RMSD::register_options(){
80  using namespace basic::options;
81  using namespace basic::options::OptionKeys;
82 
83  if( !options_registered_ ){
84  // NEW_OPT( bsampling::out::new_structures, "write structures above transition_threshold to this file", "discovered_decoys.out" );
85  options_registered_ = true;
86  }
87 }
88  **/
89  /**
90 void
91 MPIBPool_RMSD::set_defaults_from_cmdline(){
92  using namespace basic::options;
93  using namespace basic::options::OptionKeys;
94 
95  //runtime_assert( options_registered_ );
96  //new_decoys_out_ = option[ bsampling::out::new_structures ];
97 }
98  **/
99 
100 void
102  new_decoys_out_ = newout;
103 }
104 
105 std::string const&
107  return new_decoys_out_;
108 }
109 
111  //MPI_Comm_rank( MPI_COMM_POOL, (int*) (&rank_) );
112  if( tracer_visible_ ){
113  tr.Debug << "checking if " << pool_rank_ << " is an active node " << !nodes_finished_[ ( pool_rank_ + 1 ) ]<< std::endl;
114  }
115  return !nodes_finished_[ ( pool_rank_ + 1 ) ];
116 }
117 
119 
120  PROF_START( basic::CHECK_COMM_SIZE );
121  MPI_Comm_rank( MPI_COMM_WORLD, ( int* )( &rank_ ) );
122  MPI_Comm_size( MPI_COMM_WORLD, ( int* )( &npes_ ) );
123  PROF_STOP( basic::CHECK_COMM_SIZE );
124  PROF_START( basic::INITIALIZE );
125  pool_rank_ = rank_;
126  pool_npes_ = npes_;
127 
128  //assume master-node is always first in active_nodes list
129  pool_master_node_ = 0;
130 
131  int new_size = npes_ - master_node_;
133  transfer_buf_.set_size( new_size );
134  //set_defaults_from_cmdline();
135  if ( rank_ == master_node_ ) {
138  assert( Pool_RMSD::size() == 0 );
139  } else {
141  }
142  PROF_STOP( basic::INITIALIZE );
143  //create new MPI_COMM_WORLD based on sub-set of nodes
144  PROF_START( basic::MPICOMMCREATION );
145  int index = 0;
146  for(int ii = master_node_; ii < npes_; ii++){
147  (transfer_buf_.int_buf1_)[ index++ ] = ii;
148  }
149 
150  //initialize all num_slave dependent buffers for MPI transfers
151  MPI_Group pool_group, all;
152  int returnval;
153  //int world_rank;
154  //int new_rank;
155 
156  returnval = MPI_Comm_group( MPI_COMM_WORLD, &all);
157  if ( returnval != MPI_SUCCESS ) {
158  utility_exit_with_message("failed in creating a new communicator!");
159  }
160 
161  returnval = MPI_Group_incl( all, (new_size), transfer_buf_.int_buf1_, &pool_group );
162  if ( returnval != MPI_SUCCESS ) {
163  utility_exit_with_message("failed in creating a new communicator!");
164  }
165 
166  returnval = MPI_Comm_create( MPI_COMM_WORLD, pool_group, &MPI_COMM_POOL );
167  if ( returnval != MPI_SUCCESS ) {
168  utility_exit_with_message("failed in creating a new communicator!");
169  }
170 
171  update_ranks( transfer_buf_.int_buf1_, (new_size) );
172  PROF_STOP( basic::MPICOMMCREATION );
173 
174  tracer_visible_ = tr.visible();
175 }
176 
177 
178 void MPIBPool_RMSD::reformat( core::pose::Pose const& pose, std::string & new_tag ){
179  PROF_START( basic::FARRAY_MANIPULATION );
181  PROF_STOP( basic::FARRAY_MANIPULATION );
182  //assign new tag based on olli's scheme
183  assign_tag( new_tag, 0 );
184 }
185 
186  void MPIBPool_RMSD::assign_tag( std::string& new_tag, core::Size optional_id_num ){
187  //std::string jobname = protocols::jd2::current_output_name();
188  if( rank_ == master_node_ ){
189  if( tracer_visible_ ){
190  tr.Debug << "assigning a tag with value " << lead_zero_string_of( ( pool_size_ + new_structures_ ) , 8 ) << std::endl;
191  }
192  new_tag = "new."+lead_zero_string_of( pool_size_ + new_structures_, 8 ); //+".0"+"_"+jobname
193  }else{
194  if( optional_id_num == 0 ){
195  if( tracer_visible_ ){
196  tr.Debug << "assigning a tag with value " << lead_zero_string_of( Pool_RMSD::size(), 8 ) << std::endl;
197  }
198  new_tag = "new."+lead_zero_string_of( Pool_RMSD::size(), 8 ); //+".0"+"_"+jobname
199  }else{
200  if( tracer_visible_ ){
201  tr.Debug << "assigning a tag with value " << lead_zero_string_of( optional_id_num, 8 ) << std::endl;
202  }
203  new_tag = "new."+lead_zero_string_of( optional_id_num, 8 ); //+".0"+"_"+jobname
204 
205  }
206  }
207 }
208 
210  new_structures_ += num_to_add;
211 }
212 
213 
214 void MPIBPool_RMSD::broadcast_newest_coords( int num_to_send ){
215  if ( num_to_send == 0 ) return;
216 
217  PROF_START( basic::MPIBARRIER );
218  MPI_Barrier( MPI_COMM_POOL );
219  PROF_STOP( basic::MPIBARRIER );
220 
221  if ( rank_ == master_node_ ) {
222 
223  assert( (int)(new_structures_) >= num_to_send );
224  core::Size current_size = new_structures_;
225  if( tracer_visible_ ) {
226  tr.Debug << "broadcasting " << num_to_send << " structures " << std::endl;
227  for( core::Size ii = 0; ii < num_to_send; ii++) {
228  tr.Debug << " sending coordinates starting at index " << transfer_buf_.int_buf1_[ ii ] << std::endl;
229  }
230  }
231  // PROF_START( basic::MPI_MASTER_BCAST_NUM_STRUCTURES_TO_ADD );
232  // MPI_Bcast( &num_to_send, 1, MPI_INT, (pool_master_node_), MPI_COMM_POOL );
233  // PROF_STOP( basic::MPI_MASTER_BCAST_NUM_STRUCTURES_TO_ADD );
234  //take newest coords and put back in farray_coord_ptr_
235 
236  PROF_START( basic::COPY_COORDS );
237  int shifted_index = 0;
238  //int_buf1_ contains starting indices of structures you wish to send
239  for( core::Size ii = 0; ii < num_to_send; ii++ ) {
240  // delete coordinates that are not being saved by shifting coordinates over to the left
241  for( core::Size jj = 0; jj < (3 * transfer_buf_.nresidues_ ); jj++ ) {
243  }
244  }
245  PROF_STOP( basic::COPY_COORDS );
246  } //master copy coordinates
247 
248  PROF_START( basic::MPI_MASTER_BCAST_COORDS );
249  MPI_Bcast(
251  ( num_to_send * transfer_buf_.nresidues_ * 3 ),
252  MPI_DOUBLE,
254  MPI_COMM_POOL
255  );
256  PROF_STOP( basic::MPI_MASTER_BCAST_COORDS );
257 
258  if ( rank_ != master_node_ ) { //slave
259 
260  int num_to_receive = num_to_send;
261  // PROF_START( basic::MPI_MASTER_BCAST_NUM_STRUCTURES_TO_ADD );
262  // MPI_Bcast( &num_to_receive, 1, MPI_INT, (pool_master_node_), MPI_COMM_POOL );
263  // PROF_STOP( basic::MPI_MASTER_BCAST_NUM_STRUCTURES_TO_ADD );
264  if ( tracer_visible_ ) {
265  tr.Debug << "receiving " << num_to_receive << " structures " << std::endl;
266  }
267  //if( tracer_visible_ ){
268  //tr.Debug << "outputting the received coordinates " << (num_to_receive*transfer_buf_.nresidues_*3) << std::endl;
269  //for(core::Size ii = 0; ii < (num_to_receive*transfer_buf_.nresidues_*3); ii++) {
270  //tr.Debug << transfer_buf_.farray_coord_ptr_[ ii ] << " ";
271  //}
272  //tr.Debug << std::endl;
273  //}
274  PROF_START( basic::COPY_COORDS );
275  increment_pool_size( num_to_receive );
276  core::Size index = 0;
277  while( num_to_receive > 0 ){
278  std::string tag_to_get;
279  assign_tag( tag_to_get, 0 );
280  if( tracer_visible_ ){
281  tr.Debug << "assigned a new tag to new structure " << tag_to_get << " " << num_to_receive << std::endl;
282  }
283  array_to_farray( index );
284  if( tracer_visible_ ){
285  tr.Debug << "successfully assign array to farray " << std::endl;
286  }
288  if( tracer_visible_ ){
289  tr.Debug << "successfully added structure to pool " << std::endl;
290  }
291  num_to_receive--;
292  index += (transfer_buf_.nresidues_ * 3);
293  if( tracer_visible_ ){
294  tr.Debug << "next will be accessing index " << index << std::endl;
295  }
296  }
297  PROF_STOP( basic::COPY_COORDS );
298  tr.Debug << "tabulated pool size is " << new_structures_ << " and real size is " << Pool_RMSD::size() << std::endl;
299  } //slave post procession
300 }
301 
302 
303 void
305  runtime_assert( rank_ == master_node_ );
306 
307  if( tracer_visible_ ){
308  tr.Debug << "expecting " << pool_npes_ << " updates" << std::endl;
309  }
310 
311  PROF_START( basic::MPI_GATHER_BARRIER );
312  MPI_Barrier( MPI_COMM_POOL );
313  PROF_STOP( basic::MPI_GATHER_BARRIER );
314 
315  //find out wether node has structure to report or finished trajectory
316  //return in size_per_coord: -1 finished, 0 no structure, nresidues_ a structure to report
317  int structures_to_report = 0;
318  PROF_START( basic::MPI_SLAVE_REPORT_SIZES );
319  MPI_Gather( &structures_to_report, 1, MPI_INT, transfer_buf_.size_per_coords_, 1, MPI_INT, (pool_master_node_), MPI_COMM_POOL );
320  PROF_STOP( basic::MPI_SLAVE_REPORT_SIZES );
321 
322  core::Size max_coord_size = 0;
323  //core::Size num_added = 0;
324  transfer_buf_.size_ = 0;
325 
326  for ( core::Size ii = 0; ii < ( pool_npes_ ); ii++){
327  if ( tracer_visible_ ) {
328  tr.Debug << "rank " << ii << " reports a size of " << transfer_buf_.size_per_coords_[ ii ] << std::endl;
329  tr.Debug << "max_coord_size now has a value of " << max_coord_size << std::endl;
330  }
331  //read out gathered information - finished?, structure? etc.
332  if ( transfer_buf_.size_per_coords_[ ii ] < 0 ){ //finished ?
333 
335  nodes_finished_[ ( ii + 1) ] = true;
336 
337  if ( tracer_visible_ ){
338  tr.Debug << "tabulating another trajectory finished! " << std::endl;
339  }
341  transfer_buf_.memory_offset_[ ii ] = 0;
342 
343  } else if ( transfer_buf_.size_per_coords_[ ii ] > 0 ) { //not finished --- structure to report ?
344  transfer_buf_.winning_ranks_[ transfer_buf_.size_ ] = ii; //save winning rank
347  }
348  max_coord_size += transfer_buf_.size_per_coords_[ ii ];
349  }
350 
351  if( tracer_visible_ ){
352  tr.Debug << "checking the contents of nodes_finished: ";
353  for(core::Size ii = 0; ii < pool_npes_; ii++){
354  tr.Debug << nodes_finished_[ (ii+1) ] << " ";
355  }
356  tr.Debug << std::endl;
357  tr.Debug << "about to receive the new coordinates " << max_coord_size << std::endl;
358  }
359 
360 
361  ///now receive all coordinates of announced structures
362  double tmp;
363  PROF_START( basic::MPI_SLAVE_REPORT_NEW_COORDS );
364  MPI_Gatherv( &tmp, 0, MPI_DOUBLE, transfer_buf_.farray_coord_ptr_, transfer_buf_.size_per_coords_, transfer_buf_.memory_offset_, MPI_DOUBLE, (pool_master_node_), MPI_COMM_POOL );
365  PROF_STOP( basic::MPI_SLAVE_REPORT_NEW_COORDS );
366  if( tracer_visible_ ){
367  tr.Debug << "expecting a total size of max: " << max_coord_size << std::endl;
368  tr.Debug << "received all-coordinates! ";
369  for(core::Size ii = 0; ii < max_coord_size; ii++){
370  tr.Debug << transfer_buf_.farray_coord_ptr_[ ii ] << " ";
371  }
372  tr.Debug << std::endl;
373  tr.Debug << "finished receiving all coordinated" << std::endl;
374  }
375 }
376 
377 
378 void
380 
382 
383  //double* array_xyz = new double[ size_to_report ];
384  farray_to_array( 0 ); //prof statement in function call
385 
386  int dummy;
387 
388  if(tracer_visible_){
389  tr.Debug << " calling gather to report slave size of coords" << std::endl;
390  }
391 
392  PROF_START( basic::MPI_GATHER_BARRIER );
393  MPI_Barrier( MPI_COMM_POOL );
394  PROF_STOP( basic::MPI_GATHER_BARRIER );
395 
396 
397  PROF_START( basic::MPI_SLAVE_REPORT_SIZES );
398  MPI_Gather( &size_to_report, 1, MPI_INT, transfer_buf_.size_per_coords_, 1, MPI_INT, (pool_master_node_), MPI_COMM_POOL );
399  PROF_STOP( basic::MPI_SLAVE_REPORT_SIZES );
400 
401  if( tracer_visible_ ){
402  tr.Debug << " slave: finished calling gather! sending coordinate of size " << size_to_report << std::endl;
403  }
404  PROF_START( basic::MPI_SLAVE_REPORT_NEW_COORDS );
405  MPI_Gatherv(
407  size_to_report,
408  MPI_DOUBLE,
409  &dummy,
410  &dummy,
411  &dummy,
412  MPI_DOUBLE,
414  MPI_COMM_POOL
415  );
416  PROF_STOP( basic::MPI_SLAVE_REPORT_NEW_COORDS );
417 }
418 
419 void
421  int num_to_report = 0;
422  int dummy;
423  double empty_coords;
424 
425 
426  PROF_START( basic::MPI_GATHER_BARRIER );
427  MPI_Barrier( MPI_COMM_POOL );
428  PROF_STOP( basic::MPI_GATHER_BARRIER );
429 
430  if( tracer_visible_ ){
431  tr.Debug << " slave: reporting no new coordinates" << std::endl;
432  }
433  PROF_START( basic::MPI_SLAVE_REPORT_SIZES );
434  MPI_Gather( &num_to_report, 1, MPI_INT, 0, 1, MPI_INT, (pool_master_node_), MPI_COMM_POOL);
435  PROF_STOP( basic::MPI_SLAVE_REPORT_SIZES );
436  PROF_START( basic::MPI_SLAVE_REPORT_NEW_COORDS );
437  MPI_Gatherv( &empty_coords, 0, MPI_DOUBLE, &dummy, &dummy, &dummy, MPI_DOUBLE, (pool_master_node_), MPI_COMM_POOL);
438  PROF_STOP( basic::MPI_SLAVE_REPORT_NEW_COORDS );
439 }
440 
442  PROF_START( basic::FARRAY_MANIPULATION );
443  for (int ii = 1; ii <= transfer_buf_.temp_coord_for_evaluation_.u1(); ii++ ) {
444  for (int jj = 1; jj <= transfer_buf_.temp_coord_for_evaluation_.u2(); jj++ ) {
446  }
447  }
448  PROF_STOP( basic::FARRAY_MANIPULATION );
449 }
450 
452  farray_to_array( index, 1 );
453 }
454 
456  PROF_START( basic::FARRAY_MANIPULATION );
457  for ( int ii = 1; ii <= transfer_buf_.temp_coord_for_evaluation_.u1(); ii++ ) {
458  for( int jj = 1; jj <= transfer_buf_.temp_coord_for_evaluation_.u2(); jj++ ) {
460  //tr.Debug << "outputting useful debug info: " << transfer_buf_.farray_coord_ptr_[ index - 1 ] << " " << transfer_buf_.temp_coord_for_evaluation_( ii, jj ) << std::endl;
461  }
462  }
463  PROF_STOP( basic::FARRAY_MANIPULATION );
464 }
465 
467  array_to_farray( index, 1 );
468 }
469 
470  /**
471 void MPIBPool_RMSD::farray_to_array( FArray2D<double> const& farray_xyz, double xyz[] ){
472  int index = 0;
473  if( tracer_visible_ ){
474  tr.Debug << "converting farray to array: u1: " << farray_xyz.u1() << " u2: " << farray_xyz.u2() << " " << std::endl;
475  }
476  for( int i = 1; i <= farray_xyz.u1(); i++ ){
477  for( int j = 1; j <= farray_xyz.u2(); j++ ){
478  xyz[ index++ ] = farray_xyz( i, j );
479  //tr.Debug << farray_xyz( i, j ) << " ";
480  }
481  }
482  //tr.Debug << std::endl;
483 }
484  **/ //not needed anymore. just access memory directly
485 
486  /**
487 void MPIBPool_RMSD::array_to_farray( FArray2D<double>& farray_xyz, double xyz[] ){
488 
489  assert( transfer_buf_.nresidues_ > 0 );
490  //farray_xyz.redimension( 3, nresidues_, 0.0 );
491  tr.Debug << "converting array to farray dimensions: " << farray_xyz.u1() << " " << farray_xyz.u2() << " " << std::endl;
492  int index = 0;
493  for(core::Size i = 1; i <= 3; i++ ){
494  for(core::Size j = 1; j <= transfer_buf_.nresidues_; j++ ){
495  farray_xyz( i, j ) = xyz[ index++ ];
496  //tr.Debug << farray_xyz( i, j ) << " ";
497  }
498  }
499  // tr.Debug << std::endl;
500 }
501  **/
502 
503 
505  transition_threshold_ = threshold;
506 }
507 
509  if ( workers_finished_ < ( npes_ - master_node_ - 1 ) ){
510  if ( tracer_visible_ ){
511  tr.Debug << "num trajectories finished: " <<
512  workers_finished_ << " needed: " <<
513  ( npes_ - master_node_ - 1 ) << std::endl;
514  }
515  return false;
516  }else{
517  if( tracer_visible_ ){
518  tr.Debug << "FINISHED! num trajectories finished: " <<
519  workers_finished_ << " needed: " <<
520  ( npes_ - master_node_ - 1 ) << std::endl;
521  }
522  return true;
523  }
524 
525 }
526 
528  if( rank_ != master_node_ ){
529  if( tracer_visible_ ){
530  tr.Debug << "sending broadcast finalized message to master " << std::endl;
531  }
532  int size_to_report = -1;
533  int empty_size = 0;
534  double empty_coords;
535 
536  PROF_START( basic::MPI_GATHER_BARRIER );
537  MPI_Barrier( MPI_COMM_POOL );
538  PROF_STOP( basic::MPI_GATHER_BARRIER );
539 
540  PROF_START( basic::MPI_SLAVE_REPORT_SIZES );
541  MPI_Gather( &size_to_report, 1, MPI_INT, &size_to_report, 1, MPI_INT, (pool_master_node_), MPI_COMM_POOL );
542  PROF_STOP( basic::MPI_SLAVE_REPORT_SIZES );
543 
544  PROF_START( basic::MPI_SLAVE_REPORT_NEW_COORDS );
545  MPI_Gatherv( &empty_coords, 0, MPI_DOUBLE, &empty_size, &empty_size, &empty_size, MPI_DOUBLE, (pool_master_node_), MPI_COMM_POOL );
546  PROF_STOP( basic::MPI_SLAVE_REPORT_NEW_COORDS );
547 
548  int num_poses_added = 0;
549  int new_size = 0;
550 
551  PROF_START( basic::MPI_MASTER_BCAST_NUM_STRUCTURES_TO_ADD );
552  MPI_Bcast( &num_poses_added, 1, MPI_INT, (pool_master_node_), MPI_COMM_POOL );
553  PROF_STOP( basic::MPI_MASTER_BCAST_NUM_STRUCTURES_TO_ADD );
554 
555  PROF_START( basic::MPI_MASTER_BCAST_WINNING_RANKS );
556  MPI_Bcast( transfer_buf_.int_buf1_, num_poses_added, MPI_INT, (pool_master_node_), MPI_COMM_POOL );
557  PROF_STOP( basic::MPI_MASTER_BCAST_WINNING_RANKS );
558 
559  broadcast_newest_coords( num_poses_added );
560 
561  PROF_START( basic::MPI_MASTER_BCAST_NEW_COMM_SIZE );
562  MPI_Bcast( &new_size, 1, MPI_INT, pool_master_node_, MPI_COMM_POOL );
563  PROF_STOP( basic::MPI_MASTER_BCAST_NEW_COMM_SIZE );
564 
565  assert( new_size < pool_npes_ );
566  if( tracer_visible_ ){
567  tr.Debug << "new size is " << new_size << " current size: " << pool_npes_ << std::endl;
568  }
569  PROF_START( basic::COMM_REDUCE_SIZE );
570  MPI_Bcast( transfer_buf_.int_buf1_, new_size, MPI_INT, pool_master_node_, MPI_COMM_POOL );
571  PROF_STOP( basic::COMM_REDUCE_SIZE );
572 
573  if( tracer_visible_ ){
574  tr.Debug << "creating new communicator from ranks: ";
575  for(int ii = 0; ii < new_size; ii++){
576  tr.Debug << transfer_buf_.int_buf1_[ ii ] << " ";
577  }
578  tr.Debug << std::endl;
579  }
580 
581  create_comm( transfer_buf_.int_buf1_, new_size );
582 
583  //DEBUG
584  /**
585  std::string debug_posedump = ".debug_posedump.out";
586  std::ofstream debug;
587  std::ostringstream q;
588  q << rank_;
589  debug.open((q.str() + debug_posedump).c_str(),std::fstream::app);
590 
591  for( core::Size ii = 1; ii <= Pool_RMSD::size(); ii++ ) {
592  FArray2D<double> tmp;
593  std::string tag = Pool_RMSD::get_tag( ii );
594  Pool_RMSD::get( ii, tmp );
595  //write to file
596  debug << tag << " BEGIN " << std::endl;
597  for( core::Size f_index_i = 1; f_index_i <= tmp.u2(); f_index_i++ ){
598  for( core::Size f_index_j = 1; f_index_j <= tmp.u1(); f_index_j++ ){
599  debug << tmp( f_index_j, f_index_i ) << " ";
600  }
601  debug << std::endl;
602  }
603  debug << tag << " END " << std::endl;
604  }
605  **/
606  //DEBUG
607 
608  }
609 }
610 
611 //
612  void MPIBPool_RMSD::create_comm( int ranks_to_include[], int new_size ){
613  int returnval;
614  MPI_Group new_pool_group, old_pool_group;
615  MPI_Comm dup_pool_comm;
616  //tr.Debug << "creating a duplicate communicator from ranks: " << std::endl;
617  bool is_active_node = false;
618  nodes_finished_.resize( new_size );
619  for(int ii = 0; ii < new_size; ii++ ){
620  nodes_finished_[ ii + 1 ] = false;
621  if( (int)(pool_rank_) == ranks_to_include[ ii ]){
622  tr.Debug << "this rank " << pool_rank_ << " is designated an active node" << std::endl;
623  is_active_node = true;
624  }
625  //tr.Debug << ranks_to_include[ ii ] << " ";
626  }
627  //tr.Debug << std::endl;
628  PROF_START( basic::MPICOMMCREATION );
629  MPI_Comm_dup( MPI_COMM_POOL, &dup_pool_comm );
630  returnval = MPI_Comm_group( dup_pool_comm, &old_pool_group );
631  assert(returnval == MPI_SUCCESS );
632  //tr.Debug << "created comm-group based on old pool" << std::endl;
633  returnval = MPI_Group_incl( old_pool_group, (new_size), ranks_to_include, &new_pool_group );
634  assert(returnval == MPI_SUCCESS );
635  //tr.Debug << " created new group based on trajs that are still active " << std::endl;
636  returnval = MPI_Comm_create( dup_pool_comm, new_pool_group, &MPI_COMM_POOL );
637  assert(returnval == MPI_SUCCESS );
638  //tr.Debug << "created new comm based on this new group " << std::endl;
639  if( is_active_node ){
640  update_ranks( ranks_to_include, new_size );
641  MPI_Comm_size(MPI_COMM_POOL, ( int* )( &new_size ) );
642  ///tr.Debug << "successfully created new com. checking size: " << new_size << std::endl;
643  //transfer_buf_.set_size( new_size );
644  //runtime_assert( transfer_buf_.size_ > pool_npes_ );
645  if( tracer_visible_ ){
646  tr.Debug << "new size of pool is " << new_size << std::endl;
647  }
648  }
649  PROF_STOP( basic::MPICOMMCREATION );
650  }
651 
652 
653 ///@detail update the rank of the pool to the MPI_COMM_POOL relative rank
654 void MPIBPool_RMSD::update_ranks( int const active_nodes[], int new_size ){
655  bool is_active = false;
656 
657  //debug output
658  if( tracer_visible_ ){
659  tr.Debug << "listing active ranks: ";
660  for( int ii = 0; ii < new_size; ii++ ){
661  tr.Debug << active_nodes[ ii ] << " ";
662  }
663  tr.Debug << std::endl;
664  }
665 
666  transfer_buf_.set_size( new_size );
667  //figure out if this rank take parts in the POOL
668  nodes_finished_.resize( new_size, false );
669  for ( int ii = 0; ii < new_size; ii++ ){
670  //nodes_finished_[ ii + 1 ] = false;
671  if ( active_nodes[ ii ] == (int)(pool_rank_) ) {
672  if( tracer_visible_ ){
673  tr.Debug << "this node " << pool_rank_ << " is still active " << std::endl;
674  }
675  PROF_START( basic::CHECK_COMM_SIZE );
676  MPI_Comm_size(MPI_COMM_POOL, ( int* )(&pool_npes_) );
677  MPI_Comm_rank(MPI_COMM_POOL, ( int* )(&pool_rank_) );
678  PROF_STOP( basic::CHECK_COMM_SIZE );
679  if ( tracer_visible_ ){
680  tr.Debug << "master node is rank " << master_node_ << std::endl;
681  tr.Debug << "new size of comm is " << npes_ << " pool-size " << pool_npes_ << " this node now has rank " << rank_ << " and pool_rank " << pool_rank_ << std::endl;
682  tr.Debug << "double checking node_finished contents: \n";
683  for(core::Size ii = 1; ii <= nodes_finished_.size(); ii++){
684  tr.Debug << nodes_finished_[ii] << " ";
685  }
686  tr.Debug << std::endl;
687  }
688  runtime_assert( transfer_buf_.size_ >= pool_npes_ );
689  return;
690  }
691  }
692 }
693 
695  memory_offset_(0),
696  size_per_coords_(0),
697  int_buf1_(0),
698  winning_ranks_(0),
699  farray_coord_ptr_(0),
700  temp_coord_for_evaluation_(),
701  coords_(),
702  size_(0),
703  nresidues_(0)
704 {}
705 
706 TransferBuffer::TransferBuffer( core::Size num_slave_nodes ):
707  size_( num_slave_nodes )
708 {
709  set_size( num_slave_nodes );
710 }
711 
713  delete [] memory_offset_;
714  delete [] size_per_coords_;
715  delete [] int_buf1_;
716  delete [] winning_ranks_;
717  delete [] farray_coord_ptr_;
718 }
719 
720 void
721 TransferBuffer::set_size( int num_slave_nodes ){
722  if( tr.visible() ){
723  tr.Debug << "setting the size of the transfer_buf_ to " << num_slave_nodes << std::endl;
724  }
725  memory_offset_ = new int[ num_slave_nodes ];
726  size_per_coords_ = new int[ num_slave_nodes ];
727  int_buf1_ = new int[ num_slave_nodes ];
728  winning_ranks_ = new int[ num_slave_nodes ];
729  //runtime_assert( nresidues_ > 0 && num_slave_nodes > 0);
730  coords_ = ObjexxFCL::FArray3D<double>( 3, nresidues_, num_slave_nodes, 0.0 );
731  temp_coord_for_evaluation_ = ObjexxFCL::FArray2D<double>( 3, nresidues_, 0.0 );
732  //farray_coord_ptr_ = coords_.get_pointer_to_data();
733  farray_coord_ptr_ = new double[ 3 * nresidues_ * num_slave_nodes ];
734  size_ = num_slave_nodes;
735 }
736 
737 
738 void
741  assign_tag( tag, 0 );
742  if( tracer_visible_ ){
743  tr.Debug << "now adding a pose with the assigned-tag: " << tag << std::endl;
744  }
748  tag
749  );
750  increment_pool_size( 1 );
751 }
752 
753 
755 
756  while( !workers_finished() ){
757 
758  //using broadcasting
759  if ( tracer_visible_ ){
760  tr.Debug << "about to gather coords from slaves" << std::endl;
761  }
762 
763  //utility::vector1<FArray2D_double> new_poses; //becomes transfer_buf_.coords
764  //utility::vector1<int> rank_of_pose_added; //becomes transfer_buf_.winning_ranks
765 
766  //figure out how many slave report structures and which workers are finished
768 
769  if( tracer_visible_ ){
770  tr.Debug << "finished gathering coordinates from slave nodes checking size of transfer_buf " << transfer_buf_.size_ << std::endl;
771  }
772 
773  PROF_START( basic::MPI_POOL_MASTER_THINKS );
774  //figure out which structures are going into the masters pool, and which are discarded
775  core::Size num_poses_added = 0;
776  for ( core::Size index_new_pose = 0; index_new_pose < transfer_buf_.size_; index_new_pose++) {
777  if ( num_poses_added == 0 ){ //first will always go in!
778  std::string new_tag;
779  //put coordinates in temp Farray
780  array_to_farray( 0 );
782  transfer_buf_.int_buf1_[ num_poses_added ] = 0; //offset for first structure
783  num_poses_added++;
784  if( tracer_visible_ ){
785  tr.Debug << "continuing . . . " << std::endl;
786  }
787  continue;
788 
789  }
790  //for all other structure evaluate if there is already a similar one in pool.
791  if( tracer_visible_ ){
792  tr.Debug << "index is now " << index_new_pose << " size is " << transfer_buf_.size_ << " performing evaluation now. num structures in pool "
793  << new_structures_ << " num added in-between "
794  << num_poses_added << " so index is " << (new_structures_ - num_poses_added + 1 ) << std::endl;
795  }
796 
797  core::Real best_rmsd;
798  std::string best_decoy;
799  if( tracer_visible_ ){
800  tr.Debug << "about to convert array to farray, index: " << (index_new_pose*transfer_buf_.nresidues_ * 3 ) << std::endl;
801  }
802  array_to_farray( index_new_pose * transfer_buf_.nresidues_ * 3 );
803  if( tracer_visible_ ){
804  tr.Debug << "finished converting array to farray, index: " << (index_new_pose) << std::endl;
805  }
807  best_decoy,
808  best_rmsd,
809  (new_structures_ + 1 - num_poses_added )
810  );
811 
812  //best_rmsd = transition_threshold_ + 1;
813 
814  if( tracer_visible_ ){
815  tr.Debug << "finished evaluating decoy against pool, index "
816  << (new_structures_ + 1 - num_poses_added) << " best_rms: " << best_rmsd << std::endl;
817  }
818  if ( best_rmsd > transition_threshold_ ) {
819  if ( tracer_visible_ ) {
820  tr.Debug << "finished eval, best rms is " << best_rmsd
821  << " which is greater than threshold " << transition_threshold_ << " so adding to pool " << std::endl;
822  }
824  transfer_buf_.int_buf1_[ num_poses_added ] = ( index_new_pose * 3 * transfer_buf_.nresidues_ ); //starting index of saved structure in array
825  if( tracer_visible_ ){
826  tr.Debug << "finished adding pose to pool, now have " << new_structures_ << std::endl;
827  }
828  num_poses_added++;
829  }
830  }
831 
832  PROF_STOP( basic::MPI_POOL_MASTER_THINKS );
833 
834  //broadcast the ranks whose structure got accepted
835  PROF_START( basic::MPI_MASTER_BCAST_NUM_STRUCTURES_TO_ADD );
836  MPI_Bcast( &num_poses_added, 1, MPI_INT, (pool_master_node_), MPI_COMM_POOL ); //int* ranks_with_accepted_poses
837  PROF_STOP( basic::MPI_MASTER_BCAST_NUM_STRUCTURES_TO_ADD );
838 
839  if( tracer_visible_ ){
840  tr.Debug << "broadcasting winning ranks " << std::endl;
841  }
842  PROF_START( basic::MPI_MASTER_BCAST_WINNING_RANKS );
843  MPI_Bcast( transfer_buf_.winning_ranks_, num_poses_added, MPI_INT, (pool_master_node_), MPI_COMM_POOL ); //int* ranks_with_accepted_poses
844  if( tracer_visible_ ){
845  tr.Debug << "checking the contents of int_buf, which should contain starting indices\n";
846  for(core::Size ii = 0; ii < num_poses_added; ii++){
847  tr.Debug << transfer_buf_.int_buf1_[ ii ] << " ";
848  }
849  tr.Debug << std::endl;
850  }
851  PROF_STOP( basic::MPI_MASTER_BCAST_WINNING_RANKS );
852  broadcast_newest_coords( num_poses_added );
853 
854  //after broadcast newest coords, int_buf1_ not needed anymore and can be overwritten in the next section
855 
856  //if some trajectories finished but not others, we need to re-create a comm with appropriate size
857  PROF_START( basic::CHECK_COMM_SIZE );
858  unsigned int new_size = 0;
859  for ( unsigned int ii = 0; ii < pool_npes_; ii++ ){
860  if ( !nodes_finished_[ (ii + 1) ] ) {
861  transfer_buf_.int_buf1_[ new_size ] = ii;
862  new_size++;
863  if( tracer_visible_ ){
864  tr.Debug << "still active node: " << transfer_buf_.int_buf1_[ new_size - 1 ] << std::endl;
865  }
866  }
867  }
868  PROF_STOP( basic::CHECK_COMM_SIZE );
869  PROF_START( basic::MPI_MASTER_BCAST_NEW_COMM_SIZE );
870  MPI_Bcast( &new_size, 1, MPI_INT, pool_master_node_, MPI_COMM_POOL );
871  PROF_STOP( basic::MPI_MASTER_BCAST_NEW_COMM_SIZE );
872 
873  if( new_size != pool_npes_ ){
874  PROF_START( basic::MPI_MASTER_BCAST_NEW_POOL_RANKS );
875  MPI_Bcast( transfer_buf_.int_buf1_, new_size, MPI_INT, (pool_master_node_), MPI_COMM_POOL ); //here's where we re-create the mpi_pool_comm
876  PROF_STOP( basic::MPI_MASTER_BCAST_NEW_POOL_RANKS );
877  if( tracer_visible_ ){
878  tr.Debug << "new size is " << new_size << " current size is " << pool_npes_ <<std::endl;
879  }
880  PROF_START( basic::MPICOMMCREATION );
881  create_comm( transfer_buf_.int_buf1_, new_size );
882  PROF_STOP( basic::MPICOMMCREATION );
883  }
884 
885  }
886  //check num_to_update. if non-zero, broadcast numm
887  if( tracer_visible_ ){
888  tr.Debug << "master node finished " << std::endl;
889  }
890 
891  //DEBUG
892  /**
893  std::string master_debug_posedump = "master_debug_posedump.out";
894  std::ofstream debug_master;
895  debug_master.open(master_debug_posedump.c_str(),std::fstream::app);
896 
897  for( core::Size ii = 1; ii <= Pool_RMSD::size(); ii++ ) {
898  FArray2D<double> tmp;
899  std::string tag = Pool_RMSD::get_tag( ii );
900  Pool_RMSD::get( ii, tmp );
901  //write to file
902  debug_master << tag << " BEGIN " << std::endl;
903  for( core::Size f_index_i = 1; f_index_i <= tmp.u2(); f_index_i++ ){
904  for( core::Size f_index_j = 1; f_index_j <= tmp.u1(); f_index_j++ ){
905  debug_master << tmp( f_index_j, f_index_i ) << " ";
906  }
907  debug_master << std::endl;
908  }
909  debug_master << tag << " END " << std::endl;
910  }
911  **/
912  //DEBUG
913 
914  return;
915 }
916 
917 
918 
920  return rank_ == master_node_;
921 }
922 
924  core::pose::Pose const& pose,
925  std::string& best_decoy,
926  core::Real& best_rmsd,
927  core::Real transition_threshold
928  ){
929  tr << "size of pool is " << Pool_RMSD::size() << std::endl;
930  PROF_STOP( basic::MPI_POOL_SLAVE_THINKS );
931  //bool use_broadcasting = true;
932  assert(transition_threshold == transition_threshold_);
933  if( transfer_buf_.nresidues_ == 0 ){
935  }else{
936  assert( transfer_buf_.nresidues_ == pose.total_residue() );
937  }
938  if( tracer_visible_ ){
939  tr.Debug << "this node is rank " << rank_ << " pool-rank is " << pool_rank_ << " master node is rank " << master_node_ << " and total size is " << npes_ << " pool-size is " << pool_npes_ << std::endl;
940  }
941  assert(pool_rank_ > pool_master_node_ && pool_rank_ < pool_npes_);
942  //tr.Debug << "node is rank " << rank_ << " out of " << npes_ << std::endl;
943  core::Size best_index;
944  best_index = Pool_RMSD::evaluate( pose, best_decoy, best_rmsd );
945  if( tracer_visible_ ){
946  tr.Debug << "best rmsd after first evaluation is " << best_rmsd << " threashold " << transition_threshold << " and index " << best_index << std::endl;
947  }
948 
949  std::string new_tag;
950  if( best_rmsd > transition_threshold ){
951  //FArray2D_double coords( 3, pose.total_residue() , 0.0 );
952  PROF_START( basic::FARRAY_MANIPULATION );
953  reformat( pose, new_tag );
954  PROF_STOP( basic::FARRAY_MANIPULATION );
955  if( tracer_visible_ ){
956  tr.Debug << " slave: about to report new coordinates to master!" << std::endl;
957  }
958 
960  }else{
961  if( tracer_visible_ ){
962  tr.Debug << " slave: about to report, no new coordinates, to master!" << std::endl;
963  }
965  }
966 
967  int new_size = 0;
968  int num_structures_to_add = 0;
969  PROF_START( basic::MPI_MASTER_BCAST_NUM_STRUCTURES_TO_ADD );
970  MPI_Bcast( &num_structures_to_add, 1, MPI_INT, (pool_master_node_), MPI_COMM_POOL );
971  PROF_STOP( basic::MPI_MASTER_BCAST_NUM_STRUCTURES_TO_ADD );
972 
973  if( tracer_visible_ ){
974  tr.Debug << " expecting " << num_structures_to_add << " from master node " << std::endl;
975  }
976  PROF_START( basic::MPI_MASTER_BCAST_WINNING_RANKS );
977  MPI_Bcast( transfer_buf_.winning_ranks_, num_structures_to_add, MPI_INT, (pool_master_node_), MPI_COMM_POOL );
978  PROF_STOP( basic::MPI_MASTER_BCAST_WINNING_RANKS );
979  if( tracer_visible_ ){
980  tr.Debug << "received "<< num_structures_to_add << " from the master node! " << std::endl;
981  for ( core::Size ii = 0; ii < num_structures_to_add; ii++ ) {
982  tr.Debug << "winning rank: " << transfer_buf_.winning_ranks_[ ii ] << "\n";
983  }
984  tr.Debug << std::endl;
985  }
986  bool i_am_a_winning_rank = false;
987  for( int ii = 0; ii < num_structures_to_add; ii++ ){
988  if( (int)(pool_rank_) == transfer_buf_.winning_ranks_[ ii ] ){
989  if( tracer_visible_ ){
990  tr.Debug << "I WON! I'm one of the winning ranks! " << std::endl;
991  }
992  i_am_a_winning_rank = true;
993  best_rmsd = 0.0;
994  assign_tag( new_tag, ( Pool_RMSD::size() + ii ) );
995  tr.Debug << "I'm gonna add a new structure, this is it's tag: " << new_tag << std::endl;
996  best_decoy = new_tag;
997  best_decoy = new_tag;
998  PROF_START( basic::WRITE_TO_FILE );
1000 
1001  ss->fill_struct( pose, new_tag );
1003  sfd.write_silent_struct( *ss, new_decoys_out_, false );
1004  PROF_STOP( basic::WRITE_TO_FILE );
1005  }
1006  }
1007 
1008  broadcast_newest_coords( num_structures_to_add ); //automatically increments pool_size appropriately
1009  PROF_START( basic::MPI_MASTER_BCAST_NEW_COMM_SIZE );
1010  MPI_Bcast( &new_size, 1, MPI_INT, pool_master_node_, MPI_COMM_POOL );
1011  PROF_STOP( basic::MPI_MASTER_BCAST_NEW_COMM_SIZE );
1012 
1013  if( tracer_visible_ ){
1014  tr.Debug << "new size of pool " << new_size << " current_size of pool_npes_ " << pool_npes_ << std::endl;
1015  }
1016  if( new_size != pool_npes_ ){
1017 
1018  PROF_START( basic::MPI_MASTER_BCAST_NEW_POOL_RANKS );
1019  MPI_Bcast( transfer_buf_.int_buf1_, new_size, MPI_INT, (pool_master_node_), MPI_COMM_POOL );
1020  PROF_STOP( basic::MPI_MASTER_BCAST_NEW_POOL_RANKS );
1021  if( tracer_visible_ ){
1022  tr.Debug << "creating new communicator from ranks: ";
1023  for(int ii = 0; ii < new_size; ii++){
1024  tr.Debug << transfer_buf_.int_buf1_[ ii ] << " ";
1025  }
1026  tr.Debug << std::endl;
1027  }
1028  create_comm( transfer_buf_.int_buf1_, new_size );
1029  }
1030  PROF_START( basic::MPI_POOL_SLAVE_THINKS );
1031 
1032  if( !i_am_a_winning_rank && num_structures_to_add > 0 ){ //update rms info and nearest cluster info
1033  FArray2D_double coords( 3, pose.total_residue(), 0.0 );
1035  tr.Debug << "checking coords" << std::endl;
1036 
1037  tr.Debug <<std::endl;
1038  core::Real competing_best_rmsd= -1;
1039  std::string competing_best_decoy;
1040  if( tracer_visible_ ){
1041  tr.Debug << "before update, this is my info: " << best_rmsd << " " << best_decoy << " " << best_index << std::endl;
1042  }
1043  core::Size alt_index = Pool_RMSD::evaluate( coords, competing_best_decoy, competing_best_rmsd, ( Pool_RMSD::size() - num_structures_to_add + 1 ) );
1044  if( tracer_visible_ ){
1045  tr.Debug << "after 2nd eval, this is my info: " << competing_best_rmsd << " " << competing_best_decoy << " " << alt_index << std::endl;
1046  }
1047  if( competing_best_rmsd < best_rmsd ) {
1048  best_rmsd = competing_best_rmsd;
1049  best_decoy = competing_best_decoy;
1050  best_index = alt_index + Pool_RMSD::size() - num_structures_to_add;
1051  }
1052  }
1053  if( tracer_visible_ ){
1054  tr.Debug << "best rmsd after evaluation is " << best_rmsd << " threashold " << transition_threshold << " num_structures_to_add " << num_structures_to_add << " pool-size " << Pool_RMSD::size() << " and index " << ( Pool_RMSD::size() - num_structures_to_add + 1 ) << std::endl;
1055  }
1056 
1057  return best_index;
1058 }
1059 
1060 
1061 } //mc_convergence_checks
1062 } //moves
1063 } //protocols
1064 #else
1065 #endif