Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
MPIMultiCommJobDistributor.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file protocols/jd2/MPIMultiCommJobDistributor.cc
11 /// @brief implementation of MPIMultiCommJobDistributor
12 /// @author Oliver Lange olange@u.washington.edu
13 /// @detail freely based on the MPIWorkPoolJobDistributor from Doug
14 
15 // MPI headers
16 #ifdef USEMPI
17 #include <mpi.h> //keep this first
18 #endif
19 
20 // Unit headers
22 
23 // Package headers
25 #include <protocols/jd2/Job.hh>
26 
27 #include <protocols/moves/Mover.hh>
28 
30 #include <utility/io/ozstream.hh> //to toggle MPI rerouting
31 
32 // Utility headers
33 #include <basic/Tracer.hh>
34 #include <basic/options/option.hh>
35 #include <utility/exit.hh>
36 // AUTO-REMOVED #include <utility/assert.hh>
37 
38 // Option headers
39 #include <basic/options/keys/out.OptionKeys.gen.hh>
40 #include <basic/options/keys/jd2.OptionKeys.gen.hh>
41 
42 // C++ headers
43 #include <string>
44 
45 // ObjexxFCL headers
46 #include <ObjexxFCL/string.functions.hh>
47 
48 #include <utility/vector1.hh>
49 
50 
51 static basic::Tracer tr("protocols.jd2.MPIMultiCommJobDistributor");
52 
53 namespace protocols {
54 namespace jd2 {
55 
56 using namespace core;
57 
58 
59 using namespace basic::options;
60 using namespace basic::options::OptionKeys;
61 
62 ///@details constructor. Notice it calls the parent class! It also builds some internal variables for determining
63 ///which processor it is in MPI land.
65  setup_sub_communicators( sub_size );
66 }
67 
69  n_comm_ = ( n_rank()-min_client_rank() ) / sub_size;
70  tr.Debug << " can allocate " << n_comm_ << " communication groups " << std::endl;
71  tr.Debug << " n_rank: " << n_rank() << " sub_size: " << sub_size << std::endl;
72  set_n_worker( n_comm_ );
73 #ifdef USEMPI
74  if ( n_comm_ < 1 ) {
75  tr.Error << "requested sub-communicators of size " << sub_size << " but only " << n_rank() << " processes are available " << std::endl;
76  utility_exit_with_message( "cannot run with requested size of sub-partition" );
77  }
78 
79  MPI_Group world_group;
80  MPI_Comm_group(MPI_COMM_WORLD, &world_group );
81  communicator_handle_ = 0;
82  Size i_rank = min_client_rank();
83  mpi_groups_.resize( n_comm_, MPI_GROUP_NULL );
84  mpi_communicators_.resize( n_comm_, MPI_COMM_NULL );
85  for ( Size i_comm = 1; i_comm <= n_comm_; ++i_comm ) {
86  mpi_ranks_.push_back( new int[ sub_size ] ); //delete never called because singleton class
87  for ( Size i = 0; i < sub_size; ++i, ++i_rank ) {
88  mpi_ranks_.back()[ i ] = i_rank;
89  if ( i_rank == rank() ) {
90  communicator_handle_ = i_comm;
91  }
92  }
93  MPI_Group_incl( world_group, sub_size, mpi_ranks_.back(), &(mpi_groups_[ i_comm ]) );
94  // MPI_Comm_create
95  /// Note that the call is to be executed by all processes in comm,
96  /// even if they do not belong to the new group. This call applies only to intra-communicators.
97  MPI_Comm_create( MPI_COMM_WORLD, mpi_groups_[ i_comm ], &(mpi_communicators_[ i_comm ]) );
98  }
99 
100  runtime_assert( rank() < min_client_rank() || communicator_handle_ == 0 || communicator_handle_ <= mpi_communicators_.size() );
101  if ( rank() >= min_client_rank() && communicator_handle_ ) {
102  MPI_Comm_rank( mpi_communicators_[ communicator_handle_ ], &sub_rank_ );
103  } else {
104  sub_rank_ = -1;
105  }
106 #endif //USEMPI
107 }
108 
109 ///@details master-job distributes job-ids as usual. sub-masters obtain a new job from master
110 /// all processes within a single communication context work on the same job-id. ( Bcast )
113  if ( rank() < min_client_rank() ) {
114  return Parent::get_new_job_id();
115  } else {
116 #ifdef USEMPI
117  int new_job_id( -1 );
118  if ( sub_rank_ == 0 ) {
119  new_job_id = Parent::get_new_job_id(); //this sets batch_id()
120  }
121  if ( sub_rank_ >= 0 ) {
122  //communicate new job and batch ids to group-members...
123  runtime_assert( communicator_handle_ && communicator_handle_ <= mpi_communicators_.size() );
124  int mpi_buf[ 2 ];
125  mpi_buf[ 0 ] = new_job_id;
126  mpi_buf[ 1 ] = current_batch_id();
127  MPI_Bcast( mpi_buf, 2, MPI_INT, 0, mpi_communicators_[ communicator_handle_ ] );
128  new_job_id = mpi_buf[ 0 ];
129  if ( sub_rank_ > 0 ) set_batch_id( mpi_buf[ 1 ] );
130  runtime_assert( new_job_id >= 0 );
131  return new_job_id;
132  }
133 #endif
134  } //e.g. overhang processes that didn't fit into any of the sub-groups -- send spin-down
135  return 0;
136 }
137 
138 ///@brief dummy for master/slave version
139 void
141  if ( sub_rank() <= 0 ) {
142  Parent::job_succeeded( pose, run_time);
143  }
144 }
145 
146 ///@brief dummy for master/slave version
147 void
149  if ( sub_rank() <= 0 ) {
150  Parent::job_failed( pose, retry);
151  }
152 }
153 
154 #ifdef USEMPI
155 MPI_Comm const& MPIMultiCommJobDistributor::current_mpi_comm() {
156  runtime_assert( communicator_handle_ );
157  runtime_assert( communicator_handle_ <= mpi_communicators_.size() );
158  return mpi_communicators_[ communicator_handle_ ];
159 }
160 #endif
161 
162 }//jd2
163 }//protocols