Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
MPI_Master.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file protocols/elscripts/MPI_Master.cc
11 /// @brief The MPI_Master role in elscripts, handles trajectories, generating workunits, processing of results
12 /// @author Ken Jung
13 
14 #if defined (USEBOOSTMPI) && defined (USELUA)
16 
17 #include <boost/bind.hpp>
18 #include <boost/function.hpp>
19 #include <boost/lexical_cast.hpp>
20 
21 #ifdef USEBOOSTSERIALIZE
22 #include <boost/archive/binary_oarchive.hpp>
23 #endif
24 
26 
27 #include <basic/options/option.hh>
28 #include <basic/options/keys/els.OptionKeys.gen.hh>
29 
30 #include <utility/lua/LuaIterator.hh>
31 
32 #include <utility/Factory.hh>
36 
37 #include <basic/Tracer.hh>
38 
39 // elscripts master
40 namespace protocols {
41 namespace elscripts {
42 
43 static basic::Tracer TR("protocols.elscripts.MPI_Master");
44 
45 void lregister_MPI_Master( lua_State * lstate ) {
46  lregister_Master( lstate );
47  luabind::module(lstate, "protocols")
48  [
49  luabind::namespace_("elscripts")
50  [
51  luabind::class_<MPI_Master, Master>("MPI_Master")
52  ]
53  ];
54 }
55 
56 MPI_Master::MPI_Master( boost::mpi::communicator world, std::vector<int> slaves, int num_trajectories, boost::uint64_t mem_limit, boost::uint64_t reserved_mem, boost::uint64_t reserved_mem_multiplier) :
57  world_(world),
58  last_status_sweep_time_( boost::posix_time::microsec_clock::universal_time() - boost::posix_time::minutes(100)),
59  slaves_( slaves ),
60  Master( num_trajectories, mem_limit, reserved_mem, reserved_mem_multiplier) {
61  using namespace basic::options;
62  using namespace basic::options::OptionKeys;
63 
64 
65  // endpoint uses this function to get role-wide memory usage
66  boost::function< boost::uint64_t ()> ref_available_mem = boost::bind( &protocols::elscripts::MPI_Master::available_mem, this );
67 
68  slave_comm_ = protocols::wum2::EndPointSP( new protocols::wum2::MPI_EndPoint( world, ref_available_mem ));
69 
70  // initializing inputterstream
71  inputterstream_.reset ( new protocols::inputter::InputterStream (
72  inputter_rank(),
73  // num of masters
74  (option[OptionKeys::els::num_traj]() / option[OptionKeys::els::traj_per_master]() ) +
75  (!( option[OptionKeys::els::num_traj]() % option[OptionKeys::els::traj_per_master]() == 0 ))
76  ) );
77 
78  lregister_MPI_Master(lstate_);
79  luabind::globals(lstate_)["master"] = this;
80  luabind::globals(lstate_)["rank"] = world_.rank();
81 
82  instantiate_inputterstream();
83 }
84 
85 void MPI_Master::go(){
86  using namespace utility::lua;
87 
88  // create functors to call back functions for automated endpoint processing
89  boost::function<void ( protocols::wum2::StatusResponse, int )> ref_slave_listen_wu_sendrecv = boost::bind( &protocols::wum2::EndPoint::listen_wu_sendrecv, slave_comm_, _1, _2);
90  boost::function<bool ( protocols::wum2::StatusResponse )> ref_slave_initiate_wu_sendrecv = boost::bind( &protocols::wum2::EndPoint::initiate_wu_sendrecv, slave_comm_, _1);
91 // boost::function<void ( protocols::wum2::StatusResponse, int )> ref_pool_listen_wu_sendrecv = boost::bind( &protocols::wum2::EndPoint::listen_wu_sendrecv, pool_comm_, _1, _2);
92  // boost::function<bool ( protocols::wum2::StatusResponse )> ref_pool_initiate_wu_sendrecv = boost::bind( &protocols::wum2::EndPoint::initiate_wu_sendrecv, pool_comm_, _1);
93 
94  // setting intial sweep time such that the first sweep will always occur immediately
95 
96  // entering main loop
97  while( 1 ) {
98  mpicounter_++;
99 
100  // pool_comm_->check_and_act_clearcommand();
101  //pool_comm_->check_and_act_status_request( f );
102 
103  // only send status requests as often as the shortest_wu would take
104  if ( ( boost::posix_time::microsec_clock::universal_time() - last_status_sweep_time_) > boost::posix_time::seconds(basic::options::option[basic::options::OptionKeys::els::shortest_wu]())){
105  for( int i = 0; i < slaves_.size(); i++ ) {
106  if( ! slave_comm_->has_open_status( slaves_[i] ) ) {
107  // only send status requests to slaves that have responded
108  // if they havent responded yet, spamming them with more messages won't help
109  // probably should retry a few times after a timeout
110  // removing slave from slave list is unecessary, but we do lose workunits sent there
111  slave_comm_->send_status_request( slaves_[i] );
112  }
113  }
114  last_status_sweep_time_ = boost::posix_time::microsec_clock::universal_time();
115  }
116 
117  fill_trajectories();
118 
119  if( mpicounter_ >= 10000000 ) {
120  // hardcoded, only try and generate initial workunits every 60 seconds
121  if ( ( boost::posix_time::microsec_clock::universal_time() - last_generate_initial_wu_time_) > boost::posix_time::seconds( 60 )){
122 
123  // generate initial wu
124  // user must be aware of memory limits if they use their own function
125  int m = luaL_dostring ( lstate_, "loop_every()" );
126  if( m == 1) {
127  TR << "calling lua function loop_every() failed. Error is:" << std::endl;
128  TR << lua_tostring(lstate_, -1) << std::endl;
129  std::exit(9);
130  }
131  last_generate_initial_wu_time_ = boost::posix_time::microsec_clock::universal_time();
132  }
133  mpicounter_ = 0;
134  }
135 
136  // send/recv WU from slaves
137  slave_comm_->act_on_status_response( ref_slave_initiate_wu_sendrecv );
138 
139  // send/recv WU from pool
140  //pool_comm_->check_and_act_status_request( ref_pool_listen_wu_sendrecv );
141 
142  // process slave results
143  protocols::wum2::WorkUnitSP wu = slave_comm_->inq().pop_front();
144  protocols::wum2::WorkUnit_ElScriptsSP castattempt = boost::dynamic_pointer_cast<protocols::wum2::WorkUnit_ElScripts> (wu);
145  if( castattempt != 0 ) {
146  traj_idx_ = castattempt->trajectory_idx();
147  std::string wuname = castattempt->name();
148  // export new variables to lua for use in the lua fxn
149  luabind::globals(lstate_)["traj_env"][traj_idx_]["pipemap"] = castattempt->pipemap().lock();
150  luabind::globals(lstate_)["traj_env"][traj_idx_]["state"] = castattempt->state().lock();
151 
152  // calling proceed fxn
153  // also increment wu_done here to save overhead of calling into lua vm
154  std::string action = "(\n"
155  "do\n"
156  " local _ENV = traj_env[)" + boost::lexical_cast<std::string>(traj_idx_) + "(]\n"
157  " if wu_done.)"+wuname+"( == nil then\n"
158  " wu_done.)"+wuname+"( = 0\n"
159  " end\n"
160  " wu_done.)"+wuname+"( = wu_done.)"+wuname+"( + 1\n"
161  " els_setenv(_ENV)\n"
162  " els.workunits.)"+wuname+"DELIM(.proceed_on_master()\n"
163  "end\n"
164  ")DELIM";
165  int err = luaL_dostring ( lstate_, action.c_str() );
166  if( err == 1) {
167  TR << "Calling lua function for workunit " << wuname << " proceed_on_master fxn failed. Error is:" << std::endl;
168  TR << lua_tostring(lstate_, -1) << std::endl;
169  std::exit(9);
170  }
171  luabind::globals(lstate_)["traj_env"][traj_idx_]["pipemap"] = luabind::nil;
172  luabind::globals(lstate_)["traj_env"][traj_idx_]["state"] = luabind::nil;
173  lua_gc(lstate_, LUA_GCCOLLECT, 0);
174  }
175 
176  // process pool results
177  /*WorkUnitSP wu = pool_comm_->inq_popfront();
178  WorkUnit_RequestStructSP castattempt = boost::dynamic_pointer_cast<WorkUnit_RequestStruct> (wu);
179  if( castattempt != 0 ) {
180  trajectories[cast_attempt->traj_idx()] = cast_attempt->replacement_pose();
181  }
182  */
183 
184  // cleans up any completed mpi::reqs and their buffers
185  slave_comm_->cleanup_reqs();
186  //pool_comm_->cleanup_reqs();
187  }
188 }
189 
190 /*void MPI_Master::request_pool_structures( std::vector < int > needs_replace ) {
191  for( int i = 0; i < needs_replace.size(); i++ ) {
192  request_pool_structure( i );
193  }
194 }
195 
196 void MPI_Master::request_pool_structure( int traj_idx ) {
197  protocols::wum2::WorkUnitSP request_struct_wu( new protocols::wum2::WorkUnit_RequestStruct() );
198  pool_comm_.push_back( request_struct_wu );
199 }*/
200 
201 } //elscripts
202 } //protocols
203 #endif