16 #include <utility/io/mpistream.hh>
17 #include <utility/exit.hh>
19 #include <utility/file/file_sys_util.hh>
22 #include <basic/Tracer.hh>
23 #include <basic/MemTracer.hh>
28 #include <utility/vector1.hh>
35 using namespace utility::io::mpi_stream;
37 static basic::Tracer
tr(
"protocols.jd2.MpiFileBuffer");
41 : buffer_rank_( file_buf_rank ),
43 bSlaveCanOpenFile_( true ),
44 bKeepFilesAlive_( true ),
45 seconds_to_keep_files_alive_( 100 ) {
51 MPI_Comm_rank (MPI_COMM_WORLD, &my_rank );
64 char *cbuf =
new char[ size+1 ];
66 MPI_Recv( cbuf, size, MPI_CHAR, slave, MPI_STREAM_TAG, MPI_COMM_WORLD, &stat );
67 line.assign( cbuf, size );
82 time_t
const now( time(NULL) );
85 tr.Debug <<
"garbage collection active..." << std::endl;
87 GarbageList::iterator to_erase( it );
89 tr.Debug <<
"marked " << to_erase->first <<
" " << now-to_erase->second <<
" seconds ago." << std::endl;
91 int channel( to_erase->first );
100 if ( !buf->has_open_slaves() ) {
101 tr.Debug <<
"channel "<< channel
102 <<
" has no more open slaves... and has not been touched again --- close via garbage collector" << std::endl;
105 mem_tr <<
"closed_channel" << std::endl;
107 runtime_assert(
false );
108 tr.Debug <<
"channel " << to_erase->first <<
" has open slaves again ... not closed, remove from closing list" << std::endl;
124 MPI_Recv( buf, 4, MPI_INT,
MPI_ANY_SOURCE, MPI_STREAM_TAG, MPI_COMM_WORLD, &stat );
125 Size const msg_type( buf[ 2 ] );
127 Size const slave( buf[ 0 ] );
128 Size const channel_id( buf[ 3 ] );
129 if ( msg_type == MPI_STREAM_OPEN || msg_type == MPI_STREAM_OPEN_APPEND ) {
133 open_channel( slave, filename, msg_type == MPI_STREAM_OPEN_APPEND, file_status );
134 }
else if ( msg_type == MPI_STREAM_SEND ) {
138 }
else if ( msg_type == MPI_STREAM_CLOSE ) {
140 }
else if ( msg_type == MPI_STREAM_FLUSH ) {
141 tr.Debug <<
"MPI_STREAM_FLUSH received" << std::endl;
149 }
else if ( msg_type == MPI_STREAM_FILE_EXIST ) {
154 int iexist( exist ? 1 : 0 );
155 MPI_Send( &iexist, 1, MPI_INT, slave, MPI_STREAM_TAG, MPI_COMM_WORLD );
159 tr.Debug <<
"received MPI_CLOSE_FILE " <<filename<<
" from Node" << slave << std::endl;
162 int closed = success ? 1 : 0;
163 MPI_Send( &closed, 1, MPI_INT, slave, MPI_STREAM_TAG, MPI_COMM_WORLD );
165 utility_exit_with_message(
"unknown msg-id received in MpiFileBuffer.cc");
181 MPI_Send( &buf, 4, MPI_INT,
buffer_rank_, MPI_STREAM_TAG, MPI_COMM_WORLD );
187 Filenames::const_iterator iter =
open_files_.find( filename );
191 channel = iter->second;
193 runtime_assert( buf );
194 buf->block( from_node );
195 tr.Debug <<
"block released... for file " << filename << std::endl;
197 tr.Warning <<
"file " << filename <<
" is not known to MpiFileBuffer " << std::endl;
200 tr.Debug <<
"send blocking confirmation... " << filename << std::endl;
201 MPI_Send( &status, 1, MPI_INT, from_node, MPI_STREAM_TAG, MPI_COMM_WORLD );
215 MPI_Send(buf, 4, MPI_INT,
buffer_rank_, MPI_STREAM_TAG, MPI_COMM_WORLD );
217 tr.Debug <<
"wait for confirmation of block " << std::endl;
219 MPI_Recv( &buf, 1, MPI_INT, buffer_rank_, MPI_STREAM_TAG, MPI_COMM_WORLD, &stat );
220 if ( buf[ 0 ] == 1 ) {
221 tr.Debug <<
"block confirmed... " << std::endl;
224 tr.Debug <<
"block not accepted ... " << std::endl;
232 runtime_assert( buffer_rank_ !=
my_rank_ );
235 buf[ 1 ] = filename.size();
240 MPI_Send(buf, 4, MPI_INT, buffer_rank_, MPI_STREAM_TAG, MPI_COMM_WORLD );
241 MPI_Send(const_cast<char*> (filename.data()), filename.size(), MPI_CHAR,
buffer_rank_, MPI_STREAM_TAG, MPI_COMM_WORLD );
244 MPI_Recv( &buf, 1, MPI_INT, buffer_rank_, MPI_STREAM_TAG, MPI_COMM_WORLD, &stat );
246 if ( buf[ 0 ] == 1 ) {
247 tr.Debug <<
"remote close confirmed... " << std::endl;
249 tr.Debug <<
"close not accepted ... " << std::endl;
251 return buf[ 0 ] != 0;
263 runtime_assert( buffer_rank_ !=
my_rank_ );
267 buf[ 1 ] = filename.size();
271 tr.Debug <<
"release file " << filename << std::endl;
272 MPI_Send(buf, 4, MPI_INT, buffer_rank_, MPI_STREAM_TAG, MPI_COMM_WORLD );
277 tr.Debug << filename <<
" is not blocked... release ignored " << std::endl;
283 Filenames::const_iterator iter =
open_files_.find( filename );
292 tr.Debug <<
"remove channel " << channel <<
" from garbage-collector list " << std::endl;
300 tr.Debug <<
"open mpi-channel from slave-node " << slave <<
" for file: " << filename << std::endl;
301 Filenames::const_iterator iter =
open_files_.find( filename );
304 channel = iter->second;
306 runtime_assert( buf );
308 tr.Debug <<
"channel exists already: " << channel << std::endl;
309 status = MPI_SUCCESS_APPEND;
315 runtime_assert( channel < 2147483647 );
316 open_files_.insert( Filenames::value_type( filename, channel ) );
318 tr.Debug <<
"new channel established: " << channel << std::endl;
321 send_buf[ 0 ] = channel;
322 send_buf[ 1 ] = status;
323 MPI_Send( &send_buf, 2, MPI_INT, slave, MPI_STREAM_TAG, MPI_COMM_WORLD );
324 if ( status == MPI_SUCCESS_NEW ) {
327 MPI_Recv( buf, 4, MPI_INT, slave, MPI_STREAM_TAG, MPI_COMM_WORLD, &stat );
328 Size const msg_type( buf[ 2 ] );
330 Size const slave_id( buf[ 0 ] );
331 Size const channel_id( buf[ 3 ] );
332 tr.Debug <<
"header? : received: " << buf[ 0 ] <<
" " << buf[ 1 ] <<
" " << buf[ 2 ] <<
" " << buf[ 3 ] << std::endl;
333 runtime_assert( msg_type == MPI_STREAM_SEND && slave_id == slave );
336 tr.Debug << header << std::endl;
337 open_buffers_[ channel ]->store_line( slave, channel, header );
352 runtime_assert( buf );
353 buf->store_line( slave, channel, line );
354 if (buf->length(slave) > 5e6) {
355 tr.Info <<
"autoflush threshold (5 MB) triggered for slave " << slave <<
" channel: " << channel << std::endl;
361 tr.Debug <<
"flush channel for slave " << slave <<
" channel: " << channel_id << std::endl;
363 runtime_assert( buf );
369 runtime_assert( buf );
371 tr.Debug <<
"close channel "<< channel_id <<
" for slave " << slave
372 <<
" currently " << buf->nr_open_slaves() <<
" open slave buffers; open files: " <<
open_buffers_.size() << std::endl;
374 tr.Debug <<
"channel has no more open slaves... close completely now" << std::endl;
376 mem_tr <<
"closed_channel" << std::endl;
378 if ( !buf->has_open_slaves() ) {
383 tr.Debug <<
"currently " <<
open_buffers_.size() <<
" open buffers" << std::endl;
388 tr.Debug <<
"remote close file " << filename <<
" on node " <<
my_rank_ << std::endl;
393 tr.Debug <<
"close file " << filename <<
" with channel_id " << channel_id << std::endl;
405 Filenames::iterator file_iter =
open_files_.find( filename );
410 utility_exit_with_message(
"illegal attempt to delete file with non-existant channel_id " + channel_id );
415 return new WriteFileSFB( filename, channel, append, status );