33 #include <ObjexxFCL/format.hh>
34 #include <ObjexxFCL/string.functions.hh>
36 #include <utility/string_util.hh>
42 #include <basic/Tracer.hh>
52 #include <basic/options/option_macros.hh>
54 #include <utility/vector1.hh>
62 using namespace basic::options;
63 using namespace basic::options::OptionKeys;
65 NEW_OPT3( noesy_weights::tolerances,
"if no #TOLERANCE is found use this for [indirect_H, direct_H, label ]", 0.04, 0.03, 0.3 );
70 static basic::Tracer
tr(
"protocols.noesy_assign.io");
73 namespace noesy_assign {
76 PeakFileFormat::PeakFileFormat() :
95 std::ostringstream line_end;
96 using namespace ObjexxFCL::fmt;
107 os << ObjexxFCL::fmt::RJ( 6, ct ) <<
" ";
121 if (
info1_->has_label() ) {
122 atom_names.push_back( ObjexxFCL::lowercased(
info1_->label_atom_type() ) );
123 tolerances.push_back(
info1_->label_tolerance() );
126 if (
info2_->has_label() ) {
127 atom_names.push_back( ObjexxFCL::uppercased(
info2_->label_atom_type() ) );
128 tolerances.push_back(
info2_->label_tolerance() );
132 atom_names.push_back( ObjexxFCL::uppercased(
info2_->main_atom() ) );
133 tolerances.push_back(
info2_->proton_tolerance() );
136 atom_names.push_back( ObjexxFCL::lowercased(
info1_->main_atom() ) );
137 tolerances.push_back(
info1_->proton_tolerance() );
139 os <<
"# Number of dimensions " << dim << std::endl;
140 os <<
"#FILENAME " <<
filename() << std::endl;
141 std::string format_str =
"xeasy" + utility::to_string( dim ) +
"D";
142 os <<
"#FORMAT " << format_str << std::endl;
144 for (
Size ct = 1; ct <= atom_names.size(); ct++ ) {
145 os <<
"#INAME " << ct <<
" " << atom_names[ ct ] << std::endl;
146 if ( atom_names[ ct ].
size()>1 ) {
147 cyana_str +=
"["+atom_names[ct]+
"]";
149 cyana_str += atom_names[ ct ];
152 if (
info1_->fold_proton_resonance().is_folded() ) {
153 os <<
"#FOLD "<< dim <<
" " <<
info1_->fold_proton_resonance().start() <<
" " <<
info1_->fold_proton_resonance().end() << std::endl;
155 if (
info1_->fold_label_resonance().is_folded() ) {
156 os <<
"#FOLD "<< 1 <<
" " <<
info1_->fold_label_resonance().start() <<
" " <<
info1_->fold_label_resonance().end() << std::endl;
159 if (
info2_->fold_proton_resonance().is_folded() ) {
160 os <<
"#FOLD "<< dim-1 <<
" " <<
info2_->fold_proton_resonance().start() <<
" " <<
info2_->fold_proton_resonance().end() << std::endl;
162 if (
info2_->fold_label_resonance().is_folded() ) {
163 os <<
"#FOLD "<< 2 <<
" " <<
info2_->fold_label_resonance().start() <<
" " <<
info2_->fold_label_resonance().end() << std::endl;
165 os <<
"#MAX_NOE_DIST " <<
info1_->max_noe_distance() << std::endl;
166 os <<
"#CYANAFORMAT " << cyana_str << std::endl;
168 for (
Size ct = 1; ct <= tolerances.size(); ct++ ) {
169 os << ObjexxFCL::fmt::RJ( 8, tolerances[ ct ] );
185 if (
info1_->has_label() ) {
213 using namespace ObjexxFCL;
223 bool HN_column_labels(
false );
226 bool simnoesy(
false );
228 std::istringstream line_stream;
229 if ( next_line.size() ) {
241 tr.Trace <<
"header line: " << line << std::endl;
242 line_stream.str( line );
245 if ( tag[0]!=
'#' ) { next_line = line; }
247 if ( line.find(
"Number of dimensions" ) != std::string::npos ) {
248 line_stream >> tag >> tag >> tag >> dim;
249 atom_names.resize( dim,
"" );
250 tolerances.resize( dim, 0.0 );
251 }
else if ( tag ==
"#FILENAME" ) {
255 }
else if ( tag ==
"#FORMAT" ) {
257 line_stream >> format;
258 std::string expected_format =
"xeasy" + utility::to_string( dim ) +
"D";
259 if ( format != expected_format ) {
260 tr.Warning <<
"[WARNING] Format inconsistent: " << expected_format <<
" found in file: " << format << std::endl;
262 }
else if ( tag ==
"#INAME" ) {
265 line_stream >> index;
266 if ( !line_stream.good() ) {
267 throw utility::excn::EXCN_BadInput(
" problem reading peak file, column index and atom-name expected after key-word #INAME ");
270 if ( name ==
"HN" || name ==
"HC" || name ==
"H1" || name ==
"1H" ) HN_column_labels =
true;
271 if ( name ==
"H1" ) name =
"H";
272 if ( name ==
"1H" ) name =
"HN";
273 if ( name ==
"C13" || name ==
"13C" ) name =
"C";
274 if ( name ==
"N15" || name ==
"15N" ) name =
"N";
275 if ( name ==
"SIM" ) name =
"NC";
276 if ( name ==
"sim" ) name =
"nc";
277 if ( name ==
"CN" ) name =
"NC" ;
278 if ( name ==
"cn" ) name =
"nc" ;
279 if ( atom_names.size() < index ) {
280 tr.Error <<
"only " << dim <<
"D format; but " << index <<
" index found in #INAME line " << line << std::endl;
283 if ( name ==
"nc" || name ==
"NC" ) { simnoesy =
true; }
284 if ( atom_names[ index ] !=
"" )
tr.Warning <<
"found index" << index <<
"in two different #INAME lines "<< std::endl;
285 atom_names[ index ] = name;
286 }
else if ( tag ==
"#CYANAFORMAT" ) {
287 line_stream >> cyana_string;
290 }
else if ( tag ==
"#FOLD" ) {
294 line_stream >> fold_dim >> start >>
end;
295 fold_starts[ fold_dim ]=
start;
296 fold_ends[ fold_dim ]=
end;
297 }
else if ( tag ==
"#IGNORE_NEGATIVE_INTENSITY" ) {
299 }
else if ( tag ==
"#MAX_NOE_DIST" ) {
300 if ( max_noe_dist < 0.01 ) {
301 tr.Warning <<
"MAX_NOE_DIST flag in peak-file ignored because of 0.0 in -noesy::calibration::max_noe_dist" << std::endl;
303 line_stream >> max_noe_dist;
305 }
else if ( tag ==
"#TOLERANCE" ) {
306 for (
Size i = 1; i <= dim; i++ ) {
309 tolerances[ i ] = val;
312 }
else if ( tag[0] ==
'#' ) {
313 tr.Warning <<
"[PeakFileFormat]: ignore header line: " << line << std::endl;
316 tr.Error <<
"reading line as header: " << line << std::endl
317 <<
"expect TOLERANCE as last header entry " << std::endl;
322 tr.Debug <<
"finished with header found " << dim <<
" dimensions" << std::endl;
323 using namespace basic::options;
324 using namespace basic::options::OptionKeys;
325 Real const default_tolerance_h( (dim < 4) ?
329 Real const default_tolerance_H( (dim < 3) ?
331 option[ noesy_weights::tolerances ][ TOL_H_DIRECT ]
334 throw utility::excn::EXCN_BadInput(
" problem reading peak file, no or incomplete header ");
340 if ( HN_column_labels && cyana_string !=
"none" ) {
341 tr.Info <<
"use CYANA string to work out column order" << std::endl;
343 for (
Size i = 1; i<=dim; i++ ) {
344 if ( HN_column_labels && cyana_string !=
"none" ) {
345 if ( simnoesy )
throw utility::excn::EXCN_BadInput(
"cannot use HN for protons when SimNOESY ( i.e., NC for label atom name");
346 atom_names[ i ]=cyana_string[ i-1 ];
348 if ( atom_names[ i ] ==
"h"
349 || ( HN_column_labels && atom_names[ i ]==
"H" )
350 || ( atom_names[ i ]==
"H" && i==2 && dim==2 &&
col2proton_[ 1 ] == 1 )
353 if ( tolerances[ i ]==0.0 ) tolerances[ i ]=default_tolerance_h;
355 info2_ =
new CrossPeakInfo( uppercased( atom_names[ i ] ),
"", max_noe_dist, tolerances[ i ], 0.0 );
357 info2_->set_proton( uppercased( atom_names[ i ] ), tolerances[ i ] );
359 }
else if ( atom_names[ i ] ==
"H" || ( HN_column_labels && atom_names[ i ]==
"HN" ) || ( HN_column_labels && atom_names[ i ]==
"HC" )) {
361 if ( HN_column_labels ) atom_names[ i ] =
"h";
362 if ( tolerances[ i ]==0.0 ) tolerances[ i ]=default_tolerance_H;
366 info1_->set_proton( atom_names[ i ], tolerances[ i ] );
368 }
else if ( atom_names[ i ] ==
"c" || atom_names[ i ] ==
"n" || atom_names[ i ] ==
"nc" ) {
371 if ( tolerances[ i ]==0.0 ) tolerances[ i ]= option[ noesy_weights::tolerances ][
TOL_LABEL ];
373 info2_ =
new CrossPeakInfo(
"", uppercased( atom_names[ i ] ), max_noe_dist, 0.0, tolerances[ i ] );
375 info2_->set_label( uppercased( atom_names[ i ] ), tolerances[ i ] );
377 }
else if ( atom_names[ i ] ==
"C" || atom_names[ i ] ==
"N" || atom_names[ i ] ==
"NC" ) {
380 if ( tolerances[ i ]==0.0 ) tolerances[ i ]= option[ noesy_weights::tolerances ][
TOL_LABEL ];
384 info1_->set_label( atom_names[ i ], tolerances[ i ] );
388 for (
Size i = 1; i<=dim; i++ ) {
390 info->set_folding_window( fold_starts[ i ], fold_ends[ i ],
col2islabel_[ i ] );
394 throw utility::excn::EXCN_BadInput(
" problem reading peak file, no or errorenous header ");
397 tr.Debug <<
" cross-peak infos: " << *
info1_ <<
" and " << *
info2_ << std::endl;
401 tr.Debug <<
" check if we need to swap columns " << std::endl;
403 for (
Size i=1; i<=3; i++ ) {
406 if ( ct_col_1 == 1 ) {
407 for (
Size i=1; i<=3; i++ ) {
424 runtime_assert(
ncol >=2 &&
ncol <= 4 );
427 }
else if (
ncol == 3 ) {
432 cp->set_info( 1,
info1_ );
433 cp->set_info( 2,
info2_ );
435 if ( !next_line.size() ) {
436 getline( is, next_line );
438 tr.Trace <<
" next_line: " << next_line << std::endl;
439 std::istringstream line_stream( next_line );
443 if ( !line_stream.good() ) {
447 cp->set_peak_id(
id );
453 line_stream >> tag; line_stream >> tag;
459 line_stream >> tag; line_stream >> tag;
467 for (
Size icol=1; icol<=
ncol; ++icol ) {
477 runtime_assert( cp.
has_label( iproton ) );
489 for (
Size icol=1; icol<=
ncol; ++icol ) {
493 if ( !is_label ) val = cp.
proton( iproton ).
freq();
495 runtime_assert( cp.
has_label( iproton ) );
498 os << ObjexxFCL::fmt::F( 8, 3, val ) <<
" ";
505 val = val < 0 ? -val : val;
521 if ( !getline( rest_is, line ) )
return;
522 tr.Trace <<
"rest_of_line: --" << line <<
"--" << std::endl;
527 if ( !getline( is, line ) ) {
529 std::ostringstream errstr;
531 throw EXCN_FileFormat(
"expected assignment value for " + errstr.str() );
535 std::istringstream line_stream( line );
536 new_peak_line = line;
538 std::istringstream line_stream_test_new_peak( line );
542 line_stream_test_new_peak >> dummyI >> dummyR >> dummyR >> dummyR >> dummyI >> tag;
543 if ( tag ==
"U" )
return;
546 std::istringstream line_stream_test_new_peak( line );
548 line_stream_test_new_peak >> tag;
549 if ( tag[0]==
'#' )
return;
552 std::istringstream line_stream_test_new_peak( line );
554 line_stream_test_new_peak >> tag1 >> tag2 >> tag3;
555 if ( tag3.find(
".") != std::string::npos )
return;
559 for (
Size icol=1; icol<=
ncol; ++icol ) {
562 if ( !line_stream ) {
567 std::ostringstream errstr;
569 throw EXCN_FileFormat(
"expected assignment value for " + errstr.str() );
571 if ( first )
tr.Trace <<
" read assignments: ";
574 tr.Trace << val <<
" ";
576 line_stream.setstate( std::ios_base::failbit );
581 tr.Trace << std::endl;
584 for (
Size icol=1; icol<=
ncol; ++icol ) {
585 Size val = vals[ icol ];
588 Size index = iproton + ( is_label ? 2 : 0 );
589 runtime_assert( !is_label || cp.
has_label( iproton ) );
590 reorder[ index ] = val;
592 tr.Trace <<
" add assignment " << std::endl;
597 if ( !line_stream.good() ) {
601 if ( tag ==
"#VC" ) {
606 }
else weight += 1.0;
607 if ( !line_stream.good() )
break;
623 os << std::endl <<
" ";
628 for (
Size icol=1; icol<=
ncol(); ++icol ) {
637 if ( !is_label ) os << pa.
atom( iproton ) <<
" ";
640 os << ObjexxFCL::fmt::RJ( 6, val ) <<
" ";
647 os <<
"#VC " << ObjexxFCL::fmt::F( 5, 3, val );
653 Size assignments_written( 0 );
657 CrossPeak::PeakAssignments::const_iterator best_VC_it = cp.
assignments().end();
658 for ( CrossPeak::PeakAssignments::const_iterator it = cp.
assignments().begin(); it != cp.
assignments().end(); ++it ) {
659 Real val( (*it)->normalized_peak_volume() );
660 if ( val > bestVC ) {
668 ++assignments_written;
672 for ( CrossPeak::PeakAssignments::const_iterator it = cp.
assignments().begin(); it != cp.
assignments().end(); ++it ) {
673 Real val( (*it)->normalized_peak_volume() );
675 ++assignments_written;
680 if ( assignments_written == 1 ) os << line_end;
695 os <<
"\ninfo1: " <<
info1_->label_atom_type() <<
" " <<
info1_->proton_tolerance() <<
" " <<
info1_->label_tolerance();
696 os <<
"\ninfo2: " <<
info2_->label_atom_type() <<
" " <<
info2_->proton_tolerance() <<
" " <<
info2_->label_tolerance();