21 #include <basic/Tracer.hh>
23 #include <utility/exit.hh>
24 #include <utility/string_util.hh>
25 #include <utility/io/izstream.hh>
26 #include <utility/LexicographicalIterator.hh>
30 #include <utility/vector1.hh>
34 static basic::Tracer
tr(
"protocols.enzdes.EnzdesLoopsFile");
38 namespace match_enzdes_util {
48 targ_res_(0), num_interactions_(1),
49 dis_(NULL), loop_ang_(NULL), targ_ang_(NULL),
50 loop_dih_(NULL), targ_dih_(NULL), lt_dih_(NULL)
66 bool block_end(
false);
71 tr <<
"Error: end of file reached before RES_CONTACT_END tag was found." << std::endl;
76 getline( data, line );
78 tokens.clear(); tokens.push_back(
"");
82 if( tokens.size() < 1 )
continue;
84 if( tokens[1] ==
"RES_CONTACT_END" ){
105 tr <<
"targ_atom_names ";
109 tr <<
"loopres_atom_names ";
143 using namespace toolbox::match_enzdes_util;
145 bool to_return(
false);
149 if( ( tokens.size() < 2 ) || (tokens[2] ==
"" ) ){
150 tr <<
"Error when processing res_interactions block. Line containing " << tokens[1] <<
" seems to have no useable data." << std::endl;
155 if( tokens[1] ==
"target_res" ){
159 else if( tokens[1] ==
"num_contacts" ){
171 else if( tokens[1] ==
"targ_atom_names" ){
176 else if( tokens[1] ==
"targ_base_atom_names" ){
181 else if( tokens[1] ==
"targ_base2_atom_names" ){
186 else if( tokens[1] ==
"loopres_atom_names" ){
191 else if( tokens[1] ==
"loopres_base_atom_names" ){
196 else if( tokens[1] ==
"loopres_base2_atom_names" ){
201 else if( tokens[1] ==
"distance" ){
202 if( tokens.size() < 3 ){
tr <<
"too little information given for distance_LT." << std::endl;
205 if( tokens.size() == 3 )
dis_ =
new GeomSampleInfo( atof( tokens[2].c_str() ), atof( tokens[3].c_str() ), generic_force_K, 0.0 );
206 else dis_ =
new GeomSampleInfo( atof( tokens[2].c_str() ), atof( tokens[3].c_str() ), atof( tokens[4].c_str() ), 0.0 );
209 else if( tokens[1] ==
"angle_loop" ){
210 if( tokens.size() < 3 ){
tr <<
"too little information given for angle_loop." << std::endl;
213 if( tokens.size() == 3 )
loop_ang_ =
new GeomSampleInfo( atof( tokens[2].c_str() ), atof( tokens[3].c_str() ), generic_force_K, generic_period );
214 else loop_ang_ =
new GeomSampleInfo( atof( tokens[2].c_str() ), atof( tokens[3].c_str() ), atof( tokens[4].c_str() ), generic_period );
217 else if( tokens[1] ==
"angle_targ" ){
218 if( tokens.size() < 3 ){
tr <<
"too little information given for angle_targ." << std::endl;
221 if( tokens.size() == 3 )
targ_ang_ =
new GeomSampleInfo( atof( tokens[2].c_str() ), atof( tokens[3].c_str() ), generic_force_K, generic_period );
222 else targ_ang_ =
new GeomSampleInfo( atof( tokens[2].c_str() ), atof( tokens[3].c_str() ), atof( tokens[4].c_str() ), generic_period );
225 else if( tokens[1] ==
"dih_loop" ){
226 if( tokens.size() < 3 ){
tr <<
"too little information given for dih_loop." << std::endl;
229 if( tokens.size() == 3 )
loop_dih_ =
new GeomSampleInfo( atof( tokens[2].c_str() ), atof( tokens[3].c_str() ), generic_force_K, generic_period );
230 else if( tokens.size() == 4 )
loop_dih_ =
new GeomSampleInfo( atof( tokens[2].c_str() ), atof( tokens[3].c_str() ), atof( tokens[4].c_str() ), generic_period );
231 else loop_dih_ =
new GeomSampleInfo( atof( tokens[2].c_str() ), atof( tokens[3].c_str() ), atof( tokens[4].c_str() ), atof( tokens[5].c_str() ) );
234 else if( tokens[1] ==
"dih_targ" ){
235 if( tokens.size() < 3 ){
tr <<
"too little information given for dih_loop." << std::endl;
238 if( tokens.size() == 3 )
targ_dih_ =
new GeomSampleInfo( atof( tokens[2].c_str() ), atof( tokens[3].c_str() ), generic_force_K, generic_period );
239 else if( tokens.size() == 4 )
targ_dih_ =
new GeomSampleInfo( atof( tokens[2].c_str() ), atof( tokens[3].c_str() ), atof( tokens[4].c_str() ), generic_period );
240 else targ_dih_ =
new GeomSampleInfo( atof( tokens[2].c_str() ), atof( tokens[3].c_str() ), atof( tokens[4].c_str() ), atof( tokens[5].c_str() ) );
243 else if( tokens[1] ==
"dih_LT" ){
244 if( tokens.size() < 3 ){
tr <<
"too little information given for dih_loop." << std::endl;
247 if( tokens.size() == 3 )
lt_dih_ =
new GeomSampleInfo( atof( tokens[2].c_str() ), atof( tokens[3].c_str() ), generic_force_K, generic_period );
248 else if( tokens.size() == 4 )
lt_dih_ =
new GeomSampleInfo( atof( tokens[2].c_str() ), atof( tokens[3].c_str() ), atof( tokens[4].c_str() ), generic_period );
249 else lt_dih_ =
new GeomSampleInfo( atof( tokens[2].c_str() ), atof( tokens[3].c_str() ), atof( tokens[4].c_str() ), atof( tokens[5].c_str() ) );
254 if( !to_return )
tr <<
"Unspecified error when processing res_interactions block line containing " << tokens[1] << std::endl;
261 resA_(false), cst_block_(0)
270 tokens.push_back(
"");
272 bool cst_line_encountered(
false), block_end(
false);
278 tr <<
"Error: end of file reached before CST_TARGET_END tag was found." << std::endl;
284 getline( data, line );
287 tokens.clear(); tokens.push_back(
"");
291 if( tokens.size() < 1 )
continue;
293 if( tokens[1] ==
"CST_TARGET_END" ){
297 else if( tokens[1] ==
"cst_target" ){
301 if( tokens[3] ==
"A" )
resA_ =
true;
302 else if( tokens[3] ==
"B" )
resA_ =
false;
304 tr <<
"When specifing a cst_target line, the 3rd element has to be either A or B, corresponding to which of the two residues in the cstfile is desired." << std::endl;
308 cst_line_encountered =
true;
317 if( !cst_line_encountered ){
318 tr <<
"Did not find 'cst_target' line in CST block." << std::endl;
324 tr <<
"An explicit residue number has been specified in a CST_TARGET block" << std::endl;
345 loop_start_(0), loop_end_(0),
346 loop_start_pdb_(0), loop_end_pdb_(0),
347 loop_start_pdb_chain_(
' '), loop_end_pdb_chain_(
' '),
348 pose_numb_(false), pdb_numb_(false),
349 min_length_(0), max_length_(0),
350 preserve_buried_contacts_(false), contact_buried_problematic_res_(false)
359 utility::io::izstream & data
368 tokens.push_back(
"");
370 bool loop_end(
false );
372 bool min_length_tag_found(
false), max_length_tag_found(
false);
379 tr <<
"Error: end of file reached before LOOP_END tag was found." << std::endl;
385 getline( data, line );
387 tokens.clear(); tokens.push_back(
"");
391 if( tokens.size() < 1 )
continue;
395 if( tokens[1] ==
"LOOP_END" ) loop_end =
true;
397 else if( tokens[1] ==
"start" ) {
402 }
else if( tokens[1] ==
"stop" ) {
407 }
else if( tokens[1] ==
"pdb_start" ) {
413 }
else if( tokens[1] ==
"pdb_stop" ) {
419 }
else if( tokens[1] ==
"min_length" ){
421 min_length_tag_found =
true;
424 else if( tokens[1] ==
"max_length" ){
426 max_length_tag_found =
true;
429 else if( tokens[1] ==
"ss_string" ){
433 else if( tokens[1] ==
"ss_blueprint" ){
434 for(
core::Size i = 2; i <= tokens.size(); ++i ) ss_blueprints.push_back( tokens[i] );
437 else if( tokens[1] ==
"CST_TARGET_BEGIN" ){
443 tr <<
"Error occured when processing a CST_TARGET_BEGIN block." << std::endl;
449 else if( tokens[1] ==
"RES_CONTACT_BEGIN" ){
454 tr <<
"Error occured when processing a RES_CONTACT_BEGIN block." << std::endl;
459 else if( tokens[1] ==
"instruction" ){
471 if( !min_length_tag_found ){
473 tr <<
"No min_length tag found, min_length_ set to " <<
min_length_ <<
"." << std::endl;
475 if( !max_length_tag_found ){
477 tr <<
"No max_length tag found, max_length_ set to " <<
max_length_ <<
"." << std::endl;
490 if( report )
tr <<
"file didn't seem to specify loop start" << std::endl;
495 if( report )
tr <<
"file didn't seem to specify loop stop" << std::endl;
500 if( report )
tr <<
"specify loop start and stop by PDB or pose numbering" << std::endl;
505 if( report )
tr <<
"file specified illegal minimum loop length (min_length tag) of 0" << std::endl;
510 if( report )
tr <<
"file specified illegal maximum loop length (max_length tag) of 0" << std::endl;
515 if( report )
tr <<
"specified start of loop is after stop of loop. go work at mcdonalds." << std::endl;
520 tr <<
"min_length of loop is larger than max_length of loop. go work at mcdonalds" << std::endl;
525 if( report )
tr <<
"min length of loop is too short, has to be at least 4." << std::endl;
531 if(
ss_strings_[i].length() <
min_length_ )
tr <<
"WARNING: secondary structure string " << i <<
", " <<
ss_strings_[i] <<
", specified in the enzdes loops file is shorter than min_length for the loop. This overrides the specified min_length." << std::endl;
533 if(
ss_strings_[i].length() >
max_length_ )
tr <<
"WARNING: secondary structure string " << i <<
", " <<
ss_strings_[i] <<
", specified in the enzdes loops file is longer than max_length for the loop. This overrides the specified max_length." << std::endl;
555 if( *ble_it ==
"" )
continue;
560 if( ( (min_max_strings.size() != 1 ) && (min_max_strings.size() != 2 ) ) || ( (*ble_it)[1] !=
'(' ) ){
561 utility_exit_with_message(
"SS_blueprint "+ *ble_it +
" could not be understood when trying to generate ss_strings from it.");
564 blueprint_element_ss_chars.push_back( (*ble_it)[0] );
566 if( min_max_strings.size() == 1 ){
567 blueprint_element_num_lengths.push_back(1);
568 blueprint_element_min_lengths.push_back( (
core::Size) atoi( min_max_strings[1].c_str() ) );
574 blueprint_element_num_lengths.push_back( ble_max_length - ble_min_length + 1);
575 blueprint_element_min_lengths.push_back( ble_min_length );
578 num_combos = num_combos * blueprint_element_num_lengths[ blueprint_element_num_lengths.size() ];
580 core::Size num_elements( blueprint_element_num_lengths.size() );
582 core::Size too_long_strings(0), too_short_strings(0);
584 utility::LexicographicalIterator lex( blueprint_element_num_lengths );
585 std::set< std::string > observed_ss_strings;
587 while( !lex.at_end() ){
591 for(
core::Size i = 1; i <= num_elements; ++i){
592 length_this_string += lex[i]+blueprint_element_min_lengths[i] - 1;
607 for(
core::Size i = 1; i <= num_elements; ++i){
609 core::Size length_this_element( lex[i]+blueprint_element_min_lengths[i] - 1 );
610 for(
core::Size j = 1; j <= length_this_element; ++j ){
611 ss_string.push_back( blueprint_element_ss_chars[i] );
615 if( observed_ss_strings.find( ss_string ) == observed_ss_strings.end() ){
617 observed_ss_strings.insert( ss_string );
619 else redundant_strings++;
624 tr <<
"SS blueprint string " << ss_blueprint <<
" led to the following " <<
ss_strings_.size() - num_previous_ss_strings <<
" secondary structure strings out of a total of " << num_combos <<
" possible combinations: " << std::endl;
628 tr << std::endl <<
"A total of " << too_long_strings <<
" ss_strings were ignored because they were too long, a total of " << too_short_strings <<
" ss_strings were ignored because they were too short, and a total of " << redundant_strings <<
" ss_strings were ignored because they were redundant." << std::endl;
644 utility::io::izstream data( filename.c_str() );
645 std::istringstream line_stream;
650 std::cerr <<
"ERROR:: Unable to open enzdes loops file: "
651 << filename << std::endl;
654 tr.Info <<
"reading enzdes loops from " << filename <<
" ..." << std::endl;
658 while( !data.eof() ) {
663 line_stream.str(line);
666 if( key ==
"LOOP_BEGIN" ){
669 tr <<
"reading loop block " << counted_loops <<
"... " << std::endl;
673 if( el->read_loops_file_block( data ) ){
677 tr <<
"Data read: start(" << el->start() <<
"), stop(" << el->stop() <<
"), min_length(" << el->min_length() <<
"),";
678 tr <<
" max_length(" << el->max_length() <<
") " << std::endl;
679 tr << el->ss_strings().size() <<
" secstruct strings, " << el->res_interactions().size() <<
" res interactions" << std::endl;
680 tr <<
" ... done reading block " << counted_loops <<
"." << std::endl;
684 tr <<
"Error when reading file " << filename <<
". Block " << counted_loops <<
" or the info therein was corrupted." << std::endl;