Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
AllowedSeqposForGeomCst.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 // :noTabs=false:tabSize=4:indentSize=4:
4 //
5 // (c) Copyright Rosetta Commons Member Institutions.
6 // (c) This file is part of the Rosetta software suite and is made available under license.
7 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
8 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
9 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
10 
11 /// @file protocols/toolbox/match_enzdes_util/InvrotTreeNodeBase.hh
12 /// @brief Forward declaration for inverse rotamer tree node base
13 /// @author Florian Richter, flosopher@gmail.com, mar 2012
14 
15 
16 /// unit headeers
18 
19 ///project headers
20 #include <basic/options/option.hh>
21 #include <basic/options/keys/match.OptionKeys.gen.hh>
22 
24 #include <core/pose/Pose.hh>
25 #include <core/types.hh>
26 
27 #include <basic/Tracer.hh>
28 
29 //utility headers
30 #include <utility/io/izstream.hh>
31 #include <utility/pointer/ReferenceCount.hh>
32 #include <utility/string_util.hh>
33 #include <utility/vector1.fwd.hh>
34 
35 // C++ headers
36 #include <sstream>
37 
38 namespace protocols {
39 namespace toolbox {
40 namespace match_enzdes_util {
41 
42 static basic::Tracer TR( "protocols.toolbox.match_enzdes_util.AllowedSeqposForGeomCst" );
43 
45  utility::vector1< utility::vector1< Size > > const & seqpos_for_geomcst )
46  : seqpos_for_geomcst_( seqpos_for_geomcst )
47 {}
48 
50 
52 
53 
54 /// @details this function supports two behaviours
55 /// 1. the same list for every geomcst. this means that if
56 /// the size of seqpos_for_geomcst_ == 1 and a bigger number
57 /// than 1 is passed in, it will return the first element
58 /// 2. different lists for every geomcst. this means that
59 /// the size of seqpos_for_geomcst_ is > 1, but if a larger
60 /// number than the vector size is passed in, it's unclear
61 /// what to do, so we exit
64 
65  if( seqpos_for_geomcst_.size() == 1 ) return seqpos_for_geomcst_[1];
66 
67  else if( geomcst > seqpos_for_geomcst_.size() ) utility_exit_with_message("Asking for seqpos list of geomcst "+utility::to_string( geomcst )+", but lists only exist for "+utility::to_string( seqpos_for_geomcst_.size() )+" geomcst.");
68 
69  return seqpos_for_geomcst_[ geomcst ];
70 }
71 
72 
73 /// @details This function reads one of two files from the command line
74 /// depending on which options the user has provided. These files define
75 /// the set of residues on the scaffold to consider as launch-points
76 /// for the geometric constraints (e.g. to consider as part of the
77 /// the catalytic core in an enzyme). The file meanings and formats are below.
78 ///
79 /// 1. A list of residue id's to consider for all of the geometric constraints.
80 /// Such a file can be generated for a scaffold and then used along side any
81 /// enzyme-design .cst file. It is scaffold dependent and constraint-file
82 /// independent. The file should list the residue indexes for the scaffold
83 /// on one or more lines. The file format does not support comments.
84 /// Residue id's start counting at 1; the input pdb resids are ignored.
85 /// (It's best to renumber your scaffold resids starting from 1 to avoid confusion)
86 ///
87 /// Example.
88 /// <begin file>
89 /// 104 106 108 109 117 118 137 139 143 144 36 6 85 87 88 89 91 92 97
90 /// <end file>
91 ///
92 /// 2. A list for each geometric constraint of the residues to consider.
93 /// Such a file allows the user to focus on particular residues for certain
94 /// geometric constraints for a particular scaffold. Such a file should be
95 /// depends on both the scaffold and the match constraint file and cannot
96 /// be generalized across either multiple scaffolds or multiple constraint files.
97 /// The first line of the file begins with N_CST, followed by the number of geometric
98 /// contraints. This must match the number of geometric constraints in the .cst file.
99 /// On each subsequent line, the geometric constraint ID is given, followed by
100 /// a colon and then followed by all of the residue ID's that should be considered
101 /// for that geometric constraint. Each geometric constraint must appear on one
102 /// line in the file, though they may be listed in any order. The file format does
103 /// not support comments.
104 /// flo jan 2010: it is also possible to specify that all positions in the scaffold
105 /// can be used for a certain constraint. see the example for cst 4 below
106 /// Example.
107 /// <begin file>
108 /// N_CST 3
109 /// 1: 9
110 /// 3: 9
111 /// 2: 6 7 9 11 12 14 15 17 18 21 22 23 25 26 38 40 43 46 47 49 53 54 57 60 61
112 /// 4: all
113 /// <end file>
114 ///
115 void
117 {
118  using namespace basic::options;
119  using namespace basic::options::OptionKeys::match;
120 
121  if ( option[ scaffold_active_site_residues ].user() && option[ scaffold_active_site_residues_for_geomcsts ].user() ) {
122  utility_exit_with_message( "Conflicting scaffold build point defintion: both "
123  "the -match::scaffold_active_site_residues <filename> flag\n"
124  "and the -match::scaffold_active_site_residues_for_geomcsts <filename> flag were found on the command line" );
125  }
126 
127  seqpos_for_geomcst_.clear();
128 
129  if ( option[ scaffold_active_site_residues ].user() ) {
130  //share_build_points_for_geomcsts_ = true;
131  seqpos_for_geomcst_.resize( 1 );
132 
133  std::string filename = option[ scaffold_active_site_residues ];
134  utility::io::izstream istr( filename.c_str() );
135  std::list< Size > upstream_build_resids;
136  TR << "Reading match::scaffold_active_stie_residues " << filename << ":";
137  while ( istr ) {
138  Size resid( 0 );
139  istr >> resid;
140  if ( ! istr.bad() && resid != 0 ) {
141  TR << " " << resid;
142  upstream_build_resids.push_back( resid );
143  }
144 
145  }
146  if ( upstream_build_resids.empty() ) {
147  utility_exit_with_message( "Failed to read any scaffold active site residues from file " + filename + " specified by the flag match::scaffold_active_stie_residues" );
148  }
149 
150  TR << std::endl;
151  //generic_pose_build_resids_.resize( upstream_build_resids.size() );
152  seqpos_for_geomcst_[1].resize( upstream_build_resids.size() );
153  std::copy( upstream_build_resids.begin(), upstream_build_resids.end(), seqpos_for_geomcst_[1].begin() );
154  }
155  else if (option[ scaffold_active_site_residues_for_geomcsts ].user() ) {
156  //share_build_points_for_geomcsts_ = false;
157 
158  std::string filename = option[ scaffold_active_site_residues_for_geomcsts ];
159  utility::io::izstream istr( filename.c_str() );
160  std::list< Size > upstream_build_resids;
161  TR << "Reading match::scaffold_active_site_residues_for_geomcsts " << filename << std::endl;
162  std::string ncsts_string; Size ncsts;
163  if ( ! istr.good() ) {
164  utility_exit_with_message( "Could not read first line from match::scaffold_active_site_residues_for_geomcsts " + filename );
165  }
166  istr >> ncsts_string;
167  if ( istr.bad() ) {
168  utility_exit_with_message( "Failed to read N_CST field in first line from match::scaffold_active_site_residues_for_geomcsts " + filename );
169  }
170  if ( ! istr.good() ) {
171  utility_exit_with_message( "Unexpected end of file after reading N_CST field in first line from match::scaffold_active_site_residues_for_geomcsts " + filename );
172  }
173  if ( ncsts_string != "N_CST" ) {
174  utility_exit_with_message( "Failed to read N_CST field in first line from match::scaffold_active_site_residues_for_geomcsts " + filename );
175  }
176  istr >> ncsts;
177  if ( istr.bad() ) {
178  utility_exit_with_message( "Failed to read the number of geometric constraints in first line from match::scaffold_active_site_residues_for_geomcsts " + filename );
179  }
180  //if ( ncsts != enz_input_data_->mcfi_lists_size() ) {
181  // utility_exit_with_message( "#geometric constraints disagreement between "
182  // "match::scaffold_active_site_residues_for_geomcsts " + filename +
183  // " (" + utility::to_string(ncsts) + ") and Enz .cst file: " + option[ geometric_constraint_file ]()() +
184  // " (" + utility::to_string(enz_input_data_->mcfi_lists_size()) + ")" );
185  //}
186  //per_cst_pose_build_resids_.resize( ncsts );
187  seqpos_for_geomcst_.resize( ncsts );
188 
189  std::string finish_the_line;
190  istr.getline( finish_the_line );
191  utility::vector1< Size > data_read_for_cst( ncsts, 0 );
192  Size linenum = 2;
193  while ( istr ) {
194  Size geomcst_id( 0 );
195  istr >> geomcst_id;
196  if ( istr.eof() && geomcst_id == 0 ) break;
197 
198  if ( istr.bad() ) {
199  utility_exit_with_message( "Reading line " + utility::to_string( linenum ) + " of " + filename + ". Failed to read the geometric constraint id at the beginning of the line." );
200  }
201  if ( geomcst_id > ncsts ) {
202  utility_exit_with_message( "Reading line " + utility::to_string( linenum ) + " of " + filename + ". Geometric constraint id at the beginning of the line is out of range: "
203  + utility::to_string( geomcst_id ) + " > " + utility::to_string( ncsts ) );
204  }
205  if ( data_read_for_cst[ geomcst_id ] != 0 ) {
206  utility_exit_with_message( "Reading line " + utility::to_string( linenum ) + " of " + filename + ". Residue list for geometric constraint #"
207  + utility::to_string( geomcst_id ) + " appeared already on line " + utility::to_string( data_read_for_cst[ geomcst_id ] ) );
208  }
209  data_read_for_cst[ geomcst_id ] = linenum;
210  std::string colon;
211  istr >> colon;
212  if ( colon != ":" ) {
213  utility_exit_with_message( "Reading line " + utility::to_string( linenum ) + " of " + filename + ". Failed to read the colon (:) separating the geometric constraint id from the rest of the resids." );
214  }
215  TR << std::endl << geomcst_id << " :";
216  std::string first_token;
217  istr >> first_token;
218  if( istr.bad() ) utility_exit_with_message( "Apparently there are no residues listed for geom cst " + utility::to_string( geomcst_id ) + " in file " + filename +".");
219  if( ( first_token == "ALL" ) || (first_token == "all" ) ){
220  if( !pose ) utility_exit_with_message("AllowedSeqposForGeomCst requested to use all build pos for a certain constraint, but no pose passed into function, can't generate list.");
221  TR << "All pose positions requested, using ";
222  for( core::Size seqpos = 1; seqpos <= pose->total_residue(); ++seqpos ){
223  if( pose->residue(seqpos).is_protein() ){
224  seqpos_for_geomcst_[geomcst_id ].push_back( seqpos );
225  TR << " " << seqpos;
226  }
227  }
228  istr.getline( finish_the_line );
229  } else {
230  Size first_resid(0);
231  std::istringstream firststr( first_token );
232  firststr >> first_resid;
233  if( first_resid != 0) { //&& (first_resid <= upstream_pose_->total_residue() )){
234  seqpos_for_geomcst_[geomcst_id].push_back( first_resid );
235  TR << " " << first_resid;
236  } else {
237  utility_exit_with_message("Bad first residue listed for geomcst " + utility::to_string( geomcst_id ) + " in file " + filename +": " + first_token);
238  }
239  istr.getline( finish_the_line );
240  if( finish_the_line != ""){
241  std::istringstream isstr( finish_the_line );
242  while ( isstr.good() ) {
243  Size resid( 0 );
244  isstr >> resid;
245  if ( isstr.eof() && resid == 0 ) break;
246  if ( ! isstr.bad() ) {
247  //if ( resid > 0 && resid <= upstream_pose_->total_residue() ) {
248  TR << " " << resid;
249  seqpos_for_geomcst_[ geomcst_id ].push_back( resid );
250  //}
251  //else if ( resid > upstream_pose_->total_residue() ){
252  //std::cerr << std::endl << "ERROR parsing line fragment: " << finish_the_line << std::endl;
253  //utility_exit_with_message( "Reading line " + utility::to_string( linenum ) + " of " + filename + ". Requested upstream build resid of " + utility::to_string(resid) + " exceeds the number of residues in the pose ( " + utility::to_string( upstream_pose_->total_residue() ) + ")" );
254  //} else {
255  //std::cerr << std::endl << "ERROR parsing line fragment: " << finish_the_line << std::endl;
256  //utility_exit_with_message( "Reading line " + utility::to_string( linenum ) + " of " + filename + ". Failed to read an integer." );
257  //}
258  } else {
259  std::cerr << std::endl << "ERROR parsing line fragment: " << finish_the_line << std::endl;
260  utility_exit_with_message( "Reading line " + utility::to_string( linenum ) + " of " + filename + ". Only integers may be included." );
261  }
262  } //while loop over line
263  }//if finish_the_line has stuff in it
264  }//if all pos else
265  ++linenum;
266  } //loop over lines
267  TR << std::endl;
268  bool any_absent( false );
269  for ( Size ii = 1; ii <= ncsts; ++ii ) {
270  if ( data_read_for_cst[ ii ] == 0 ) {
271  std::cerr << "ERROR reading " << filename << ": did not find residue list for constraint # " << ii << std::endl;
272  any_absent = true;
273  }
274  }
275  if ( any_absent ) {
276  utility_exit_with_message( "Failed to read a residue list for one or more constraints" );
277  }
278  }
279 }
280 
281 
282 
283 }
284 }
285 }