Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
BetaTurnDetectionFeatures.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 // :noTabs=false:tabSize=4:indentSize=4:
4 //
5 // (c) Copyright Rosetta Commons Member Institutions.
6 // (c) This file is part of the Rosetta software suite and is made available under license.
7 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
8 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
9 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
10 
11 /// @file protocols/features/BetaTurnDetectionFeatures.cc
12 /// @brief report comments stored with each pose
13 /// @author Brian D. Weitzner (brian.weitzner@gmail.com)
14 
15 // Unit Headers
17 
18 // Project Headers
19 #include <core/pose/Pose.hh>
20 #include <core/pose/util.hh>
21 
22 //External
23 #include <boost/uuid/uuid.hpp>
24 #include <boost/uuid/uuid_io.hpp>
25 
26 // Platform Headers
30 #include <core/types.hh>
32 
33 // Numeric Headers
34 #include <numeric/HomogeneousTransform.hh>
35 
36 // Utility Headers
37 #include <utility/excn/Exceptions.hh>
38 #include <utility/vector1.hh>
39 #include <utility/sql_database/DatabaseSessionManager.hh>
40 #include <utility/tag/Tag.hh>
41 
42 // Basic Headers
43 #include <basic/options/option.hh>
44 #include <basic/options/keys/inout.OptionKeys.gen.hh>
45 #include <basic/database/sql_utils.hh>
46 
47 #include <basic/database/schema_generator/PrimaryKey.hh>
48 #include <basic/database/schema_generator/ForeignKey.hh>
49 #include <basic/database/schema_generator/Column.hh>
50 #include <basic/database/schema_generator/Schema.hh>
51 
52 
53 // External Headers
54 #include <cppdb/frontend.h>
55 
56 // Boost Headers
57 #include <boost/foreach.hpp>
58 #define foreach BOOST_FOREACH
59 
60 // C++ Headers
61 #include <algorithm>
62 #include <map>
63 #include <sstream>
64 #include <string>
65 
66 namespace protocols{
67 namespace features{
68 
69 using std::string;
70 using std::stringstream;
71 using std::transform;
72 using std::endl;
73 using std::map;
74 using basic::database::safely_write_to_database;
75 using basic::database::safely_prepare_statement;
76 using core::Size;
77 using core::SSize;
78 using core::Real;
79 using core::pose::Pose;
84 using numeric::HomogeneousTransform;
85 using numeric::xyzVector;
86 using utility::sql_database::sessionOP;
87 using utility::excn::EXCN_Msg_Exception;
88 using utility::vector1;
90 using cppdb::statement;
91 using cppdb::result;
92 
94  FeaturesReporter(), beta_turn_length( 3 ), beta_turn_distance_cutoff( 7.0 )
95 {}
96 
98  FeaturesReporter(), beta_turn_length( 3 ), beta_turn_distance_cutoff( 7.0 )
99 {}
100 
102 
103 string
104 BetaTurnDetectionFeatures::type_name() const { return "BetaTurnDetectionFeatures"; }
105 
106 void
108  sessionOP db_session
109 ) const {
110  write_beta_turns_table_schema(db_session);
111 }
112 
113 void
115  sessionOP db_session
116 ) const {
117  using namespace basic::database::schema_generator;
118 
119  Column struct_id("struct_id", new DbUUID());
120  Column residue_begin("residue_begin", new DbInteger());
121  Column turn_type("turn_type", new DbText());
122 
123  Columns primary_key_columns;
124  primary_key_columns.push_back(struct_id);
125  primary_key_columns.push_back(residue_begin);
126  PrimaryKey primary_key(primary_key_columns);
127 
128  Columns foreign_key_columns;
129  foreign_key_columns.push_back(struct_id);
130  foreign_key_columns.push_back(residue_begin);
131  vector1< std::string > reference_columns;
132  reference_columns.push_back("struct_id");
133  reference_columns.push_back("resNum");
134  ForeignKey foreign_key(foreign_key_columns, "residues", reference_columns, true);
135 
136  Schema table("beta_turns", primary_key);
137  table.add_foreign_key(foreign_key);
138  table.add_column(turn_type);
139 
140  table.write(db_session);
141 }
142 
145  utility::vector1<std::string> dependencies;
146  dependencies.push_back("ResidueFeatures");
147  return dependencies;
148 }
149 
150 /// @details
151 /// An anchor is a take off and landing for a loop.
152 /// Every residue in the loop must be relevant in order for the loop to be stored.
153 Size
155  Pose const & pose,
156  vector1< bool > const & relevant_residues,
157  boost::uuids::uuid struct_id,
158  sessionOP db_session
159 ){
160  string beta_turns_stmt_string = "INSERT INTO beta_turns (struct_id, residue_begin, turn_type) VALUES (?,?,?);";
161  statement beta_turns_stmt(
162  safely_prepare_statement(beta_turns_stmt_string, db_session));
163 
164  for(SSize begin=1; begin <= SSize( pose.total_residue() - beta_turn_length ); ++begin){
165  Size end = begin + beta_turn_length;
166 
167  if ( !residue_range_is_relevant( relevant_residues, begin, end ) || !residue_range_is_protein( pose, begin, end ) || !all_turn_residues_are_on_the_same_chain( pose, begin ) || !beta_turn_present( pose, begin ) )
168  {
169  continue;
170  }
171 
172  // Add stuff to database
173  beta_turns_stmt.bind(1,struct_id);
174  beta_turns_stmt.bind(2,begin);
175  beta_turns_stmt.bind(3, beta_turn_type( pose, begin ) );
176  basic::database::safely_write_to_database( beta_turns_stmt );
177 
178  }
179  return 0;
180 }
181 
183 {
184  // It pisses me off that C++ works this way, but it does. Sergey promises this line will only ever be executed once.
185  static map< string, string > * conformation_to_turn_type = 0;
186 
187  if ( conformation_to_turn_type == 0 )
188  {
189  conformation_to_turn_type = new map< string, string >;
190 
191  // Turn types will be notated thusly: TurnXX[_NUMERAL], where XX is the Ramachandran hash of residues 2 and 3,
192  // _NUMERAL will be present if the turn is a classically recognized turn type, a trailing "p" stands for prime.
193  // (e.g. a Type I turn will be annotated "TurnAA_I")
194 
195  ( *conformation_to_turn_type )[ "AA" ] = "TurnAA_I";
196  ( *conformation_to_turn_type )[ "AB" ] = "TurnAB_VIII";
197  ( *conformation_to_turn_type )[ "AL" ] = "TurnAL_IX";
198  ( *conformation_to_turn_type )[ "AE" ] = "TurnAE";
199 
200  ( *conformation_to_turn_type )[ "BA" ] = "TurnBA";
201  ( *conformation_to_turn_type )[ "BB" ] = "TurnBB";
202  ( *conformation_to_turn_type )[ "BL" ] = "TurnBL_II";
203  ( *conformation_to_turn_type )[ "BE" ] = "TurnBE";
204 
205  ( *conformation_to_turn_type )[ "LA" ] = "TurnLA_IXp";
206  ( *conformation_to_turn_type )[ "LB" ] = "TurnLB";
207  ( *conformation_to_turn_type )[ "LL" ] = "TurnLL_Ip";
208  ( *conformation_to_turn_type )[ "LE" ] = "TurnLE_VIIIp";
209 
210  ( *conformation_to_turn_type )[ "EA" ] = "TurnEA_IIp";
211  ( *conformation_to_turn_type )[ "EB" ] = "TurnEB";
212  ( *conformation_to_turn_type )[ "EL" ] = "TurnEL";
213  ( *conformation_to_turn_type )[ "EE" ] = "TurnEE";
214 
215  // Well characterized turn types with Cis residues
216  ( *conformation_to_turn_type )[ "Ba" ] = "TurnCis3_VIa";
217  ( *conformation_to_turn_type )[ "Bb" ] = "TurnCis3_VIb";
218 
219  // Other possible Cis conformations
220  ( *conformation_to_turn_type )[ "xX" ] = "TurnCis2";
221  ( *conformation_to_turn_type )[ "xx" ] = "TurnCis2Cis3";
222  ( *conformation_to_turn_type )[ "Xx" ] = "TurnCis3other";
223  }
224  return *conformation_to_turn_type;
225 }
226 
228 {
229  // It pisses me off that C++ works this way, but it does. Sergey promises this line will only ever be executed once.
230  static vector1< string > * valid_ramachandran_hashes = 0;
231 
232  if ( valid_ramachandran_hashes == 0 )
233  {
234  valid_ramachandran_hashes = new vector1< string >;
235  valid_ramachandran_hashes->resize( number_of_ramachandran_hashes );
236 
237  ( *valid_ramachandran_hashes )[ A ] = "A";
238  ( *valid_ramachandran_hashes )[ B ] = "B";
239  ( *valid_ramachandran_hashes )[ L ] = "L";
240  ( *valid_ramachandran_hashes )[ E ] = "E";
241  }
242 
243  return *valid_ramachandran_hashes;
244 }
245 
247 {
248  Size chain = pose.residue( first_residue ).chain();
249  for ( Size residue_number = first_residue + 1; residue_number <= first_residue + beta_turn_length; ++residue_number )
250  {
251  if ( pose.residue( first_residue ).chain() != chain )
252  {
253  return false;
254  }
255  }
256  return true;
257 }
258 
259 bool BetaTurnDetectionFeatures::residue_range_is_relevant( vector1< bool > const & relevant_residues, Size range_begin, Size range_end ) const
260 {
261  for ( Size current_residue = range_begin; current_residue <= range_end; ++current_residue )
262  {
263  if ( current_residue > relevant_residues.size() || !relevant_residues[ current_residue ] )
264  {
265  return false;
266  }
267  }
268  return true;
269 }
270 
271 bool BetaTurnDetectionFeatures::residue_range_is_protein( Pose const & pose, Size range_begin, Size range_end ) const
272 {
273  for ( Size current_residue = range_begin; current_residue <= range_end; ++current_residue )
274  {
275  if ( !pose.residue( current_residue ).is_protein() )
276  {
277  return false;
278  }
279  }
280  return true;
281 }
282 
283 
284 bool BetaTurnDetectionFeatures::beta_turn_present( Pose const & pose, Size first_residue ) const
285 {
286  return ( pose.residue( first_residue ).xyz( "CA" ) - pose.residue( first_residue + beta_turn_length ).xyz( "CA" ) ).norm() <= beta_turn_distance_cutoff;
287 }
288 
289 string const & BetaTurnDetectionFeatures::beta_turn_type( Pose const & pose, Size first_residue ) const
290 {
291  string rama_hash = determine_ramachandran_hash( pose, first_residue );
292  validate_ramachandran_hash( rama_hash );
293  return get_conformation_to_turn_type_map().find( rama_hash )->second;
294 }
295 
296 
297 string BetaTurnDetectionFeatures::determine_ramachandran_hash( Pose const & pose, Size first_residue ) const
298 {
299  string rama_hash = "";
300  for ( Size residue_number = first_residue + 1; residue_number < first_residue + beta_turn_length; ++residue_number )
301  {
302  rama_hash += determine_ramachandran_hash_for_residue_with_dihedrals( pose.phi( residue_number ), pose.psi( residue_number ), pose.omega( residue_number ) );
303  }
304  return rama_hash;
305 }
306 
307 /// @brief For the purposes of classifying beta-turns, Ramachandran space has been hashed into four large areas.
308 /// In most turns the dihedral angles are not close to the boundaries as defined, so this provides a simple
309 /// way of accurately classifying beta-turns.
310 ///
311 /// @details The four regions of space are defined as:
312 /// A: phi <= 0, -100 < psi <= 50
313 /// B: phi <= 0, psi > 50 OR psi <= -100
314 /// L: phi > 0, -50 < psi <= 100
315 /// E: phi > 0, psi > 100 OR psi <= -50
316 ///
317 /// Note: In the case of a Cis peptide plane, the lowercase letter for the hash will be returned.
318 ///
319 /// Pictoral representation of Ramachandran hashing used for beta-turn classification:
320 ///
321 /// |----------------------|
322 /// | | |
323 /// | B | E |
324 /// | |===========| 100
325 /// 50 |==========| |
326 /// p | | L |
327 /// s 0 |--- A ----------------|
328 /// i | | |
329 /// | |===========| -50
330 /// -100 |==========| |
331 /// | | E |
332 /// | B | |
333 /// |----------------------|
334 /// -180 0 180
335 /// phi
337 {
338  string rama_hash;
339  if ( phi <= 0. )
340  {
341  if ( psi > -100. && psi <= 50. )
342  {
343  rama_hash = get_valid_ramachandran_hashes()[ A ];
344  }
345  else
346  {
347  rama_hash = get_valid_ramachandran_hashes()[ B ];
348  }
349  }
350  else
351  {
352  if ( psi > -50. && psi <= 100. )
353  {
354  rama_hash = get_valid_ramachandran_hashes()[ L ];
355  }
356  else
357  {
358  rama_hash = get_valid_ramachandran_hashes()[ E ];
359  }
360  }
361 
362  // Return the lower case letter for the hash for Cis peptide planes
363  if ( omega > -90 && omega <= 90 )
364  {
365  transform( rama_hash.begin(), rama_hash.end(), rama_hash.begin(), ::tolower );
366  }
367  return rama_hash;
368 }
369 
371 {
372  if ( ! get_conformation_to_turn_type_map().count( rama_hash ) )
373  {
374  string cis_trans_hash = "";
375 
376  for ( string::const_iterator it = rama_hash.begin(); it != rama_hash.end(); ++it )
377  {
378  bool cis_peptide_bond = islower( * it );
379  string single_residue_rama_hash( 1, toupper( * it ) );
380 
381  if ( ! get_valid_ramachandran_hashes().contains( single_residue_rama_hash ) )
382  {
383  throw EXCN_Msg_Exception( "The Ramachandran hash '" + rama_hash + "' contains '" + string( 1, * it ) + ",' which is not valid. " +
384  "Valid Ramachandran hashes are 'A', 'B', 'L' and 'E' for trans peptide bonds, and 'a', 'b', 'l' and 'e' for cis peptide bonds."
385  );
386  }
387  cis_trans_hash += cis_peptide_bond ? "x" : "X";
388  }
389 
390  if ( ! get_conformation_to_turn_type_map().count( cis_trans_hash ) )
391  {
392  throw EXCN_Msg_Exception( "The Ramachandran hash '" + rama_hash +
393  "' is not recognized as a valid beta-turn type. " +
394  "The attempt to create a generic hash based on the omega dihedral angle resulted in '" +
395  cis_trans_hash + ",' which is also not recognized as a valid beta-turn type."
396  );
397  }
398 
399  rama_hash = cis_trans_hash;
400  }
401 }
402 
403 } // namesapce features
404 } // namespace protocols