Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
P_AA.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file core/scoring/P_AA.cc
11 /// @brief Amino acid probability arrays and functions
12 /// @author Stuart G. Mentzer (Stuart_Mentzer@objexx.com)
13 /// @author Andrew Leaver-Fay -- porting Stuarts code
14 
15 // Unit headers
16 #include <core/scoring/P_AA.hh>
17 
18 // Project headers
19 #include <basic/database/open.hh>
20 #include <basic/options/option.hh>
21 #include <basic/options/keys/score.OptionKeys.gen.hh>
22 #include <basic/options/keys/corrections.OptionKeys.gen.hh>
23 #include <basic/options/keys/OptionKeys.hh>
24 
26 #include <core/id/TorsionID.hh>
27 
28 // Numeric headers
29 #include <numeric/conversions.hh>
30 #include <numeric/numeric.functions.hh>
31 #include <numeric/interpolation/periodic_range/full/interpolation.hh>
32 #include <numeric/interpolation/periodic_range/half/interpolation.hh>
33 
34 #include <numeric/interpolation/spline/Bicubic_spline.hh>
35 
36 // ObjexxFCL headers
37 // AUTO-REMOVED #include <ObjexxFCL/FArray1D.hh>
38 #include <ObjexxFCL/FArray2D.hh>
39 #include <ObjexxFCL/format.hh>
40 
41 // Utility headers
42 #include <utility/io/izstream.hh>
43 
44 // C++ headers
45 #include <cassert>
46 
47 #include <utility/vector1.hh>
48 
49 
50 
51 namespace core {
52 namespace scoring {
53 
54 
55 /// @brief Amino acid probability array: P(aa)
56 //Probability_AA P_AA;
57 
58 /// @brief Amino acid conditional probability wrt number of neighbors array: P(aa|neighbors)
59 //Probability_AA_n P_AA_n;
60 
61 /// @brief Amino acid conditional probability wrt (phi,psi) array: P(aa|phi,psi)
62 //Probability_AA_pp P_AA_pp;
63 
64 
65 /// @brief ctor -- Initialize the amino acid probability data structures
67 {
68  read_P_AA();
69  read_P_AA_n();
70  read_P_AA_pp();
71 }
72 
74 
75 
76 /// @brief Read the amino acid probability file into P_AA
77 ///
78 /// @note Only the keys present in the file are given entries
79 void
81 {
82  using namespace core::chemical;
83 
84  // Read the probability file and load the array
85  std::string id;
86  Probability probability, probability_sum( 0.0 );
87  utility::io::izstream stream;
88  basic::database::open( stream, "scoring/score_functions/P_AA_pp/P_AA" );
89 
90  P_AA_.resize( num_canonical_aas );
91 
92  while ( stream ) {
93  using namespace ObjexxFCL::fmt;
94  stream >> bite( 3, id ) >> skip( 1 ) >> bite( 9, probability ) >> skip;
95  if ( stream ) {
96  assert( ( probability >= Probability( 0.0 ) ) && ( probability <= Probability( 1.0 ) ) );
97  AA aa = aa_from_name( id );
98  assert( ( aa >= 1 ) && ( aa <= num_canonical_aas ) );
99  probability_sum += probability;
100  P_AA_[ aa ] = probability;
101  } //! ADD INPUT ERROR HANDLING
102  }
103  stream.close();
104 
105  // Check probabilities sum to ~ 1
106  assert( numeric::eq_tol( probability_sum, Probability( 1.0 ), Probability( .0001 ), Probability( .0001 ) ) );
107 }
108 
109 
110 /// @brief Read the amino acid conditional probability wrt (neighbors) file into P_AA_n
111 ///
112 /// @note Only the keys present in the file are given entries
113 /// @note The file entries can be in any order
114 void
116 {
117  using namespace core::chemical;
118 
119  // Read the probability file and load the array
120  std::string id;
121  int n; // Number of neighbors
122  Probability probability;
123  utility::io::izstream stream;
124  basic::database::open( stream, "scoring/score_functions/P_AA_pp/P_AA_n" );
125 
127  for ( Size ii = 1; ii <= chemical::num_canonical_aas; ++ii ) P_AA_n_[ ii ].resize( 14 );
128 
129  while ( stream ) {
130  using namespace ObjexxFCL::fmt;
131  stream >> bite( 3, id ) >> skip( 1 ) >> bite( 2, n ) >> skip( 1 ) >> bite( 9, probability ) >> skip;
132  if ( stream ) {
133  assert( ( n >= 1 ) && ( n <= 14 ) ); // Support n in [1,14]
134  assert( ( probability >= Probability( 0.0 ) ) && ( probability <= Probability( 1.0 ) ) );
135  AA aa = aa_from_name( id );
136  assert( ( aa >= 1 ) && ( aa <= num_canonical_aas ) );
137  //AminoAcidKey const & key( AminoAcidKeys::key( id ) );
138  P_AA_n_[ aa ][ n ] = probability;
139  } //! ADD INPUT ERROR HANDLING
140  }
141  stream.close();
142 
143 #ifndef NDEBUG
144  // Check probabilities sum to ~ 1 for each (n)
145  for ( int n = 1; n <= 14; ++n ) {
146  Probability probability_sum( 0.0 );
147  for ( Probability_AA_n::ConstIterator i = P_AA_n_.begin(), e = P_AA_n_.end(); i != e ; ++i ) {
148  probability_sum += (*i)[ n ];
149  }
150  assert( numeric::eq_tol( probability_sum, Probability( 1.0 ), Probability( .0001 ), Probability( .0001 ) ) );
151  }
152 #endif
153 }
154 
155 
156 /// @brief Read the amino acid conditional probability wrt (phi,psi) file into P_AA_pp_
157 ///
158 /// @note Only the keys present in the file are given entries
159 /// @note The file entries can be in any order
160 /// @note Missing entries for a present key are assigned zero
161 void
163 {
164  using namespace core::chemical;
165  using namespace basic::options;
166  using namespace basic::options::OptionKeys::score;
167  using namespace basic::options::OptionKeys::corrections::score;
168  typedef FArray2D_Probability::IR IR; // Index range type
169 
170  // Read the probability file and load the array
171  Angle phi, psi;
172  std::string id;
173  Probability probability;
174  utility::io::izstream stream;
175 
176  // search in the local directory first
177  stream.open( option[ p_aa_pp ] );
178  // then database
179  if ( !stream.good() ) {
180  stream.close();
181  basic::database::open( stream, option[ p_aa_pp ] );
182  }
183 
184  if ( !stream.good() ) utility_exit_with_message( "Unable to open p_aa_pp map!" );
186  for ( Size ii = 1; ii <= chemical::num_canonical_aas; ++ii ) {
187  P_AA_pp_[ ii ].dimension( IR( 0, 35 ), IR( 0, 35 ), Probability( 0.0 ) );
188  }
189 
190  while ( stream ) {
191  using namespace ObjexxFCL::fmt;
192 
193  stream >> bite( 4, phi ) >> skip( 1 ) >> bite( 4, psi ) >> skip( 1 )
194  >> bite( 3, id ) >> skip( 17 ) >> bite( 7, probability ) >> skip;
195 
196  if ( ( stream ) ) {
197  assert( ( phi >= Angle( -180.0 ) ) && ( phi <= Angle( 180.0 ) ) );
198  assert( ( psi >= Angle( -180.0 ) ) && ( psi <= Angle( 180.0 ) ) );
199  assert( ( probability >= Probability( 0.0 ) ) && ( probability <= Probability( 1.0 ) ) );
200 
201  AA aa = aa_from_name( id );
202  assert( ( aa >= 1 ) && ( aa <= num_canonical_aas ) );
203 
204  if ( option[ p_aa_pp_nogridshift ] ) {
205  int const i_phi( numeric::mod( 36 + numeric::nint( phi / Angle( 10.0 ) ), 36 ) );
206  int const i_psi( numeric::mod( 36 + numeric::nint( psi / Angle( 10.0 ) ), 36 ) );
207 
208  if ( probability == Probability( 0.0 ) ) probability = 1e-6;
209  P_AA_pp_[ aa ]( i_phi, i_psi ) = probability;
210  }
211  else {
212  int const i_phi( numeric::mod( 36 + numeric::nint( ( phi / Angle( 10.0 ) ) - Angle( 0.5 ) ), 36 ) );
213  int const i_psi( numeric::mod( 36 + numeric::nint( ( psi / Angle( 10.0 ) ) - Angle( 0.5 ) ), 36 ) );
214 
215  if ( probability == Probability( 0.0 ) ) probability = .001; //! Hack from rosetta++ except leave .001 entries alone
216  P_AA_pp_[ aa ]( i_phi, i_psi ) = probability;
217  }
218 
219  } //! ADD INPUT ERROR HANDLING
220  }
221  stream.close();
222 
223 //! P_AA_pp file is NOT a proper distribution: Some (phi,psi) bins have total probabilities of zero
224 //! This test must be left off until the file distribution is made proper or adapted to the file semantics
225 //#ifndef NDEBUG
226 // // Check probabilities sum to ~ 1 for each (phi,psi)
227 // for ( int i_phi = 0; i_phi <= 35; ++i_phi ) {
228 // for ( int i_psi = 0; i_psi <= 35; ++i_psi ) {
229 // Probability probability_sum( 0.0 );
230 // for ( Probability_AA_pp::ConstIterator i = P_AA_pp.begin(), e = P_AA_pp.end(); i != e ; ++i ) {
231 // probability_sum += (*i)( i_phi, i_psi );
232 // }
233 // assert( numeric::eq_tol( probability_sum, Probability( 1.0 ), Probability( .001 ), Probability( .001 ) ) );
234 // }
235 // }
236 //#endif
237 
238 
239 
240  if ( basic::options::option[ basic::options::OptionKeys::corrections::score::use_bicubic_interpolation ] ) {
241 
242  // Now prepare the bicubic spline
243  using namespace numeric;
244  using namespace numeric::interpolation::spline;
246  for ( Size ii = 1; ii <= chemical::num_canonical_aas; ++ii ) {
247  BicubicSpline paappEspline;
248  MathMatrix< Real > energy_vals( 36, 36 );
249  for ( Size jj = 0; jj < 36; ++jj ) {
250  for ( Size kk = 0; kk < 36; ++kk ) {
251  energy_vals( jj, kk ) = -std::log( P_AA_pp_[ ii ]( jj, kk ) /P_AA_[ ii ] );
252  }
253  }
254  BorderFlag periodic_boundary[2] = { e_Periodic, e_Periodic };
255  Real start_vals[2];
256  if ( basic::options::option[ basic::options::OptionKeys::corrections::score::p_aa_pp_nogridshift ] ) {
257  start_vals[0] = start_vals[1] = 0.0; // if this flag is on, then the PAAPP table is not shifted and aligns with the ten-degree boundaries.
258  } else {
259  start_vals[0] = start_vals[1] = 5.0; // otherwise, the grid is shifted by five degrees.
260  }
261  Real deltas[2] = {10.0, 10.0}; // grid is 10 degrees wide
262  bool lincont[2] = {false,false}; //meaningless argument for a bicubic spline with periodic boundary conditions
263  std::pair< Real, Real > unused[2];
264  unused[0] = std::make_pair( 0.0, 0.0 );
265  unused[1] = std::make_pair( 0.0, 0.0 );
266  paappEspline.train( periodic_boundary, start_vals, deltas, energy_vals, lincont, unused );
267  P_AA_pp_energy_splines_[ ii ] = paappEspline;
268  }
269  }
270 }
271 
272 
273 /// @brief Probability energies from P(aa|phi,psi)
274 Energy
276 {
277  using namespace core::chemical;
278  using numeric::conversions::degrees;
279 
280  AA const aa( res.aa()); //! Need to decide if/how/where to exclude NCAAs
281  if ( aa > chemical::num_canonical_aas ) return 0.0;
282 
283  if ( ! res.is_terminus() && ! res.is_virtual_residue() )//ToDo Also exclude chainbreaks
284  { // Probabilities for this amino acid are present in files and it is not a terminus
285  Angle const phi( res.mainchain_torsion( 1 ) );
286  Angle const psi( res.mainchain_torsion( 2 ) );
287  if ( basic::options::option[ basic::options::OptionKeys::corrections::score::use_bicubic_interpolation ] ) {
288  return P_AA_pp_energy_splines_[ aa ].F( phi, psi );
289  } else {
290  if ( basic::options::option[ basic::options::OptionKeys::corrections::score::p_aa_pp_nogridshift ] ) { // the format of p_aa_pp changed from using i*10+5 to i*10 as grid
291  using numeric::interpolation::periodic_range::full::bilinearly_interpolated;
292  return -std::log( bilinearly_interpolated( phi, psi, Angle( 10.0 ), 36, P_AA_pp_[ aa ] ) / P_AA_[ aa ] );
293  } else {
294  using numeric::interpolation::periodic_range::half::bilinearly_interpolated;
295  return -std::log( bilinearly_interpolated( phi, psi, Angle( 10.0 ), 36, P_AA_pp_[ aa ] ) / P_AA_[ aa ] );
296  }
297  }
298  } else { // Probabilities for this amino acid aren't present in files or it is a terminus
299  return Energy( 0.0 );
300  }
301 }
302 
303 
304 /// @brief Probability energies from P(aa|phi,psi): Low level calculation for non-terminus position
305 Energy
306 P_AA::P_AA_pp_energy( chemical::AA const aa, Angle const phi, Angle const psi ) const
307 {
308  using numeric::interpolation::periodic_range::half::bilinearly_interpolated;
309 
310  if ( aa <= chemical::num_canonical_aas ) {
311  // Probabilities for this amino acid are present in files
312  if ( basic::options::option[ basic::options::OptionKeys::corrections::score::p_aa_pp_nogridshift ] ) { // the format of p_aa_pp changed from using i*10+5 to i*10 as grid
313  return -std::log( numeric::interpolation::periodic_range::full::bilinearly_interpolated( phi, psi, Angle( 10.0 ), 36, P_AA_pp_[ aa ] ) / P_AA_[ aa ] );
314  }
315  else {
316  //return -std::log( bilinearly_interpolated( phi, psi, Angle( 10.0 ), 36, P_AA_pp_[ aa ] ) / P_AA_[ aa ] );
317  numeric::MathVector< Real > args(2);
318  args(0) = phi;
319  args(1) = psi;
320  return P_AA_pp_energy_splines_[ aa ].F( args );
321  }
322  } else { // Probabilities for this amino acid aren't present in files or it is a terminus
323  return Energy( 0.0 );
324  }
325 }
326 
327 ////////////////////////////////////////////////////////////////////////////////
330  conformation::Residue const & res,
331  id::TorsionID const & tor_id
332 ) const
333 {
334 
335  using namespace core::chemical;
336  using numeric::conversions::degrees;
337  using numeric::interpolation::periodic_range::half::bilinearly_interpolated;
338 
339  AA const aa( res.aa() ); //! Need to decide if/how/where to exclude NCAAs
340  if ( aa > chemical::num_canonical_aas )
341  return 0.0;
342 
343  /// APL ARGH!!! MAGIC NUMBERS!!!
344  Size const phi_id = 1;
345  Size const psi_id = 2;
346 
347  if ( ! res.is_terminus() && ( tor_id.type() == id::BB && (tor_id.torsion() == phi_id || tor_id.torsion() == psi_id )) & ! res.is_virtual_residue() ) {
348  //ToDo Also exclude chainbreaks
349  // Probabilities for this amino acid are present in files and it is not a terminus
350  Angle const phi( res.mainchain_torsion( phi_id ));
351  Angle const psi( res.mainchain_torsion( psi_id ));
352  Probability dp_dphi( 0.0 ), dp_dpsi( 0.0 );
353 
354  if ( basic::options::option[ basic::options::OptionKeys::corrections::score::use_bicubic_interpolation ] ) {
355  switch ( tor_id.torsion() ) {
356  case phi_id :
357  return P_AA_pp_energy_splines_[ aa ].dFdx( phi, psi );
358  case psi_id :
359  return P_AA_pp_energy_splines_[ aa ].dFdy( phi, psi );
360  default :
361  return EnergyDerivative( 0.0 );
362  }
363  }else {
364  if ( basic::options::option[ basic::options::OptionKeys::corrections::score::p_aa_pp_nogridshift ] ) { // the format of p_aa_pp changed from using i*10+5 to i*10 as grid
365  Probability const interp_p = numeric::interpolation::periodic_range::full::bilinearly_interpolated( phi, psi, Angle( 10.0 ), 36, P_AA_pp_[ aa ], dp_dphi, dp_dpsi );
366  //Energy Paa_ppE = -std::log( interp_p / P_AA_[ aa ] );
367  switch ( tor_id.torsion() ) {
368  case phi_id :
369  return /*dlog_Paa_dphi = */ -( 1.0 / interp_p ) * dp_dphi; break;
370  case psi_id :
371  return /*dlog_Paa_dpsi = */ -( 1.0 / interp_p ) * dp_dpsi; break;
372  default :
373  return EnergyDerivative( 0.0 );
374  }
375  } else {
376  Real const interp_p = bilinearly_interpolated( phi, psi, Angle( 10.0 ), 36, P_AA_pp_[ aa ], dp_dphi, dp_dpsi );
377  switch ( tor_id.torsion() ) {
378  case phi_id :
379  return /*dlog_Paa_dphi = */ -( 1.0 / interp_p ) * dp_dphi; break;
380  case psi_id :
381  return /*dlog_Paa_dpsi = */ -( 1.0 / interp_p ) * dp_dpsi; break;
382  default :
383  return EnergyDerivative( 0.0 );
384  }
385  }
386  }
387  } else { // Probabilities for this amino acid aren't present in files or it is a terminus
388  return EnergyDerivative( 0.0 );
389  }
390 }
391 
392 
393 ///@brief Probability energies for P(aa)
394 ///
395 ///@remarks No derivative function since there are no degrees of freedom to vary for a P_AA energy like for P_AA_pp.
396 Energy
398 
399  using namespace core::chemical;
400 
401  AA const aa( res.aa()); //! Need to decide if/how/where to exclude NCAAs
402  if ( aa > chemical::num_canonical_aas )
403  return 0.0;
404 
405  return -std::log( P_AA_[ aa ] );
406 }
407 
408 
409 } // namespace scoring
410 } // namespace rosetta
411