Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
PairEPotential.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file core/scoring/PairEPotential.cc
11 /// @brief pairE knowledge-based potential class
12 /// @author Stuart G. Mentzer (Stuart_Mentzer@objexx.com)
13 /// @author Kevin P. Hinshaw (KevinHinshaw@gmail.com)
14 /// @author Original function authors of attributed functions noted below
15 /// @author Andrew Leaver-Fay (leaverfa@email.unc.edu)
16 
17 // Project headers
18 #include <core/chemical/AA.hh>
21 #include <basic/database/open.hh>
22 #include <basic/options/option.hh>
23 
24 // ObjexxFCL headers
25 
26 // Utility headers
27 #include <utility/exit.hh>
28 #include <utility/io/izstream.hh>
29 
30 // Numeric headers
31 #include <numeric/numeric.functions.hh>
32 #include <numeric/interpolation/interpolation.hh>
33 #include <numeric/xyzVector.hh>
34 // AUTO-REMOVED #include <numeric/xyzVector.io.hh>
35 
36 // option key includes
37 
38 #include <basic/options/keys/packing.OptionKeys.gen.hh>
39 #include <basic/options/keys/corrections.OptionKeys.gen.hh>
40 
41 #include <utility/vector1.hh>
42 #include <ObjexxFCL/format.hh>
43 
44 
45 
46 
47 namespace core {
48 namespace scoring {
49 
51  pair_score_min_sep_( 1 ), // from pdbstatistics_pack
52  pair_score_cb_thresh_( 16 ),
53  pair_score_bin_range_( 1.5 ),
54  pair_score_bin_base_( 3.0 ),
55  max_bin_( 3 ) // APL from 3 to 2; defines range from 0 A to 6 A instead of all the way out to 7.5 A.
56 {
57  //initialize all the data
58  // Constants
59  int const max_aa( 20 ); // Only set up for CAAs: NCAAs will share values via "cat_key()" lookup
60 
61  // Open the residue pair statistics file
62  utility::io::izstream stream;
63  basic::database::open( stream, "scoring/score_functions/PairEPotential/pdb_pair_stats_fine" );
64 
65  // Dimension/allocate the pair_corr array
66  pair_corr_.dimension( max_aa, max_aa, 2, 2, 5, TableProbability( 0.0 ) );
67 
68  // Read the file and assign the array elements
69  int aa1, aa2, e1, e2, r12_bin;
70  TableProbability pair_probability;
71  while ( stream ) {
72  using namespace ObjexxFCL::fmt;
73  stream
74  >> bite( 2, aa1 ) >> skip( 1 )
75  >> bite( 2, aa2 ) >> skip( 1 )
76  >> bite( 2, e1 ) >> skip( 1 )
77  >> bite( 2, e2 ) >> skip( 1 )
78  >> bite( 2, r12_bin ) >> skip( 1 )
79  >> bite( 12, pair_probability ) >> skip;
80 
81  //iwd A few entries have 0 observations, which leads to a likelihood ratio of 0,
82  //iwd which causes INF and NAN values when you take the log of it.
83  //iwd It's also unrealistic, as the value is just due to statistics of small numbers.
84  //iwd This (conservative) replacement is the minimum (non-zero) probability in the file.
85  TableProbability const min_prob = 0.05;
86  if( pair_probability < min_prob ) pair_probability = min_prob;
87 
88  if ( stream ) {
89  pair_corr_( aa1, aa2, e1, e2, r12_bin ) = pair_probability;
90  }
91  }
92  stream.close();
93 
94 
95  //bk One interesting property of the pair statistics are that they favor unlike charges being
96  //bk near each other, but only very close range interactions between like charges are
97  //bk disfavored. This is presumably because like charged residues frequently come togethor to
98  //bk bind charged ligands. When performing protein design in the absence of extra ligands, it is
99  //bk therefore probably more correct to penalize like charges being near each other.
100  //bk The following section makes it unfavorable to have like charged residues near each other
101  //bk if the -use_electrostic_repulsion flag is turned on. The penalty is set to be roughly equal
102  //bk but opposite to the favorable energy given to unlike charges.
103  { // Electrostatic repulsion
104  //using namespace core::conformation::amino::AminoAcidKeys;
105  using namespace core::chemical;//conformation;
106  using namespace basic::options;
107  using namespace OptionKeys::packing;
108 
109  if ( option[ use_electrostatic_repulsion ] ) {
110  for ( int e1 = 1; e1 <= 2; ++e1 ) {
111  for ( int e2 = 1; e2 <= 2; ++e2 ) {
112  pair_corr_( aa_asp, aa_asp, e1, e2, 1 ) = 0.3; // 3-4.5 angstroms
113  pair_corr_( aa_asp, aa_asp, e1, e2, 2 ) = 0.5; // 4.5-6 angstroms
114  pair_corr_( aa_asp, aa_asp, e1, e2, 3 ) = 0.75; // 6-7.5 angstroms
115 
116  pair_corr_( aa_asp, aa_glu, e1, e2, 1 ) = 0.3;
117  pair_corr_( aa_asp, aa_glu, e1, e2, 2 ) = 0.5;
118  pair_corr_( aa_asp, aa_glu, e1, e2, 3 ) = 0.75;
119 
120  pair_corr_( aa_glu, aa_asp, e1, e2, 1 ) = 0.3;
121  pair_corr_( aa_glu, aa_asp, e1, e2, 2 ) = 0.5;
122  pair_corr_( aa_glu, aa_asp, e1, e2, 3 ) = 0.75;
123 
124  pair_corr_( aa_glu, aa_glu, e1, e2, 1 ) = 0.3;
125  pair_corr_( aa_glu, aa_glu, e1, e2, 2 ) = 0.5;
126  pair_corr_( aa_glu, aa_glu, e1, e2, 3 ) = 0.75;
127 
128  pair_corr_( aa_lys, aa_lys, e1, e2, 1 ) = 0.3;
129  pair_corr_( aa_lys, aa_lys, e1, e2, 2 ) = 0.5;
130  pair_corr_( aa_lys, aa_lys, e1, e2, 3 ) = 0.75;
131 
132  pair_corr_( aa_arg, aa_arg, e1, e2, 1 ) = 0.3;
133  pair_corr_( aa_arg, aa_arg, e1, e2, 2 ) = 0.5;
134  pair_corr_( aa_arg, aa_arg, e1, e2, 3 ) = 0.75;
135 
136  pair_corr_( aa_arg, aa_lys, e1, e2, 1 ) = 0.3;
137  pair_corr_( aa_arg, aa_lys, e1, e2, 2 ) = 0.5;
138  pair_corr_( aa_arg, aa_lys, e1, e2, 3 ) = 0.75;
139 
140  pair_corr_( aa_lys, aa_arg, e1, e2, 1 ) = 0.3;
141  pair_corr_( aa_lys, aa_arg, e1, e2, 2 ) = 0.5;
142  pair_corr_( aa_lys, aa_arg, e1, e2, 3 ) = 0.75;
143  }
144  }
145  }
146  }
147 }
148 
149 bool
151 {
152  return ( (rsd.is_polar() || rsd.is_aromatic() ) && rsd.is_protein() );
153 }
154 
155 Energy
157  conformation::Residue const & res1,
158  int res1_num_10A_neighbors,
159  conformation::Residue const & res2,
160  int res2_num_10A_neighbors
161 ) const
162 {
163  Probability temp1, temp2, temp3;
164  return pair_term_energy( res1, res1_num_10A_neighbors, res2, res2_num_10A_neighbors, temp1, temp2, temp3 );
165 }
166 
167 Energy
169  conformation::Residue const & res1,
170  int res1_num_10A_neighbors,
171  conformation::Residue const & res2,
172  int res2_num_10A_neighbors,
173  Probability & pair_lhood_ratio,
174  Probability & pair_lhood_ratio_high,
175  Probability & pair_lhood_ratio_low
176 ) const
177 {
178  //using namespace pdbstatistics_pack; // Various constants and tables
179  //using namespace pdbstatistics_pack::pdbstatistics; // Energy lookup tables
180  using namespace basic::options;
181  using namespace core::chemical; //conformation;
182  using namespace basic::options::OptionKeys;
183  //using namespace core::conformation::amino::AminoAcidKeys;
184  using numeric::abs_difference;
185  using numeric::interpolation::interpolated;
186 
187  assert( res1.seqpos() != res2.seqpos() ); // Only call for distinct residues
188  assert( res1.is_polar() || res1.is_aromatic() );
189  assert( res2.is_polar() || res2.is_aromatic() ); // Only for polar amino acids: Caller does exclusion (prevents call overhead)
190  //assert( pair_corr_.I1() == pair_corr_.I2() ); // First 2 index ranges should match //this is a silly assert
191 
192 // if ( !is_protein(aa1) || !is_protein(aa2) ) ) return; //dr okay for dupes but not other nnaa where we won't have this info //! Change to an NCAA exclusion????
193 
194  //jk option to suppress computing pair term for histidine (numbers are skewed due to metal-binding sites)
195  if ( ( option[ corrections::score::no_his_his_pairE ] ) &&
196  ( ( res1.aa() == aa_his ) &&
197  ( res2.aa() == aa_his ) ) )
198  {
199  return Energy( 0.0 );
200  }
201 
202  if ( ( option[ corrections::score::no_his_DE_pairE ] ) &&
203  ((( res1.aa() == aa_his ) &&
204  ( res2.aa() == aa_asp || res2.aa() == aa_glu ) ) ||
205  ( ( res1.aa() == aa_asp || res1.aa() == aa_glu ) &&
206  ( res2.aa() == aa_his ))) )
207  {
208  return Energy( 0.0 );
209  }
210 
211 
212 
213  if ( pair_score_min_sep_ > 1 ) { // Short-circuit for speed
214  if ( res1.polymeric_sequence_distance( res2 ) < pair_score_min_sep_ ) return Energy( 0.0 );
215  }
216 
217  // Amino acid indexes for lookup
218  int const pair_corr_n_AA( pair_corr_.u1() );
219  AA const aa1n( res1.aa() );
220  if ( aa1n > pair_corr_n_AA ) return Energy( 0.0 ); // Unsupported amino acid
221  AA const aa2n( res2.aa() );
222  if ( aa2n > pair_corr_n_AA ) return Energy( 0.0 ); // Unsupported amino acid
223 
224  // Number of neighbor bins
225  int const e1( ( res1_num_10A_neighbors <= pair_score_cb_thresh_ ) ? 1 : 2 );
226  int const e2( ( res2_num_10A_neighbors <= pair_score_cb_thresh_ ) ? 1 : 2 );
227 
228  // Action center distance
229  Distance const r12( res1.actcoord().distance( res2.actcoord() ) );
230  // If r12 == 0, one of two things has happened:
231  // (1) You're comparing a residue to itself -- don't.
232  // (2) The residues actcoords aren't being updated and are (0,0,0)
233  // -- see ResidueType.requires_actcoord() and the appropriate .params files.
234  assert( r12 > 0 );
235 
236  // 1. Get the lower of the two bin averages bracketing r12 and set r12_bin to this value.
237  // Consider 'bin average' to exist halfway through bin's range.
238  //
239  // 2. Find the difference between the low_bin average and the actual value of r12 in bin units
240  //
241  //int const max_bin( 3 ); //apl should this be hard coded here?
242 
243  Distance const r12_bin_real( std::max(
245  Distance( 0.5 ) ) ); // First bin is [ 3 A, 4.5 A ] but we use it to represent r12 down to 0 A
246  int const r12_bin( std::max( numeric::nint( r12_bin_real ), 1 ) );
247  if ( r12_bin > max_bin_ ) return Energy( 0.0 );
248  Distance const r12_alpha( r12_bin_real - ( r12_bin - Distance( 0.5 ) ) );
249  assert( ( r12_alpha >= Distance( 0.0 ) ) && ( r12_alpha <= Distance( 1.0 ) ) );
250 
251  // Set the low and high bin averages for interpolation
252 
253  pair_lhood_ratio_low =
254  ( pair_corr_(aa1n,aa2n,e1,e2,r12_bin) + pair_corr_(aa2n,aa1n,e2,e1,r12_bin) ) * Probability( 0.5 );
255 
256  pair_lhood_ratio_high = r12_bin == max_bin_ ? Probability( 1.0 ) :
257  ( r12_bin_real == Distance( 0.5 ) ? pair_lhood_ratio_low :
258  ( pair_corr_(aa1n,aa2n,e1,e2,r12_bin+1) + pair_corr_(aa2n,aa1n,e2,e1,r12_bin+1) ) * Probability( 0.5 ) );
259 
260  //std::cout << "pairE potential: residues " << res1.seqpos() << " & " << res2.seqpos() << " with pairE = ";
261  //std::cout << -std::log( interpolated( r12_alpha, pair_lhood_ratio_low, pair_lhood_ratio_high ) ) << std::endl;
262 
263 
264  // Return the energy
265  pair_lhood_ratio = interpolated( r12_alpha, pair_lhood_ratio_low, pair_lhood_ratio_high );
266  return -std::log( pair_lhood_ratio );
267 }
268 
269 
270 Energy
272  conformation::Residue const & res1,
273  int res1_num_10A_neighbors,
274  conformation::Residue const & res2,
275  int res2_num_10A_neighbors,
276  EnergyDerivative & dpairE_dr
277 ) const
278 {
279  Probability pair_lhood_ratio( 1.0 ), pair_lhood_ratio_low( 1.0 ), pair_lhood_ratio_high( 1.0 );
280 
281  Energy pairE = pair_term_energy( res1, res1_num_10A_neighbors, res2, res2_num_10A_neighbors,
282  pair_lhood_ratio, pair_lhood_ratio_high, pair_lhood_ratio_low);
283 
284  dpairE_dr = -(1/pair_lhood_ratio) *
285  ( pair_lhood_ratio_high - pair_lhood_ratio_low ) / pair_score_bin_range_;
286 
287  return pairE;
288 
289 }
290 
291 }
292 }