Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
EnvPairPotential.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file core/scoring/methods/EnvPairPotential.cc
11 /// @brief Statistically derived rotamer pair potential class implementation
12 /// @author Phil Bradley
13 /// @author Andrew Leaver-Fay
14 
15 
16 // Unit headers
18 
19 // Package headers
20 #include <core/scoring/Energies.hh>
22 
23 // Project headers
24 #include <core/chemical/AA.hh>
27 #include <basic/database/open.hh>
28 #include <core/pose/Pose.hh>
30 #include <basic/datacache/BasicDataCache.hh>
31 #include <basic/prof.hh>
32 // AUTO-REMOVED #include <core/scoring/TwelveANeighborGraph.hh>
33 // Utility headers
34 #include <utility/io/izstream.hh>
35 
36 #include <utility/vector1.hh>
37 
38 
39 
40 // just for debugging
41 //#include <ObjexxFCL/format.hh>
42 
43 // C++
44 
45 
46 namespace core {
47 namespace scoring {
48 
49 /// @details Copy constructors must copy all data, not just some...
51  CacheableData()
52 {
53  fcen6_ = src.fcen6_;
54  fcen10_ = src.fcen10_;
55  fcen12_ = src.fcen12_;
57 }
58 
59 void
61 {
62  Size const nres( pose.total_residue() );
63 
64  fcen6_.resize( nres, 0.0 );
65  fcen10_.resize( nres, 0.0 );
66  fcen12_.resize( nres, 0.0 );
67 
68  std::fill( fcen6_.begin(), fcen6_.end(), 1.0 ); // 1 because a residue is w/i 6 A of itself
69  std::fill( fcen12_.begin(), fcen12_.end(), 0.0 ); // 0 because a residue is not between 6 and 12 A of itself
70  std::fill( fcen10_.begin(), fcen10_.end(), 1.0 ); // 1 because a residue is w/i 10A of itself
71 }
72 
74  cen_dist_cutoff2( 144.0 ),
75 
76  cen_dist6sqr_( 6 * 6 ),
77  cen_dist10sqr_( 10 * 10 ),
78  cen_dist12sqr_( 12 * 12 ),
79 
80  //cems transition regions between environment bins
81  //cems transition is from +/- sqrt(36+pad6) +/- sqrt(100+pad10) etc
82  cen_dist5_pad( 0.5 ),
83  cen_dist6_pad( 0.6 ),
84  cen_dist7_pad( 0.65 ),
85  cen_dist10_pad( 1.0 ),
86  cen_dist12_pad( 1.2 ),
87 
88  cen_dist5_pad_plus ( cen_dist5_pad + 25.0 ),
89  cen_dist6_pad_plus( cen_dist6_pad + 36.0 ),
90  cen_dist7_pad_plus ( cen_dist7_pad + 56.25 ),
91  cen_dist10_pad_plus( cen_dist10_pad + 100.0 ),
92  cen_dist12_pad_plus( cen_dist12_pad + 144.0 ),
93 
94  cen_dist5_pad_minus ( cen_dist5_pad - 25.0 ),
95  cen_dist7_pad_minus ( cen_dist7_pad - 56.25 ),
96  cen_dist10_pad_minus( cen_dist10_pad - 100.0 ),
97  cen_dist12_pad_minus( cen_dist12_pad - 144.0 ),
98 
99  cen_dist5_pad_hinv ( 0.5 / cen_dist5_pad ),
100  cen_dist6_pad_hinv ( 0.5 / cen_dist6_pad ),
101  cen_dist7_pad_hinv ( 0.5 / cen_dist7_pad ),
102  cen_dist10_pad_hinv( 0.5 / cen_dist10_pad ),
103  cen_dist12_pad_hinv( 0.5 / cen_dist12_pad ),
104 
105  cen_dist_cutoff_12_pad( cen_dist_cutoff2 + cen_dist12_pad )
106 {
107  // load the data
108  Size const max_aa( 20 ); // just the standard aa's for now
109  Size const env_log_table_size( 40 );
110  Size const pair_log_table_size( 5 );
111  Size const cbeta_den_table_size( 45 );
112  Size const cenpack_log_table_size( 120 );
113 
114  std::string tag,line;
115  chemical::AA aa;
116 
117  { // env_log
118  env_log_.dimension( max_aa, env_log_table_size );
119 
120  utility::io::izstream stream;
121  basic::database::open( stream, "scoring/score_functions/EnvPairPotential/env_log.txt" );
122  while ( getline( stream, line ) ) {
123  std::istringstream l(line);
124  l >> tag >> aa;
125  for ( Size i=1; i<= env_log_table_size; ++i ){
126  l >> env_log_(aa,i);
127  }
128  if ( l.fail() || tag != "ENV_LOG:" ) utility_exit_with_message("bad format for scoring/score_functions/EnvPairPotential/env_log.txt");
129  }
130  }
131 
132  { // cebeta_den_6/12
133  cbeta_den6_.dimension( cbeta_den_table_size );
134  cbeta_den12_.dimension( cbeta_den_table_size );
135 
136  utility::io::izstream stream;
137  basic::database::open( stream, "scoring/score_functions/EnvPairPotential/cbeta_den.txt" );
138 
139  { // den6
140  getline( stream, line );
141  std::istringstream l(line);
142  l >> tag;
143  for ( Size i=1; i<= cbeta_den_table_size; ++i ){
144  l >> cbeta_den6_(i);
145  }
146  if ( l.fail() || tag != "CBETA_DEN6:" ) utility_exit_with_message("bad format for scoring/score_functions/EnvPairPotential/cbeta_den.txt");
147  }
148 
149  { // den12
150  getline( stream, line );
151  std::istringstream l(line);
152  l >> tag;
153  for ( Size i=1; i<= cbeta_den_table_size; ++i ){
154  l >> cbeta_den12_(i);
155  }
156  if ( l.fail() || tag != "CBETA_DEN12:" ) utility_exit_with_message("bad format for scoring/score_functions/EnvPairPotential/cbeta_den.txt");
157  }
158  }
159 
160 
161  { // pair_log
162  pair_log_.dimension( pair_log_table_size, max_aa, max_aa );
163 
164  utility::io::izstream stream;
165  basic::database::open( stream, "scoring/score_functions/EnvPairPotential/pair_log.txt" );
166  for ( Size j=1; j<= pair_log_table_size; ++j ) {
167  for ( Size k=1; k<= max_aa; ++k ) {
168  getline( stream, line );
169  std::istringstream l(line);
170  Size jj;
171  l >> tag >> jj >> aa;
172  assert( Size(aa) == k );
173  for ( Size i=1; i<= max_aa; ++i ) {
174  l >> pair_log_(j,aa,i);
175  }
176  if ( l.fail() || jj != j || tag != "PAIR_LOG:" ) utility_exit_with_message("bad format for scoring/score_functions/EnvPairPotential/pair_log.txt");
177  }
178  }
179  }
180 
181  { // cenpack_log
182  cenpack_log_.dimension( cenpack_log_table_size ); //sequence independent
183 
184  utility::io::izstream stream;
185  basic::database::open( stream, "scoring/score_functions/EnvPairPotential/cenpack_log.txt" );
186  for ( Size j=1; j<= cenpack_log_table_size; ++j ) {
187  getline( stream, line );
188  std::istringstream l(line);
189  Size jj;
190  l >> tag >> jj;
191  l >> cenpack_log_(j);
192  if ( l.fail() || jj != j || tag != "CENPACK_LOG:" ) utility_exit_with_message("bad format for scoring/score_functions/EnvPairPotential/cenpack_log.txt");
193  }
194  }
195 }
196 
197 /// @brief fill the cenlist using interpolation
198 /// @detailed
199 ///cems--------------------------------------------------------------------------
200 /// interpolation notes --Historically we have broken the
201 /// centroid density statistics into three bins: i) pairs
202 /// less than 6 angstroms ii) pairs less than 10 angstroms ems
203 /// iii) and pairs between 6 and 12 angstroms the resulting
204 /// abruptness in the scoring functions due to the arbitrary radius
205 /// cutoffs has caused some problems during gradient minimization.
206 /// therefore this was replaced with an interpolated binning
207 /// schema as follows: When a pairwise distance lies within "+/-
208 /// dr" of the bin boundary (6,10,12) then partial credit is given
209 /// to the enclosing bins. So for example, if fgap=0.5 angstroms, and
210 /// a pair radius were 6.4 angstroms, then a fractional count is
211 /// given to BOTH the "less-than-6" bin AND to the
212 /// "between-6-and-10" bin. The sum of these fractions always adds to
213 /// one. So that we dont have to re-do the statistics we
214 /// currently use we want to keep "fgap" small. ideally fgap
215 /// should be large compared to the search algorithm step size, and
216 /// larger than the expected roundoff error in any refold
217 /// operation, and otherwise as small as possible. Also we want
218 /// to cleverly choose the interpolation function so that the average
219 /// number of counts getting into the bins is the same as under
220 /// the old schema. As long as dr is small then we can use either
221 /// r+/-fgap or alternatively r^2+/-fgap^2 and this will be
222 /// approximately satsified. since the squared from is easier to work
223 /// we will use this. in the code below the frag^2 term is called
224 /// a _pad, and we allow for different pad_sizes on the three radii.
225 ///cems--------------------------------------------------------------------------
226 void
228  CenListInfo & cenlist,
229  Size const res1,
230  Size const res2,
231  Real const cendist
232 ) const
233 {
234 
235  assert( cendist <= cen_dist12_pad_plus );
236 
237  /*
238  // If we should ever need the integer "cenX" arrays and not
239  // the floating-point fcen arrays
240  if ( cendist <= cen_dist10sqr_ ) {
241  if ( cendist <= cen_dist6sqr_ ) {
242  cenlist.cen6(res1) += 1;
243  cenlist.cen6(res2) += 1;
244  } else {
245  cenlist.cen12(res1) += 1;
246  cenlist.cen12(res2) += 1;
247  }
248  cenlist.cen10(res1) += 1;
249  cenlist.cen10(res2) += 1;
250  } else {
251  cenlist.cen12(res1) += 1;
252  cenlist.cen12(res2) += 1;
253  }
254  */
255 
256  // compute arrays needed for C-beta energy function
257  Real const one( 1.0 );
258 
259  // NOTE: *_hinv is negative of vdw.cc version and positive in structure.cc version.
260  // We are using postive hinvs.
261  if ( cendist <= cen_dist10_pad_plus ) {
262  Real interp = std::min( ( cen_dist10_pad_plus - cendist ) * cen_dist10_pad_hinv, one );
263  cenlist.fcen10(res1) += interp;
264  cenlist.fcen10(res2) += interp;
265  }
266 
267  if ( cendist <= cen_dist6_pad_plus ) { // its sort of a "6" and not so much a "12"
268  Real interp = std::min( ( cen_dist6_pad_plus - cendist ) * cen_dist6_pad_hinv, one );
269 
270  cenlist.fcen6(res1) += interp;
271  cenlist.fcen6(res2) += interp;
272  cenlist.fcen12(res1) += 1.0 - interp;
273  cenlist.fcen12(res2) += 1.0 - interp;
274 
275  } else { // then its sort of a "12" but definitely not a "6"
276 
277  Real interp = std::min( ( cen_dist12_pad_plus - cendist ) * cen_dist12_pad_hinv, one );
278 
279  cenlist.fcen12(res1) += interp;
280  cenlist.fcen12(res2) += interp;
281  }
282 }
283 
284 void
286 {
287  for ( Size ii = 1; ii <= cenlist.size(); ++ii ) {
288  if ( cenlist.fcen6(ii) >= 45.0 ) cenlist.fcen6(ii) = 44.9999;
289  if ( cenlist.fcen10(ii) >= 31.0 ) cenlist.fcen10(ii) = 30.9999;
290  if ( cenlist.fcen12(ii) < 1 ) {
291  cenlist.fcen12(ii) = 1;
292  } else if ( cenlist.fcen12(ii) >= 45.0 ) {
293  cenlist.fcen12(ii) = 44.9999;
294  }
295  }
296 }
297 
298 
299 //////////////////////////////////////////////////////////////////////////////////////
300 void
302  pose::Pose & pose
303 ) const
304 {
305  // basic::ProfileThis doit( basic::ENERGY_ENVPAIR_POTENTIAL );
306 
307  CenListInfo & cenlist( nonconst_cenlist_from_pose( pose ));
308 
309  /// Energy graph contains edges for all residue pairs with
310  /// centroids w/i cen_dist_cutoff_12_pad
311  EnergyGraph const & energy_graph( pose.energies().energy_graph() );
312  Size const nres( energy_graph.num_nodes() );
313 
314  /// calculate the cenlist info only if it has not been calculated since the last score evaluation
315  if ( !cenlist.calculated() ) {
316 
317  // ensure that cenlist has pose.total_residue() elements in case the pose has
318  // changed its sequence lenght since the last cenlist update
319  cenlist.initialize( pose );
320 
321  for ( Size i = 1; i < nres; ++i ) {
322  conformation::Residue const & rsd1 ( pose.residue(i) );
323  if ( !rsd1.is_protein() ) continue;
325  iru = energy_graph.get_node(i)->const_upper_edge_list_begin(),
326  irue = energy_graph.get_node(i)->const_upper_edge_list_end();
327  iru != irue; ++iru ) {
328  EnergyEdge const * edge( static_cast< EnergyEdge const *> (*iru) );
329  Size const j( edge->get_second_node_ind() );
330  conformation::Residue const & rsd2 ( pose.residue(j) );
331  if ( !rsd2.is_protein() ) continue;
332 
333  Real const cendist = edge->square_distance();
334 
335  // compute arrays needed for C-beta energy function
336  // first do a coarse grain reality check on centroid separations
337  if ( cendist <= cen_dist_cutoff_12_pad ) {
338  fill_cenlist( cenlist, i, j, cendist );
339  }
340  }
341  }
342 
343  truncate_cenlist_values( cenlist );
344  cenlist.calculated() = true;
345  }
346 
347 }
348 
349 void
351 {
352  CenListInfo & cenlist( nonconst_cenlist_from_pose( pose ));
353  cenlist.calculated() = false;
354 }
355 
356 ////////////////////////////////////////////////////////////////////////////////////
357 void
359  pose::Pose const & pose,
360  conformation::Residue const & rsd,
361  Real & env_score,
362  Real & cb_score6,
363  Real & cb_score12
364 ) const
365 {
366  //using ObjexxFCL::fmt::F; // debugging
367  //using ObjexxFCL::fmt::I;
368  // basic::ProfileThis doit( basic::ENERGY_ENVPAIR_POTENTIAL );
369 
370  CenListInfo const & cenlist( cenlist_from_pose( pose ));
371 
372  int const position ( rsd.seqpos() );
373 
374  Real const fcen6 ( cenlist.fcen6( position) );
375  Real const fcen10 ( cenlist.fcen10(position) );
376  Real const fcen12 ( cenlist.fcen12(position) );
377 
378  if ( rsd.is_protein() ) {
379 
380  env_score = env_log_( rsd.aa(), static_cast< int >( fcen10 ) );
381 
382  // interp1 rounds down to nearest (non-negative) integer.
383  int interp1 = static_cast< int >( fcen6 );
384  // note cen6 is always at least 1.0
385 
386  // fraction remainder after nearest lower integer is removed
387  Real interp2 = fcen6 - interp1;
388 
389  // use interp2 to linearly interpolate the two nearest bin values
390  cb_score6 =
391  ( ( 1.0 - interp2 ) * cbeta_den6_( interp1 ) +
392  ( interp2 ) * cbeta_den6_( interp1+1 ) );
393 
394  interp1 = static_cast< int >( fcen12 );
395  // note cen12 is always at least 1.0 -- this is in fact false for fcen12
396  interp2 = fcen12 - interp1;
397  cb_score12 =
398  ( ( 1.0 - interp2 ) * cbeta_den12_( interp1 ) +
399  ( interp2 ) * cbeta_den12_( interp1+1 ) );
400 
401  //std::cout << "eval_env_cbeta: " << I(4,rsd.seqpos()) << F(9,3,fcen6) << F(9,3,fcen10) << F(9,3,fcen12) <<
402  // F(9,3,env_score) << F(9,3,cb_score6) << F(9,3,cb_score12) << ' ' << rsd.name() << std::endl;
403  //std::cout << "fcen6( " << position << " ) = " << fcen6 << " fcen10( " << position << " ) " << fcen10 << " fcen12( " << position << " ) = ";
404  //std::cout << fcen12 << " "; //<< std::endl;
405  // " interp1: " << interp1 << " interp2: " << interp2 << std::endl;
406 
407 
408  } else { // amino acid check
409  env_score = 0.0;
410  cb_score6 = 0.0;
411  cb_score12 = 0.0;
412  }
413 }
414 
415 
416 ///////////////////////////////////////////////////////////////////////////////////////////////
417 
418 void
420  conformation::Residue const & rsd1,
421  conformation::Residue const & rsd2,
422  Real const cendist,
423  Real & pair_contribution,
424  Real & cenpack_contribution
425 ) const
426 {
427  // basic::ProfileThis doit( basic::ENERGY_ENVPAIR_POTENTIAL );
428 
429  pair_contribution = 0.0;
430  cenpack_contribution = 0.0;
431 
432  if ( !rsd1.is_protein() || !rsd2.is_protein() ) return;
433 
434  chemical::AA const aa1( rsd1.aa() );
435  chemical::AA const aa2( rsd2.aa() );
436 
437  //CAR no pair score if a disulfide
438  if ( aa1 == chemical::aa_cys && aa2 == chemical::aa_cys &&
439  rsd1.is_bonded( rsd2 ) && rsd1.polymeric_sequence_distance( rsd2 ) > 1 &&
441 
442  // no pair score for residues closer than 9 in sequence
443  if ( rsd1.polymeric_sequence_distance( rsd2 ) /* j - i */ <= 8 ) return;
444 
445  //$$$ we now try to find which bin the pair distance lies in
446  //$$$ I note this could in principle be calculated and updatded
447  //$$$ just like cen_dist is if there is a need for speed.
448  //$$$ this function interpolates between bins.
449  //$$$ An important(!) requirement on pair_log is that the
450  //$$$ value should approach zero as the radius increases.
451  //$$$ this fact permits us not to have to compute and score pairs are larger
452  //$$$ than cen_dist > cutoff.
453 
454  int icon = 5;
455  Real interp2( 0.0 );
456 
457  if ( cendist > cen_dist10_pad_plus ) {
458  icon = 4;
459  interp2 = ( cendist + cen_dist12_pad_minus ) * cen_dist12_pad_hinv;
460  } else {
461  if ( cendist > cen_dist7_pad_plus ) {
462  icon = 3;
463  interp2 = ( cendist + cen_dist10_pad_minus ) * cen_dist10_pad_hinv;
464  } else {
465  if ( cendist > cen_dist5_pad_plus ) {
466  icon = 2;
467  interp2 = ( cendist + cen_dist7_pad_minus ) * cen_dist7_pad_hinv;
468  } else {
469  icon = 1;
470  interp2 = ( cendist + cen_dist5_pad_minus ) * cen_dist5_pad_hinv;
471  }
472  }
473  }
474  if ( interp2 < 0.0 ) interp2 = 0.0;
475 
476  // note in theory this will never happen but in practice round off
477  // error can cause problem
478  if ( interp2 > 1.0 ) interp2 = 1.0;
479 
480  // handle last bin specially since icon+1 would be past array end
481  // pb note -- I don't think icon will ever be 5 here, wonder if it has always been this way?
482  if ( icon != 5 ) {
483  pair_contribution =
484  ( ( 1.0f - interp2 ) * pair_log_( icon , aa1, aa2 ) +
485  ( interp2 ) * pair_log_( icon+1, aa1, aa2 ) );
486  } else {
487  pair_contribution = ( 1.0f - interp2 ) * pair_log_( icon , aa1, aa2 );
488  }
489 
490 
491  // Adding a term that should help reproduce pairwise correlation function between centroids
492  // as observed in the PDB.
493  int cendist_bin = static_cast <int> ( sqrt( cendist ) * 10 + 1); //Binned with 0.1 A width.
494 
495  if (cendist_bin > 120) cendist_bin = 120;
496  if (cendist_bin < 1) cendist_bin = 1;
497 
498  cenpack_contribution = cenpack_log_( cendist_bin );
499 
500  return;
501 }
502 
503 /// @details Pose must already contain a cenlist object or this method will fail.
504 CenListInfo const &
506 {
507  using namespace core::pose::datacache;
508  return *( static_cast< CenListInfo const * >( pose.data().get_const_ptr( CacheableDataType::CEN_LIST_INFO )() ));
509 
510 }
511 
512 /// @details Either returns a non-const reference to the cenlist object already stored
513 /// in the pose, or creates a new cenist object, places it in the pose, and returns
514 /// a non-const reference to it.
515 CenListInfo &
517 {
518  // ////using core::pose::datacache::CacheableDataType::CEN_LIST_INFO;
519 
521  return *( static_cast< CenListInfo * >( pose.data().get_ptr( core::pose::datacache::CacheableDataType::CEN_LIST_INFO )() ));
522  }
523  // else
524  CenListInfoOP cenlist = new CenListInfo;
526  return *cenlist;
527 
528 }
529 
530 
531 
532 }
533 }