Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
sasapack.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:f;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file src/apps/pilot/phil/test1.cc
11 /// @brief Some simple examples of how to use basic functionality + some DNA functionality
12 /// @author Phil Bradley (pbradley@fhcrc.org)
13 
14 // libRosetta headers
16 
17 #include <core/id/AtomID.hh>
18 #include <core/id/AtomID_Map.hh>
20 #include <core/scoring/sasa.hh>
23 #include <core/scoring/Energies.hh>
24 #include <core/pose/Pose.hh>
25 #include <core/pose/util.hh>
26 #include <core/pose/util.tmpl.hh>
29 
30 #include <basic/Tracer.hh>
31 #include <basic/database/open.hh>
32 
33 #include <utility/io/izstream.hh>
34 #include <utility/vector1.hh>
35 #include <utility/vector1.functions.hh>
36 
37 #include <ObjexxFCL/string.functions.hh>
38 #include <ObjexxFCL/format.hh>
39 
40 static basic::Tracer TR( "protocols.sasa_scores.sasapack" );
41 
42 
43 namespace protocols {
44 namespace sasa_scores {
45 
46 using namespace core;
47 using namespace core::pose;
48 using namespace core::conformation;
49 using namespace core::chemical;
50 using namespace core::scoring;
51 using utility::vector1;
52 //using namespace basic::options;
53 
55 
56 using namespace std;
57 using namespace ObjexxFCL::fmt;
58 
59 class Poly {
60 public:
61  Real
62  operator()( Real const x ) const;
63 
64  Size
65  degree() const { return coeffs_.size() -1; }
66 
67  friend
68  std::istream & operator >>( std::istream & is, Poly & p );
69 
70 private:
72 
73 
74 };
75 
76 ///////
77 Real
78 Poly::operator()( Real const x ) const
79 {
80  runtime_assert( !coeffs_.empty() );
81 
82  Size const degree( coeffs_.size() - 1 );
83 
84  Real polyval(0.0);
85  Real x_raised( 1.0 );
86  for ( Size i=0; i<= degree; ++i ) {
87  polyval += coeffs_[ degree+1-i ] * x_raised;
88  x_raised *= x;
89  }
90  return polyval;
91 }
92 
93 std::istream & operator >>( std::istream & is, Poly & p )
94 {
95  Size degree(0);
96  string tmp1,tmp2,tmp3;
97  is >> tmp1 >> tmp2 >> degree >> tmp3;
98  if ( is.fail() || tmp1 != "POLY" || tmp2 != "DEGREE" || tmp3 != "COEFFS_HI2LO" || degree > 10000 ) {
99  is.setstate( std::ios_base::failbit );
100  p.coeffs_.clear();
101  return is;
102  }
103  p.coeffs_.resize( degree+1 );
104  for ( Size i=1; i<=degree+1; ++i ) {
105  is >> p.coeffs_[i];
106  }
107  if ( is.fail() ) {
108  p.coeffs_.clear();
109  return is;
110  }
111  return is;
112 }
113 
114 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////
115 
116 class PPoly {
117 public:
118  Real
119  operator()( Real const x ) const;
120 
121  friend
122  std::istream & operator >>( std::istream & is, PPoly & p );
123 
124  Size
125  max_degree() const;
126 
127 private:
131 
132 };
133 
134 Real
135 PPoly::operator()( Real const x ) const
136 {
137  for ( Size ii=1; ii<= xmins_.size(); ++ii ) {
138  if ( x >= xmins_[ii] && x <= xmaxs_[ii] ) return polys_[ii](x);
139  }
140  utility_exit_with_message("PPoly::operator() x out of range: "+ObjexxFCL::string_of(x)); // probably a nicer way to handle this...
141  return 0.0;
142 }
143 
144 Size
146 {
147  Size md(0);
148  for ( Size ii=1; ii<= polys_.size(); ++ii ) {
149  md = max( md, polys_[ii].degree() );
150  }
151  return md;
152 }
153 
154 std::istream & operator >>( std::istream & is, PPoly & pp )
155 {
156  string tmp1,tmp2;
157  Size npoly;
158  is >> tmp1 >> tmp2 >> npoly;
159  if ( is.fail() || tmp1 != "PPOLY" || tmp2 != "NPOLY" ) {
160  is.setstate( std::ios_base::failbit );
161  pp.polys_.clear();
162  return is;
163  }
164  for ( Size i=1; i<= npoly; ++i ) {
165  Poly p;
166  Real xmin,xmax;
167  is >> tmp1 >> xmin >> tmp2 >> xmax >> p;
168  if ( is.fail() || tmp1 != "XMIN" || tmp2 != "XMAX" ) break;
169  pp.polys_.push_back(p);
170  pp.xmins_.push_back( xmin );
171  pp.xmaxs_.push_back( xmax );
172  }
173  if ( is.fail() || pp.polys_.size() != npoly ) {
174  is.setstate( std::ios_base::failbit );
175  pp.polys_.clear(); pp.xmins_.clear(); pp.xmaxs_.clear();
176  return is;
177  }
178  return is;
179 }
180 
181 ///////////////////////////////////////////////////////////////////////////////
182 void
184  vector1< PPoly > & polys,
185  Reals & avg_sasa14s
186  )
187 {
188  polys.clear();
189  polys.resize( num_canonical_aas );
190  avg_sasa14s.clear();
191  avg_sasa14s.resize( num_canonical_aas );
192 
193  utility::io::izstream data;
194  string const datafile( "scoring/sasa_scores/sasapack_datafile_v1.txt" ); // TO DO: make this an option
195  basic::database::open( data, datafile );
196 
197  map< std::pair< AA, Size >, vector1< std::pair< Real, Real > > > all_polyvals;
198 
199  string line;
200  bool found_avg_sasa( false );
201  while ( getline( data,line ) ) {
202  string linetag, name1;
203  Size degree;
204  istringstream is(line );
205  is >> linetag;
206  if ( linetag == "PPOLY_SASAPACK" ) {
207  PPoly p;
208  is >> name1 >> degree >> p;
209  runtime_assert( !is.fail() );
210  AA const aa( aa_from_oneletter_code( name1[0] ) );
211  runtime_assert( aa >= aa_ala && aa <= num_canonical_aas );
212  polys[ aa ] = p;
213  } else if ( linetag == "PPOLYVAL_SASAPACK" ) {
214  Real x,y;
215  is >> name1 >> degree >> x >> y;
216  std::pair< AA, Size > const p( make_pair( aa_from_oneletter_code( name1[0] ), degree ) );
217  all_polyvals[p].push_back( make_pair(x,y) );
218  } else if ( linetag == "AVG_SASA" ) {
219  Real mean;
220  is >> name1 >> linetag >> mean;
221  found_avg_sasa = true;
222  AA const aa( aa_from_oneletter_code( name1[0] ) );
223  runtime_assert( aa >= aa_ala && aa <= num_canonical_aas );
224  avg_sasa14s[ aa ] = mean;
225  }
226  }
227  runtime_assert( found_avg_sasa );
228  for ( Size i=1; i<= 20; ++i ) {
229  AA const aa = AA(i);
230  PPoly const & p( polys[i] );
231  Size const degree( p.max_degree() );
232  cout << "load_sasapack_polynomial_coefficients: using degree " << degree << " polynomial for " << aa <<
233  " datafile: " << datafile << endl; /// NOTE: cout
234  vector1< std::pair< Real, Real > > const polyvals( all_polyvals[ make_pair( aa, degree ) ] );
235  Size count(0);
236  Real err( 0.0 );
237  for ( Size ii=1; ii<= polyvals.size(); ++ii ) {
238  Real const x( polyvals[ii].first ), expected_y( polyvals[ii].second ), recomputed_y( p(x) );
239  err += ( expected_y - recomputed_y ) * ( expected_y - recomputed_y );
240  ++count;
241  }
242  if ( count ) err /= count;
243  TR.Trace << "polyval_err: " << aa << ' ' << degree << " npoints: " << I(4,count) << " err-per-point: " <<
244  F(9,3,err) << endl;
245  runtime_assert( err < 1e-3 );
246  }
247  data.close();
248 }
249 
250 
251 
252 ///////////////////////////////////////////////////////////////////////////////
253 void
255  vector1< PPoly > & polys,
256  Reals & avg_sasa14s
257  )
258 {
259  polys.clear();
260  polys.resize( num_canonical_aas );
261  avg_sasa14s.clear();
262  avg_sasa14s.resize( num_canonical_aas );
263 
264  utility::io::izstream data;
265  string const datafile( "scoring/sasa_scores/avge_datafile_score12prime_v1.txt" );// TO DO: make this an option
266  basic::database::open( data, datafile );
267 
268  map< std::pair< AA, Size >, vector1< std::pair< Real, Real > > > all_polyvals;
269 
270  string line;
271  bool found_avg_sasa( false );
272  while ( getline( data,line ) ) {
273  string linetag, name1;
274  Size degree;
275  istringstream is(line );
276  is >> linetag;
277  if ( linetag == "PPOLY_NORME" ) {
278  PPoly p;
279  is >> name1 >> degree >> p;
280  runtime_assert( !is.fail() );
281  AA const aa( aa_from_oneletter_code( name1[0] ) );
282  runtime_assert( aa >= aa_ala && aa <= num_canonical_aas );
283  polys[ aa ] = p;
284  } else if ( linetag == "PPOLYVAL_NORME" ) {
285  Real x,y;
286  is >> name1 >> degree >> x >> y;
287  std::pair< AA, Size > const p( make_pair( aa_from_oneletter_code( name1[0] ), degree ) );
288  all_polyvals[p].push_back( make_pair(x,y) );
289  } else if ( linetag == "AVG_SASA" ) {
290  Real mean;
291  is >> name1 >> linetag >> mean;
292  found_avg_sasa = true;
293  AA const aa( aa_from_oneletter_code( name1[0] ) );
294  runtime_assert( aa >= aa_ala && aa <= num_canonical_aas );
295  avg_sasa14s[ aa ] = mean;
296  }
297  }
298  runtime_assert( found_avg_sasa );
299 
300  for ( Size i=1; i<= 20; ++i ) {
301  AA const aa = AA(i);
302  PPoly const & p( polys[i] );
303  Size const degree( p.max_degree() );
304  cout << "load_avge_polynomial_coefficients: using degree " << degree << " polynomial for " << aa <<
305  " datafile: " << datafile << endl; /// NOTE: cout
306  vector1< std::pair< Real, Real > > const polyvals( all_polyvals[ make_pair( aa, degree ) ] );
307  Size count(0);
308  Real err( 0.0 );
309  for ( Size ii=1; ii<= polyvals.size(); ++ii ) {
310  Real const x( polyvals[ii].first ), expected_y( polyvals[ii].second ), recomputed_y( p( x ) );
311  err += ( expected_y - recomputed_y ) * ( expected_y - recomputed_y );
312  ++count;
313  }
314  if ( count ) err /= count;
315  TR.Trace << "polyval_err: " << aa << ' ' << degree << " npoints: " << I(4,count) << " err-per-point: " <<
316  F(9,3,err) << endl;
317  runtime_assert( err < 1e-3 );
318  }
319  data.close();
320 }
321 
322 
323 
324 ///////////////////////////////////////////////////////////////////////////////
325 /// NOTE: this does not include the probe radius in the sasa value, hence somewhat specialized for sasapack
326 void
328  Real const probe_radius,
329  Pose const & pose,
330  Reals & rsd_sasa
331  )
332 {
333  bool const use_big_polar_H( false ), use_naccess_sasa_radii( false ), expand_polar_radii( false ),
334  include_probe_radius_in_atom_radii( false ), use_lj_radii( true ); // NOTE -- LJ RADII
335  Real const polar_expansion_radius_unused( 1.0 );
336 
337  id::AtomID_Map< Real > atom_sasa;
338  id::AtomID_Map< bool > atom_subset;
339  atom_subset.clear(); // unnecessary?
340  core::pose::initialize_atomid_map( atom_subset, pose, true );
341 
342  calc_per_atom_sasa( pose, atom_sasa, rsd_sasa, probe_radius, use_big_polar_H,
343  atom_subset, use_naccess_sasa_radii, expand_polar_radii, polar_expansion_radius_unused,
344  include_probe_radius_in_atom_radii, use_lj_radii );
345 
346 }
347 
348 
349 
350 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////
351 /// NOTE: for this to make any sense it's critical that the same scorefxn be used in scoring decoys and natives
352 ///
353 void
355  Pose const & pose_in,
356  Reals & residue_avge,
357  Reals & residue_normsasa,
358  Real & average_avge, // over all residues that were counted
359  Real & average_normsasa
360  )
361 {
362  bool const ignore_gly_paa( true ), ignore_pro_close( true ), ignore_omega( true ), ignore_fa_dun( true ),
363  ignore_fa_rep( true );
364 
365  static ScoreFunctionOP fa_scorefxn( 0 );
366 
367  if ( !fa_scorefxn ) {
368  fa_scorefxn = ScoreFunctionFactory::create_score_function( "score12prime" );
369  if ( pose::symmetry::is_symmetric( *fa_scorefxn ) ) {
370  /// seems like residue energies may be messed up in the symmetric case, for intra-monomer interactions
371  ///
372  fa_scorefxn = new ScoreFunction( *fa_scorefxn );
373  runtime_assert( !pose::symmetry::is_symmetric( *fa_scorefxn ) );
374  }
375  }
376 
377 
378  residue_avge.clear(); residue_avge.resize( pose_in.total_residue(), 0.0 );
379  residue_normsasa.clear(); residue_normsasa.resize( pose_in.total_residue(), 0.0 );
380 
381  static vector1< PPoly > polys;
382  static Reals avg_sasa14s;
383  if ( polys.empty() ) {
384  load_avge_polynomial_coefficients( polys, avg_sasa14s );
385  }
386 
387  Pose pose( pose_in );
388 
390 
391  /// need to compute sasa
392  Real const probe_radius( 1.4 );
393  Reals rsd_sasa;
394  compute_residue_sasas_for_sasa_scores( probe_radius, pose, rsd_sasa );
395 
396  (*fa_scorefxn)( pose ); // rescore with the same energy function used in fitting coeffs
397 
398  EnergyMap const & wts( fa_scorefxn->weights() );
399 
400  average_avge = average_normsasa = 0.0;
401 
402  Size count(0);
403  for ( Size i=1; i<= pose.total_residue(); ++i ) {
404  Residue const & rsd( pose.residue(i) );
405  if ( !rsd.is_protein() ) continue;
406  if ( rsd.is_lower_terminus() || rsd.is_upper_terminus() ) continue;
407  if ( rsd.aa() == aa_cys && rsd.has_variant_type( chemical::DISULFIDE ) ) continue; // avge ==> 0.0 for disulfs
408  EnergyMap const & rsd_energies( pose.energies().residue_total_energies(i) );
409  Real const total_energy( rsd_energies.dot( fa_scorefxn->weights() ) );
410 
411  Real normE( total_energy );
412  if ( ignore_gly_paa ) normE -= rsd_energies[ p_aa_pp ] * wts[ p_aa_pp ];
413  if ( ignore_pro_close ) normE -= rsd_energies[ pro_close ] * wts[ pro_close ];
414  if ( ignore_omega ) normE -= rsd_energies[ omega ] * wts[ omega ];
415  if ( ignore_fa_dun ) normE -= rsd_energies[ fa_dun ] * wts[ fa_dun ];
416  if ( ignore_fa_rep ) normE -= rsd_energies[ fa_rep ] * wts[ fa_rep ];
417 
418  Real const sasa14( rsd_sasa[i] ), expected_normE( polys[ rsd.aa() ]( sasa14 ) );
419 
420  residue_avge [i] = normE - expected_normE;
421  residue_normsasa[i] = sasa14 - avg_sasa14s[ rsd.aa() ];
422 
423  average_avge += residue_avge [ i ];
424  average_normsasa += residue_normsasa[ i ];
425  ++count;
426  }
427 
428  if ( count ) {
429  average_avge /= count;
430  average_normsasa /= count;
431  }
432 }
433 
434 
435 
436 
437 ///////////////////////////////////////////////////////////////////////////////
438 void
440  Pose const & pose,
441  Reals & residue_sasapack,
442  Reals & residue_normsasa,
443  Real & average_sasapack,
444  Real & average_normsasa
445  )
446 {
447  static vector1< PPoly > polys;
448  static Reals avg_sasa14s;
449  if ( polys.empty() ) {
450  load_sasapack_polynomial_coefficients( polys, avg_sasa14s );
451  }
452 
453  //// need to compute sasas
454  Real const big_probe_radius( 1.4 ), small_probe_radius( 0.5 );
455  Reals rsd_sasa_big_probe, rsd_sasa_small_probe;
456  compute_residue_sasas_for_sasa_scores( big_probe_radius, pose, rsd_sasa_big_probe );
457  compute_residue_sasas_for_sasa_scores( small_probe_radius, pose, rsd_sasa_small_probe );
458 
459  residue_sasapack.clear(); residue_sasapack.resize( pose.total_residue(), 0. );
460  residue_normsasa.clear(); residue_normsasa.resize( pose.total_residue(), 0. );
461 
462  average_sasapack = average_normsasa = 0.0;
463 
464  Size count(0);
465  for ( Size i=1; i<= pose.total_residue(); ++i ) {
466  Residue const & rsd( pose.residue(i) );
467  if ( !rsd.is_protein() ) continue;
468  if ( rsd.is_lower_terminus() || rsd.is_upper_terminus() ) continue;
469  if ( rsd.aa() == aa_cys && rsd.has_variant_type( chemical::DISULFIDE ) ) continue; // sasapack ==> 0.0 for disulfs
470 
471  Real const actual_sasa14( rsd_sasa_big_probe[i] ), actual_sasa5( rsd_sasa_small_probe[i] );
472  Real const expected_sasa5( polys[ rsd.aa() ]( actual_sasa14 ) );
473  residue_sasapack[ i ] = actual_sasa5 - expected_sasa5;
474  residue_normsasa[ i ] = actual_sasa14 - avg_sasa14s[ rsd.aa() ];
475 
476  average_sasapack += residue_sasapack[i];
477  average_normsasa += residue_normsasa[i];
478  ++count;
479  }
480  if ( count ) {
481  average_sasapack /= count;
482  average_normsasa /= count;
483  }
484 }
485 
486 
487 
488 } // ns sasa_scores
489 } // ns protocols