Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
NMerPSSMEnergy.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file core/scoring/methods/NMerPSSMEnergy.hh
11 /// @brief PSSMerence energy method implementation
12 /// @author Andrew Leaver-Fay (aleaverfay@gmail.com)
13 
14 // Unit headers
17 
18 // Package headers
20 // AUTO-REMOVED #include <core/scoring/methods/EnergyMethodOptions.hh>
21 
22 // Project headers
23 #include <core/pose/Pose.hh>
24 #include <core/chemical/AA.hh>
27 
28 // C++ Headers
29 #include <string>
30 #include <vector>
31 
32 // Utility Headers
33 #include <utility/io/izstream.hh>
34 #include <utility/string_util.hh>
35 
36 #include <basic/options/option.hh>
37 #include <basic/options/keys/score.OptionKeys.gen.hh>
38 
39 #include <utility/vector1.hh>
40 
41 static basic::Tracer TR( "core.scoring.methods.NMerPSSMEnergy" );
42 
43 namespace core {
44 namespace scoring {
45 namespace methods {
46 
47 
48 /// @details This must return a fresh instance of the NMerPSSMEnergy class,
49 /// never an instance already in use
53 ) const {
54  return new NMerPSSMEnergy;
55 }
56 
59  ScoreTypes sts;
60  sts.push_back( nmer_pssm );
61  return sts;
62 }
63 
64 void
65 NMerPSSMEnergy::nmer_length( Size const nmer_length ){
67  //nmer residue energy is attributed to position 1
69 }
70 
71 void
72 NMerPSSMEnergy::gate_pssm_scores( bool const gate_pssm_scores ){
74 }
75 
76 void
77 NMerPSSMEnergy::nmer_pssm_scorecut( Real const nmer_pssm_scorecut ){
79 }
80 
81 void
83 {
84  using namespace basic::options;
85  NMerPSSMEnergy::nmer_length( option[ OptionKeys::score::nmer_ref_seq_length ]() );
86  NMerPSSMEnergy::gate_pssm_scores( option[ OptionKeys::score::nmer_pssm_scorecut ].user() );
87  NMerPSSMEnergy::nmer_pssm_scorecut( option[ OptionKeys::score::nmer_pssm_scorecut ]() );
88 }
89 
92 {
95 }
96 
99 {
100  //TODO: make this an argument of the function call
102 
103  all_nmer_pssms_.clear();
104  for( Size ipssm = 1; ipssm <= all_nmer_pssms_in.size(); ++ipssm ){
105  std::map< chemical::AA, utility::vector1< core::Real > > nmer_pssm;
106  std::map< chemical::AA, utility::vector1< core::Real > > const nmer_pssm_in( all_nmer_pssms_in[ ipssm ] );
107  //copy contents of input into new copy
108  for( std::map< chemical::AA, utility::vector1< Real > >::const_iterator it = nmer_pssm_in.begin(); it != nmer_pssm_in.end(); ++it ) {
109  nmer_pssm.insert( *it );
110  }
111  //append new copy to our cleared instance
112  all_nmer_pssms_.push_back( nmer_pssm );
113  }
114 }
115 
117 
119 
120  using namespace basic::options;
121 
122  TR << "checking for NMerPSSMEnergy PSSM list" << std::endl;
123 
124  //check for pssm list file
125  if ( option[ OptionKeys::score::nmer_pssm_list ].user() ) {
126  std::string const pssm_list_fname( option[ OptionKeys::score::nmer_pssm_list ] );
127  NMerPSSMEnergy::read_nmer_pssm_list( pssm_list_fname );
128  }
129  //use single pssm file
130  if( option[ OptionKeys::score::nmer_pssm ].user() ){
131  std::string const pssm_fname( option[ OptionKeys::score::nmer_pssm ] );
132  NMerPSSMEnergy::read_nmer_pssm( pssm_fname );
133  }
134 }
135 
136 //read energy table list
137 void NMerPSSMEnergy::read_nmer_pssm_list( std::string const pssm_list_fname ) {
138  TR << "reading NMerPSSMEnergy list from " << pssm_list_fname << std::endl;
139  utility::io::izstream in_stream( pssm_list_fname );
140  if (!in_stream.good()) {
141  utility_exit_with_message( "[ERROR] Error opening NMerPSSMEnergy list file" );
142  }
143  //now loop over all names in list
144  std::string pssm_fname;
145  while( getline( in_stream, pssm_fname ) ){
146  utility::vector1< std::string > const tokens( utility::split( pssm_fname ) );
147  //skip comments
148  if( tokens[ 1 ][ 0 ] == '#' ) continue;
149  NMerPSSMEnergy::read_nmer_pssm( pssm_fname );
150  }
151 }
152 
153 //load PSSM with AA x seqpos scores
154 // PSSM format is 1 AA per line w/ nmer_length_ score vals
155 void NMerPSSMEnergy::read_nmer_pssm( std::string const pssm_fname ) {
156 
157  TR << "reading NMerPSSMEnergy scores from " << pssm_fname << std::endl;
158  utility::io::izstream in_stream( pssm_fname );
159  if (!in_stream.good()) {
160  utility_exit_with_message( "[ERROR] Error opening NMerPSSMEnergy file" );
161  }
162 
163  std::map< chemical::AA, utility::vector1< core::Real > > nmer_pssm;
164  std::string line;
165  while( getline( in_stream, line) ) {
166  utility::vector1< std::string > const tokens( utility::string_split_multi_delim( line, " \t" ) );
167  //skip comments
168  if( tokens[ 1 ][ 0 ] == '#' ) continue;
169  char const char_aa( tokens[ 1 ][ 0 ] );
171  if( nmer_pssm.count( aa ) ) utility_exit_with_message( "[ERROR] NMer ref energy database file "
172  + pssm_fname + " has double entry for aa " + char_aa );
173  if( tokens.size() != nmer_length_ + 1 ) utility_exit_with_message( "[ERROR] NMer PSSM database file "
174  + pssm_fname + " has wrong number entries at line " + line
175  + "\n\tfound: " + utility::to_string( tokens.size() ) + " expected: " + utility::to_string( Size( nmer_length_ + 1 ) ) + "\nNote: Whitespace delimited!" );
176  utility::vector1< Real > seqpos_scores( nmer_length_, 0.0 );
177  for( Size ival = 2; ival <= tokens.size(); ++ival ){
178  Real const score( atof( tokens[ ival ].c_str() ) );
179  seqpos_scores[ ival - 1 ] = score;
180  }
181  nmer_pssm[ aa ] = seqpos_scores;
182  }
183  all_nmer_pssms_.push_back( nmer_pssm );
184 }
185 
186 
189 {
190  return new NMerPSSMEnergy( all_nmer_pssms_ );
191 }
192 
193 
194 //retrieves ref energy of NMer centered on seqpos
195 //we're changing this so energy is computed as sum of all frames that overlap w this seqpos
196 //that way, res energy is actually reflective
197 //…unless we recalc the whole pssm for each overlapping frame and reeval gate criterion
198 void
200  conformation::Residue const & rsd,
201  pose::Pose const & pose,
202  EnergyMap & emap
203 ) const
204 {
205  using namespace chemical;
206 
207  if( all_nmer_pssms_.empty() ) return;
208  Size const seqpos( rsd.seqpos() );
209  //over each pssm
210  Size const n_pssms( all_nmer_pssms_.size() );
211  for( Size ipssm = 1; ipssm <= n_pssms; ++ipssm ){
212  std::map< chemical::AA, utility::vector1< core::Real > > const this_nmer_pssm( all_nmer_pssms_[ ipssm ] );
213  if( this_nmer_pssm.empty() ) continue; //this really shouldn't happen, but just in case
214  //calc nmer's score for this pssm
215  Real rsd_energy( 0.0 );
216  chemical::AA const rsd_aa( pose.residue( seqpos ).aa() );
217 
218  //loop effective p1 seqpos over all overlapping positions
219  //need chain begin, end so dont run off end of sequence in multi-chain poses
220  Size chain_begin( pose.conformation().chain_begin( pose.chain( seqpos ) ) );
221  Size chain_end( pose.conformation().chain_end( pose.chain( seqpos ) ) );
222  Size p1_seqpos_begin( seqpos - nmer_length_ + 1 < chain_begin ? chain_begin : seqpos - nmer_length_ + 1 );
223  //will we run off end if we start p1 at seqpos?
224  Size p1_seqpos_end( seqpos + nmer_length_ - 1 > chain_end ? chain_end - nmer_length_ + 1 : seqpos );
225  //loop over each frame beginning
226  for( Size p1_seqpos = p1_seqpos_begin; p1_seqpos <= p1_seqpos_end; ++p1_seqpos ){
227  //get pssm index of seqpos in this p1 frame
228  Size rsd_iseq_nmer( seqpos - p1_seqpos + 1 );
229  //now go ahead and get pssm energy from this_nmer_pssm
230  Real rsd_energy_this_nmer( this_nmer_pssm.find( rsd_aa )->second[ rsd_iseq_nmer ] );
231 
232  //skip this part if not doing gating
233  if( gate_pssm_scores_ ){
234  Real energy( 0.0 );
235  for( Size iseq_nmer = 1; iseq_nmer <= nmer_length_; ++iseq_nmer ){
236  Size iseq_pose( iseq_nmer + p1_seqpos - 1 );
237  //bail if we fall off end of chain
238  if( iseq_pose > chain_end ) break;
239  chemical::AA const aa( pose.residue( iseq_pose ).aa() );
240  //skip if aa not in this pssm
241  if( !this_nmer_pssm.count( aa ) ) continue;
242  Real this_rsd_energy( this_nmer_pssm.find( aa )->second[ iseq_nmer ] );
243  energy += this_rsd_energy;
244  }
245  //gate energy at pssm_scorecut, thus ignoring low-scoring nmers
246  //skip rsd_energy accumulation if total pssm score is low enough
247  if( energy < nmer_pssm_scorecut_ ) continue;
248  }
249  rsd_energy += rsd_energy_this_nmer;
250  }
251  //add sum of all frames' rsd energies into emap
252  emap[ nmer_pssm ] += rsd_energy;
253  }
254  //normalize energy by number of pssms used
255  //otherwise avg scores would become huge if we use lots of pssms instead of just 1
256  emap[ nmer_pssm ] /= n_pssms;
257  return;
258 }
259 
260 
261 Real
263  id::DOF_ID const &,
264  id::TorsionID const &,
265  pose::Pose const &,
266  ScoreFunction const &,
267  EnergyMap const &
268 ) const
269 {
270  return 0.0;
271 }
272 
273 /// @brief NMerPSSMEnergy is context independent; indicates that no
274 /// context graphs are required
275 void
277 {}
280 {
281  return 1; // Initial versioning
282 }
283 } // methods
284 } // scoring
285 } // core
286