Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
RamaScore.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite && is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file protocols/frag_picker/scores/Rama.cc
11 /// @brief Ramachandran surface scores for fragment picking
12 /// @author Robert Vernon (rvernon@u.washington.edu)
13 
15 
16 // type headers
17 #include <core/types.hh>
18 
19 // package headers
24 
25 // option key includes
26 #include <basic/options/option.hh>
27 #include <basic/options/keys/OptionKeys.hh>
28 #include <basic/options/keys/frags.OptionKeys.gen.hh>
29 
30 // utility headers
31 #include <basic/database/open.hh>
32 #include <utility/io/izstream.hh>
33 #include <utility/io/ozstream.hh>
34 #include <ObjexxFCL/string.functions.hh>
35 
36 namespace protocols {
37 namespace frag_picker {
38 namespace scores {
39 
40 using namespace basic::options;
41 using namespace basic::options::OptionKeys;
42 
43 RamaScore::RamaScore(Size priority, Real lowest_acceptable_value, bool use_lowest, std::string & fastaQuery, std::string prediction_name ) :
44  CachingScoringMethod(priority, lowest_acceptable_value, use_lowest, "RamaScore"), query_(fastaQuery),prediction_name_(prediction_name) {
45 
47  std::cout << "QUERY" << fastaQuery << " " << query_.size() << std::endl;
48  default_ss->extend(query_.size());
49 
50  for ( Size i = 1; i <= query_.size(); ++i ) {
51  default_ss->set_fractions(i, 1.0, 1.0, 1.0 );
52  }
53 
54  query_ss_ = default_ss;
55 
57 }
58 
59 
60 RamaScore::RamaScore(Size priority, Real lowest_acceptable_value, bool use_lowest, std::string & fastaQuery, core::fragment::SecondaryStructureOP query_prediction, std::string prediction_name ) :
61  CachingScoringMethod(priority, lowest_acceptable_value, use_lowest, "RamaScore"), query_(fastaQuery), query_ss_(query_prediction), prediction_name_(prediction_name)
62 {
64 }
65 
66 void
68 {
69 
70  // skip if static data is already set up
71  if (sequence_rama_tables_.size() > 1) return;
72 
74 
75  for( Size i = 2; i <= query_.size() -1; ++i ) {
76  std::string curr_aa, next_aa, aa_type, ss_type;
77  utility::vector1< core::Real > ss_weight( 3, 0 );
78  utility::vector1< std::string > ss_types( 3, "" );
79 
80  ss_weight[1] = query_ss_->helix_fraction(i);
81  ss_weight[2] = query_ss_->strand_fraction(i);
82  ss_weight[3] = query_ss_->loop_fraction(i);
83 
84  if ( ( ss_weight[1] + ss_weight[2] + ss_weight[3]) == 0) {
85  ss_weight[1] = 1.0;
86  ss_weight[2] = 1.0;
87  ss_weight[3] = 1.0;
88  }
89 
90  ss_types[1] = "H";
91  ss_types[2] = "E";
92  ss_types[3] = "L";
93 
94  curr_aa = query_[i-1];
95  next_aa = query_[i];
96 
97  //(C) is a non-disulfide cysteine, and (c) is a disulfide cysteine
98  // but then the database needs to refer to them as "C" and "dc" because macs
99  // ignore capitalization.
100  if ( curr_aa == "c" ) {
101  curr_aa = "dc";
102  }
103 
104  //There are 42 amino acid types.
105  // 21 for the normal set w. cysteins split into non-disulfide (C) and disulfide (c)
106  // 21 for the above set but where the next residue is proline
107  if ( next_aa == "P" ) {
108  aa_type = curr_aa+next_aa;
109  } else {
110  aa_type = curr_aa;
111  }
112 
113  //The ramachandran fragment score is a sigmoid function based on sequence and secondary structure specific phi/psi counts
114  //However secondary structure is at this point just a weighted probability, so first we have to combine the
115  //sequence specific H, E & L counts to create a secondary structure weighted ramachandran table.
116  //(note: the counts here were extracted from the vall and have been gaussian smoothed to blur out the noise)
117  for( Size s = 1; s <= 3; ++s ) {
118  if ( ss_weight[s] > 0.0 ) {
119  //std::string db_location("/work/rvernon/fragpicking_tests/vall/final/"+ss_types[s]+"_"+aa_type+".counts");
120  std::string db_location("sampling/fragpicker_rama_tables/"+ss_types[s]+"_"+aa_type+".counts");
121  utility::io::izstream table_file;
122  basic::database::open(table_file, db_location);
123  //table_file.open(db_location);
124 
125  std::string line;
126 
127  while ( getline(table_file, line) ) {
128  if ( line.length() != 0 ) {
129  std::istringstream line_stream(line);
130  Size x, y;
131  float count;
132 
133  line_stream >> x >> y >> count;
134  temp[i][x+1][y+1] += ss_weight[s] * count;
135  //std::cout << "HEYO " << i << " " << x << " " << y << " " << ss_weight[s] << " " << count << std::endl;
136  }
137  }
138  }
139  }
140 
141  utility::io::ozstream outtable;
142  if (option[frags::write_rama_tables].user()) {
143  std::string res = ObjexxFCL::string_of( i );
144  outtable.open("res"+res+"_"+aa_type+".rama_table");
145  }
146 
147  //Post-Normalization Multiplier (puts things back roughly into the magnitude of the raw counts
148  //If zero then don't bother normalizing, just use raw counts
149  float const A( option[frags::rama_norm] );
150  if (A > 0.0) {
151  Real total(0.0);
152  for( Size x = 1; x <= 37; ++x ) {
153  for( Size y = 1; y <= 37; ++y ) {
154  total += temp[i][x][y];
155  }
156  }
157  runtime_assert( total != 0.0 );
158  for( Size x = 1; x <= 37; ++x ) {
159  for( Size y = 1; y <= 37; ++y ) {
160  temp[i][x][y] = (temp[i][x][y] / total) * A;
161  }
162  }
163  }
164 
165  float const C( option[frags::rama_C] ); //default 0.0 <- Sigmoid inflection point adjuster
166  float const B( option[frags::rama_B] ); //default 1.0 <- Sigmoid slope adjuster
167  //Now we convert the count tables into sigmoid function score tables. The score goes from 1 (no counts) to
168  //0 (many counts). Because this is a sigmoid there is a sharp transition between 1 and 0, this transition
169  //takes place at an arbitrary point defined by me. It can be changed by adding in a constant to the final exp.
170  for( Size x = 1; x <= 37; ++x ) {
171  for( Size y = 1; y <= 37; ++y ) {
172  temp[i][x][y] += 0.000000000000000000000000001;
173  temp[i][x][y] = std::log(temp[i][x][y]);
174  temp[i][x][y] = 1.0 / ( 1 + std::exp( C + B*temp[i][x][y] ) );
175 
176  if (option[frags::write_rama_tables].user()) {
177  float xf( static_cast< float >( x ));
178  float yf( static_cast< float >( y ));
179  outtable << ((xf-1)*10)-175 << " " << ((yf-1)*10)-175 << " " << temp[i][x][y] << std::endl;
180  }
181  }
182  //This blank line is so I can plot the tables in gnuplot. Don't judge me! -rv
183  if (option[frags::write_rama_tables].user()) outtable << std::endl;
184  }
185  }
186 
187  sequence_rama_tables_ = temp;
188 
189 }
190 
191 void RamaScore::do_caching(VallChunkOP current_chunk) {
192 
193  std::string & tmp = current_chunk()->chunk_key();
194  if (tmp.compare(cached_scores_id_) == 0)
195  return;
196  cached_scores_id_ = tmp;
197 
198  Size query_sequence_length = query_.size();
199 
200  utility::vector1< utility::vector1< Real > > temp( current_chunk->size(),
201  utility::vector1< Real > (query_sequence_length, 0 ) );
202 
203  runtime_assert( query_sequence_length > 0 );
204 
205  for (Size r = 2; r <= query_sequence_length - 1; ++r) {
206 
207  for (Size i = 1; i <= current_chunk->size(); ++i) {
208  VallResidueOP res = current_chunk->at(i);
209 
210  Real phi = res->phi();
211  Real psi = res->psi();
212 
213  //Frigging vall...
214  if ( phi > 180 ) phi = -180 + (phi - 180);
215  if ( psi > 180 ) psi = -180 + (psi - 180);
216  if ( phi < -180 ) phi = 180 + (phi + 180);
217  if ( psi < -180 ) psi = 180 + (psi + 180);
218 
219  Size i_phi = static_cast< Size > (((phi + 180)/10)+1);
220  Size i_psi = static_cast< Size > (((psi + 180)/10)+1);
221 
222  runtime_assert( (i_phi >= 1) && (i_phi <= 37) );
223  runtime_assert( (i_psi >= 1) && (i_psi <= 37) );
224 
225  temp[i][r] = sequence_rama_tables_[r][i_phi][i_psi];
226  }
227  }
228 
229  scores_ = temp;
230 }
231 
233 }
234 
236  return cached_score( fragment, scores );
237 }
238 
240  FragmentScoreMapOP scores) {
241 
242 
243  std::string & tmp = fragment->get_chunk()->chunk_key();
244  if (tmp.compare(cached_scores_id_) != 0)
245  do_caching(fragment->get_chunk());
246 
247 
248  Real totalScore = 0.0;
249 
250  for (Size i = 1; i <= fragment->get_length(); i++) {
251 // runtime_assert(fragment->get_first_index_in_vall() + i - 1 <= scores_.size());
252 // runtime_assert(fragment->get_first_index_in_query() + i - 1 <= scores_[1].size());
253 
254  Real tmp = scores_[fragment->get_first_index_in_vall() + i - 1]
255  [fragment->get_first_index_in_query() + i - 1];
256  totalScore += tmp;
257  }
258 
259  //std::cout << "TOTALSCORE " << totalCount << " " << totalScore << " " << totalScore / (Real) fragment->get_length();
260  totalScore /= (Real) fragment->get_length();
261 
262  scores->set_score_component(totalScore, id_);
263  if ((totalScore < lowest_acceptable_value_) && (use_lowest_ == true))
264  return false;
265  return true;
266 }
267 
269  FragmentScoreMapOP, std::ostream&)
270 {
271  return true;
272 }
273  //
274  // return true;
275  //}
276 
277 
278  //bool RamaScore::describe_score(FragmentCandidateOP f,
279  // FragmentScoreMapOP empty_map, std::ostream& out) {
280  //
281  // return true;
282  //}
283 
284 
286 
287 } // scores
288 } // frag_picker
289 } // protocols
290 
291