Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
CSScore.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // This file is part of the Rosetta software suite && is made available under license.
5 // The Rosetta software is developed by the contributing members of the Rosetta Commons consortium.
6 // (C) 199x-2009 Rosetta Commons participating institutions && developers.
7 // For more information, see http://www.rosettacommons.org/.
8 
9 /// @file protocols/frag_picker/scores/CSScore.cc
10 /// @brief Object that scores a fragment by target-observed/vall-predicted chemical shift distances
11 /// @author Robert Vernon rvernon@u.washington.edu
12 
14 
20 // AUTO-REMOVED #include <protocols/frag_picker/VallProvider.hh>
21 #include <utility/io/ozstream.hh>
22 
23 
24 // option key includes
25 #include <basic/options/option.hh>
26 #include <basic/options/keys/OptionKeys.hh>
27 #include <basic/options/keys/in.OptionKeys.gen.hh>
28 #include <basic/options/keys/frags.OptionKeys.gen.hh>
29 
30 // AUTO-REMOVED #include <basic/prof.hh>
31 
32 // Boost
33 #include <boost/tuple/tuple.hpp>
34 
35 // project headers
36 #include <basic/Tracer.hh>
37 
39 #include <utility/vector1.hh>
40 
41 
42 namespace protocols {
43 namespace frag_picker {
44 namespace scores {
45 
46 using namespace basic::options;
47 using namespace basic::options::OptionKeys;
48 
49 static basic::Tracer trCSScore(
50  "protocols.frag_picker.scores.CSScore");
51 
52 
53 //CSScore Constructor
54 // The Talos file reader is passed in as an object, and secondary shifts are calculated during CSScore construction
55 // (Secondary shifts are shift deviations from random coil, where random coil values are defined according to
56 // the combination of atom type, residue type, previous residue type, and next residue type.
57 CSScore::CSScore(Size priority, Real lowest_acceptable_value, bool use_lowest,
58  CSTalosIO& reader) :
59  CachingScoringMethod(priority, lowest_acceptable_value, use_lowest, "CSScore")
60 {
61 
62  //outfile_ = utility::io::ozstream tmp("allcomparisons.out");
63  //outfile_.open("allcomparisons.out");
64 
65  trCSScore.Debug << "READING SHIFTS!" << std::endl;
66  CS2ndShift secondary_shift_calculator(reader, true);
67  trCSScore.Debug << "SHOULD BE DONE WRITING 2nd SHIFTS" << std::endl;
68 
69  target_shifts_ = secondary_shift_calculator.shifts();
70 }
71 
72 //Residue-Residue scores are cached for current vall chunk
73 //This is where the CSScore equation lives
74 void CSScore::do_caching(VallChunkOP current_chunk) {
75 
76  ////clip_factor is used to define the maximum shift difference
77  ////larger differences are adjusted down to clip_factor*v_shift
78  ////(the new score is sigmoidal, so clip factors are not required)
79  ////ONLY USED IN THE OLD MFR VERSION OF THE SCORE
80  //Real const clip_factor(3.0);
81 
82  //bool vall_data(false);
83  trCSScore.Debug << "caching CS score for " << current_chunk->get_pdb_id()
84  << " of size " << current_chunk->size() << std::endl;
85 
86  //Check to see if the cache needs to be recalculated
87  std::string & tmp = current_chunk()->chunk_key();
88  if (tmp.compare(cached_scores_id_) == 0)
89  return;
90  cached_scores_id_ = tmp;
91 
92  //Initialize empty 2D table, vall-length x target-length
93  Size query_sequence_length = target_shifts_.size();
94  std::pair< Real, Real > empty(0,0);
95  utility::vector1< utility::vector1< std::pair< Real, Real> > > temp( current_chunk->size(),
96  utility::vector1<std::pair< Real, Real> > (query_sequence_length, empty ) );
97  //runtime_assert( target_shifts_.size() > 0 );
98 
99  //SIGMOID CONSTANTS - Should be set in constructor, not command line flags
100  Real a( option[frags::sigmoid_cs_A]() ); // default = 4
101  Real b( option[frags::sigmoid_cs_B]() ); // default = 5
102 
103 
104  //Loop logic is "For each target x vall residue comparison, sum up total of
105  //all shift differences"
106  for (Size r = 1; r <= target_shifts_.size(); ++r) {
108  for (Size i = 1; i <= current_chunk->size(); ++i) {
109  Real tmp = 0.0;
110  Real count = 0.0;
111  for (Size d = 1; d <= query_residue_shifts.size(); ++d) {
112 
113  //q_shift_type is target atom type, q_shift is that atom's secondary shift
114  // 1 = N
115  // 2 = HA (HA3 for Gly)
116  // 3 = C
117  // 4 = CA
118  // 5 = CB (HA2 for Gly)
119  // 6 = HN
120  Size q_shift_type(query_residue_shifts[d].first);
121  Real q_shift(query_residue_shifts[d].second);
122 
123  //v_shift is the vall atom's secondary shift, v_sigma is the average deviation
124  //on v_shifts for that type of atom at the vall residue's specific phi/psi location
125  // (Think of v_shift as a phi/psi dependent and atom type dependent weight constant)
126  VallResidueOP res = current_chunk->at(i);
127 
128  if ( res->secondary_shifts().size() < q_shift_type*2 ) {
129  trCSScore.Debug << "Chunk has not enough secondary shifts to perform this query at position "
130  << i << " " << std::endl
131  << "pdb_id: " << current_chunk->get_pdb_id() << std::endl
132  << "chain_id: " << current_chunk->get_chain_id() << std::endl
133  << "sequence: " << current_chunk->get_sequence() << std::endl;
134  continue;
135  }
136 
137  Real v_shift(res->secondary_shifts()[(q_shift_type*2)-1]);
138  //q_shift_type*2-1 because the array of 12 numbers goes shift1, sigma1, shift2, sigma2...
139  Real v_sigma(res->secondary_shifts()[ q_shift_type*2 ]);
140 
141  //v_sigma is only 0.0 for atoms that don't exist in the vall. CB on glycine, for example.
142  if (v_sigma > 0.0) {
143 
144  Real sig_diff(std::abs((q_shift - v_shift) / v_sigma ));
145  Real sigmoid_diff( 1 / ( 1 + exp((-a*sig_diff)+b) ) );
146 
147  tmp += sigmoid_diff;
148  count += 1;
149  //vall_data = true; set but never used ~Labonte
150 
151 
152  //THIS IS WHAT THE ORIGINAL CSROSETTA CS SCORE FUNCTION LOOKED LIKE:
153  //Real c1_weight(1.0); //Reweight hydrogen and nitrogen values by 0.9
154  //if ((q_shift_type == 1) || (q_shift_type == 6)) {// or (q_shift_type == 3)) {
155  // c1_weight = 0.9;
156  //}
157  //Real diff(q_shift - v_shift);
158  //if ( std::abs(diff) > (clip_factor*v_sigma) ) {
159  // diff = clip_factor*v_sigma;
160  //}
161  //tmp += c1_weight*(diff/v_sigma)*(diff/v_sigma);
162  }
163  }
164 
165  //Arbitrarily high score for vall residues that don't have any CS data
166  //(ie: residues immediately adjacent to missing density or termini)
167  if ( ( count == 0 ) && ( query_residue_shifts.size() != 0 ) ) {
168  tmp = 9999.9;
169  } else {
170  //Sigma6: scores don't use /N_shifts to normalize
171  // This reweights each residue based on its number of shifts instead
172  // so that over gaps in the data the CS score decreases in power and other scores
173  // can take over.
174  if ( count != 0 )
175  tmp = ( tmp / count ) * query_residue_shifts.size();
176  }
177 
178  temp[i][r].first = tmp;
179  temp[i][r].second = count;
180  }
181  }
182 
183  //runtime_assert(vall_data == true);//Make sure the vall had some chemical shift data in it
184 
185  scores_ = temp;
186 
187  trCSScore.Debug << "caching CS score for " << current_chunk->get_pdb_id()
188  << " of size " << current_chunk->size()
189  << ". The matrix is: "<<scores_.size()<<" x "<<scores_[1].size()<<std::endl;
190 }
191 
193  return cached_score( fragment, scores );
194 }
195 
197 
198  std::string & tmp = fragment->get_chunk()->chunk_key();
199 
200  if (tmp.compare(cached_scores_id_) != 0) {
201  do_caching(fragment->get_chunk());
202  cached_scores_id_ = tmp;
203  }
204 
205  //Size offset_q = fragment->get_first_index_in_query() - 1;
206  //Size offset_v = fragment->get_first_index_in_vall() - 1;
207 
208  Real totalScore = 0.0;
209  Real totalCount = 0.0;
210 
211  for (Size i = 1; i <= fragment->get_length(); i++) {
212  runtime_assert(fragment->get_first_index_in_vall() + i - 1 <= scores_.size());
213  runtime_assert(fragment->get_first_index_in_query() + i - 1 <= scores_[1].size());
214 
215 
216  std::pair< Real, Real> tmp = scores_[fragment->get_first_index_in_vall() + i - 1]
217  [fragment->get_first_index_in_query() + i - 1];
218 
219  //tmp.first is the score for that residue comparison
220  //tmp.second is the number of chemical shifts
221 
222  totalScore += tmp.first;
223  totalCount += tmp.second;
224  }
225 
226 // runtime_assert( totalScore != NULL );
227 
228  totalScore /= (Real) fragment->get_length();
229 
230  scores->set_score_component(totalScore, id_);
231 
232  if ((totalScore < lowest_acceptable_value_) && (use_lowest_ == true))
233  return false;
234  return true;
235 }
236 
238 }
239 
241  Real lowest_acceptable_value, bool use_lowest, FragmentPickerOP //picker
242  , std::string // line
243 ) {
244 
245  if (option[in::file::talos_cs].user()) {
246  CSTalosIO in(option[in::file::talos_cs]());
247  // in.write(std::cerr);
248  return (FragmentScoringMethodOP) new CSScore(priority,
249  lowest_acceptable_value, use_lowest,in);
250  }
251 
252  utility_exit_with_message(
253  "Can't read CS data. Provide a chemical shifts file in TALOS format.");
254 
255  return NULL;
256 }
257 
258 } // scores
259 } // frag_picker
260 } // protocols