Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
AmbigCSScore.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // This file is part of the Rosetta software suite && is made available under license.
5 // The Rosetta software is developed by the contributing members of the Rosetta Commons consortium.
6 // (C) 199x-2009 Rosetta Commons participating institutions && developers.
7 // For more information, see http://www.rosettacommons.org/.
8 
9 /// @file protocols/frag_picker/scores/AmbigCSScore.cc
10 /// @brief Object that scores a fragment by target-observed/vall-predicted chemical shift distances
11 /// @author Robert Vernon rvernon@u.washington.edu
12 
14 
18 // AUTO-REMOVED #include <protocols/frag_picker/scores/FragmentCrmsd.hh>
19 // AUTO-REMOVED #include <protocols/frag_picker/scores/FragmentScoreManager.hh>
22 // AUTO-REMOVED #include <protocols/frag_picker/VallProvider.hh>
23 #include <utility/io/ozstream.hh>
24 
25 
26 // option key includes
27 // AUTO-REMOVED #include <core/init.hh>
28 #include <basic/options/option.hh>
29 #include <basic/options/keys/OptionKeys.hh>
30 #include <basic/options/keys/in.OptionKeys.gen.hh>
31 #include <basic/options/keys/frags.OptionKeys.gen.hh>
32 
33 // AUTO-REMOVED #include <basic/prof.hh>
34 
35 // Boost
36 #include <boost/tuple/tuple.hpp>
37 
38 // project headers
39 #include <basic/Tracer.hh>
40 
42 #include <utility/vector1.hh>
43 
44 
45 namespace protocols {
46 namespace frag_picker {
47 namespace scores {
48 
49 using namespace basic::options;
50 using namespace basic::options::OptionKeys;
51 
52 static basic::Tracer trAmbigCSScore(
53  "protocols.frag_picker.scores.AmbigCSScore");
54 
55 
56 //AmbigCSScore Constructor
57 // The Talos file reader is passed in as an object, and secondary shifts are calculated during AmbigCSScore construction
58 // (Secondary shifts are shift deviations from random coil, where random coil values are defined according to
59 // the combination of atom type, residue type, previous residue type, and next residue type.
60 AmbigCSScore::AmbigCSScore(Size priority, Real lowest_acceptable_value, bool use_lowest,
61  CSTalosIO& readerA, CSTalosIO& readerB) :
62  CachingScoringMethod(priority, lowest_acceptable_value, use_lowest, "AmbigCSScore")
63 {
64 
65  //outfile_ = utility::io::ozstream tmp("allcomparisons.out");
66  //outfile_.open("allcomparisons.out");
67 
68  trAmbigCSScore << "READING SHIFTS!" << std::endl;
69  CS2ndShift secondary_shift_calculatorA(readerA, false);
70  CS2ndShift secondary_shift_calculatorB(readerB, false);
71  trAmbigCSScore << "SHOULD BE DONE WRITING 2nd SHIFTS" << std::endl;
72 
73  target_Ashifts_ = secondary_shift_calculatorA.shifts();
74  target_Bshifts_ = secondary_shift_calculatorB.shifts();
75 }
76 
77 //Residue-Residue scores are cached for current vall chunk
78 //This is where the AmbigCSScore equation lives
80 
81  //ONLY USED IN THE OLD VERSION OF THE SCORE
82  //clip_factor is used to define the maximum shift difference
83  //larger differences are adjusted down to clip_factor*v_shift
84  //Real const clip_factor(3.0);
85 
86  //bool vall_data(false);
87  trAmbigCSScore << "caching CS score for " << current_chunk->get_pdb_id()
88  << " of size " << current_chunk->size() << std::endl;
89 
90  //Check to see if the cache needs to be recalculated
91  std::string & tmp = current_chunk()->chunk_key();
92  if (tmp.compare(cached_scores_id_) == 0)
93  return;
94  cached_scores_id_ = tmp;
95 
96  //Initialize empty 2D table, vall-length x target-length
97  Size query_sequence_length = target_Ashifts_.size();
98  runtime_assert(query_sequence_length == target_Bshifts_.size());
99  std::pair< Real, Real > empty(0,0);
100  utility::vector1< utility::vector1< std::pair< Real, Real> > > temp( current_chunk->size(),
101  utility::vector1<std::pair< Real, Real> > (query_sequence_length, empty ) );
102  runtime_assert( target_Ashifts_.size() > 0 );
103 
104  //SIGMOID CONSTANTS - Should be set in constructor, not command line flags
105  Real a( option[frags::sigmoid_cs_A]() ); // default = 4
106  Real b( option[frags::sigmoid_cs_B]() ); // default = 5
107 
108  //Loop logic is "For each target x vall residue comparison, sum up total of
109  //all shift differences"
110  for (Size r = 1; r <= target_Ashifts_.size(); ++r) {
113 
114  if (query_residue_shiftsA.size() != query_residue_shiftsB.size()) {
115  utility_exit_with_message("ERROR: -in::file::ambig_talos_cs_A file does not have the same number of shifts as -in::file::ambig_talos_cs_B file, check your formatting, aside from the shifts themselves the files must be identical");
116  }
117 
118  for (Size i = 1; i <= current_chunk->size(); ++i) {
119  Real tmp = 0.0;
120  Real count = 0.0;
121  for (Size d = 1; d <= query_residue_shiftsA.size(); ++d) {
122 
123  //q_shift_type is target atom type, q_shift is that atom's secondary shift
124  Size q_shift_typeA(query_residue_shiftsA[d].first);
125  Real q_shiftA(query_residue_shiftsA[d].second);
126 
127  Size q_shift_typeB(query_residue_shiftsB[d].first);
128  Real q_shiftB(query_residue_shiftsB[d].second);
129 
130  if (q_shift_typeA != q_shift_typeB) {
131  utility_exit_with_message("ERROR: -in::file::ambig_talos_cs_A file does not match -in::file::ambig_talos_cs_B file, check your formatting, aside from the shifts themselves the files must be identical, even the order matters");
132  }
133 
134  //v_shift is the vall atom's secondary shift, v_sigma is the average deviation
135  //on v_shifts for that type of atom at the vall residue's specific phi/psi location
136  // (Think of v_shift as a phi/psi dependent and atom type dependent weight constant)
137  VallResidueOP res = current_chunk->at(i);
138  Real v_shift(res->secondary_shifts()[(q_shift_typeA*2)-1]);
139  Real v_sigma(res->secondary_shifts()[ q_shift_typeA*2 ]);
140 
141  //v_sigma is only 0.0 for atoms that don't exist in the vall. CB on glycine, for example.
142  if (v_sigma > 0.0) {
143 
144  Real sig_diffA(std::abs((q_shiftA - v_shift) / v_sigma ));
145  Real sig_diffB(std::abs((q_shiftB - v_shift) / v_sigma ));
146 
147  //Always use the lowest diff of the two.
148  if ( sig_diffB < sig_diffA ) {
149  sig_diffA = sig_diffB;
150  }
151 
152  Real sigmoid_diff( 1 / ( 1 + exp((-a*sig_diffA)+b) ) );
153 
154 
155  tmp += sigmoid_diff;
156  count += 1;
157  //vall_data = true; set but never used ~Labonte
158 
159 
160  //THIS IS WHAT THE ORIGINAL CSROSETTA CS SCORE FUNCTION LOOKED LIKE:
161  //Real c1_weight(1.0); //Reweight hydrogen and nitrogen values by 0.9
162  //if ((q_shift_type == 1) || (q_shift_type == 6)) {// or (q_shift_type == 3)) {
163  // c1_weight = 0.9;
164  //}
165  //Real diff(q_shift - v_shift);
166  //if ( std::abs(diff) > (clip_factor*v_sigma) ) {
167  // diff = clip_factor*v_sigma;
168  //}
169  //tmp += c1_weight*(diff/v_sigma)*(diff/v_sigma);
170  }
171  }
172 
173  //Arbitrarily high score for vall residues that don't have any CS data
174  //(ie: residues immediately adjacent to missing density or termini)
175  if ( ( count == 0 ) && ( query_residue_shiftsA.size() != 0 ) ) {
176  tmp = 9999.9;
177  } else {
178  //Sigma6: scores don't use /N_shifts to normalize
179  // This reweights each residue based on its number of shifts instead
180  // so that over gaps in the data the CS score decreases in power and other scores
181  // can take over.
182  if ( count != 0 )
183  tmp = ( tmp / count ) * query_residue_shiftsA.size();
184  }
185 
186  temp[i][r].first = tmp;
187  temp[i][r].second = count;
188  }
189  }
190 
191  //runtime_assert(vall_data == true);//Make sure the vall had some chemical shift data in it
192 
193  scores_ = temp;
194 
195  trAmbigCSScore << "caching CS score for " << current_chunk->get_pdb_id()
196  << " of size " << current_chunk->size()
197  << ". The matrix is: "<<scores_.size()<<" x "<<scores_[1].size()<<std::endl;
198 }
199 
201  return cached_score( fragment, scores );
202 }
203 
205 
206  std::string & tmp = fragment->get_chunk()->chunk_key();
207 
208  if (tmp.compare(cached_scores_id_) != 0) {
209  do_caching(fragment->get_chunk());
210  cached_scores_id_ = tmp;
211  }
212 
213  //Size offset_q = fragment->get_first_index_in_query() - 1;
214  //Size offset_v = fragment->get_first_index_in_vall() - 1;
215 
216  Real totalScore = 0.0;
217  Real totalCount = 0.0;
218 
219  for (Size i = 1; i <= fragment->get_length(); i++) {
220  runtime_assert(fragment->get_first_index_in_vall() + i - 1 <= scores_.size());
221  runtime_assert(fragment->get_first_index_in_query() + i - 1 <= scores_[1].size());
222 
223 
224  std::pair< Real, Real> tmp = scores_[fragment->get_first_index_in_vall() + i - 1]
225  [fragment->get_first_index_in_query() + i - 1];
226 
227  //tmp.first is the score for that residue comparison
228  //tmp.second is the number of chemical shifts
229 
230  totalScore += tmp.first;
231  totalCount += tmp.second;
232  }
233 
234 // runtime_assert( totalScore != NULL );
235 
236  scores->set_score_component(totalScore, id_);
237 
238  if ((totalScore < lowest_acceptable_value_) && (use_lowest_ == true))
239  return false;
240  return true;
241 }
242 
244 }
245 
247  Real lowest_acceptable_value, bool use_lowest, FragmentPickerOP //picker
248  , std::string // line
249 ) {
250 
251  if (option[in::file::ambig_talos_cs_A].user() &&
252  option[in::file::ambig_talos_cs_B].user()) {
253  CSTalosIO inA(option[in::file::ambig_talos_cs_A]());
254  CSTalosIO inB(option[in::file::ambig_talos_cs_B]());
255  inA.write(std::cerr);
256  inB.write(std::cerr);
257  return (FragmentScoringMethodOP) new AmbigCSScore(priority,
258  lowest_acceptable_value,
259  use_lowest,inA,inB);
260  }
261 
262  utility_exit_with_message(
263  "Can't read ambiguous CS data. Provide two chemical shifts file in TALOS format using flags -in::file::ambig_talos_cs_A and in::file::ambig_talos_cs_B");
264 
265  return NULL;
266 }
267 
268 } // scores
269 } // frag_picker
270 } // protocols