Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
CS2ndShift.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // This file is part of the Rosetta software suite and is made available under license.
5 // The Rosetta software is developed by the contributing members of the Rosetta Commons consortium.
6 // (C) 199x-2009 Rosetta Commons participating institutions and developers.
7 // For more information, see http://www.rosettacommons.org/.
8 
9 /// @file protocols/frag_picker/CSTalosIO.cc
10 /// @brief
11 /// @author Dominik Gront (dgront@chem.uw.edu.pl)
12 
13 // unit headers
15 // package headers
16 
17 // disulfide compatibility
18 #include <basic/options/option.hh>
19 #include <basic/options/keys/OptionKeys.hh>
20 #include <basic/options/keys/in.OptionKeys.gen.hh>
22 
23 #include <basic/database/open.hh>
24 
25 // project headers
26 #include <basic/Tracer.hh>
27 
28 // utility headers
29 #include <core/types.hh>
30 
31 #include <string>
32 #include <map>
33 #include <utility/io/izstream.hh>
34 #include <utility/exit.hh>
35 
36 #include <utility/vector1.hh>
37 
38 
39 namespace protocols {
40 namespace frag_picker {
41 
42 using namespace basic::options;
43 using namespace basic::options::OptionKeys;
44 
45 static basic::Tracer tr("protocols.frag_picker.SecondaryShiftCalculator");
46 
47 CS2ndShift::CS2ndShift(CSTalosIO & input_data, bool use_sslimit) {
48  //Change to database files!
49  std::map<char,std::map<std::string,Real> > rcprev(CS2ndShift::read_adjust_table("external/SPARTA+/tab/rcprev.tab") );
50  std::map<char,std::map<std::string,Real> > rcnext(CS2ndShift::read_adjust_table("external/SPARTA+/tab/rcnext.tab") );
51  std::map<char,std::map<std::string,Real> > rcadj(CS2ndShift::read_adjust_table("external/SPARTA+/tab/rcadj.tab") );
52  std::map<char,std::map<std::string,Real> > randcoil(CS2ndShift::read_adjust_table("external/SPARTA+/tab/randcoil.tab") );
53 
54  std::map<char,std::map<std::string,std::pair< Real, Real > > > sslimit(CS2ndShift::read_sslimit_table("external/SPARTA+/tab/sslimit.tab") );
55 
56  //Recalculate Values:
57  std::string const sequence( also_check_fix_disulf( input_data.get_sequence() ) );
58 
60 
61  shift_types.push_back(std::make_pair(1,"N"));//N
62  shift_types.push_back(std::make_pair(2,"HA"));//HA but HA3 for Gly
63  shift_types.push_back(std::make_pair(3,"C"));//C
64  shift_types.push_back(std::make_pair(4,"CA"));//CA
65  shift_types.push_back(std::make_pair(5,"CB"));//CB but HA2 for Gly
66  shift_types.push_back(std::make_pair(6,"HN"));//HN
67 
68  //utility::vector1< utility::vector1< std::pair< Size, Real > > > secondary_shifts_;//( utility::vector1< Real > ( 0, 0 ) );
69 
70 
71  for ( Size seqpos = 0; seqpos < sequence.length(); seqpos++ ) {
72  //tr << "CALC_SECONDARY " << seqpos << " ";
74 
75  for ( Size st_i = 1; st_i <= shift_types.size(); st_i++ ) {
76 
77  Real offset(0);
78 
79  std::pair<Size,std::string> shift_type(shift_types[st_i]);
80 
81  std::pair< Size, Real > shift;
82  //static_cast< std::string > ( "HA" );
83  static std::string const HA( "HA" );
84  static std::string const CB( "CB" );
85  static std::string const HA2( "HA2" );
86  static std::string const HA3( "HA3" );
87 
88  Real shift_value(9999);
89  bool has_shift(false);
90 
91  if ((( shift_type.second == HA ) || ( shift_type.second == CB)) && (sequence[seqpos] == 'G')) {
92 
93  offset = randcoil.find(sequence[seqpos])->second.find(HA)->second
94  +rcadj.find(sequence[seqpos])->second.find(HA)->second;
95 
96  if (seqpos != 0) {
97  offset = offset + rcprev.find(sequence[seqpos-1])->second.find(HA)->second;
98  }
99  if (seqpos != sequence.length()-1) {
100  offset = offset + rcnext.find(sequence[seqpos+1])->second.find(HA)->second;
101  }
102 
103  if ( input_data.has_atom(seqpos+1,HA2) && input_data.has_atom(seqpos+1,HA3) ) { // && (shift_type.second != CB) ) {
104  //shift.first = shift_type.first;
105  //shift.second = (input_data.get_shift(seqpos+1,HA2) + input_data.get_shift(seqpos+1,HA3))/2 - offset;
106 
107  shift_value = (input_data.get_shift(seqpos+1,HA2) + input_data.get_shift(seqpos+1,HA3))/2 - offset;
108  has_shift = true;
109  //res_shifts.push_back( shift );
110 
111  tr.Debug << "CALC_SECONDARY_G " << (seqpos+1) << " " << sequence[seqpos] << " " << input_data.get_shift(seqpos+1, HA2) + input_data.get_shift(seqpos+1,HA3) << " " << offset << " " << shift_type.second << " " << shift_value << std::endl;
112  }
113 
114  } else {
115 
116  offset = randcoil.find(sequence[seqpos])->second.find(shift_type.second)->second
117  +rcadj.find(sequence[seqpos])->second.find(shift_type.second)->second;
118 
119  if (seqpos != 0) {
120  offset = offset + rcprev.find(sequence[seqpos-1])->second.find(shift_type.second)->second;
121  }
122  if (seqpos != sequence.length()-1) {
123  offset = offset + rcnext.find(sequence[seqpos+1])->second.find(shift_type.second)->second;
124  }
125 
126  if ( input_data.has_atom(seqpos+1,shift_type.second) ) {
127  //shift.first = shift_type.first;
128 
129  //shift.second = input_data.get_shift(seqpos+1,shift_type.second) - offset;
130 
131  shift_value = input_data.get_shift(seqpos+1,shift_type.second) - offset;
132  has_shift = true;
133  //res_shifts.push_back( shift );
134 
135  tr.Debug << "CALC_SECONDARY_A " << (seqpos+1) << " " << sequence[seqpos] << " " << input_data.get_shift(seqpos+1, shift_type.second) << " " << offset << " " << shift_type.second << " " << shift_value << std::endl;
136  }
137 
138  }
139 
140  if (has_shift == true) {
141  if (sslimit.count(sequence[seqpos]) == 1) {
142  if (sslimit.find(sequence[seqpos])->second.count(shift_type.second) == 1) {
143 
144  Real min( sslimit.find(sequence[seqpos])->second.find(shift_type.second)->second.first );
145  Real max( sslimit.find(sequence[seqpos])->second.find(shift_type.second)->second.second );
146 
147  // If use_sslimit == false, always accept
148  if ( ((shift_value >= min) && ( shift_value <= max )) || ( !use_sslimit) ) {
149  shift.first = shift_type.first;
150  shift.second = shift_value;//(input_data.get_shift(seqpos+1,HA2) + input_data.get_shift(seqpos+1,HA3))/2 - offset;
151  res_shifts.push_back( shift );
152 
153  tr.Debug << "USING_2ND_SHIFT: " << seqpos+1 << " " << sequence[seqpos] << "_aa" << " " << shift_type.second << " " << shift.second << std::endl;
154  } else {
155  if (shift_value < min) {
156  tr.Debug << "SHIFT OUTLIER REMOVED: " << seqpos+1 << " " << sequence[seqpos] << " " << shift.second << " " << shift_value << " Limit: " << min << std::endl;
157  } else {
158  tr.Debug << "SHIFT OUTLIER REMOVED: " << seqpos+1 << " " << sequence[seqpos] << " " << shift.second << " " << shift_value << " Limit: " << max << std::endl;
159  }
160  }
161  }
162  }
163  }//end: if (has_shift == true)
164 
165  }
166  secondary_shifts_.push_back( res_shifts );
167  }
168 }
169 
170 std::map< char, std::map<std::string,Real> >
172 
173  std::map< char, std::map<std::string,Real> > file_data_map;
174  utility::vector1<std::string> column_names_;
175 
176 
177  utility::io::izstream data(basic::database::full_name(file_name.c_str()));
178  tr.Info << "read CS adjustment data from " << file_name << std::endl;
179  if (!data)
180  utility_exit_with_message("[ERROR] Unable to open talos file: "
181  + file_name);
182 
183  std::string line;
184  std::string keyword;
185  std::string entry;
186  std::string junk;
187 
188  bool header_done = false;
189  while (!header_done) {
190  getline(data, line);
191  std::istringstream line_stream(line);
192  line_stream >> keyword;
193 
194  if (keyword == "VARS") {
195  line_stream >> junk >> junk >> entry;
196 
197  while (!line_stream.eof()) {
198  if (entry == "H") {
199  entry = "HN";
200  }
201  column_names_.push_back(entry);
202  line_stream >> entry;
203  }
204 
205  if (entry == "H") {
206  entry = "HN";
207  }
208  column_names_.push_back(entry);
209  }
210 
211  if (keyword == "FORMAT") {
212  header_done = true;
213  getline(data,line);
214  }
215  }
216 
217  getline(data,line);
218  while (!data.eof()) {
219  std::istringstream line_stream(line);
220 
221  char aa;
222 
223  line_stream >> junk >> aa;
224 
225  std::map< std::string ,Real > linemap;
226  for ( Size i = 1; i <= column_names_.size(); i++ ){
227  Real offset(0.0);
228  line_stream >> offset;
229  linemap.insert(std::make_pair(column_names_[i], offset));
230  }
231  file_data_map.insert(std::make_pair(aa,linemap));
232 
233  getline(data,line);
234  }
235 
236  return file_data_map;
237 }
238 
239 std::map<char,std::map<std::string,std::pair< core::Real, core::Real > > >
241 
242  std::map<char,std::map<std::string,std::pair< core::Real, core::Real > > > file_data_map;
243 
244  utility::vector1<std::string> column_names_;
245 
246 
247  utility::io::izstream data(basic::database::full_name(file_name.c_str()));
248  tr.Info << "read CS sslimit data from " << file_name << std::endl;
249  if (!data)
250  utility_exit_with_message("[ERROR] Unable to open talos file: "
251  + file_name);
252 
253  std::string line;
254  std::string keyword;
255  std::string entry;
256  std::string junk;
257 
258  std::string sub_entry;
259 
260  bool header_done = false;
261  while (!header_done) {
262  getline(data, line);
263  std::istringstream line_stream(line);
264  line_stream >> keyword;
265 
266  if (keyword == "VARS") {
267  line_stream >> junk >> junk >> entry >> junk;
268 
269  while (!line_stream.eof()) {
270  Size mid = entry.find_first_of('_');
271  sub_entry = entry.substr(0,mid);
272  column_names_.push_back(sub_entry);
273 
274  line_stream >> entry >> junk;
275  }
276 
277  Size mid = entry.find_first_of('_');
278  sub_entry = entry.substr(0,mid);
279  column_names_.push_back(sub_entry);
280  }
281 
282  if (keyword == "FORMAT") {
283  header_done = true;
284  getline(data,line);
285  }
286  }
287 
288  getline(data,line);
289  while (!data.eof()) {
290  std::istringstream line_stream(line);
291 
292  char aa;
293 
294  line_stream >> junk >> aa;
295 
296  std::map< std::string , std::pair< Real, Real> > linemap;
297  for ( Size i = 1; i <= column_names_.size(); i++ ){
298  Real min(0.0), max(0.0);
299  line_stream >> min >> max;
300 
301  if ( (min < 1000) && (max < 1000) ) {
302  tr.Debug << "READ_SSLIST " << column_names_[i] << " " << aa << " " << min << " " << max << std::endl;
303  linemap.insert(std::make_pair(column_names_[i], std::make_pair(min,max)));
304  }
305  }
306  file_data_map.insert(std::make_pair(aa,linemap));
307 
308  getline(data,line);
309  }
310 
311  return file_data_map;
312 }
313 
314 //TODO: move this with rest of disulfide code
317  if (option[in::fix_disulf].user()) {
318 
319  core::io::raw_data::DisulfideFile ds_file( option[ in::fix_disulf ]() );
320 
321  utility::vector1< std::pair<Size,Size> > disulfides_in_file;
322 
323  ds_file.disulfides(disulfides_in_file);
324 
325  for ( Size i = 1; i <= disulfides_in_file.size(); ++i ) {
326 
327  Size l = disulfides_in_file[i].first;
328  Size u = disulfides_in_file[i].second;
329 
330  if ( u <= l ) {
331  utility_exit_with_message("[ERROR] Disulfide File Format: res2 must be > res1");
332  }
333 
334  if ( !((instring[l-1] == 'C') || (instring[l-1] == 'c'))
335  || !((instring[u-1] == 'C') || (instring[u-1] == 'c')) ) {
336  utility_exit_with_message("[ERROR] -fix_disulf residues do not map to cysteines in talos file");
337  }
338 
339  instring[l-1] = 'c';
340  instring[u-1] = 'c';
341  }
342  }
343 
344  return instring;
345 }
346 
347 } // frag_picker
348 } // protocols