Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
SecondaryStructure.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file
11 /// @brief secondary structure will hold statistics about secondary structure predictions
12 /// sources can be from
13 /// - fragments
14 /// - psipred files ? other stuff
15 ///
16 /// @detailed
17 /// from converting conformation_pairings.cc of rosetta++ into mini
18 ///
19 ///
20 ///
21 /// @author Oliver Lange
22 
23 
24 // Unit Headers
26 
27 // Package Headers
28 
29 // Project Headers
30 #include <core/types.hh>
31 #include <core/pose/Pose.hh>
32 
33 #include <core/fragment/FragSet.hh>
35 #include <core/fragment/Frame.hh>
37 
38 // Utility headers
39 #include <basic/Tracer.hh>
40 #include <utility/io/izstream.hh>
41 
42 // ObjexxFCL Headers
43 #include <ObjexxFCL/FArray1D.hh>
44 #include <ObjexxFCL/format.hh>
45 
48 #include <utility/vector1.hh>
49 
50 
51 //// C++ headers
52 //#include <cstdlib>
53 //#include <string>
54 //#include <vector>
55 static basic::Tracer tr("core.fragment");
56 
57 namespace core {
58 namespace fragment {
59 
60 /// @details Auto-generated virtual destructor
62 
63 using namespace ObjexxFCL;
64 using namespace ObjexxFCL::fmt;
65 
66 void SecondaryStructure::compute_fractions( core::fragment::FragSet const& frags, bool bJustCenterResidue /*default false */ ) {
67  using namespace core::fragment;
68  Size frag_nres = frags.max_pos();
69  if ( total_residue_ < frag_nres ) total_residue_ = frag_nres;
70  tr.Info << "compute strand/loop fractions for " << total_residue_ << " residues... " << std::endl;
71  if ( total_residue_ == 0 ) utility_exit_with_message( "no fragment to compute secondary structure ");
72 
73  strand_fraction_.dimension( total_residue_, 0.0 );
74  loop_fraction_.dimension( total_residue_, 0.0 );
75 
76  //Note: Confidence is rarely used, and defaults to zero unless set otherwise -rv
77  confidence_.dimension( total_residue_, 0.0 );
78 
79  FArray1D_int count( total_residue_, 0 ); //keep track how many entries for each residue
80 
81  strand_fraction_(1) = 0.0;
82  strand_fraction_(total_residue_) = 0.0;
83 
84  for ( FragID_Iterator it=frags.begin(), eit=frags.end(); it!=eit; ++it ) { //carefully checked that I don't change FrameData
85  Size central_residue( static_cast< Size > (it->frame().length()/2.0+0.5) );
86  Size loop_start = bJustCenterResidue ? central_residue : 1;
87  Size loop_end = bJustCenterResidue ? central_residue : it->frame().length();
88  for ( Size fpos = loop_start; fpos <= loop_end; ++fpos ) {
89  char const ss = it->fragment().secstruct( fpos );
90  Size pos = it->frame().seqpos( fpos );
91  if ( ss == 'E' || ss == 'L' || ss == 'H' ) {
92  ++count( pos );
93  if ( ss == 'E' ) strand_fraction_( pos ) += 1.0;
94  else if ( ss == 'L' ) loop_fraction_( pos ) += 1.0;
95  } else {
96  tr.Warning << "found invalid secondary structure assignment in fragment data: " << ss << std::endl;
97  }
98  }
99  }
100 
101  for ( Size pos = 1; pos <= total_residue_; pos++ ) {
102  if ( count( pos ) ) {
103  strand_fraction_(pos) /= count ( pos );
104  loop_fraction_(pos) /= count ( pos );
105  } else {
106  loop_fraction_( pos ) = 1.0;
107  strand_fraction_( pos ) = 0.0;
108  }
109  }
110 }
111 
112 ///@brief returns regions (in loop-class format) that belong to contiguous pieces of ss-structure
113 //loops::Loops SecondaryStructure::compute_ss_regions( core::Real max_loop_frac, core::Size min_length ) const {
114 // Size start( 0 );
115 // Size last( 0 );
116 // Size max_gap( 2 );
117 // loops::Loops ss_regions;
118 // for ( Size pos = 1; pos <= total_residue(); ++pos ) {
119 // if ( loop_fraction( pos ) <= max_loop_frac ) {
120 // if ( !start ) {
121 // start = pos;
122 // last = pos - 1;
123 // }
124 // if ( last + max_gap < pos ) {
125 // if ( last - start >= min_length ) {
126 // ss_regions.add_loop( start, last );
127 // }
128 // start=0;
129 // }
130 // last = pos;
131 // }
132 // }
133 // return ss_regions;
134 //}
135 
136 
138  core::Real max = loop_fraction( pos );
139  char ss = 'L';
140  if ( max < strand_fraction( pos ) ) {
141  max = strand_fraction_( pos );
142  ss = 'E';
143  }
144  if ( max < helix_fraction( pos ) ) {
145  ss = 'H';
146  }
147  return ss;
148 }
149 
150 ///@detail read from file
152  utility::io::izstream data( fn );
153  if ( !data ) {
154  tr.Fatal << "can't secondary structure file!!!" << fn << std::endl;
155  data.close();
156  utility::exit( EXIT_FAILURE, __FILE__, __LINE__);
157  }
158 
159  std::string line;
160  getline( data, line); //ignore header
161  std::istringstream line_stream( line );
162  std::string dummy;
163 
164  //read number of residues
165  line_stream >> dummy >> dummy >> dummy >> dummy >> total_residue_;
166 
167  //dimension arrays
168  loop_fraction_.dimension( total_residue_, 0.0 );
169  strand_fraction_.dimension( total_residue_, 0.0 );
170 
171  while ( getline( data, line ) ) {
172  std::istringstream line_stream( line );
173  // a=i, b=j, c=orientation(1 or 2), d=pleating(1 or 2)
174  int pos;
175  core::Real ef,hf,lf;
176  line_stream >> pos >> ef >> hf >> lf;
177 
178  if ( line_stream.fail() ) {
179  tr.Warning << "parse error: " << line << std::endl;
180  continue;
181  }
182 
183  loop_fraction_( pos ) = lf;
184  strand_fraction_( pos ) = ef;
185  if ( std::abs( helix_fraction( pos ) - hf ) > 0.01 ) {
186  tr.Warning << "inconsistency in secondary structure file at position "
187  << pos << " H ( read ) = " << hf << " 1.0-L-E ( expected ) "
188  << helix_fraction( pos ) << std::endl;
189  }
190  }
191 
192  tr.flush();
193 }
194 
196  // pose::set_ss_from_phi_psi( pose );
197  total_residue_ = pose.total_residue();
198  strand_fraction_.dimension( total_residue_, 0.0 );
199  loop_fraction_.dimension( total_residue_, 0.0 );
200  for ( Size pos = 1; pos<=pose.total_residue(); pos++ ) {
201  char ss = pose.secstruct( pos );
202  if ( ss == 'L' ) loop_fraction_( pos ) += 1.0;
203  if ( ss == 'E' ) strand_fraction_( pos ) += 1.0;
204  }
205 }
206 
208 
209  runtime_assert(components.size() == weights.size());
210  Size size = components[1]->total_residue_;
211  for(Size i=2;i<=components.size();i++)
212  runtime_assert(components[i]->total_residue_ == size);
213  total_residue_ = size;
214  strand_fraction_.dimension( size, 0.0 );
215  loop_fraction_.dimension( size, 0.0 );
216  for ( Size pos = 1; pos <= size; pos++ ) {
217  Real pE = 0.0;
218  Real pH = 0.0;
219  Real pL = 0.0;
220  for(Size i=1;i<=components.size();i++) {
221  pE += components[i]->strand_fraction(pos) * weights[i];
222  pH += components[i]->helix_fraction(pos) * weights[i];
223  pL += components[i]->loop_fraction(pos) * weights[i];
224  }
225  set_fractions(pos,pH,pE,pL);
226  }
227 }
228 
229 
230 ///@detail write to stream ( opposite from read_from_file )
231 void SecondaryStructure::show( std::ostream& os ) const {
232  using namespace fmt;
233  int const width( 10 );
234  os << A( width, "pos") << A( width, "E" ) << A( width, "H" ) << A( width, "L" ) << I( width, 4, total_residue() ) << std::endl;
235  for ( Size i = 1; i<= total_residue(); i++ ) {
236  os << I( width, 4, i)
237  << F( width, 4, strand_fraction( i ) )
238  << F( width, 4, helix_fraction( i ) )
239  << F( width, 4, loop_fraction( i ) )
240  << std::endl;
241  }
242 }
243 
244 
246  utility::io::izstream data( filename );
247  if ( !data ) {
248  tr.Fatal << "can't secondary structure file!!!" << filename << std::endl;
249  data.close();
250  utility::exit( EXIT_FAILURE, __FILE__, __LINE__);
251  }
252  read_psipred_ss2( data );
253 }
254 
255 void SecondaryStructure::read_psipred_ss2( std::istream& data ) {
256 
257  std::string line;
258  getline( data, line); //ignore header
259 
260  //dimension arrays
261  Size total_reserved( 500 );
262  loop_fraction_.dimension( total_reserved, 0.0 );
263  strand_fraction_.dimension( total_reserved, 0.0 );
264  Size last_pos(0);
265  while ( getline( data, line ) ) {
266  if ( line.size() == 0 ) continue;
267  if ( line[ 0 ] == '#' ) continue;
268  std::istringstream line_stream( line );
269  // a=i, b=j, c=orientation(1 or 2), d=pleating(1 or 2)
270  Size pos;
271  std::string aa, secstruct_letter;
272  core::Real ef,hf,lf;
273  line_stream >> pos >> aa >> secstruct_letter >> lf >> hf >> ef;
274 
275  //fill up missing residues until pos with 0.33 0.33 0.33 probabilities:
276  if ( line_stream.fail() ) {
277  tr.Warning << "parse error: " << line << std::endl;
278  continue;
279  }
280  // renormalize so that probabilities sum to 1.0
281  Real const total( lf + hf + ef );
282  lf /= total;
283  hf /= total;
284  ef /= total;
285 
286  if ( pos > total_reserved ) {
287  total_reserved+=400;
288  loop_fraction_.redimension( total_reserved );
289  strand_fraction_.redimension( total_reserved );
290  }
291  for ( last_pos = last_pos+1; last_pos<pos; last_pos++ ) {
292  loop_fraction_( last_pos ) = 1.0/3;
293  strand_fraction_( last_pos ) = 1.0/3;
294  }
295 
296  if ( total_residue_ < pos ) total_residue_ = pos;
297  loop_fraction_( pos ) = lf;
298  strand_fraction_( pos ) = ef;
299 
300  if ( std::abs( helix_fraction( pos ) - hf ) > 0.01 ) {
301  tr.Warning << "inconsistency in secondary structure file at position "
302  << pos << " H ( read ) = " << hf << " 1.0-L-E ( expected ) "
303  << helix_fraction( pos ) << std::endl;
304  }
305  last_pos = pos;
306  }
307 
308  tr.flush();
309 }
310 
312  std::ostream & os,
313  std::string const & sequence
314 ) const {
315  int const width( 10 );
316  os << "# PSIPRED VFORMAT (PSIPRED V2.5 by David Jones)\n\n";
317  for ( Size i = 1; i<= total_residue(); i++ ) {
318  char ss = secstruct( i );
319  if ( ss == 'L' ) ss = 'C'; //for psipred
320  os << I( width, 4, i);
321  os << A( 2, ( sequence.size() >= i ) ? sequence[ i - 1 ] : 'X' )
322  << A( 2, ss )
323  << F( width, 3, loop_fraction( i ) )
324  << F( width, 3, helix_fraction( i ) )
325  << F( width, 3, strand_fraction( i ) )
326  << std::endl;
327  }
328 }
329 
331  utility::io::izstream data( filename );
332  if ( !data ) {
333  tr.Fatal << "can't secondary structure file!!!" << filename << std::endl;
334  data.close();
335  utility::exit( EXIT_FAILURE, __FILE__, __LINE__);
336  }
337  read_talos_ss( data );
338 }
339 
340 void SecondaryStructure::read_talos_ss( std::istream& data ) {
341 
342  std::string line;
343  bool header_done(false);
344 
345  while (!header_done && !data.eof()) {
346  getline( data, line); //ignore header
347  std::istringstream line_stream( line );
348  std::string keyword;
349  line_stream >> keyword;
350 
351  if (keyword == "FORMAT") {
352  header_done = true;
353  }
354  }
355 
356  //dimension arrays
357  Size total_reserved( 500 );
358  loop_fraction_.dimension( total_reserved, 0.0 );
359  strand_fraction_.dimension( total_reserved, 0.0 );
360  confidence_.dimension( total_reserved, 0.0 );
361  Size last_pos( 0 );
362  while ( getline( data, line ) ) {
363  if ( line.size() == 0 ) continue;
364  if ( line[ 0 ] == '#' ) continue;
365  std::istringstream line_stream( line );
366  // a=i, b=j, c=orientation(1 or 2), d=pleating(1 or 2)
367  Size pos;
368  std::string aa, junk, secstruct_letter;
369  core::Real ef,hf,lf, confidence;
370 
371  //confidence is currently being thrown out, may want to bear that in mind
372  line_stream >> pos >> aa >> junk >> junk >> hf >> ef >> lf >> confidence >> secstruct_letter;
373 
374  if ( line_stream.fail() ) {
375  tr.Warning << "parse error: " << line << std::endl;
376  continue;
377  }
378  // renormalize so that probabilities sum to 1.0
379  Real const total( lf + hf + ef );
380  lf /= total;
381  hf /= total;
382  ef /= total;
383 
384  if ( pos > total_reserved ) {
385  total_reserved+=400;
386  loop_fraction_.redimension( total_reserved );
387  strand_fraction_.redimension( total_reserved );
388  confidence_.redimension( total_reserved );
389  }
390  if ( total_residue_ < pos ) total_residue_ = pos;
391  loop_fraction_( pos ) = lf;
392  strand_fraction_( pos ) = ef;
393  confidence_( pos ) = confidence;
394 
395  if ( std::abs( helix_fraction( pos ) - hf ) > 0.01 ) {
396  tr.Warning << "inconsistency in secondary structure file at position "
397  << pos << " H ( read ) = " << hf << " 1.0-L-E ( expected ) "
398  << helix_fraction( pos ) << std::endl;
399  }
400  for ( last_pos = last_pos+1; last_pos<pos; last_pos++ ) {
401  loop_fraction_( last_pos ) = 1.0/3;
402  strand_fraction_( last_pos ) = 1.0/3;
403  }
404  }
405 
406  tr.flush();
407 }
408 
409 void
411  if ( nres > total_residue_ ) {
412  loop_fraction_.dimension( nres );
413  strand_fraction_.dimension( nres );
414  for ( Size pos=total_residue_+1; pos<=nres; ++pos ) {
415  loop_fraction_( pos ) = 1.0;
416  strand_fraction_( pos ) = 0.0;
417  }
418  total_residue_=nres;
419  }
420 }
421 
422 
423 } // core
424 } // fragment