Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
RMSVallData.hh
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 #ifndef INCLUDED_protocols_frags_RMSVallData_hh
11 #define INCLUDED_protocols_frags_RMSVallData_hh
12 
13 
14 // Rosetta Headers
15 
16 #include <core/types.hh>
18 #include <core/fragment/Frame.hh>
20 
21 #include <protocols/frags/heap.hh>
22 
23 // ObjexxFCL Headers
24 #include <utility/vector1.hh>
25 
26 // utility headers
27 #include <ObjexxFCL/FArray1A.hh>
28 #include <utility/io/izstream.hh>
29 
30 #include <numeric/model_quality/rms.hh>
31 #include <numeric/random/random.fwd.hh>
32 #include <utility/exit.hh>
33 
34 // C++ Headers
35 #include <string>
36 
37 //Auto using namespaces
38 namespace ObjexxFCL { } using namespace ObjexxFCL; // AUTO USING NS
39 //Auto using namespaces end
40 
41 
42 namespace protocols {
43 namespace frags {
44 
45 using core::Real;
46 using core::Size;
47 
48 class RMSVallData {
49 public:
50  /// default constructor
52  {
53  Size const big_size( 100000 ); // rough guess
54  sequence_.reserve( big_size );
55  secstruct_.reserve( big_size );
56  phi_.reserve( big_size );
57  psi_.reserve( big_size );
58  omega_.reserve( big_size );
59  exclude_gly = false;
60  exclude_pro = false;
61  exclude_cys_peptides = false;
62  }
63 
64  /// constructor from input vall database file
66  {
67  Size const big_size( 100000 ); // rough guess
68  // prevent lots of redimensioning as we read file? does this even matter?
69  sequence_.reserve( big_size );
70  secstruct_.reserve( big_size );
71  phi_.reserve( big_size );
72  psi_.reserve( big_size );
73  omega_.reserve( big_size );
74  exclude_gly = false;
75  exclude_pro = false;
76  exclude_cys_peptides = false;
77 
78  read_file( filename );
79  }
80 
81  /// removes excess storage capacity to minimize memory usage
82  void
84  {
85  sequence_.shrink();
86  secstruct_.shrink();
87  phi_.shrink();
88  psi_.shrink();
89  omega_.shrink();
90  }
91 
92  // read from vall database file "filename"
93  void
95  utility::io::izstream data ( filename );
96  if ( !data ) {
97  utility_exit_with_message( "cant open vall file: " + filename );
98  }
99 
100  //////////////////////////////////////////////
101  // this file parsing should be very fast since
102  // the vall is enormous
103  bool const new_format( filename.find( "trimmed" ) != std::string::npos );
104 
105  if ( new_format ) {
106  std::cerr << "Unsupported format for RMSVallData!!!\n";
107  exit(1);
108  } else {
109  char line[250];
110  float phi,psi,omega, x,y,z;
111  char seq,ss;
112  while ( data ) {
113  data.getline( line, 250 );
114  if ( data.eof() ) break;
115 
116  std::sscanf( line+6 , "%1c", &seq);
117  std::sscanf( line+8 , "%1c", &ss);
118 
119  std::sscanf( line+25 , "%9f", &x);
120  std::sscanf( line+34 , "%9f", &y);
121  std::sscanf( line+43 , "%9f", &z);
122 
123  std::sscanf( line+52, "%9f", &phi);
124  std::sscanf( line+61, "%9f", &psi);
125  std::sscanf( line+70, "%9f", &omega);
126 
127  add_line( seq, ss, x, y, z, phi, psi, omega );
128  }
129  }
130  data.close();
131 
132  // remove excess capacity
133  shrink();
134 
135  }
136 
137  /// read in one more line from Vall input file
138  void
139  add_line( const char sq, const char ss,
140  const Real x, const Real y, const Real z,
141  const Real ph, const Real ps, const Real om ) {
142  sequence_.push_back( sq );
143  secstruct_.push_back( ss );
144 
145  X_.push_back( numeric::xyzVector<Real>( x, y, z ) );
146 
147  phi_.push_back( ph );
148  psi_.push_back( ps );
149  omega_.push_back( om );
150  }
151 
152  utility::vector1< char > const & sequence () const { return sequence_; }
153  utility::vector1< char > const & secstruct() const { return secstruct_; }
154 
155  utility::vector1< numeric::xyzVector< Real > > const & X() const {return X_;}
156 
157  utility::vector1< Real > const & phi () const {return phi_;}
158  utility::vector1< Real > const & psi () const {return psi_;}
159  utility::vector1< Real > const & omega() const {return omega_;}
160 
161  /// number of lines in Vall database
162  int size() const { return sequence_.size(); }
163 
164  // pick fragments for a single residue position from vall database
165  void
167  Size const nfrags,
168  utility::vector1< numeric::xyzVector< core::Real > > const & templ, // CA coords we wish to align to
169  std::string const &pref_seq,
170  char const force_ss,
171  core::fragment::FrameOP & frame,
172  core::Real randomness = 0.0,
173  core::Real oversample = 5.0
174  ) const {
175  Size const frag_size( templ.size() );
176  assert( frag_size == pref_seq.length() );
177 
178  // reset heaps
179  Size const my_size( size() );
180  Size bucket1_size( (Size)std::ceil(oversample*nfrags) );
181  if (oversample<=0) { bucket1_size = my_size - frag_size + 1; }
182  FArray1D_int heap( bucket1_size + 2 );
183  FArray1D_float coheap( bucket1_size + 2 );
184  protocols::frags::heap_init( heap, coheap, bucket1_size );
185 
186  // center template
187  ObjexxFCL::FArray2D< core::Real > tmpl_pos( 3,frag_size ), tgt_pos( 3,frag_size );
188  numeric::xyzVector< core::Real > tmpl_com(0,0,0);
189  for ( int i = 1; i <= (int)frag_size; ++i ) {
190  numeric::xyzVector< core::Real > const &x_i = templ[i];
191  tmpl_com += x_i;
192  for ( int k = 0; k < 3; ++k ) tmpl_pos(k+1,i) = x_i[k];
193  }
194  tmpl_com /= frag_size;
195  for (int i=1; i<=(int)frag_size; ++i)
196  for ( int k = 0; k < 3; ++k ) tmpl_pos(k+1,i) -= tmpl_com[k];
197 
198  // set up other tmp store data
199  ObjexxFCL::FArray1D< numeric::Real > ww( frag_size, 1.0 );
200  //ObjexxFCL::FArray2D< numeric::Real > uu( 3, 3, 0.0 );
201  //numeric::Real ctx;
202 
203  for ( Size vall_pos=1; vall_pos <= my_size - frag_size + 1; ++vall_pos ) {
204  // score this position
205 
206  bool bad_frag( false );
207 
208  Real score(0.0); // bigger is worse
209  numeric::xyzVector< core::Real > tgt_com(0,0,0);
210  int seq_score = 0;
211 
212  for ( Size k=0; k< frag_size; ++k ) {
213  Real const phi ( phi_ [ vall_pos+k ] );
214  Real const psi ( psi_ [ vall_pos+k ] );
215  Real const omega( omega_ [ vall_pos+k ] );
216  char const seq ( sequence_ [ vall_pos+k ] );
217  char const ss ( secstruct_[ vall_pos+k ] );
218  if ( ( std::abs( phi ) < 0.01 ) ||
219  ( std::abs( psi ) + std::abs( omega ) < 0.01 ) ||
220  ( seq == 'G' && exclude_gly ) ||
221  ( seq == 'P' && exclude_pro ) ||
222  ( std::abs( omega ) < 90.0 && exclude_cys_peptides ) ||
223  ( ss != force_ss && (force_ss == 'H' || force_ss == 'E' || force_ss == 'L') ) ) {
224  bad_frag = true;
225  break;
226  }
227 
228  if ( seq != pref_seq[k] ) seq_score++;
229 
230  numeric::xyzVector< core::Real > const &xx( X_[ vall_pos+k ] );
231  tgt_com += xx;
232  for ( int j = 0; j < 3; ++j ) tgt_pos(j+1,k+1) = xx[j];
233  }
234 
235  if ( bad_frag ) continue;
236 
237  // center tgt
238  tgt_com /= frag_size;
239  for (int i=1; i<=(int)frag_size; ++i)
240  for ( int j = 0; j < 3; ++j ) tgt_pos(j+1,i) -= tgt_com[j];
241 
242  // score == rms (?)
243  //float rms_out=999.0;
244  //rms_out = numeric::model_quality::rms_wrapper( frag_size, tgt_pos, tmpl_pos); set but never used ~Labonte
245 
246  // sort on seq score (?)
247  score = seq_score;
248 
249  // insert into heap, with negative score! since bigger==better for heaps
250  bool err;
251  protocols::frags::heap_insert( heap, coheap, vall_pos, -score, err ); // ?? out-of-date
252  }
253 
254  // from the top 5*N in terms of seq score, chose best N from these using RMS
255  FArray1D_int rmsheap( nfrags + 2 );
256  FArray1D_float rmscoheap( nfrags + 2 );
257  protocols::frags::heap_init( rmsheap, rmscoheap, nfrags ); // ?? out-of-date
258 
259  Size exact_matches(0);
260  Real worst_score(999), best_score(999);
261 
262  for ( Size nn = bucket1_size; nn >=1; --nn ) {
263  bool err;
264  int vall_pos;
265  float score;// heaps use float!!!
266 
267  protocols::frags::heap_extract( heap, coheap, vall_pos, score, err); // ?? out-of-date
268  assert( !err );
269 
270  if ( score == 0 ) ++exact_matches;
271 
272  // grab XYZs
273  numeric::xyzVector< core::Real > tgt_com(0,0,0);
274  for ( Size k=0; k< frag_size; ++k ) {
275  numeric::xyzVector< core::Real > const &xx( X_[ vall_pos+k ] );
276  tgt_com += xx;
277  for ( int j = 0; j < 3; ++j ) tgt_pos(j+1,k+1) = xx[j];
278  }
279 
280  // center tgt
281  tgt_com /= frag_size;
282  for (int i=1; i<=(int)frag_size; ++i)
283  for ( int j = 0; j < 3; ++j ) tgt_pos(j+1,i) -= tgt_com[j];
284 
285  // score == rms (?)
286  float rms_out=999.0;
287  rms_out = numeric::model_quality::rms_wrapper( frag_size, tgt_pos, tmpl_pos);
288 
289  // make things (a bit) randomized
290  rms_out += randomness * numeric::random::uniform();
291 
292  // place in heap B
293  protocols::frags::heap_insert( rmsheap, rmscoheap, vall_pos, -rms_out, err ); // ?? out-of-date
294  }
295 
296 
297  for ( Size nn = nfrags; nn >=1; --nn ) {
298  bool err;
299  int vall_pos;
300  float score;// heaps use float!!!
301 
302  protocols::frags::heap_extract( rmsheap, rmscoheap, vall_pos, score, err); // ?? out-of-date
303  assert( !err );
304 
305  if ( nn == nfrags ) worst_score = -score;
306  else if ( nn == 1 ) best_score = -score;
307 
309  for ( Size k=0; k< frag_size; ++k ) {
311  new core::fragment::BBTorsionSRFD( 3 ,secstruct_[ vall_pos + k ], sequence_ [ vall_pos+k ] ) ); // 3 protein torsions
312  res_torsions->set_torsion ( 1, phi_ [ vall_pos + k ] ); // ugly numbers 1-3, but pose.set_phi also uses explicit numbers
313  res_torsions->set_torsion ( 2, psi_ [ vall_pos + k ] );
314  res_torsions->set_torsion ( 3, omega_ [ vall_pos + k ] );
315  res_torsions->set_secstruct ( secstruct_[ vall_pos + k ] );
316  current_fragment->add_residue( res_torsions );
317  }
318  // we must manually mark the fragment as valid
319  // why???? this is dumb
320  if (current_fragment->size() == frag_size) current_fragment->set_valid();
321 
322  // goes at the beginning
323  if ( !frame->add_fragment( current_fragment ) ) {
324  std::cerr << "Incompatible fragment!" << std::endl;
325  utility::exit( EXIT_FAILURE, __FILE__, __LINE__);
326  }
327  } // nn
328 
329 
330  std::cerr << "rms-frags: " <<
331  nfrags << " of " << bucket1_size <<
332  " exact_matches: " << exact_matches <<
333  " best_score: " << best_score <<
334  " worst_score: " << worst_score << std::endl;
335 
336  }
337 
338 private:
341 
346 
350 };
351 
352 } // ns frags
353 } // ns protocols
354 
355 #endif