Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Dssp.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @brief
11 /// @detailed
12 ///
13 ///
14 ///
15 /// @author olange: ported from original bblum-rosetta++ version $
16 
17 // Unit Headers
19 
20 // Package Headers
21 
22 // Project Headers
24 #include <core/pose/Pose.hh>
25 #include <core/id/NamedAtomID.hh>
26 
27 // ObjexxFCL Headers
28 // AUTO-REMOVED #include <ObjexxFCL/FArray1A.hh>
29 #include <ObjexxFCL/FArray1D.hh>
30 #include <ObjexxFCL/FArray2D.hh>
31 // AUTO-REMOVED #include <ObjexxFCL/FArray3P.hh>
32 
33 
34 // Utility headers
35 #include <utility/vector1.fwd.hh>
36 // AUTO-REMOVED #include <utility/io/izstream.hh>
37 #include <utility/exit.hh>
38 #include <basic/Tracer.hh>
39 
40 // #include <basic/options/option.hh>
41 // #include <basic/options/keys/OptionKeys.hh>
42 
43 // numeric headers
44 #include <numeric/xyzVector.hh>
45 // AUTO-REMOVED #include <numeric/xyz.functions.hh>
46 
47 //// C++ headers
48 #include <cstdlib>
49 #include <string>
50 // AUTO-REMOVED #include <list>
51 #include <vector>
52 #include <iostream>
53 
54 #include <core/id/AtomID.hh>
56 #include <utility/vector1.hh>
57 
58 
59 
60 static basic::Tracer tr("core.scoring.dssp");
61 
62 using namespace core;
63 using namespace basic;
64 using namespace ObjexxFCL;
65 //using namespace basic::options;
66 
67 namespace core {
68 namespace scoring {
69 namespace dssp {
70 
71 
72 Dssp::Dssp( core::pose::Pose const& pose ) {
73  pair_set_ = NULL;
74  total_residue_ = 0;
75  for ( Size i = 1; i <= pose.total_residue(); i++ ) {
76  if ( pose.residue(i).is_protein() ) total_residue_++;
77  }
78  compute( pose );
79 }
80 
81 Dssp::~Dssp() {}
82 
83 //////////////////////////////////////////////////////////////////////////////
84 /// @begin fill_hbond_bb_pair_score_
85 ///
86 /// @brief Populates the hbond_bb_pair_score_ array
87 ///
88 /// @detailed
89 /// Uses the rosetta hydrogen bond energies computed in hbonds::hbond_set
90 /// to fill the hbond_bb_pair_score_ array. The hbond_set data structure
91 /// must already be populated, by (e.g.) full atom scoring.
92 /// Entry (i,j) is the backbone
93 /// hydrogen bond energy between residues i (acceptor) and j (donor).
94 ///
95 /// @global_read
96 /// hbonds::hbond_set
97 ///
98 /// @global_write
99 /// dssp_ns::hbond_bb_pair_score_
100 ///
101 /// @remarks
102 ///
103 /// @references
104 ///
105 /// @author olange: ported from original bblum-rosetta++ version $
106 ///
107 /// @last_modified
108 //////////////////////////////////////////////////////////////////////////////
109 /*
110  void
111  Dssp::fill_hbond_bb_pair_score_() {
112  using namespace hbonds;
113 
114  hbond_bb_pair_score_.dimension(misc::total_residue_, misc::total_residue_);
115  hbond_bb_pair_score_ = 0.0f;
116  Size dres, ares, type;
117  float enrg;
118 
119  for( Size i = 1; i <= hbond_set.nhbonds(); ++i ) {
120  dres = hbond_set.hbdon_res(i);
121  ares = hbond_set.hbact_res(i);
122  enrg = hbond_set.hbenergies(i);
123  // first index is always acceptor (carboxyl group), second is donor (amyl group)
124  type = hbond_set.hbtype(i);
125  if(type == SRBB_HBTYPE || type == LRBB_HBTYPE)
126  hbond_bb_pair_score_(ares, dres) = enrg;
127  }
128  }
129 */
130 
131 
132 // //////////////////////////////////////////////////////////////////////////////
133 // void
134 // copy_the_relevant_atoms( FArray3D_float & full_coord_dest,
135 // FArray3D_float & full_coord_src){
136 
137 
138 // for (Size i = 1; i <= total_residue_; i++ ){
139 
140 // Size const natoms( aaproperties_pack::natoms( res(i), res_variant(i) ) );
141 
142 // for ( Size j=1; j<= natoms; ++j ) {
143 // for ( Size k = 1; k<= 3; ++k ) {
144 // full_coord_dest( k, j, i ) = full_coord_src( k, j, i );
145 // }
146 // }
147 
148 // }
149 
150 // }
151 
152 //////////////////////////////////////////////////////////////////////////////
153 /// @begin fill_hbond_bb_pair_score__dssp
154 ///
155 /// @brief Populates the hbond_bb_pair_score_ array with dssp energies
156 ///
157 /// @detailed
158 /// Uses hydrogen bond energies computed a la dssp to fill the
159 /// hbond_bb_pair_score_ array. Entry (i,j) is the backbone
160 /// hydrogen bond energy between residues i (acceptor) and j (donor).
161 ///
162 /// @global_read
163 /// misc::Sizes::total_residue_
164 /// misc::current_pose::Eposition
165 /// misc::current_pose::full_coord
166 /// misc::Sizes::res
167 /// misc::current_pose::res_variant
168 /// aaproperties_pack::properties_per_aa_aav::HNpos
169 /// aaproperties_pack::properties_per_aa_aav::HNpos
170 /// param_aa::aa_pro
171 /// termini_ns::is_N_terminus
172 /// termini_ns::is_C_terminus
173 ///
174 /// @global_write
175 /// dssp_ns::hbond_bb_pair_score_
176 ///
177 /// @remarks
178 ///
179 /// @references
180 ///
181 /// @author olange: ported from original bblum-rosetta++ version $
182 ///
183 /// @last_modified
184 //////////////////////////////////////////////////////////////////////////////
185 void
186 fill_hbond_bb_pair_score_dssp( pose::Pose const& pose, ObjexxFCL::FArray2D_float &hbond_bb_pair_score ) {
187  Size const total_residue( pose.total_residue() );
188 
189  //initialize FArray
190  hbond_bb_pair_score.dimension(total_residue, total_residue);
191  hbond_bb_pair_score = 0.0f;
192 
193  // FArray3D_float full_coord_backup( 3, param::MAX_ATOM(), total_residue );
194  // copy_the_relevant_atoms( full_coord_backup, full_coord );
195 
196  // copy_position_to_fullcoord(Eposition,full_coord,total_residue);
197  // put_nh(full_coord, 1, total_residue, res, res_variant, is_N_terminus, is_C_terminus, 0.0f);
198  // initialize_fullcoord_array(Eposition, full_coord, total_residue, res, res_variant);
199 
200  // float C[3], O[3], N[3], H[3], rON, rCH, rOH, rCN, E, dist, total;
201  for ( Size i = 1; i <= total_residue; ++i ) {
202  if (pose.residue(i).aa() == core::chemical::aa_vrt) continue;
203  for ( Size j = 1; j <= total_residue; ++j ) {
204  if ( !pose.residue(i).is_protein() ) continue;
205  if ( !pose.residue(j).is_protein() ) continue;
206  if ( i == j || i - j == 1 || j - i == 1 ) continue;
207  //chu skip non-protein residues
208  if ( !(pose.residue(i).is_protein() && pose.residue(j).is_protein()) ) continue;
209 
210  if (pose.residue(j).aa() == core::chemical::aa_vrt) continue;
211 
212  //ignore if CA-CA > 10A
213  id::NamedAtomID CA1("CA",i );
214  id::NamedAtomID CA2("CA",j );
215  if ( pose.xyz( CA1 ).distance( pose.xyz( CA2 ) ) > 10.0 ) continue;
216 
217  //get C at i
218  PointPosition pC;
219  pC = pose.xyz( id::NamedAtomID( "C", i ) );
220 
221  //get O at i
222  PointPosition pO;
223  pO = pose.xyz( id::NamedAtomID( "O", i ) );
224 
225  //get N at j
226  PointPosition pN;
227  pN = pose.xyz( id::NamedAtomID( "N", j ) );
228 
229  Real rON, rCN;
230  rON = pO.distance( pN );
231  rCN = pC.distance( pN );
232 
233  // Size HN1_pos = HNpos( res(j), res_variant(j) );
234 
235  chemical::ResidueType const& rtj ( pose.residue_type ( j ) );
236  runtime_assert( rtj.has( "N" ) );
237  for ( Size hatom = rtj.attached_H_begin( rtj.atom_index( "N" ) ), ehatom = rtj.attached_H_end( rtj.atom_index( "N" ) );
238  hatom <= ehatom; ++hatom ) {
239 
240  Real rOH, rCH;
241  PointPosition pH;
242  pH = pose.xyz( id::AtomID( hatom, j ) );
243  rOH = pH.distance( pO );
244  rCH = pH.distance( pC );
245  Real E = 27.888f * ( 1.0f / rON + 1.0f / rCH - 1.0f / rOH - 1.0f / rCN);
246  //tr.Trace << "rOH " << rOH << " rCH " << rCH << " rCN " << rCN << " rON " << rON << " rNH " << distance( pN, pH ) << std::endl;
247  //tr.Trace << "hbond energy for H " << hatom << " respair " << i << " " << j << " " << E << std::endl;
248  if ( E < hbond_bb_pair_score(i, j) ) {
249  hbond_bb_pair_score(i, j) = E;
250  break;
251  }
252  }
253  }
254  }
255  //copy_the_relevant_atoms( full_coord, full_coord_backup);
256 }
257 
258 void
259 Dssp::dssp( FArray1_char &secstruct ) {
260  for( Size i = 1; i <= total_residue_; i++)
261  secstruct(i) = dssp_secstruct_(i);
262 }
263 
264 void
265 Dssp::dssp_featurizer( FArray1_char &secstruct ) {
266  for( Size i = 1; i <= total_residue_; i++ ) {
267  if( dssp_secstruct_(i) == 'H' || dssp_secstruct_(i) == 'G' || dssp_secstruct_(i) == 'I' ) {
268  secstruct(i) = 'H';
269  } else if(dssp_secstruct_(i) == 'B' || dssp_secstruct_(i) == 'E') {
270  // see if paired on one or both sides
271  secstruct(i) = pair_set_->featurizer_state(i);
272  } else secstruct(i) = 'L';
273  }
274 }
275 
276 //////////////////////////////////////////////////////////////////////////////
277 /// @begin dssp_reduced
278 ///
279 /// @brief Reduces to E/H/L secondary structure alphabet
280 ///
281 /// @detailed
282 /// This function simply reduces dssp's secondary structure alphabet (which
283 /// includes 3- and 5-turn helices and various kinds of loop, and
284 /// differentiates lone beta bridges from extended beta strand pairings)
285 /// Sizeo the standard E/H/L alphabet, as follows:
286 /// G,H,I --> H
287 /// E,B --> E
288 /// S,T,blank --> L
289 ///
290 /// @global_read
291 /// Sizes::total_residue_
292 ///
293 /// @global_write
294 ///
295 /// @remarks
296 ///
297 /// @references
298 ///
299 /// @author olange: ported from original bblum-rosetta++ version $
300 ///
301 /// @last_modified
302 //////////////////////////////////////////////////////////////////////////////
303 void
304 Dssp::dssp_reduced( FArray1_char &secstruct ) {
305  for( Size i = 1; i <= total_residue_; i++ ) {
306  if( dssp_secstruct_(i) == 'H' || dssp_secstruct_(i) == 'G' || dssp_secstruct_(i) == 'I' ) {
307  secstruct(i) = 'H';
308  } else if(dssp_secstruct_(i) == 'B' || dssp_secstruct_(i) == 'E') {
309  secstruct(i) = 'E';
310  } else secstruct(i) = 'L';
311  }
312 }
313 
314 void
315 Dssp::dssp_reduced() {
316  dssp_reduced( dssp_secstruct_ );
317 }
318 
319 //////////////////////////////////////////////////////////////////////////////
320 /// @begin Dssp::compute
321 ///
322 /// @brief Runs dssp, calculating per-residue secondary structure.
323 ///
324 /// @detailed
325 /// dssp is a standard algorithm for per-residue secondary structure analysis.
326 /// It has the following alphabet:
327 /// H: 4-turn helix
328 /// B: beta bridge
329 /// E: extended beta strand
330 /// G: 3-turn helix
331 /// I: 5-turn helix
332 /// T: helix-like loop (some nearby hbonds)
333 /// S: loop with high curvature
334 /// : loop
335 /// Most of these determinations are made on the basis of hydrogen bonds,
336 /// with torsion angles disregarded. The designations are reported with priority
337 /// given to categories higher in the list (e.g. if a residue has both high-
338 /// curvature (S) and helical hydrogen bonds (H), it will be reported as H).
339 /// Experiments indicate that this function agrees almost completely with
340 /// the true dssp results (run on dumped pdbs). Slight differences can perhaps
341 /// be chalked up to unequal placement of hydrogens.
342 ///
343 /// @global_read
344 /// Sizes::total_residue_
345 /// dssp_ns::hbond_bb_pair_score_
346 ///
347 /// @global_write
348 /// dssp_ns::hbond_bb_pair_score_
349 ///
350 /// @remarks
351 ///
352 /// @references
353 ///
354 /// @author olange: ported from original bblum-rosetta++ version $
355 ///
356 /// @last_modified nobu
357 //////////////////////////////////////////////////////////////////////////////
358 void
359 Dssp::compute( pose::Pose const& pose ) {
360 
361  float dssp_hbond_threshold = -0.5;
362 
363  dssp_secstruct_.dimension(total_residue_);
364 
365 
366  fill_hbond_bb_pair_score_dssp( pose, hbond_bb_pair_score_ ); // fills hbond_bb_pair_score_ array
367 
368  // Initialize to all loops
369  for ( Size i = 1; i <= total_residue_; i++ ) dssp_secstruct_(i) = ' ';
370 
371  bool helix;
372 
373  // Record all 5-turn helices (I)
374  if( total_residue_ > 5 ) {
375  for ( Size i=1; i <= total_residue_-5; i++ ) {
376  if ( hbond_bb_pair_score_(i, i + 5) < dssp_hbond_threshold ) {
377  helix = i < total_residue_ - 5 &&
378  hbond_bb_pair_score_(i + 1, i + 6) < dssp_hbond_threshold;
379  for ( Size j = 1; j < 6; j++) {
380  if ( helix )
381  dssp_secstruct_(i + j) = 'I';
382  else if ( j < 5 && dssp_secstruct_(i + j) == ' ' )
383  dssp_secstruct_(i + j) = 'T';
384  }
385  }
386  }
387  }
388  // Record all 3-turn helices (G)
389  if( total_residue_ > 3 ) {
390  for ( Size i = 1; i <= total_residue_ - 3; i++ ) {
391  if ( hbond_bb_pair_score_(i, i + 3) < dssp_hbond_threshold ) {
392  helix = i < total_residue_ - 3 &&
393  hbond_bb_pair_score_(i + 1, i + 4) < dssp_hbond_threshold;
394  for ( Size j = 1; j < 4; j++ ) {
395  if ( helix )
396  dssp_secstruct_(i+j) = 'G';
397  else if ( j < 3 && dssp_secstruct_(i+j) == ' ' )
398  dssp_secstruct_(i+j) = 'T';
399  }
400  }
401  }
402  }
403 
404  // Record all strands (B and E)
405  pair_set_ = new StrandPairingSet( hbond_bb_pair_score_, dssp_hbond_threshold, pose );
406 
407  for ( Size i = 1; i <= total_residue_; i++ ) {
408  char state = pair_set_->dssp_state(i);
409  if ( state != ' ' ) dssp_secstruct_(i) = state;
410  }
411 
412  // Record all 4-turn helices (H)
413  if( total_residue_ > 4 ) {
414  for ( Size i = 1; i <= total_residue_ - 4; i++ ) {
415  if ( hbond_bb_pair_score_(i, i + 4) < dssp_hbond_threshold ) {
416  helix = i < total_residue_ - 4 &&
417  hbond_bb_pair_score_(i + 1, i + 5) < dssp_hbond_threshold;
418  for ( Size j = 1; j < 5; j++ ) {
419  if ( helix )
420  /*
421  && torsion_bin(phi(i+j), psi(i+j), omega(i+j)) != 'A')
422  // only allow helix chunks in which every residue has bin A
423  */
424  dssp_secstruct_(i + j) = 'H';
425  else if(j < 4 && dssp_secstruct_(i + j) == ' ')
426  dssp_secstruct_(i + j) = 'T';
427  }
428  }
429  }
430  }
431 
432  // Record all tight turns (S), only if still a loop
433  if( total_residue_ > 2 ) {
434  for ( Size i = 3; i <= total_residue_ - 2; i++ ) {
435  if ( !pose.residue(i-2).is_protein() ) continue;
436  if ( !pose.residue(i ).is_protein() ) continue;
437  if ( !pose.residue(i+2).is_protein() ) continue;
438  if (pose.residue(i-2).aa() == core::chemical::aa_vrt) continue;
439  if (pose.residue(i ).aa() == core::chemical::aa_vrt) continue;
440  if (pose.residue(i+2).aa() == core::chemical::aa_vrt) continue;
441  if ( dssp_secstruct_(i) == ' ' ) {
442  Vector const v1 ( pose.xyz( id::NamedAtomID("CA",i ) ) - pose.xyz( id::NamedAtomID("CA",i-2 ) ) );
443  Vector const v2 ( pose.xyz( id::NamedAtomID("CA",i+2 ) ) - pose.xyz( id::NamedAtomID("CA",i ) ) );
444  Real dot = angle_of( v1, v2 );
445  if ( dot < .34202014 )
446  dssp_secstruct_(i) = 'S';
447  }
448  }
449  }
450 }
451 
452 bool
453 Dssp::paired(Size res1, Size res2, bool antiparallel) {
454  return pair_set_->paired(res1, res2, antiparallel);
455 }
456 
457 
458 void
459 Dssp::insert_ss_into_pose( core::pose::Pose & pose ) {
460  compute( pose );
461  dssp_reduced( dssp_secstruct_ );
462  for ( core::Size i = 1; i <= total_residue_;/*pose.total_residue();*/ ++i ) {
463  pose.set_secstruct( i, dssp_secstruct_(i) );
464  }
465 }
466 
467 
468 char
469 Dssp::get_dssp_secstruct( core::Size resid ) {
470  return dssp_secstruct_( resid );
471 }
472 
474 Dssp::get_dssp_secstruct() {
475  dssp_reduced( dssp_secstruct_ );
476  std::string sequence;
477  for ( core::Size i = 1; i <= total_residue_;/*pose.total_residue();*/ ++i ) {
478  sequence += dssp_secstruct_( i );
479  }
480  return sequence;
481 }
482 
483 float
484 Dssp::bb_pair_score( Size res1, Size res2 )
485 {
486  return hbond_bb_pair_score_( res1, res2 );
487 }
488 
489 } //dssp
490 } //scoring
491 } //core