Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
SurfacePotential.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 // :noTabs=false:tabSize=4:indentSize=4:
4 //
5 // (c) Copyright Rosetta Commons Member Institutions.
6 // (c) This file is part of the Rosetta software suite and is made available under license.
7 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
8 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
9 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
10 
11 /// @file src/core/scoring/SurfacePotential.cc
12 /// @brief Class which keeps reads the residue hydrophobic ASA database file and calculates surface residue energies.
13 /// @author Ron Jacak
14 
16 
17 #include <basic/database/open.hh>
18 #include <basic/Tracer.hh>
19 
20 #include <core/id/AtomID.hh>
21 #include <core/id/AtomID_Map.hh>
22 
24 // AUTO-REMOVED #include <core/chemical/AtomTypeSet.hh>
25 
29 // AUTO-REMOVED #include <core/conformation/symmetry/util.hh>
30 
31 
35 
37 
38 #include <core/pose/Pose.hh>
39 #include <core/pose/PDBInfo.hh>
41 #include <core/pose/util.hh>
42 
43 // AUTO-REMOVED #include <core/scoring/sasa.hh>
44 #include <core/scoring/Energies.hh>
46 
47 #include <core/types.hh>
48 
49 // ObjexxFCL Headers
50 
51 // Numeric Headers
52 
53 // Utility Headers
54 #include <utility/io/izstream.hh>
55 
56 // C++ Headers
57 #include <sstream>
58 
59 //Auto Headers
60 #include <utility/vector1.hh>
61 //Auto Headers
64 #include <core/pose/util.tmpl.hh>
65 
66 
67 
68 
69 namespace core {
70 namespace pack {
71 namespace interaction_graph {
72 
73 //#define FILE_DEBUG 1
74 
75 static basic::Tracer TR("core.pack.interaction_graph.SurfacePotential");
76 
83 
87 
88 /// @brief set initial value as no instance
89 SurfacePotential* SurfacePotential::instance_( 0 );
90 
91 
92 /// @brief static function to get the instance of (pointer to) this singleton class
94  if ( instance_ == 0 )
96  return instance_;
97 }
98 
99 /// @brief private constructor to guarantee the singleton
103 
105 }
106 
107 
108 /// @brief Reads in the database file which contains average residue hydrophobic accessible surface areas.
110 
111  utility::io::izstream residue_hASA_ifstream;
112  basic::database::open( residue_hASA_ifstream, "scoring/score_functions/SurfacePotential/average_hASA_by_res_and_neighbor.txt" );
113 
114  std::string str_restype;
115  Real lte10_asa = 0.0;
116  Real lte13_asa = 0.0;
117  Real lte16_asa = 0.0;
118  Real lte20_asa = 0.0;
119  Real lte24_asa = 0.0;
120 
122 
123  while ( !residue_hASA_ifstream.eof() ) {
124  residue_hASA_ifstream >> str_restype >> lte10_asa >> lte13_asa >> lte16_asa >> lte20_asa >> lte24_asa;
125 
126  // store the mean hASA values in a vector for faster lookup
127  // This way, rather than constructing a key each time we need to get the information on a particular AA type
128  // we can just index right into the vector. We waste some memory on storing the same values in the vector
129  // but this class is a Singleton and it's only 48 floats and the size of the vectors.
130  chemical::AA aa_type = chemical::aa_from_name( str_restype );
131  res_to_average_hASA_[ aa_type ].resize( 24, 0.0 ); // we have average hASA values for up to 24 nbs
132 
133  for ( Size ii=1; ii <= 10; ++ii ) { // for nbs 1-10
134  res_to_average_hASA_[ aa_type ][ ii ] = lte10_asa;
135  }
136  for ( Size ii=11; ii <= 13; ++ii ) { // for nbs 11-13
137  res_to_average_hASA_[ aa_type ][ ii ] = lte13_asa;
138  }
139  for ( Size ii=14; ii <= 16; ++ii ) { // for nbs 14-16
140  res_to_average_hASA_[ aa_type ][ ii ] = lte16_asa;
141  }
142  for ( Size ii=17; ii <= 20; ++ii ) { // for nbs 17-20
143  res_to_average_hASA_[ aa_type ][ ii ] = lte20_asa;
144  }
145  for ( Size ii=21; ii <= 24; ++ii ) { // for nbs 21-24
146  res_to_average_hASA_[ aa_type ][ ii ] = lte24_asa;
147  }
148 
149  }
150 
151 #ifdef FILE_DEBUG
152  // quick check to make sure we stored the right things
153  for ( Size ii=1; ii <= chemical::num_canonical_aas; ++ii ) {
154  TR << chemical::name_from_aa( (chemical::AA)ii ) << ": [ ";
155  for ( Size jj=1; jj <= res_to_average_hASA_[ ii ].size(); ++jj ) {
156  TR << res_to_average_hASA_[ ii ][ jj ] << ", ";
157  }
158  TR << "]" << std::endl;
159  }
160 #endif
161 
162 }
163 
164 
165 /// @brief Reads in the database file which contains the scores for a distribution of patch sizes.
166 ///
167 /// @detailed
168 /// Not assuming any particular length to the database file so that if I want to increase
169 /// the maximum of the distribution or shrink it, the vector will dynamically resize to what it needs
170 /// to be.
171 ///
173 
174  utility::io::izstream hASA_score_ifstream;
175  basic::database::open( hASA_score_ifstream, "scoring/score_functions/SurfacePotential/surface_score.txt" );
176 
177  Real amount_hASA = 0.0;
178  Real score = 0.0;
179  Size num_fields = 20; // keep scores for nb counts of 1, 2, 3 ... x (where x is # fields)
180 
181  while ( !hASA_score_ifstream.eof() ) {
182  hASA_score_ifstream >> amount_hASA;
183 
184  utility::vector1< Real > v( num_fields, 0.0 );
185  for ( Size ii=1; ii <= num_fields; ++ii ) {
186  hASA_score_ifstream >> score;
187  v[ ii ] = score;
188  }
189  hASA_to_score_.push_back( v );
190  }
191 
192 #ifdef FILE_DEBUG
193  TR << "patch energies: [ " << std::endl;
194  for ( Size ii=0; ii < hASA_to_score_.size(); ++ii ) {
195  TR << "patch size: " << ii * 25 << " [ ";
196  for ( Size jj=1; jj <= hASA_to_score_[ ii ].size(); ++jj ) {
197  TR << hASA_to_score_[ ii ][ jj ] << ", ";
198  }
199  TR << "]" << std::endl;
200  }
201  TR << "]" << std::endl;
202 #endif
203 }
204 
205 
206 /// @brief Reads in the database file for the hpatch score, yet another version of the surface energy.
207 ///
208 /// @detailed
209 /// Not assuming any particular length to the database file so that if I want to increase
210 /// the maximum of the distribution or shrink it, the vector will dynamically resize to what it needs
211 /// to be.
212 ///
214 
215  utility::io::izstream hpatch_score_ifstream;
216  basic::database::open( hpatch_score_ifstream, "scoring/score_functions/SurfacePotential/hpatch_score.txt" );
217 
218  Real patch_area = 0.0;
219  Real score = 0.0;
220 
221  while ( !hpatch_score_ifstream.eof() ) {
222  hpatch_score_ifstream >> patch_area >> score;
223  patcharea_to_score_.push_back( score );
224  }
225 
226 #ifdef FILE_DEBUG
227  TR << "patch area scores: [ " << std::endl;
228  for ( Size ii=0; ii < patcharea_to_score_.size(); ++ii ) {
229  TR << patcharea_to_score_[ ii ] << ", ";
230  }
231  TR << "]" << std::endl;
232 #endif
233 
234 }
235 
236 
237 ///
238 /// @begin SurfacePotential::average_residue_hASA
239 ///
240 /// @brief
241 /// Returns the average surface energy for the given residue type and number of neighbors.
242 ///
244 
245  // these checks will only run in debug mode builds
246 #ifdef FILE_DEBUG
247  if ( num_nbs > BURIED_RESIDUE_NO_HSASA_CUTOFF ) {
248  std::cout << "Number of neighbors (" << num_nbs << ") outside bounds in SurfacePotential::average_residue_hASA." << std::endl;
249  }
250  if ( aa_type > chemical::num_canonical_aas ) {
251  std::cout << "aatype (" << aa_type << ") outside of canonical 20 in SurfacePotential::average_residue_hASA." << std::endl;
252  }
253 #endif
254  assert( num_nbs <= BURIED_RESIDUE_NO_HSASA_CUTOFF );
255  if ( aa_type > chemical::num_canonical_aas ) { return 0.0; }
256  return res_to_average_hASA_[ aa_type ][ num_nbs ];
257 
258 }
259 
260 ///
261 /// @begin SurfacePotential::hASA_patch_energy
262 ///
263 /// @brief
264 /// Returns the energy for a given patch size. The calling function must ensure that an out-of-bounds error will not occur.
265 ///
267 
268 #ifdef FILE_DEBUG
269  if ( patch_area > MAX_PATCH_SURFACE_AREA ) {
270  std::cout << "patch_area (" << patch_area << ") greater than MAX_PATCH_SURFACE_AREA in SurfacePotential::hASA_patch_energy." << std::endl;
271  }
272 #endif
273  assert( patch_area <= MAX_PATCH_SURFACE_AREA );
274  return hASA_to_score_[ (Size)(patch_area / SURFACE_SCORE_BIN_SIZE ) ][ num_nbs ];
275 }
276 
277 
278 ///
279 /// @begin SurfacePotential::hpatch_score
280 ///
281 /// @brief
282 /// Returns the score for a given patch size. The calling function must ensure that an out-of-bounds error will not occur.
283 ///
285 
286 #ifdef FILE_DEBUG
287  if ( patch_area > MAX_HPATCH_AREA ) {
288  std::cout << "patch_area (" << patch_area << ") greater than MAX_HPATCH_AREA in SurfacePotential::hpatch_score." << std::endl;
289  }
290 #endif
291  assert( patch_area <= MAX_HPATCH_AREA );
292  return patcharea_to_score_[ (Size)(patch_area / HPATCH_SCORE_BIN_SIZE) ];
293 }
294 
295 
296 ///
297 /// @begin SurfacePotential::compute_residue_surface_energy
298 ///
299 /// @brief
300 /// Calculates the surface energy for a single residue within a Pose object. Used only by the RotamerSet_::compute_one_body_energy_maps
301 /// function (which, in turn, is only used by the optE protocol). Nowhere else in mini is this function used.
302 ///
304  Size resid, utility::vector1< Size > num_neighbors_ ) {
305 
306  // our definition of surface residue is that the residue has fewer than 16 neighbors
307  if ( !( pose.energies().residue_neighbors_updated() ) || num_neighbors_[ resid ] > SURFACE_EXPOSED_CUTOFF ) {
308  emap[ scoring::surface ] = 0.0;
309  return;
310  }
311 
312  // add this residues hASA to the total
313  // this residue must have <= SURFACE_EXPOSED_CUTOFF number of neighbors to get here
314  Real total_hASA = 0.0;
315  total_hASA += average_residue_hASA( rsd.aa(), num_neighbors_[ resid ] );
316 #ifdef FILE_DEBUG
317  TR << rsd.aa() << resid << " (hASA: " << average_residue_hASA( rsd.aa(), num_neighbors_[ resid ] ) << ", nbs: " << num_neighbors_[ resid ] << ") neighbors... ";
318 #endif
319 
320  // now add the hASA of all se residues within xA (nbr_atom - nbr_atom) distance. Use the nbr_atom coordinates of the passed
321  // in (being considered) rotamer, not of the wild type sequence rotamer. In a small percent of the cases, using the wild type
322  // nbr_atom will give a different count than when using the new rotamer nbr_atom position.
323  Real distanceBetweenAtoms = 0.0;
324  for ( Size res2_position = 1; res2_position < pose.total_residue(); ++res2_position ) {
325 
326  if ( resid == res2_position ) { continue; }
327  conformation::Residue const & rsd2 = pose.residue( res2_position );
328 
329  distanceBetweenAtoms = rsd.xyz( rsd.nbr_atom() ).distance( rsd2.xyz( rsd2.nbr_atom() ) );
330 
331  // first, have to check again if these two residues are neighbors
332  if ( distanceBetweenAtoms <= INTERACTION_RADIUS ) {
333 
334  // and finally, check to make sure it's surface exposed
335  if ( num_neighbors_[ res2_position ] > BURIED_RESIDUE_NO_HSASA_CUTOFF ) {
336  continue; // if it has so many neighbors it probably doesn't add anything to the hSASA (and it would cause out-of-bounds errors below)
337  }
338  // passed all checks
339  total_hASA += average_residue_hASA( rsd2.aa(), num_neighbors_[ res2_position ] );
340 #ifdef FILE_DEBUG
341  TR << rsd2.aa() << res2_position << " " << average_residue_hASA( rsd2.aa(), num_neighbors_[ res2_position ] ) << ", ";
342 #endif
343  }
344  }
345 #ifdef FILE_DEBUG
346  TR << std::endl;
347 #endif
348 
349  // now that we know how many surface-exposed neighbors res1 has, get the surface energy for that value
350  if ( total_hASA > MAX_PATCH_SURFACE_AREA ) {
352  } else {
353  emap[ scoring::surface ] = hASA_patch_energy( total_hASA, num_neighbors_[ resid ] );
354  }
355 
356 #ifdef FILE_DEBUG
357  TR << "compute_residue_surface_energy: calculated total_hASA: " << total_hASA << ", surface energy: " << emap[ scoring::surface ] << std::endl;
358 #endif
359 
360 }
361 
362 
363 ///
364 /// @begin compute_pose_surface_energy
365 ///
366 /// @brief
367 /// helper method for computing surface score. in the optE protocol we don't care about the total vs. residue
368 /// level surface scores. so just call that function but discard the values for those variables.
369 ///
370 void SurfacePotential::compute_pose_surface_energy( pose::Pose const & pose, Real & surface_energy_ ) {
371 
372  utility::vector1< Size > num_neighbors_;
373  utility::vector1< Real > res_level_energies_;
374 
375  compute_pose_surface_energy( pose, surface_energy_, res_level_energies_ );
376 
377  return;
378 }
379 
380 void SurfacePotential::compute_pose_surface_energy( pose::Pose const & pose, Real & total_surface_energy_,
381  utility::vector1< Real > & residue_surface_energy_ ) {
382 
383  // the pose has to have been scored at this point for this method to work. since I can't force a score eval here,
384  // return 0.0 for everything if it hasn't been.
385  if ( ! pose.energies().residue_neighbors_updated() ) {
386  total_surface_energy_ = 0.0;
387  residue_surface_energy_.clear();
388  return;
389  }
390 
391  // resize the per-residue surface energy vector
392  residue_surface_energy_.clear();
393  residue_surface_energy_.resize( pose.n_residue(), 0.0 );
394 
395  utility::vector1< Size > num_neighbors_( pose.n_residue(), 0 );
396 
397  // first, we need to init the num neighbors array (either using the tenA nb graph or by counting manually)
398  for ( core::Size res1_position = 1; res1_position <= pose.n_residue(); ++res1_position ) {
399  core::scoring::TenANeighborGraph const & tenA_neighbor_graph( pose.energies().tenA_neighbor_graph() );
400 
401  // set the number of neighbors vector for later output
402  num_neighbors_[ res1_position ] = tenA_neighbor_graph.get_node( res1_position )->num_neighbors_counting_self();
403  }
404 
405  // need symmetry info to ignore the duplicated units
407  if( core::pose::symmetry::is_symmetric( pose ) ) {
408  symm_info = (dynamic_cast<const core::conformation::symmetry::SymmetricConformation &>(pose.conformation()).Symmetry_Info());
409  }
410 
411  // now, we have to loop over all residues and find exposed residues (we can use the num neighbors array to determine
412  // which ones are surface exposed)
413 
414  for ( core::Size res1_position = 1; res1_position <= pose.n_residue(); ++res1_position ) {
415 
416  if( pose.residue( res1_position ).aa() > core::chemical::num_canonical_aas ) continue;
417  if( symm_info && !symm_info->bb_is_independent(res1_position) ) continue;
418 
419  // reset the counter
420  Real total_hASA = 0.0;
421 
422  // our definition of surface residue is that the residue has fewer than 16 neighbors. we only assign surface
423  // scores to residues with fewer than this number of nbs. but when getting the patch area, neighboring residues
424  // can have more nbs than this and still contribute to patch area.
425  if ( num_neighbors_[ res1_position ] > SURFACE_EXPOSED_CUTOFF ) {
426  continue;
427  }
428 
429  // passed the surface-exposed check...
430 
431  total_hASA += average_residue_hASA( pose.residue( res1_position ).aa(), num_neighbors_[ res1_position ] );
432 #ifdef FILE_DEBUG
433  TR << pose.residue( res1_position ).aa() << res1_position
434  << " (hASA: " << average_residue_hASA( pose.residue( res1_position ).aa(), num_neighbors_[ res1_position ] )
435  << ", nbs: " << num_neighbors_[ res1_position ] << ") neighbors... ";
436 #endif
437 
438  //TR << "Neighbors of residue " << pose.residue( res1_position ).name3() << " " << res1_position << " include " << std::endl;
439 
440  // for every Edge in the neighbor graph, figure out if that residue is surface exposed
441  //for ( core::graph::EdgeListConstIterator eli = tenA_neighbor_graph.get_node( res1_position )->const_edge_list_begin(),
442  // eli_end = tenA_neighbor_graph.get_node( res1_position )->const_edge_list_end(); eli != eli_end; ++eli ) {
443 
444  // save the value to simplify code ahead
445  //int res2_position = (*eli)->get_other_ind( res1_position );
446 
447  // get the other node for this edge, so pass in the res1 node to this method
448  //TR << pose.residue( res2_position ).name3() << " " << res2_position << std::endl;
449 
450  conformation::Residue const & rsd1 = pose.residue( res1_position );
451  Real distanceBetweenAtoms = 0.0;
452 
453  for ( Size res2_position = 1; res2_position < pose.total_residue(); ++res2_position ) {
454  if( pose.residue( res2_position ).aa() > core::chemical::num_canonical_aas ) continue;
455  if( symm_info && !symm_info->bb_is_independent(res2_position) ) continue;
456 
457  if ( res2_position == res1_position ) { continue; }
458  conformation::Residue const & rsd2 = pose.residue( res2_position );
459 
460  distanceBetweenAtoms = rsd1.xyz( rsd1.nbr_atom() ).distance( rsd2.xyz( rsd2.nbr_atom() ) );
461 
462  // first, have to check again if these two residues are neighbors
463  if ( distanceBetweenAtoms <= INTERACTION_RADIUS ) {
464 
465  // ok, is it surface-exposed, too?
466  if ( num_neighbors_[ res2_position ] > BURIED_RESIDUE_NO_HSASA_CUTOFF ) {
467  continue; // if it has so many neighbors it probably doesn't add anything to the hSASA (and it would cause out-of-bounds errors below)
468  }
469  // passed all checks
470  total_hASA += average_residue_hASA( rsd2.aa(), num_neighbors_[ res2_position ] );
471 #ifdef FILE_DEBUG
472  TR << rsd2.aa() << res2_position << " " << average_residue_hASA( rsd2.aa(), num_neighbors_[ res2_position ] ) << ", ";
473 #endif
474  }
475  }
476 #ifdef FILE_DEBUG
477  TR << std::endl;
478 #endif
479 
480  // now that we know how many surface-exposed neighbors res1 has, get the surface energy for that value
481  if ( total_hASA > MAX_PATCH_SURFACE_AREA ) {
482  residue_surface_energy_[ res1_position ] = MAX_SURFACE_ENERGY;
483  } else {
484  residue_surface_energy_[ res1_position ] = hASA_patch_energy( total_hASA, num_neighbors_[ res1_position ] );
485  }
486 
487 #ifdef FILE_DEBUG
488  TR << "compute_pose_surface_energy: calculated residue total_hASA: " << total_hASA << ", surface energy: "
489  << residue_surface_energy_[ res1_position ] << std::endl;
490 #endif
491 
492  } // end second res1 loop
493 
494 
495  total_surface_energy_ = 0.0;
496  for ( Size ii=1; ii < residue_surface_energy_.size(); ++ii ) {
497  total_surface_energy_ += residue_surface_energy_[ii];
498  }
499 
500 #ifdef FILE_DEBUG
501  TR << "compute_pose_surface_energy: calculated surface energy: " << total_surface_energy_ << ", residue_surfaceE: [ ";
502  for ( Size ii=1; ii <= residue_surface_energy_.size(); ++ii ) {
503  TR << residue_surface_energy_[ ii ] << ", ";
504  }
505  TR << "]" << std::endl;
506 #endif
507 
508 }
509 
510 
511 Real
513  pose::Pose const & pose
514 )
515 {
516  core::Real total_hpatch_score;
517  std::map< core::Size, std::pair< core::Real, core::Real > > patch_scores;
518  std::map< Size,utility::vector1< id::AtomID > > atoms_in_patches;
519 
520  compute_pose_hpatch_score( pose, total_hpatch_score, patch_scores, atoms_in_patches );
521  return total_hpatch_score;
522 }
523 
524 
525 ///
526 /// @begin compute_pose_hpatch_score
527 ///
528 /// @brief
529 /// Uses the src/core/pack/interaction_graph/RotamerDots classes to determine exact SASAs and then uses a graph-based
530 /// approach to find all the exposed hydrophobic patches on the surface of the pose. Uses the scores in the file
531 /// scoring/score_functions/SurfacePotential/hpatch_score.txt to assign a score to each patch and puts the score into the passed in Real.
532 ///
533 /// Note: If a Pose object has overlapping atoms anywhere, then this function will fail with the following error:
534 ///
535 /// sin_cos_range ERROR: nan is outside of [-1,+1] sin and cos value legal range
536 /// sin_cos_range ERROR: nan is outside of [-1,+1] sin and cos value legal range
537 /// ERROR:: Exit from: src/numeric/trig.functions.hh line: 117
538 ///
539 ///
541  pose::Pose const & pose,
542  core::Real & total_hpatch_score_,
543  std::map< core::Size, std::pair< core::Real, core::Real > > & patch_scores_,
544  std::map< Size,utility::vector1< id::AtomID > > & atoms_in_patches_
545 )
546 {
547 
548  using namespace core;
549  using namespace core::id;
550  using namespace core::pack::interaction_graph;
551 
552  // an atomID map is needed for the calc_per_atom_sasa method; it stores the actual calculated sasa for every atom
554  core::pose::initialize_atomid_map( atom_sasa, pose, (core::Real)0.0 ); // initialize to 0.0 for "not computed"
555 
558 
559  for ( Size ii = 1; ii <= pose.total_residue(); ++ii ) {
560  rdots[ii] = new RotamerDots( pose.residue(ii).clone(), true /* exclude H's */, true /* use expanded polar atom radii */);
561  invdots[ii] = new InvRotamerDots();
562  }
563 
564  // PointGraph is a one-way graph, which makes it somewhat annoying for iterating over neighbors of a certain
565  // position. Only edges to higher-indexed nodes exist. So instead, make a graph which has all the edges at every
566  // node to simplify iterating over all neighboring edges.
569 
570  Distance const max_pair_radius = pose::pose_max_nbr_radius( pose );
571  Distance const max_ep_radius = rdots[1]->max_atom_radius() + 2 * RotamerDots::probe_radius_;
572  core::conformation::find_neighbors<core::conformation::PointGraphVertexData,core::conformation::PointGraphEdgeData>( pg, max_pair_radius + max_pair_radius + max_ep_radius /* Angstrom cutoff */ ); //create edges
573 
574  // increment the self and residue-residue overlap for each residue
575  for ( Size ii = 1; ii <= pose.total_residue(); ++ ii ){
576  rdots[ ii ]->increment_self_overlap();
577  for ( core::conformation::PointGraph::UpperEdgeListConstIter edge_iter = pg->get_vertex( ii ).upper_edge_list_begin(),
578  edge_end_iter = pg->get_vertex(ii ).upper_edge_list_end(); edge_iter != edge_end_iter; ++edge_iter ) {
579  Size jj = edge_iter->upper_vertex();
580  rdots[ ii ]->increment_both( *rdots[ jj ] );
581  }
582  }
583 
584  // we need to know how many heavy atoms are in the pose before we can construct the disjoint sets object
585  Size heavyatom_count = 0;
586 
587  for ( Size ii=1; ii <= pose.total_residue(); ++ii ) {
588  conformation::Residue const & rsd = pose.residue( ii );
589  heavyatom_count += rsd.nheavyatoms();
590  }
591 
592  graph::DisjointSets ds( heavyatom_count );
593  utility::vector1< id::AtomID > ds_index_2_atomid( heavyatom_count );
594 
595  // create an AtomID map that will convert an atom in some residue into a DisjointSets index
596  id::AtomID_Map< Size > atom_2_ds_index;
597  atom_2_ds_index.resize( pose.total_residue() );
598  Size ds_index = 1;
599 
600  for ( Size ii=1; ii <= pose.total_residue(); ++ii ) {
601  conformation::Residue const & rsd = pose.residue( ii );
602  atom_2_ds_index.resize( ii, rsd.nheavyatoms(), 0 );
603  for ( Size jj=1; jj <= rsd.nheavyatoms(); ++jj ) {
604  id::AtomID const atomid( jj, ii );
605  atom_2_ds_index[ atomid ] = ds_index;
606  ds_index_2_atomid[ ds_index ] = atomid;
607  ds_index++;
608  }
609  }
610 
611  // now iterate over all residues of the pose and find all intra- and inter-residue atom-atom overlaps
612  for ( Size ii = 1; ii <= pose.total_residue(); ++ii ) {
613  invdots[ ii ]->setup_from_rotamer_dots( *rdots[ ii ] );
614  }
615 
616 #ifdef FILE_DEBUG
617  TR << "compute_pose_hpatch_score(): finding intra- and inter-residue overlaps" << std::endl;
618 #endif
619 
620  std::string carbon_atom = "C", sulfur_atom = "S";
621  for ( Size ii = 1; ii <= pose.total_residue(); ++ii ) {
622 
623  // don't try to find overlaps with non-protein residues; this can cause problems.
624  if ( ! pose.residue( ii ).is_protein() ) continue;
625 
626  // 1. self overlap; iterate over all heavyatoms of residue ii
627  for ( Size iia = 1, iia_end = pose.residue_type(ii).nheavyatoms(); iia <= iia_end; ++iia ) {
628 
629  // check if iia atom is buried with expanded polar atoms
630  if ( rdots[ ii ]->get_atom_sasa( iia ) == 0 )
631  continue;
632 
633  // check if jj atom is a hydrophobic atom
634  std::string const & iia_elem = pose.residue_type(ii).atom_type( iia ).element();
635  if ( iia_elem != carbon_atom && iia_elem != sulfur_atom ) // doing char comparison is much faster than string comparison
636  continue;
637 
638  Real const iia_rad = rdots[ ii ]->get_atom_radius( iia ) + RotamerDots::probe_radius_;
639  Vector const & iia_xyz = rdots[ ii ]->rotamer()->xyz( iia );
640 
641  // only have to iterate over higher-indexed heavyatoms of iia
642  for ( Size iib = iia+1; iib <= iia_end; ++iib ) {
643 
644  // check if kk atom is buried with expanded polar atoms
645  if ( rdots[ ii ]->get_atom_sasa( iib ) == 0 )
646  continue;
647 
648  // check if iib atom is a hydrophobic atom
649  std::string const & iib_elem = pose.residue_type( ii ).atom_type( iib ).element();
650  if ( iib_elem != carbon_atom && iib_elem != sulfur_atom ) // doing char comparison is much faster than string comparison
651  continue;
652 
653  Real const iib_rad = rdots[ ii ]->get_atom_radius( iib ) + RotamerDots::probe_radius_;
654  Vector const & iib_xyz = rdots[ ii ]->rotamer()->xyz( iib );
655 
656  if ( iia_xyz.distance_squared( iib_xyz ) < (iia_rad + iib_rad) * (iia_rad + iib_rad) ) {
657 #ifdef FILE_DEBUG
658  /*conformation::Residue const & ii_rsd = pose.residue( ii );
659  TR << "compute_pose_hpatch_score(): overlapping atom pair: "
660  << ii_rsd.aa() << " " << ii << "/" << utility::trim( ii_rsd.atom_name( iia ) ) << " - "
661  << ii_rsd.aa() << " " << ii << "/" << utility::trim( ii_rsd.atom_name( iib ) ) << std::endl;*/
662 #endif
663 
664  if ( invdots[ ii ]->atom_overlap_is_exposed( iia, iib ) ) {
665  Size iia_dsid = atom_2_ds_index( AtomID( iia, ii ) );
666  Size iib_dsid = atom_2_ds_index( AtomID( iib, ii ) );
667 
668  // if the two atoms aren't in the same connected component, call union on them
669  if ( ds.ds_find( iia_dsid ) != ds.ds_find( iib_dsid ) ) {
670  ds.ds_union( iia_dsid, iib_dsid );
671  }
672  }
673 
674  }
675  }
676  } // end for loop for self overlap
677 
678  /// 2. upper neighbors of ii, we'll use jj
679  for ( core::conformation::PointGraph::UpperEdgeListConstIter edge_iter = pg->get_vertex( ii ).upper_edge_list_begin(),
680  edge_end_iter = pg->get_vertex( ii ).upper_edge_list_end(); edge_iter != edge_end_iter; ++edge_iter ) {
681 
682  Size jj = edge_iter->upper_vertex();
683  // don't try to find overlaps with non-protein residues; this can cause problems.
684  if ( ! pose.residue( jj ).is_protein() ) continue;
685 
686  for ( Size iia = 1; iia <= pose.residue_type( ii ).nheavyatoms(); ++iia ) {
687 
688  if ( rdots[ ii ]->get_atom_sasa( iia ) == 0 )
689  continue;
690 
691  std::string const & iia_elem = pose.residue_type( ii ).atom_type( iia ).element();
692  if ( iia_elem != carbon_atom && iia_elem != sulfur_atom )
693  continue;
694 
695  Real const iia_rad = rdots[ ii ]->get_atom_radius( iia ) + RotamerDots::probe_radius_;
696  Vector const & iia_xyz = rdots[ ii ]->rotamer()->xyz( iia );
697 
698  for ( Size jja = 1; jja <= pose.residue_type( jj ).nheavyatoms(); ++jja ) {
699 
700  if ( rdots[ jj ]->get_atom_sasa( jja ) == 0 )
701  continue;
702 
703  std::string const & jja_elem = pose.residue_type( jj ).atom_type( jja ).element();
704  if ( jja_elem != carbon_atom && jja_elem != sulfur_atom )
705  continue;
706 
707  Real const jja_rad = rdots[ jj ]->get_atom_radius( jja ) + RotamerDots::probe_radius_;
708  Vector const & jja_xyz = rdots[ jj ]->rotamer()->xyz( jja );
709 
710  if ( iia_xyz.distance_squared( jja_xyz ) < (iia_rad+jja_rad) * (iia_rad+jja_rad) ) {
711 #ifdef FILE_DEBUG
712  /*conformation::Residue const & ii_rsd = pose.residue( ii );
713  conformation::Residue const & jj_rsd = pose.residue( jj );
714  TR << "compute_pose_hpatch_score(): overlapping atom pair: "
715  << ii_rsd.aa() << " " << ii << "/" << utility::trim( ii_rsd.atom_name( iia ) ) << " - "
716  << jj_rsd.aa() << " " << jj << "/" << utility::trim( jj_rsd.atom_name( jja ) ) << std::endl;*/
717 #endif
718  if ( invdots[ ii ]->atom_overlap_is_exposed( iia, *invdots[ jj ], jja ) ) {
719 
720  Size iia_dsid = atom_2_ds_index( AtomID( iia, ii ) );
721  Size jja_dsid = atom_2_ds_index( AtomID( jja, jj ) );
722 
723  // if the two atom aren't in the same connected component, call union on them
724  if ( ds.ds_find( iia_dsid ) != ds.ds_find( jja_dsid ) ) {
725  ds.ds_union( iia_dsid, jja_dsid );
726  }
727  }
728 #ifdef FILE_DEBUG
729  /*else {
730  conformation::Residue const & ii_rsd = pose.residue( ii );
731  conformation::Residue const & jj_rsd = pose.residue( jj );
732  TR << "compute_pose_hpatch_score(): overlapping atom pair, but overlap is buried: "
733  << ii_rsd.aa() << " " << ii << "/" << utility::trim( ii_rsd.atom_name( iia ) ) << " - "
734  << jj_rsd.aa() << " " << jj << "/" << utility::trim( jj_rsd.atom_name( jja ) ) << std::endl;
735  }*/
736 #endif
737 
738  }
739  }
740  }
741  } // end for loop over upper neighbors of ii
742 
743  } // end for loop over all residues
744 
745  // resize the per-patch score map
746  patch_scores_.clear();
747  total_hpatch_score_ = 0.0; // just in case the value was not init'd
748 
749  // tally up the patch area of all the connected components
750  std::map< Size, utility::vector1< Size > > sets = ds.sets();
751  std::map< Size, utility::vector1< Size > >::iterator it;
752 
753  core::Real patch_area = 0.0;
754  for ( it = sets.begin() ; it != sets.end(); it++ ) {
755  std::ostringstream strstream;
756  patch_area = 0.0;
757 
758  // only score patches with 4 or more atoms in them. without this filter, the small patches worsen the ability of
759  // the total score to discriminate rosetta decoys from natives. this filter can still cause some really small
760  // patches to get scores, but there aren't that many of those.
761  if ( (*it).second.size() < 4 ) {
762  continue;
763  }
764 
765 #ifdef FILE_DEBUG
766  TR << "representative: " << (*it).first << " => atoms in CC: [ ";
767 #endif
768  utility::vector1< id::AtomID > atoms( (*it).second.size() );
769  for ( Size ii=1; ii <= (*it).second.size(); ++ii ) {
770  id::AtomID const & atomid = ds_index_2_atomid[ (*it).second[ii] ];
771  atoms[ ii ] = atomid;
772 #ifdef FILE_DEBUG
773  core::conformation::Residue const & rsd = pose.residue( atomid.rsd() );
774  if ( pose.pdb_info() ) {
775  if ( pose.pdb_info()->chain( atomid.rsd() ) != ' ' ) {
776  TR << pose.pdb_info()->chain( atomid.rsd() ) << "/";
777  }
778  TR << pose.pdb_info()->number( atomid.rsd() ) << "/" << utility::trim( rsd.atom_name( atomid.atomno() ) ) << " + ";
779  } else {
780  TR << rsd.seqpos() << "/" << utility::trim( rsd.atom_name( atomid.atomno() ) ) << " + ";
781  }
782 #endif
783  patch_area += rdots[ atomid.rsd() ]->get_atom_sasa( atomid.atomno() );
784  }
785  atoms_in_patches_[ (*it).first ] = atoms;
786  atoms.clear();
787 
788 #ifdef FILE_DEBUG
789  TR << "], patch_area: " << patch_area;
790 #endif
791 
792  Real score = 0.0;
793  if ( patch_area > MAX_HPATCH_AREA ) {
794  score = MAX_HPATCH_SCORE;
795  } else {
796  score = hpatch_score( patch_area );
797  }
798 
799 #ifdef FILE_DEBUG
800  TR << ", hpatch_score: " << score << std::endl;
801 #endif
802  total_hpatch_score_ += score;
803 
804  patch_scores_[ (*it).first ] = std::make_pair( score, patch_area );
805  }
806 
807 //#ifdef FILE_DEBUG
808  // output all of the scores on a single line
809  TR << "calculated total hpatch score: " << total_hpatch_score_ << ", individual patch scores: [ ";
810  std::map< Size, std::pair< Real, Real > >::iterator scores_iter;
811  for ( scores_iter = patch_scores_.begin(); scores_iter != patch_scores_.end(); scores_iter++ ) {
812  TR << (*scores_iter).second.first << ", ";
813  }
814  TR << "]" << std::endl;
815 //#endif
816 
817  // iterate over the connected components again, but this time output only patches greater than or equal to 250A2
818  // by using a score cutoff.
819  for ( it = sets.begin() ; it != sets.end(); it++ ) {
820  std::ostringstream strstream;
821 
822  std::map< core::Size, std::pair< core::Real, core::Real > >::iterator ps_it = patch_scores_.find( (*it).first );
823  if ( ps_it == patch_scores_.end() ) { continue; } // this shouldn't happen though
824  Real score = (*ps_it).second.first;
825  if ( score < 4.00 ) { continue; }
826 
827  TR << "large patch, hpatch_score: " << score << ", PyMOL expression: select p" << (*it).first << ", ";
828  for ( Size ii=1; ii <= (*it).second.size(); ++ii ) {
829  id::AtomID const & atomid = ds_index_2_atomid[ (*it).second[ii] ];
830 
831  // output PDB numbering if there's a PDBInfo object present
832  core::conformation::Residue const & rsd = pose.residue( atomid.rsd() );
833  if ( pose.pdb_info() ) {
834  if ( pose.pdb_info()->chain( atomid.rsd() ) != ' ' ) {
835  TR << pose.pdb_info()->chain( atomid.rsd() ) << "/";
836  }
837  TR << pose.pdb_info()->number( atomid.rsd() ) << "/" << utility::trim( rsd.atom_name( atomid.atomno() ) ) << " + ";
838  } else {
839  TR << rsd.seqpos() << "/" << utility::trim( rsd.atom_name( atomid.atomno() ) ) << " + ";
840  }
841  }
842 
843  TR << std::endl;
844  }
845 
846  return;
847 
848 }
849 
850 } // namespace interaction_graph
851 } // namespace pack
852 } // namespace core
853