Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
find_neighbors.hh
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file core/graph/find_neighbors.cc
11 /// @brief Sets up the residue neighbor information
12 /// @author Stuart G. Mentzer (Stuart_Mentzer@objexx.com)
13 /// @author Andrew Leaver-Fay (aleaverfay@gmail.com)
14 ///
15 /// @remarks Thanks to Will Sheffler for his ideas on refining this and extending it to atom neighbors
16 /// @remarks Adapting libRosetta code for generalized neighbor detection
17 
18 #ifndef INCLUDED_core_conformation_find_neighbors_hh
19 #define INCLUDED_core_conformation_find_neighbors_hh
20 
21 // Package Headers
24 #include <core/types.hh>
27 
28 
29 
30 // Numeric headers
31 #include <numeric/numeric.functions.hh>
32 #include <numeric/xyzTriple.hh>
33 #include <numeric/xyzVector.hh>
34 
35 #include <numeric/geometry/hashing/xyzStripeHashWithMeta.hh>
36 
37 // ObjexxFCL headers
38 //#include <ObjexxFCL/KeyFArray1D.hh>
39 //#include <ObjexxFCL/KeyFArray2D.hh>
40 #include <ObjexxFCL/FArray3D.hh>
41 
42 // Utility headers
43 //#include <utility/pointer/access_ptr.hh>
44 
45 // boost headers
46 #include <boost/unordered_map.hpp>
47 
48 // C++ headers
49 #include <cassert>
50 #include <cmath>
51 #include <cstdlib>
52 #include <limits>
53 #include <map>
54 #include <vector>
55 
56 #include <utility/vector1.hh>
57 #include <basic/options/keys/score.OptionKeys.gen.hh>
58 #include <basic/prof.hh>
59 
60 
61 namespace core {
62 namespace conformation {
63 
64 
65 // add the following to top of file:
66 /// @brief uses default boost::hash combine to hash Cubes
67 struct DefaultCubeHash : std::unary_function< CubeKey, std::size_t > {
68  // use std::size_t instead of core::Size just to be
69  // consistent with boost::hash types
70 
71  /// @brief return hash value given CubeKey
72  std::size_t operator()( CubeKey const & key ) const {
73  std::size_t seed = 0;
74  boost::hash_combine( seed, key.x() );
75  boost::hash_combine( seed, key.y() );
76  boost::hash_combine( seed, key.z() );
77  return seed;
78  }
79 };
80 
81 
82 
83 // Find neighbors and place them in a graph
84 template <class Vertex, class Edge>
85 void
88  core::Real neighbor_cutoff,
89  Strategy strategy = AUTOMATIC
90 )
91 {
92  // the naive and the octree strategies are equally fast
93  // when detecting neighbors for 150 points.
94  core::Size const N_POINTS_BREAK_EVEN = 150;
95 
96  // PROF_START( basic::TEST2 );
97 
98  if ( strategy == STRIPEHASH || ( strategy == AUTOMATIC && basic::options::option[ basic::options::OptionKeys::score::find_neighbors_stripehash ]() )) {
99  find_neighbors_stripe( point_graph, neighbor_cutoff );
100  } else if ( strategy == THREEDGRID || ( strategy == AUTOMATIC && basic::options::option[ basic::options::OptionKeys::score::find_neighbors_3dgrid ]() )) {
101  find_neighbors_3dgrid( point_graph, neighbor_cutoff );
102  } else if ( strategy == NAIVE || point_graph->num_vertices() < N_POINTS_BREAK_EVEN ) {
103  find_neighbors_naive( point_graph, neighbor_cutoff );
104  } else { // Use automatic or an octree strategy
105  find_neighbors_octree( point_graph, neighbor_cutoff, strategy );
106  }
107  // PROF_STOP( basic::TEST2 );
108 
109 }
110 
111 template <class Vertex, class Edge>
112 void
115  core::Real neighbor_cutoff
116 )
117 {
118  // Constants
119  core::Size const n_points( point_graph->num_vertices() );
120  if ( n_points == 0 ) return;
121  core::Real neighbor_cutoff_sq = neighbor_cutoff * neighbor_cutoff;
122 
123  // Exclusion checks
124  if ( n_points <= 1 ) return; // Nothing to do
125 
126  // Naive method: O( R^2 ) for R residues but faster for small, compact conformations
127  for ( core::Size ii = 1; ii <= n_points; ++ii ) {
128  PointPosition const & ii_pos( point_graph->get_vertex(ii).data().xyz() );
129  for ( core::Size jj = ii + 1; jj <= n_points; ++jj ) {
130  core::Real const d_sq( ii_pos.distance_squared( point_graph->get_vertex(jj).data().xyz() ) ); // Using member version of distance_squared to work around GCC 3.4.5 ADL bug
131  if ( d_sq <= neighbor_cutoff_sq ) {
132  // Add neighbor link
133  point_graph->add_edge( ii, jj, Edge( d_sq ) );
134  }
135  }
136  }
137 }
138 
139 template <class Vertex, class Edge>
143  graph::UpperEdgeGraph<Vertex, Edge> & point_graph_in
144  ) : point_graph(point_graph_in) {}
145  void visit(
146  numeric::xyzVector<Real> const & /*v*/, Real const & vm,
147  numeric::xyzVector<Real> const & /*c*/, Real const & cm, Real const & d_sq
148  ){
149  if( vm < cm ) point_graph.add_edge( vm, cm, Edge( d_sq ) );
150  }
151 };
152 
153 template <class Vertex, class Edge>
154 void
157  core::Real neighbor_cutoff
158 ){
159  core::Size const n_points( point_graph->num_vertices() );
160  if ( n_points <= 1 ) return; // Nothing to do
161  std::cout << n_points << std::endl;
162  utility::vector1<PointPosition> pts(n_points);
163  for( core::Size ii = 1; ii <= n_points; ++ii ) pts[ii] = point_graph->get_vertex(ii).data().xyz();
165  numeric::geometry::hashing::xyzStripeHashWithMeta<Real> hash(neighbor_cutoff,pts,dummy);
166  AddEdgeVisitor<Vertex,Edge> visitor(*point_graph);
167  for( core::Size ii = 1; ii <= n_points; ++ii ) {
168  hash.visit(pts[ii],ii,visitor);
169  }
170 }
171 
172 /// @brief Finds the residue neighbors efficiently using an octree-like spatial sort
173 /// @remarks
174 /// @li The "octree" algorithm isn't a real octree since we don't want/need the tree structure:
175 /// we are only interested in one distance cutoff criterion
176 /// @li The octree algorithm is O( R log R ) for R residues vs. O( R^2 ) for the naive algorithm
177 /// @li The octree algorithm seems to be faster for R >= 150 or so: this may come down as it gets refined
178 /// @li This is an initial implementation of a more scalable neighbor detection algorithm: it will be
179 /// further tuned as more tests cases are run. This type of algorithm may be of greater benefit
180 /// for atom neighbor detection because the numbers involved are greater and the neighbor regions
181 /// encompass a smaller fraction of the other atoms in typical structures.
182 /// @li The spatial sorting is essentially an octree method but with the important distinction that
183 /// is doesn't have full a full tree structure: That would change the log R search complexity to
184 /// log C where C is the number of cubes in the whole bounding box, which could be as much O( R^3 )
185 /// ( O( log C ) == O( log R ) would still be true but the constant would be 3x worse). Instead
186 /// we let the map build a tree of just the active cubes, keeping the depth minimal. If we needed
187 /// to access the parent meta-cubes we would need the full octree, but we don't here.
188 /// @li The use of std::map to hold the octree should be compared against hash maps and other methods.
189 template <class Vertex, class Edge>
190 void
193  core::Real neighbor_cutoff,
194  Strategy strategy
195 )
196 {
197  //using namespace residue_neighbor_strategy;
198 
199  using numeric::min;
200  using numeric::max;
201  using numeric::square;
202 
203 
204  // Types
205  typedef numeric::xyzTriple< core::Size > CubeDim; // Cube dimensions
206  //typedef numeric::xyzTriple< core::Size > CubeKey; // Cube index-triple key
207  typedef utility::vector1< PointPosition > Points;
208  typedef std::vector< core::Size > PointIDs;
209  //typedef std::map< CubeKey, PointIDs, std::less< CubeKey >, boost::pool_allocator< std::pair< CubeKey const, PointIDs > > > Cubes;
210  typedef std::map< CubeKey, PointIDs > Cubes;
211 
212  /// Andrew Ban's boost version
213  // within find_neighbors() change the Cubes typedef to:
214  ///typedef boost::unordered_map< CubeKey, PointIDs, DefaultCubeHash > Cubes; // uncomment to use boost version
215 
216  // Constants
217  core::Size const n_points( point_graph->num_vertices() );
218 
219  core::Real neighbor_cutoff_sq( neighbor_cutoff*neighbor_cutoff);
220 
221  //local copy
222  Points points( n_points );
223  for ( core::Size ii = 1; ii <= n_points; ++ii ) { points[ ii ] = point_graph->get_vertex( ii ).data().xyz(); }
224 
225  // Exclusion checks
226  if ( n_points <= 1 ) return; // Nothing to do
227 
228  // Use automatic or an octree strategy
229 
230  // Bounding box of residue cutoff positions
231  PointPosition bbl( points[ 1 ] ), bbu( bbl ); // Lower and upper corners of bounding box
232  for ( core::Size ii = 2; ii <= n_points; ++ii ) {
233  bbl.min( points[ ii ] );
234  bbu.max( points[ ii ] );
235  }
236 
237  core::Size const epsilon_multiplier( 10 ); // Increase this if assert failures hit in finding a point's cube
238  core::Real const epsilon( epsilon_multiplier * std::numeric_limits< core::Real >::epsilon() );
239  bbl -= epsilon; // Expand bounding box to assure all points get assigned cubes in it
240  bbu += epsilon;
241 
242  // Set cube size and dimensions within bounding box
243  core::Size const side_factor( 1 ); // 1 factor => Check <= 27 adjacent cubes // 2 factor => Check <= 8 adjacent cubes
244  // Might gain some speed by replacing max_residue_pair_cutoff below with the max cutoff for pairs present
245  core::Real const side( side_factor * neighbor_cutoff );
246  assert( side > core::Real( 0 ) );
247  core::Real const side_inv( core::Real( 1 ) / side );
248  CubeDim const cube_dim( // Cube dimensions
249  core::Size( std::ceil( ( bbu.x() - bbl.x() ) * side_inv ) ), // Test that ceil values == core::Size values
250  core::Size( std::ceil( ( bbu.y() - bbl.y() ) * side_inv ) ),
251  core::Size( std::ceil( ( bbu.z() - bbl.z() ) * side_inv ) )
252  );
253  // We rounded up the number of cubes in each dimension
254  // We use cubes of exactly side x side x side dimensions
255  // We treat the (0,0,0) cube as touching bbl at its low corner
256  // The "highest" cube generally extends beyond bbu
257  // We call this the expanded bounding box
258 
259  // Number of potential cubes in expanded bounding box (we don't create them all)
260  core::Size const n_cube( cube_dim.x() * cube_dim.y() * cube_dim.z() );
261 
262  // Find upper Residue neighbors of each residue
263  if ( ( n_cube < core::Size( 27 ) ) && ( strategy < OCTREE ) ) { // Naive strategy //! Tune the n_cube threshold based on more real-world trials
264 
265  // Naive method: O( R^2 ) for R residues but faster for small, compact conformations
266  find_neighbors_naive<Vertex, Edge>( point_graph, neighbor_cutoff );
267 
268  } else { // Octree O( R log R ) strategy
269 
270  // Add residues to bounding box cube tree: Only cubes with residues are added
271  Cubes cubes; /// STL MAP cubes
272 
273  // take a look at boost doc and implementation on more info to
274  // set number of buckets/loadfactor, briefly e.g.:
275 
276  // Andrew Ban's Boost version
277  // init at least 128 buckets, boost::unordered_map uses
278  // prime number sequence for bucket growth
279  //Cubes cubes( 128 ); // uncomment to use boost version
280  // change max load factor to 4.0
281  //cubes.max_load_factor( 4.0 ); // uncomment to use boost version
282 
283  for ( core::Size i = 1; i <= n_points; ++i ) {
284  //AminoAcid & res( p[ i ] );
285  PointPosition const pp( points[ i ]); //( res.neighbor_graph_position() );
286 
287  // Find the residue's cube: Cube coords are indexed from 0 to cube_dim -1
288  CubeKey const cube_key(
289  core::Size( ( pp.x() - bbl.x() ) * side_inv ),
290  core::Size( ( pp.y() - bbl.y() ) * side_inv ),
291  core::Size( ( pp.z() - bbl.z() ) * side_inv )
292  );
293 
294  // Check that it is within the expanded bounding box
295  assert( cube_key.x() < cube_dim.x() );
296  assert( cube_key.y() < cube_dim.y() );
297  assert( cube_key.z() < cube_dim.z() );
298 
299  // Add the point's position to the cube's collection
300  cubes[ cube_key ].push_back( i ); // Creates the cube if it doesn't exist yet
301  }
302 
303  // Find upper neighbors
304  core::Real const D_ZERO( 0 );
305  for ( core::Size i = 1; i <= n_points; ++i ) {
306  //AminoAcid & res( p[ i ] );
307  PointPosition const pp( points[ i ]); //( res.neighbor_graph_position() );
308  //AminoAcidKey const & res_key( res.cat_key() );
309  //core::Size const res_number( res.number() );
310 
311  // Find the residue's cube indexes
312  core::Size const icx( core::Size( ( pp.x() - bbl.x() ) * side_inv ) );
313  core::Size const icy( core::Size( ( pp.y() - bbl.y() ) * side_inv ) );
314  core::Size const icz( core::Size( ( pp.z() - bbl.z() ) * side_inv ) );
315 
316  // Get cube-relative position (for fast cube exclusion tests)
317  core::Real const cx( pp.x() - ( bbl.x() + ( icx * side ) ) );
318  core::Real const cy( pp.y() - ( bbl.y() + ( icy * side ) ) );
319  core::Real const cz( pp.z() - ( bbl.z() + ( icz * side ) ) );
320 
321  // Check its cube and adjacent cubes (<= all 27 of them with side_factor==1)
322  for ( core::Size ix = max( icx, core::Size( 1 ) ) - 1, ixe = min( icx + 1, cube_dim.x() - 1 ); ix <= ixe; ++ix ) {
323  for ( core::Size iy = max( icy, core::Size( 1 ) ) - 1, iye = min( icy + 1, cube_dim.y() - 1 ); iy <= iye; ++iy ) {
324  for ( core::Size iz = max( icz, core::Size( 1 ) ) - 1, ize = min( icz + 1, cube_dim.z() - 1 ); iz <= ize; ++iz ) {
325  Cubes::iterator const ic( cubes.find( CubeKey( ix,iy, iz ) ) );
326  if ( ic != cubes.end() ) { // Cube exists
327  if ( // This test gave a ~10% speedup in trials
328  ( ix != icx ? square( cx - ( ix > icx ? side : D_ZERO ) ) : D_ZERO ) +
329  ( iy != icy ? square( cy - ( iy > icy ? side : D_ZERO ) ) : D_ZERO ) +
330  ( iz != icz ? square( cz - ( iz > icz ? side : D_ZERO ) ) : D_ZERO )
331  <= neighbor_cutoff_sq )
332  { // Max cutoff sphere intersects this cube so check each residue in it
333  for ( PointIDs::iterator ia = ic->second.begin(), iae = ic->second.end(); ia != iae; ++ia ) {
334  core::Size const j( *ia );
335  if ( i < j ) { // It is an upper neighbor
336  core::Real const d_sq( pp.distance_squared( points[ j ] ) );
337  if ( d_sq <= neighbor_cutoff_sq )
338  {
339  point_graph->add_edge( i, j, Edge( d_sq ) );
340  }
341  //if ( d_sq < residue_neighbor_count_cutoff_sq ) { // Add to neighbor counts
342  // res.increment_n_neighbor();
343  // resu.increment_n_neighbor();
344  //}
345  }
346  }
347  }
348  }
349  }
350  }
351  }
352 
353  }
354  }
355 }
356 
357 /// @brief Create a 3D grid of points. O(N^3). For "spherical" conformations, Theta(N). Speeds neighbor detection
358 /// in abinitio by a factor of 2. Definition: Spherical = span of x,y and z all O(N**1/3). Note circularity.
359 /// Adendum: if the 3D grid used a list of point indices instead of a vector, then this would be Theta(N) for
360 /// spherical conformations; however, with a vector, this is O(NlgN). With the additional assumption that
361 /// each cube contains O(1) points, then this implementation is O(N). Such an assumption is unneccessary
362 /// in the list implementation.
363 /// @details Shameless code duplication below based on Stuart's
364 /// stl-map-based-neighbor-detection code. Note that the FArray3D is an index-from-1
365 /// data structure.
366 template <class Vertex, class Edge>
367 void
370  core::Real neighbor_cutoff
371 )
372 {
373  using numeric::min;
374  using numeric::max;
375  using numeric::square;
376 
377 
378  // Types
379  typedef numeric::xyzTriple< core::Size > CubeDim; // Cube dimensions
380  typedef numeric::xyzTriple< core::Size > CubeKey; // Cube index-triple key
381  typedef utility::vector1< PointPosition > Points;
382  typedef std::vector< core::Size > PointIDs;
383  typedef ObjexxFCL::FArray3D< PointIDs > Cubes; // The 3D array that will be indexed into. Indexed from 1, not 0.
384 
385  // Constants
386  core::Size const n_points( point_graph->num_vertices() );
387 
388  core::Real neighbor_cutoff_sq( neighbor_cutoff*neighbor_cutoff);
389 
390  //local copy
391  Points points( n_points );
392  for ( core::Size ii = 1; ii <= n_points; ++ii ) { points[ ii ] = point_graph->get_vertex( ii ).data().xyz(); }
393 
394  // Exclusion checks
395  if ( n_points <= 1 ) return; // Nothing to do
396 
397  // Use automatic or an octree strategy
398 
399  // Bounding box of residue cutoff positions
400  PointPosition bbl( points[ 1 ] ), bbu( bbl ); // Lower and upper corners of bounding box
401  for ( core::Size ii = 2; ii <= n_points; ++ii ) {
402  bbl.min( points[ ii ] );
403  bbu.max( points[ ii ] );
404  }
405 
406  core::Size const epsilon_multiplier( 10 ); // Increase this if assert failures hit in finding a point's cube
407  core::Real const epsilon( epsilon_multiplier * std::numeric_limits< core::Real >::epsilon() );
408  bbl -= epsilon; // Expand bounding box to assure all points get assigned cubes in it
409  bbu += epsilon;
410 
411  // Set cube size and dimensions within bounding box
412  core::Size const side_factor( 1 ); // 1 factor => Check <= 27 adjacent cubes // 2 factor => Check <= 8 adjacent cubes
413  // Might gain some speed by replacing max_residue_pair_cutoff below with the max cutoff for pairs present
414  core::Real const side( side_factor * neighbor_cutoff );
415  assert( side > core::Real( 0 ) );
416  core::Real const side_inv( core::Real( 1 ) / side );
417  CubeDim const cube_dim( // Cube dimensions
418  core::Size( std::ceil( ( bbu.x() - bbl.x() ) * side_inv ) ), // Test that ceil values == core::Size values
419  core::Size( std::ceil( ( bbu.y() - bbl.y() ) * side_inv ) ),
420  core::Size( std::ceil( ( bbu.z() - bbl.z() ) * side_inv ) )
421  );
422  // We rounded up the number of cubes in each dimension
423  // We use cubes of exactly side x side x side dimensions
424  // We treat the (1,1,1) cube as touching bbl at its low corner
425  // The "highest" cube generally extends beyond bbu
426  // We call this the expanded bounding box
427 
428  // Add residues to bounding box cube tree: Only cubes with residues are added
429 
430  /// NOT THREAD SAFE -- Static variable below would avoid allocation and deallocation costs of the 3D array.
431  /// Does not seem to offer any appreciable speed advantages.
432  //static Cubes cubes;
433  ///cubes.dimension( cube_dim.x(), cube_dim.y(), cube_dim.z() );
434 
435  /// keep track of the non-empty voxels so we can delete them later. The cubes array must be empty
436  /// at the beginning of neighbor detection.
437  //utility::vector1< core::Size > nonempty_cube_indices;
438 
439  /// Thread safe version; potentially more expensive than the non-thread-safe version,
440  /// but has not proven so in experimentation.
441  Cubes cubes( cube_dim.x(), cube_dim.y(), cube_dim.z() );
442 
443  for ( core::Size i = 1; i <= n_points; ++i ) {
444  PointPosition const pp( points[ i ]);
445 
446  // Find the residue's cube: Cube coords are indexed from 1 to cube_dim.
447  CubeKey const cube_key(
448  core::Size( ( pp.x() - bbl.x() ) * side_inv ) + 1,
449  core::Size( ( pp.y() - bbl.y() ) * side_inv ) + 1,
450  core::Size( ( pp.z() - bbl.z() ) * side_inv ) + 1
451  );
452 
453  // Check that it is within the expanded bounding box
454  assert( cube_key.x() <= cube_dim.x() );
455  assert( cube_key.y() <= cube_dim.y() );
456  assert( cube_key.z() <= cube_dim.z() );
457 
458  // Add the point's position to the cube's collection
459  //cubes[ cube_key ].push_back( i ); // Creates the cube if it doesn't exist yet
460  core::Size i_index = cubes.index( cube_key.x(), cube_key.y(), cube_key.z() );
461  if ( cubes[ i_index ].size() == 0 ) {
462  /// In the statically-allocated version, the cubes object must be emptied
463  /// at the conclusion of neighbor detection; keep track of those cubes which
464  /// have some entry to avoid the expense of traversing the whole cubes object
465  /// later.
466  //nonempty_cube_indices.push_back( i_index );
467 
468  /// In the thread-safe version, guess that any cube with 1 point contained inside
469  /// it will likely contain several. Allocate a bit of space now. O(NlgN) if the points
470  /// are not well distributed.
471  cubes[ i_index ].reserve( 10 );
472  }
473  cubes[ i_index ].push_back( i );
474  ///std::cout << "Cube " << i_index << " for residue " << i << " at coordinate: (" << pp.x() << "," << pp.y() <<"," << pp.z() << ")" << std::endl;
475  }
476 
477  // Find upper neighbors
478  //core::Real const D_ZERO( 0 );
479  for ( core::Size i = 1; i <= n_points; ++i ) {
480  //AminoAcid & res( p[ i ] );
481  PointPosition const pp( points[ i ]);
482 
483  // Find the residue's cube indexes
484  core::Size const icx( core::Size( ( pp.x() - bbl.x() ) * side_inv ) + 1 );
485  core::Size const icy( core::Size( ( pp.y() - bbl.y() ) * side_inv ) + 1 );
486  core::Size const icz( core::Size( ( pp.z() - bbl.z() ) * side_inv ) + 1 );
487 
488  // Check its cube and adjacent cubes (<= all 27 of them with side_factor==1)
489  for ( core::Size ix = max( icx, core::Size( 2 ) ) - 1, ixe = min( icx + 1, cube_dim.x() ); ix <= ixe; ++ix ) {
490  for ( core::Size iy = max( icy, core::Size( 2 ) ) - 1, iye = min( icy + 1, cube_dim.y() ); iy <= iye; ++iy ) {
491  for ( core::Size iz = max( icz, core::Size( 2 ) ) - 1, ize = min( icz + 1, cube_dim.z() ); iz <= ize; ++iz ) {
492 
493  //Cubes::iterator const ic( cubes.find( CubeKey( ix,iy, iz ) ) );
494  core::Size cube_index = cubes.index( ix, iy, iz );
495 
496  ///std::cout << "Searching for neighbors of point " << i << " in cube [" << ix << "," << iy << "," << iz << ") index: " << cube_index << std::endl;
497 
498  if ( cubes[ cube_index ].size() != 0 ) { // Cube exists
499  for ( PointIDs::iterator ia = cubes[ cube_index ].begin(), iae = cubes[ cube_index ].end(); ia != iae; ++ia ) {
500  core::Size const j( *ia );
501  ///std::cout << "point " << j << " found " << std::endl;
502  if ( i < j ) { // It is an upper neighbor
503  core::Real const d_sq( pp.distance_squared( points[ j ] ) );
504  if ( d_sq <= neighbor_cutoff_sq )
505  {
506  point_graph->add_edge( i, j, Edge( d_sq ) );
507  }
508  //if ( d_sq < residue_neighbor_count_cutoff_sq ) { // Add to neighbor counts
509  // res.increment_n_neighbor();
510  // resu.increment_n_neighbor();
511  //}
512  }
513  }
514  }
515  }
516  }
517  }
518 
519  }
520 
521  /// Only necessary in the non-thread-safe version
522  /// before returning, empty the cubes array so it's ready for the next round
523  //for ( core::Size ii = 1; ii <= nonempty_cube_indices.size(); ++ii ) {
524  // cubes[ nonempty_cube_indices[ ii ] ].clear();
525  //}
526 }
527 
528 template <class Vertex, class Edge>
529 void
532  core::Real neighbor_cutoff,
533  utility::vector1< bool > const & residue_selection,
534  Strategy strategy = AUTOMATIC
535 )
536 {
537  // the naive and the octree strategies are equally fast
538  // when detecting neighbors for 150 points.
539  core::Size const N_POINTS_BREAK_EVEN = 150;
540 
541  // PROF_START( basic::TEST2 );
542 
543  if ( strategy == THREEDGRID || ( strategy == AUTOMATIC && basic::options::option[ basic::options::OptionKeys::score::find_neighbors_3dgrid ] )) {
544  find_neighbors_3dgrid_restricted<Vertex,Edge>( point_graph, neighbor_cutoff, residue_selection );
545  } else if ( strategy == NAIVE || point_graph->num_vertices() < N_POINTS_BREAK_EVEN ) {
546  find_neighbors_naive_restricted<Vertex,Edge>( point_graph, neighbor_cutoff, residue_selection );
547  } else { // Use automatic or an octree strategy
548  find_neighbors_octree_restricted<Vertex,Edge>( point_graph, neighbor_cutoff, residue_selection, strategy );
549  }
550  // PROF_STOP( basic::TEST2 );
551 }
552 
553 template <class Vertex, class Edge>
554 void
557  core::Real neighbor_cutoff,
558  utility::vector1< bool > const & residue_selection
559 )
560 {
561  // Constants
562  core::Size const n_points( point_graph->num_vertices() );
563  if ( n_points == 0 ) return;
564  core::Real neighbor_cutoff_sq = neighbor_cutoff * neighbor_cutoff;
565 
566  // Exclusion checks
567  if ( n_points <= 1 ) return; // Nothing to do
568 
569  // Naive method: O( R^2 ) for R residues but faster for small, compact conformations
570  for ( core::Size ii = 1; ii <= n_points; ++ii ) {
571  if ( !residue_selection[ ii ] ) continue;
572  PointPosition const & ii_pos( point_graph->get_vertex(ii).data().xyz() );
573  for ( core::Size jj = 1; jj <= n_points; ++jj ) {
574  if ( residue_selection[ jj ] && jj <= ii ) continue;
575  core::Real const d_sq( ii_pos.distance_squared( point_graph->get_vertex(jj).data().xyz() ) ); // Using member version of distance_squared to work around GCC 3.4.5 ADL bug
576  if ( d_sq <= neighbor_cutoff_sq ) {
577  // Add neighbor link
578  point_graph->add_edge( ii, jj, Edge( d_sq ) );
579  }
580  }
581  }
582 }
583 
584 /// @brief Finds the residue neighbors efficiently using an octree-like spatial sort
585 template <class Vertex, class Edge>
586 void
589  core::Real neighbor_cutoff,
590  utility::vector1< bool > const & residue_selection,
591  Strategy strategy
592 )
593 {
594  //using namespace residue_neighbor_strategy;
595 
596  using numeric::min;
597  using numeric::max;
598  using numeric::square;
599 
600 
601  // Types
602  typedef numeric::xyzTriple< core::Size > CubeDim; // Cube dimensions
603  //typedef numeric::xyzTriple< core::Size > CubeKey; // Cube index-triple key
604  typedef utility::vector1< PointPosition > Points;
605  typedef std::vector< core::Size > PointIDs;
606  //typedef std::map< CubeKey, PointIDs, std::less< CubeKey >, boost::pool_allocator< std::pair< CubeKey const, PointIDs > > > Cubes;
607  typedef std::map< CubeKey, PointIDs > Cubes;
608 
609  /// Andrew Ban's boost version
610  // within find_neighbors() change the Cubes typedef to:
611  ///typedef boost::unordered_map< CubeKey, PointIDs, DefaultCubeHash > Cubes; // uncomment to use boost version
612 
613  // Constants
614  core::Size const n_points( point_graph->num_vertices() );
615 
616  core::Real neighbor_cutoff_sq( neighbor_cutoff*neighbor_cutoff);
617 
618  //local copy
619  Points points( n_points );
620  for ( core::Size ii = 1; ii <= n_points; ++ii ) { points[ ii ] = point_graph->get_vertex( ii ).data().xyz(); }
621 
622  // Exclusion checks
623  if ( n_points <= 1 ) return; // Nothing to do
624 
625  // Use automatic or an octree strategy
626 
627  // Bounding box of residue cutoff positions
628  PointPosition bbl( points[ 1 ] ), bbu( bbl ); // Lower and upper corners of bounding box
629  for ( core::Size ii = 2; ii <= n_points; ++ii ) {
630  bbl.min( points[ ii ] );
631  bbu.max( points[ ii ] );
632  }
633 
634  core::Size const epsilon_multiplier( 10 ); // Increase this if assert failures hit in finding a point's cube
635  core::Real const epsilon( epsilon_multiplier * std::numeric_limits< core::Real >::epsilon() );
636  bbl -= epsilon; // Expand bounding box to assure all points get assigned cubes in it
637  bbu += epsilon;
638 
639  // Set cube size and dimensions within bounding box
640  core::Size const side_factor( 1 ); // 1 factor => Check <= 27 adjacent cubes // 2 factor => Check <= 8 adjacent cubes
641  // Might gain some speed by replacing max_residue_pair_cutoff below with the max cutoff for pairs present
642  core::Real const side( side_factor * neighbor_cutoff );
643  assert( side > core::Real( 0 ) );
644  core::Real const side_inv( core::Real( 1 ) / side );
645  CubeDim const cube_dim( // Cube dimensions
646  core::Size( std::ceil( ( bbu.x() - bbl.x() ) * side_inv ) ), // Test that ceil values == core::Size values
647  core::Size( std::ceil( ( bbu.y() - bbl.y() ) * side_inv ) ),
648  core::Size( std::ceil( ( bbu.z() - bbl.z() ) * side_inv ) )
649  );
650  // We rounded up the number of cubes in each dimension
651  // We use cubes of exactly side x side x side dimensions
652  // We treat the (0,0,0) cube as touching bbl at its low corner
653  // The "highest" cube generally extends beyond bbu
654  // We call this the expanded bounding box
655 
656  // Number of potential cubes in expanded bounding box (we don't create them all)
657  core::Size const n_cube( cube_dim.x() * cube_dim.y() * cube_dim.z() );
658 
659  // Find upper Residue neighbors of each residue
660  if ( ( n_cube < core::Size( 27 ) ) && ( strategy < OCTREE ) ) { // Naive strategy //! Tune the n_cube threshold based on more real-world trials
661 
662  // Naive method: O( R^2 ) for R residues but faster for small, compact conformations
663  find_neighbors_naive_restricted<Vertex, Edge>( point_graph, neighbor_cutoff,residue_selection );
664 
665  } else { // Octree O( R log R ) strategy
666 
667  // Add residues to bounding box cube tree: Only cubes with residues are added
668  Cubes cubes; /// STL MAP cubes
669 
670  // take a look at boost doc and implementation on more info to
671  // set number of buckets/loadfactor, briefly e.g.:
672 
673  // Andrew Ban's Boost version
674  // init at least 128 buckets, boost::unordered_map uses
675  // prime number sequence for bucket growth
676  //Cubes cubes( 128 ); // uncomment to use boost version
677  // change max load factor to 4.0
678  //cubes.max_load_factor( 4.0 ); // uncomment to use boost version
679 
680  for ( core::Size i = 1; i <= n_points; ++i ) {
681  //AminoAcid & res( p[ i ] );
682  PointPosition const pp( points[ i ]); //( res.neighbor_graph_position() );
683 
684  // Find the residue's cube: Cube coords are indexed from 0 to cube_dim -1
685  CubeKey const cube_key(
686  core::Size( ( pp.x() - bbl.x() ) * side_inv ),
687  core::Size( ( pp.y() - bbl.y() ) * side_inv ),
688  core::Size( ( pp.z() - bbl.z() ) * side_inv )
689  );
690 
691  // Check that it is within the expanded bounding box
692  assert( cube_key.x() < cube_dim.x() );
693  assert( cube_key.y() < cube_dim.y() );
694  assert( cube_key.z() < cube_dim.z() );
695 
696  // Add the point's position to the cube's collection
697  cubes[ cube_key ].push_back( i ); // Creates the cube if it doesn't exist yet
698  }
699 
700  // Find upper neighbors
701  core::Real const D_ZERO( 0 );
702  for ( core::Size i = 1; i <= n_points; ++i ) {
703  if ( !residue_selection[ i ] ) continue;
704  //AminoAcid & res( p[ i ] );
705  PointPosition const pp( points[ i ]); //( res.neighbor_graph_position() );
706  //AminoAcidKey const & res_key( res.cat_key() );
707  //core::Size const res_number( res.number() );
708 
709  // Find the residue's cube indexes
710  core::Size const icx( core::Size( ( pp.x() - bbl.x() ) * side_inv ) );
711  core::Size const icy( core::Size( ( pp.y() - bbl.y() ) * side_inv ) );
712  core::Size const icz( core::Size( ( pp.z() - bbl.z() ) * side_inv ) );
713 
714  // Get cube-relative position (for fast cube exclusion tests)
715  core::Real const cx( pp.x() - ( bbl.x() + ( icx * side ) ) );
716  core::Real const cy( pp.y() - ( bbl.y() + ( icy * side ) ) );
717  core::Real const cz( pp.z() - ( bbl.z() + ( icz * side ) ) );
718 
719  // Check its cube and adjacent cubes (<= all 27 of them with side_factor==1)
720  for ( core::Size ix = max( icx, core::Size( 1 ) ) - 1, ixe = min( icx + 1, cube_dim.x() - 1 ); ix <= ixe; ++ix ) {
721  for ( core::Size iy = max( icy, core::Size( 1 ) ) - 1, iye = min( icy + 1, cube_dim.y() - 1 ); iy <= iye; ++iy ) {
722  for ( core::Size iz = max( icz, core::Size( 1 ) ) - 1, ize = min( icz + 1, cube_dim.z() - 1 ); iz <= ize; ++iz ) {
723  Cubes::iterator const ic( cubes.find( CubeKey( ix,iy, iz ) ) );
724  if ( ic != cubes.end() ) { // Cube exists
725  if ( // This test gave a ~10% speedup in trials
726  ( ix != icx ? square( cx - ( ix > icx ? side : D_ZERO ) ) : D_ZERO ) +
727  ( iy != icy ? square( cy - ( iy > icy ? side : D_ZERO ) ) : D_ZERO ) +
728  ( iz != icz ? square( cz - ( iz > icz ? side : D_ZERO ) ) : D_ZERO )
729  <= neighbor_cutoff_sq )
730  { // Max cutoff sphere intersects this cube so check each residue in it
731  for ( PointIDs::iterator ia = ic->second.begin(), iae = ic->second.end(); ia != iae; ++ia ) {
732  core::Size const j( *ia );
733  if ( i < j || !residue_selection[ j ] ) { // It is an upper neighbor
734  core::Real const d_sq( pp.distance_squared( points[ j ] ) );
735  if ( d_sq <= neighbor_cutoff_sq )
736  {
737  point_graph->add_edge( i, j, Edge( d_sq ) );
738  }
739  //if ( d_sq < residue_neighbor_count_cutoff_sq ) { // Add to neighbor counts
740  // res.increment_n_neighbor();
741  // resu.increment_n_neighbor();
742  //}
743  }
744  }
745  }
746  }
747  }
748  }
749  }
750  }
751  }
752 }
753 
754 /// @brief Create a 3D grid of points. O(N^3). For "spherical" conformations, Theta(N). Speeds neighbor detection
755 /// in abinitio by a factor of 2. Definition: Spherical = span of x,y and z all O(N**1/3). Note circularity.
756 /// Adendum: if the 3D grid used a list of point indices instead of a vector, then this would be Theta(N) for
757 /// spherical conformations; however, with a vector, this is O(NlgN). With the additional assumption that
758 /// each cube contains O(1) points, then this implementation is O(N). Such an assumption is unneccessary
759 /// in the list implementation.
760 template <class Vertex, class Edge>
761 void
764  core::Real neighbor_cutoff,
765  utility::vector1< bool > const & residue_selection
766 )
767 {
768  using numeric::min;
769  using numeric::max;
770  using numeric::square;
771 
772 
773  // Types
774  typedef numeric::xyzTriple< core::Size > CubeDim; // Cube dimensions
775  typedef numeric::xyzTriple< core::Size > CubeKey; // Cube index-triple key
776  typedef utility::vector1< PointPosition > Points;
777  typedef std::vector< core::Size > PointIDs;
778  typedef ObjexxFCL::FArray3D< PointIDs > Cubes; // The 3D array that will be indexed into. Indexed from 1, not 0.
779 
780  // Constants
781  core::Size const n_points( point_graph->num_vertices() );
782 
783  core::Real neighbor_cutoff_sq( neighbor_cutoff*neighbor_cutoff);
784 
785  //local copy
786  Points points( n_points );
787  for ( core::Size ii = 1; ii <= n_points; ++ii ) { points[ ii ] = point_graph->get_vertex( ii ).data().xyz(); }
788 
789  // Exclusion checks
790  if ( n_points <= 1 ) return; // Nothing to do
791 
792  // Use automatic or an octree strategy
793 
794  // Bounding box of residue cutoff positions
795  PointPosition bbl( points[ 1 ] ), bbu( bbl ); // Lower and upper corners of bounding box
796  for ( core::Size ii = 2; ii <= n_points; ++ii ) {
797  bbl.min( points[ ii ] );
798  bbu.max( points[ ii ] );
799  }
800 
801  core::Size const epsilon_multiplier( 10 ); // Increase this if assert failures hit in finding a point's cube
802  core::Real const epsilon( epsilon_multiplier * std::numeric_limits< core::Real >::epsilon() );
803  bbl -= epsilon; // Expand bounding box to assure all points get assigned cubes in it
804  bbu += epsilon;
805 
806  // Set cube size and dimensions within bounding box
807  core::Size const side_factor( 1 ); // 1 factor => Check <= 27 adjacent cubes // 2 factor => Check <= 8 adjacent cubes
808  // Might gain some speed by replacing max_residue_pair_cutoff below with the max cutoff for pairs present
809  core::Real const side( side_factor * neighbor_cutoff );
810  assert( side > core::Real( 0 ) );
811  core::Real const side_inv( core::Real( 1 ) / side );
812  CubeDim const cube_dim( // Cube dimensions
813  core::Size( std::ceil( ( bbu.x() - bbl.x() ) * side_inv ) ), // Test that ceil values == core::Size values
814  core::Size( std::ceil( ( bbu.y() - bbl.y() ) * side_inv ) ),
815  core::Size( std::ceil( ( bbu.z() - bbl.z() ) * side_inv ) )
816  );
817  // We rounded up the number of cubes in each dimension
818  // We use cubes of exactly side x side x side dimensions
819  // We treat the (1,1,1) cube as touching bbl at its low corner
820  // The "highest" cube generally extends beyond bbu
821  // We call this the expanded bounding box
822 
823  // Add residues to bounding box cube tree: Only cubes with residues are added
824 
825  /// NOT THREAD SAFE -- Static variable below would avoid allocation and deallocation costs of the 3D array.
826  /// Does not seem to offer any appreciable speed advantages.
827  //static Cubes cubes;
828  ///cubes.dimension( cube_dim.x(), cube_dim.y(), cube_dim.z() );
829 
830  /// keep track of the non-empty voxels so we can delete them later. The cubes array must be empty
831  /// at the beginning of neighbor detection.
832  //utility::vector1< core::Size > nonempty_cube_indices;
833 
834  /// Thread safe version; potentially more expensive than the non-thread-safe version,
835  /// but has not proven so in experimentation.
836  Cubes cubes( cube_dim.x(), cube_dim.y(), cube_dim.z() );
837 
838  for ( core::Size i = 1; i <= n_points; ++i ) {
839  PointPosition const pp( points[ i ]);
840 
841  // Find the residue's cube: Cube coords are indexed from 1 to cube_dim.
842  CubeKey const cube_key(
843  core::Size( ( pp.x() - bbl.x() ) * side_inv ) + 1,
844  core::Size( ( pp.y() - bbl.y() ) * side_inv ) + 1,
845  core::Size( ( pp.z() - bbl.z() ) * side_inv ) + 1
846  );
847 
848  // Check that it is within the expanded bounding box
849  assert( cube_key.x() <= cube_dim.x() );
850  assert( cube_key.y() <= cube_dim.y() );
851  assert( cube_key.z() <= cube_dim.z() );
852 
853  // Add the point's position to the cube's collection
854  //cubes[ cube_key ].push_back( i ); // Creates the cube if it doesn't exist yet
855  core::Size i_index = cubes.index( cube_key.x(), cube_key.y(), cube_key.z() );
856  if ( cubes[ i_index ].size() == 0 ) {
857  /// In the statically-allocated version, the cubes object must be emptied
858  /// at the conclusion of neighbor detection; keep track of those cubes which
859  /// have some entry to avoid the expense of traversing the whole cubes object
860  /// later.
861  //nonempty_cube_indices.push_back( i_index );
862 
863  /// In the thread-safe version, guess that any cube with 1 point contained inside
864  /// it will likely contain several. Allocate a bit of space now. O(NlgN) if the points
865  /// are not well distributed.
866  cubes[ i_index ].reserve( 10 );
867  }
868  cubes[ i_index ].push_back( i );
869  ///std::cout << "Cube " << i_index << " for residue " << i << " at coordinate: (" << pp.x() << "," << pp.y() <<"," << pp.z() << ")" << std::endl;
870  }
871 
872  // Find upper neighbors
873  //core::Real const D_ZERO( 0 );
874  for ( core::Size i = 1; i <= n_points; ++i ) {
875  //AminoAcid & res( p[ i ] );
876  if ( !residue_selection[ i ] ) continue;
877  PointPosition const pp( points[ i ]);
878 
879  // Find the residue's cube indexes
880  core::Size const icx( core::Size( ( pp.x() - bbl.x() ) * side_inv ) + 1 );
881  core::Size const icy( core::Size( ( pp.y() - bbl.y() ) * side_inv ) + 1 );
882  core::Size const icz( core::Size( ( pp.z() - bbl.z() ) * side_inv ) + 1 );
883 
884  // Check its cube and adjacent cubes (<= all 27 of them with side_factor==1)
885  for ( core::Size ix = max( icx, core::Size( 2 ) ) - 1, ixe = min( icx + 1, cube_dim.x() ); ix <= ixe; ++ix ) {
886  for ( core::Size iy = max( icy, core::Size( 2 ) ) - 1, iye = min( icy + 1, cube_dim.y() ); iy <= iye; ++iy ) {
887  for ( core::Size iz = max( icz, core::Size( 2 ) ) - 1, ize = min( icz + 1, cube_dim.z() ); iz <= ize; ++iz ) {
888 
889  //Cubes::iterator const ic( cubes.find( CubeKey( ix,iy, iz ) ) );
890  core::Size cube_index = cubes.index( ix, iy, iz );
891 
892  ///std::cout << "Searching for neighbors of point " << i << " in cube [" << ix << "," << iy << "," << iz << ") index: " << cube_index << std::endl;
893 
894  if ( cubes[ cube_index ].size() != 0 ) { // Cube exists
895  for ( PointIDs::iterator ia = cubes[ cube_index ].begin(), iae = cubes[ cube_index ].end(); ia != iae; ++ia ) {
896  core::Size const j( *ia );
897  ///std::cout << "point " << j << " found " << std::endl;
898  if ( i < j || !residue_selection[ j ]) { // It is an upper neighbor
899  core::Real const d_sq( pp.distance_squared( points[ j ] ) );
900  if ( d_sq <= neighbor_cutoff_sq )
901  {
902  point_graph->add_edge( i, j, Edge( d_sq ) );
903  }
904  //if ( d_sq < residue_neighbor_count_cutoff_sq ) { // Add to neighbor counts
905  // res.increment_n_neighbor();
906  // resu.increment_n_neighbor();
907  //}
908  }
909  }
910  }
911  }
912  }
913  }
914  }
915 
916  /// Only necessary in the non-thread-safe version
917  /// before returning, empty the cubes array so it's ready for the next round
918  //for ( core::Size ii = 1; ii <= nonempty_cube_indices.size(); ++ii ) {
919  // cubes[ nonempty_cube_indices[ ii ] ].clear();
920  //}
921 
922 }
923 
924 template <class Vertex, class Edge>
928  core::Size node_id,
929  core::Real neighbor_cutoff,
930  Strategy strategy
931 )
932 {
933  find_neighbors<Vertex,Edge>(point_graph,neighbor_cutoff,strategy);
934  graph::UEVertex<Vertex,Edge> query_vertex = point_graph->get_vertex(node_id);
935  typename graph::UEVertex<Vertex,Edge>::UpperEdgeListIter query_it; //sometimes I really don't like C++...
936  core::Real min_d_squared = 99999999.9;
937  core::Size min_node_index = node_id;
938  for(query_it = query_vertex.upper_edge_list_begin(); query_it != query_vertex.upper_edge_list_end(); ++query_it)
939  {
940  core::Real d_squared = query_it->data().dsq();
941  if(d_squared < min_d_squared)
942  {
943  min_d_squared = d_squared;
944  min_node_index = query_it->upper_vertex();
945  }
946  }
947  return min_node_index;
948 }
949 
950 template <class Vertex, class Edge>
951 void
954  core::Real neighbor_cutoff,
955  utility::vector1< std::pair< Size, Size > > const & non_surface_ranges,
956  utility::vector1< bool > const & is_surface
957 )
958 {
959  //std::cout<<"finding neighbors...../n";
960  // Constants
961  core::Size const n_points( point_graph->num_vertices() );
962  if ( n_points == 0 ) return;
963  core::Real neighbor_cutoff_sq = neighbor_cutoff * neighbor_cutoff;
964 
965  // Exclusion checks
966  if ( n_points <= 1 ) return; // Nothing to do
967 
968  // Naive method: O( R^2 ) for R residues but faster for small, compact conformations
969  //std::cout<<"Protein Start Location:"<<conformation.num_jump();
970  for ( Size ii = 1; ii <= non_surface_ranges.size(); ++ii ) {
971  for ( Size jj = non_surface_ranges[ ii ].first, jjend = non_surface_ranges[ ii ].second; jj <= jjend; ++jj ) {
972  PointPosition const & jj_pos( point_graph->get_vertex(jj).data().xyz() );
973  for ( Size kk = 1; kk <= n_points; ++kk ) {
974  if ( ! is_surface[ kk ] && kk <= jj ) continue;
975  Real const d_sq( jj_pos.distance_squared( point_graph->get_vertex( kk ).data().xyz() ) );
976  if ( d_sq <= neighbor_cutoff_sq ) {
977  Size lower = kk < jj ? kk : jj;
978  Size upper = kk < jj ? jj : kk;
979  point_graph->add_edge( lower, upper, Edge( d_sq ) );
980  //point_graph->add_edge( jj, kk, Edge( d_sq ) );
981  }
982  }
983  }
984  }
985 }
986 
987 
988 } //conformation
989 } //core
990 
991 #endif