Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ShapeComplementarityCalculator.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file core/scoring/sc/ShapeComplementarityCalculator.cc
11 /// @brief Headers for the Shape Complementarity Calculator
12 /// @detailed Lawrence & Coleman shape complementarity calculator (based on CCP4's sc)
13 /// @author Luki Goldschmidt <luki@mbi.ucla.edu>
14 
15 /// This code was ported from the original Fortran code found in CCP4:
16 /// Sc (Version 2.0): A program for determining Shape Complementarity
17 /// Copyright Michael Lawrence, Biomolecular Research Institute
18 /// 343 Royal Parade Parkville Victoria Australia
19 ///
20 /// This version contains support for GPU-acceleration OpenCL-
21 /// capable devices, which provides a 10-25x speed up over the CPU-only code
22 /// using a regular desktop video card with 4 processors (32 cores).
23 /// Build with scons option extras=opencl to enable GPU support.
24 
25 #ifndef INCLUDED_core_scoring_sc_ShapeComplementarityCalculator_cc
26 #define INCLUDED_core_scoring_sc_ShapeComplementarityCalculator_cc
27 
28 // Project Headers
29 #include <core/types.hh>
30 #include <core/pose/Pose.hh>
32 #include <core/kinematics/Jump.hh>
34 
39 
40 #include <numeric/xyzVector.hh>
41 #include <numeric/NumericTraits.hh>
42 
43 // Utility headers
44 #include <utility/vector1.hh>
45 #include <utility/exit.hh>
46 #include <utility/io/izstream.hh>
47 #include <basic/Tracer.hh>
48 #include <basic/database/open.hh>
49 
50 // C headers
51 #include <stdio.h>
52 
53 // C++ headers
54 #include <iostream>
55 #include <ostream>
56 #include <fstream>
57 #include <vector>
58 #include <map>
59 #include <string>
60 
61 #define UPPER_MULTIPLE(n,d) (((n)%(d)) ? (((n)/(d)+1)*(d)) : (n))
62 
63 static basic::Tracer TR("core.scoring.sc.ShapeComplementarityCalculator");
64 
65 using namespace core;
66 
67 namespace core {
68 namespace scoring {
69 namespace sc {
70 
71 ////////////////////////////////////////////////////////////////////////////
72 // Public class functions
73 ////////////////////////////////////////////////////////////////////////////
74 
75 /// @begin ShapeComplementarityCalculator::ShapeComplementarityCalculator()
76 /// @brief
77 /// ShapeComplementarityCalculator constructor, initializes default settings
78 
81 {
82 }
83 
85 {
86 }
87 
88 /// @begin ShapeComplementarityCalculator::CalcSc()
89 /// @brief
90 /// Run the SC calculation on Pose and return just the sc statistic or -1 on error
91 /// @detailed
92 /// This is a static function and can be called without instantiating ShapeComplementarityCalculator.
93 /// The jump_id is used to partition the pose into two molecular surfaces; the first jump (1)
94 /// is used is no jump_id is explicity specified. Those desiring more control as to what residues
95 /// make up either surface should use the AddResidue() or even add_atom() function instead.
96 /// Setting quick to true will perform a much faster calculation (~5-10 times faster) at the expense
97 /// of accuracy (about 0.05 units).
98 ///
99 /// Example:
100 /// core::Real sc = core::scoring::sc::ShapeComplementarityCalculator( pose );
102 {
104 
105  if(quick)
106  sc.settings.density = 5;
107 
108  if(sc.Calc(pose, jump_id))
109  return sc.GetResults().sc;
110  else
111  return -1;
112 }
113 
114 /// @begin ShapeComplementarityCalculator::Calc()
115 /// @brief Run the SC calculation on a Pose, partitionied by jump_id
116 /// @detailed
117 /// This non-static function requires an instance of the ShapeComplementarityCalculator class.
118 /// The jump_id is used to partition the pose into two molecular surfaces. To control what
119 /// residues make up either surface, use the AddResidue() or even add_atom() function instead.
120 /// Returns true on success. Results are retrieved with GetResults().
121 ///
122 /// Example:
123 /// core::scoring::sc::ShapeComplementarityCalculator calc;
124 /// core::Real sc;
125 /// if(calc.Calc( pose ))
126 /// sc = calc.GetResults().sc;
128 {
129  if( jump_id > pose.num_jump() || jump_id <= 0) {
130  TR.Error << "Jump ID out of bounds (pose has " << pose.num_jump() << " jumps)" << std::endl;
131  return 0;
132  }
133 
134  return MolecularSurfaceCalculator::Calc(pose, jump_id);
135 }
136 
137 /// @begin ShapeComplementarityCalculator::Calc
138 /// @brief Run the SC calculation for previously defined molecules (via AddResidue or add_atom calls)
139 /// @detailed
140 /// This function should be called the residues / atoms making up the two molecular surfaces
141 /// have been explicitly defined.
142 /// Returns true on success.
143 
145 {
146 #ifdef USEOPENCL
147  gpuInit();
148 #endif
149 
150  try {
151 
152  basic::gpu::Timer timer(TR.Debug);
153 
154  run_.results.valid = 0;
155 
156  if(run_.atoms.empty())
157  throw ShapeComplementarityCalculatorException("No atoms defined");
158  if(!run_.results.surface[0].nAtoms)
159  throw ShapeComplementarityCalculatorException("No atoms defined for molecule 1");
160  if(!run_.results.surface[1].nAtoms)
161  throw ShapeComplementarityCalculatorException("No atoms defined for molecule 2");
162 
163  // Determine and assign the attention numbers for each atom
165 
167 
168  if(!run_.dots[0].size() || !run_.dots[1].size())
169  {
170  throw ShapeComplementarityCalculatorException("No molecular dots generated!");
171  }
172 
173  // Cut away the periphery of each surface
174  TR.Debug << "Trimming peripheral band, " << settings.band << "A range" << std::endl;
175 
176  std::vector<DOT const *> trimmed_dots[2];
177  for(int i = 0; i < 2; ++i) {
178  run_.results.surface[i].trimmedArea = TrimPeripheralBand(run_.dots[i], trimmed_dots[i]);
179  if(!trimmed_dots[i].size())
180  throw ShapeComplementarityCalculatorException("No molecular dots for surface %d", i);
181  run_.results.surface[i].nTrimmedDots = trimmed_dots[i].size();
182  run_.results.surface[i].nAllDots = run_.dots[i].size();
183  }
184 
185  // Compute distance arrays and histograms for each surface
186  TR.Debug << "Computing surface separation and vectors" << std::endl;
187 
188  CalcNeighborDistance(0, trimmed_dots[0], trimmed_dots[1]);
189  CalcNeighborDistance(1, trimmed_dots[1], trimmed_dots[0]);
190 
191  run_.results.surface[2].d_mean = (run_.results.surface[0].d_mean + run_.results.surface[1].d_mean) / 2;
192  run_.results.surface[2].d_median = (run_.results.surface[0].d_median + run_.results.surface[1].d_median) / 2;
193  run_.results.surface[2].s_mean = (run_.results.surface[0].s_mean + run_.results.surface[1].s_mean) / 2;
194  run_.results.surface[2].s_median = (run_.results.surface[0].s_median + run_.results.surface[1].s_median) / 2;
195 
196  run_.results.surface[2].nAtoms = (run_.results.surface[0].nAtoms + run_.results.surface[1].nAtoms);
197  run_.results.surface[2].nBuriedAtoms = (run_.results.surface[0].nBuriedAtoms + run_.results.surface[1].nBlockedAtoms);
198  run_.results.surface[2].nBlockedAtoms = (run_.results.surface[0].nBuriedAtoms + run_.results.surface[1].nBuriedAtoms);
199  run_.results.surface[2].nAllDots = (run_.results.surface[0].nAllDots + run_.results.surface[1].nAllDots);
200  run_.results.surface[2].nTrimmedDots = (run_.results.surface[0].nTrimmedDots + run_.results.surface[1].nTrimmedDots);
201  //run_.results.surface[2].nBuriedDots = (run_.results.surface[0].nBuriedDots + run_.results.surface[1].nBuriedDots);
202  //run_.results.surface[2].nAccessibleDots = (run_.results.surface[0].nAccessibleDots + run_.results.surface[1].nAccessibleDots);
203  run_.results.surface[2].trimmedArea = (run_.results.surface[0].trimmedArea + run_.results.surface[1].trimmedArea);
204 
205  run_.results.sc = run_.results.surface[2].s_median;
206  run_.results.distance = run_.results.surface[2].d_median;
207  run_.results.area = run_.results.surface[2].trimmedArea;
208  run_.results.valid = 1;
209 
210  return 1;
211 
213  TR.Error << "Failed: " << e.error << std::endl;
214  }
215 
216  return 0;
217 }
218 
219 ////////////////////////////////////////////////////////////////////////////
220 // Protected class functions
221 ////////////////////////////////////////////////////////////////////////////
222 
223 // Determine assign the attention numbers for each atom
225 {
226  std::vector<Atom>::iterator pAtom1, pAtom2;
227 
228  for(pAtom1 = run_.atoms.begin(); pAtom1 < run_.atoms.end(); ++pAtom1) {
229  // find nearest neighbour in other molecule
230  ScValue dist_min = 99999.0, r;
231  for(pAtom2 = run_.atoms.begin(); pAtom2 < run_.atoms.end(); ++pAtom2) {
232  if(pAtom1->molecule == pAtom2->molecule)
233  continue;
234  r = pAtom1->distance(*pAtom2);
235  if(r < dist_min)
236  dist_min = r;
237  }
238 
239  // check if within separator distance
240  if(dist_min >= settings.sep)
241  {
242  TR.Debug << "Atom ATTEN_BLOCKER: " << pAtom1->natom << std::endl;
243  // too _far_ away from other molecule, blocker atom only
244  pAtom1->atten = ATTEN_BLOCKER;
245  ++run_.results.surface[pAtom1->molecule].nBlockedAtoms;
246  }
247  else
248  {
249  // potential interface or neighbouring atom
250  pAtom1->atten = ATTEN_BURIED_FLAGGED;
251  ++run_.results.surface[pAtom1->molecule].nBuriedAtoms;
252  }
253  }
254 
255  return 1;
256 }
257 
258 // SC molecular dot trimming, vector dot product calculation and statistics
259 // Trim dots and retain only the peripheral band
260 
262  std::vector<DOT> const &sdots,
263  std::vector<DOT const *> &trimmed_dots)
264 {
265  ScValue area = 0;
266 
267  if(sdots.empty())
268  return 0.0;
269 
270 #ifdef USEOPENCL
271  if(settings.gpu) {
272  area = gpuTrimPeripheralBand(sdots, trimmed_dots);
273  } else {
274 #endif
275 
276  // Loop over one surface
277  // If a point is buried then see if there is an accessible point within distance band
278 
279  for(std::vector<DOT>::const_iterator idot = sdots.begin(); idot < sdots.end(); ++idot) {
280  DOT const &dot = *idot;
281  // Paralelleizable kernel function
282  if(dot.buried && TrimPeripheralBandCheckDot(dot, sdots)) {
283  area += dot.area;
284  trimmed_dots.push_back(&dot);
285  }
286  }
287 
288 #ifdef USEOPENCL
289  }
290 #endif
291 
292  return area;
293 }
294 
295 // Test a dot against a set of dots for collision
296 // NOTE: ~75% of time is spent in this function
298  DOT const &dot,
299  std::vector<DOT> const &sdots)
300 {
301  // Caching of r2 only brings 0.5% speed boost
302  ScValue r2 = pow(settings.band, 2);
303 
304  for(std::vector<DOT>::const_iterator idot2 = sdots.begin(); idot2 < sdots.end(); ++idot2) {
305  DOT const &dot2 = *idot2;
306  if(&dot == &dot2)
307  continue;
308  if(dot2.buried)
309  continue;
310  if(dot.coor.distance_squared(dot2.coor) <= r2)
311  return 0;
312  }
313  return 1;
314 }
315 
316 ////////////////////////////////////////////////////////////////////////////
317 // Calculate separation distance and and normal vector dot product (shape)
318 // distributions, mean and median of molecular surface
319 ////////////////////////////////////////////////////////////////////////////
320 
322  int const molecule,
323  std::vector<DOT const*> const &my_dots,
324  std::vector<DOT const*> const &their_dots)
325 {
326  std::map<int,int> dbins; // Distance bins
327  std::map<int,int> sbins; // Vector dot product bins (sc)
328  ScValue norm_sum = 0.0, distmin_sum = 0.0;
329  int ibin;
330  ScValue total = 0.0;
331 
332  if(my_dots.empty() || their_dots.empty())
333  return 0;
334 
335  for(std::vector<DOT const*>::const_iterator idot = my_dots.begin();
336  idot < my_dots.end(); ++idot) {
337  //if((*idot)->buried)
338  // run_.results.surface[molecule].nBuriedDots++;
339  //else
340  // run_.results.surface[molecule].nAccessibleDots++;
341  total += (*idot)->area;
342  }
343 
344 #ifdef USEOPENCL
345  std::vector<DOT const*> neighbors;
346  std::vector<DOT const*>::const_iterator iNeighbor;
347 
348  if(settings.gpu) {
349  gpuFindClosestNeighbors(my_dots, their_dots, neighbors);
350  iNeighbor = neighbors.begin();
351  }
352 #endif
353 
354  for(std::vector<DOT const*>::const_iterator idot = my_dots.begin();
355  idot < my_dots.end(); ++idot)
356  {
357  DOT const &dot1 = **idot;
358 
359  ScValue distmin, r;
360  DOT const *neighbor = NULL;
361 
362 #ifdef USEOPENCL
363  if(settings.gpu)
364  {
365  neighbor = *iNeighbor++;
366  }
367  else
368  {
369  neighbor = CalcNeighborDistanceFindClosestNeighbor(dot1, their_dots);
370  }
371 #else
372  neighbor = CalcNeighborDistanceFindClosestNeighbor(dot1, their_dots);
373 #endif
374 
375 
376  if(!neighbor)
377  continue;
378 
379  // having looked at all possible neighbours now accumulate stats
380  distmin = neighbor->coor.distance(dot1.coor);
381  distmin_sum += distmin;
382  // decide which bin to put it into and then add to distance histogram
383  ibin = (int)(distmin / settings.binwidth_dist);
384  ++dbins[ibin];
385 
386  //work out dot product
387  r = dot1.outnml.dot(neighbor->outnml);
388 
389  // weight dot product
390  // cpjx I think the weighting factor is the denominator 2 below?
391  // cpjx r = r * exp( - (distmin**2) / 2.)
392  r = r * exp( - pow(distmin, 2) * settings.weight );
393  // rounding errors a problem, so ensure abs(r) <1
394  r = MIN(0.999, MAX(r, -0.999));
395  norm_sum += r;
396 
397  // left_trunc ScValue to int ibin
398  // otherwise: (int)-0.9 = 0.
399  r /= settings.binwidth_norm;
400  if(r >= 0)
401  ibin = (int)r;
402  else
403  ibin = (int)r -1;
404  ++sbins[ibin];
405  }
406 
407  // Determine the last distance bin that has anything in it
408  // Accumulate percentages and area from all filled distance bins
409  ScValue abin, cumarea =0, cumperc = 0, perc, c;
410  ScValue rleft =0, rmedian =0;
411  std::map<int,int>::const_iterator it;
412 
413  TR.Trace << std::endl;
414  TR.Trace << "Distance between surfaces D(" << (molecule+1) << "->" << (molecule+1)%2+1 << "):" << std::endl;
415  TR.Trace << "From - To\tArea\tCum. Area\t%\tCum. %" << std::endl;
416 
417  for(it = dbins.begin(); it != dbins.end(); ++it) {
418  abin = total * (it->second) / my_dots.size();
419  cumarea += abin;
420  perc = abin * 100 / total;
421  c = cumperc + perc;
422  if(cumperc <= 50 && c >= 50) {
423  rleft = (it->first) * settings.binwidth_dist;
424  rmedian = rleft + (50 - cumperc) * settings.binwidth_dist / ( c - cumperc );
425  }
426  cumperc = c;
427 
428  #ifndef WIN32
429  if(TR.Trace.visible()) {
430  char buf[128];
431 
432  snprintf(buf, sizeof(buf),
433  "%.2f - %.2f\t%.1f\t%.1f\t%.1f\t%.1f",
434  (ScValue)it->first * settings.binwidth_dist,
435  (ScValue)it->first * settings.binwidth_dist + settings.binwidth_dist,
436  abin, cumarea,
437  perc, cumperc);
438 
439  TR.Trace << buf << std::endl;
440  }
441  #endif
442  }
443 
444  run_.results.surface[molecule].d_mean = distmin_sum / my_dots.size();
445  run_.results.surface[molecule].d_median = rmedian;
446 
447  TR.Trace << std::endl;
448  TR.Trace << "Surface complementarity S(" << (molecule+1) << "->" << (molecule+1)%2+1 << "):" << std::endl;
449  TR.Trace << "From - To\tNumber\t%\tCumm. %" << std::endl;
450 
451  cumperc = 0;
452  for(it = sbins.begin(); it != sbins.end(); ++it) {
453  perc = (ScValue)(it->second) * 100 / my_dots.size();
454  c = cumperc + perc;
455  if(cumperc <= 50 && c >= 50) {
456  rleft = (ScValue)(it->first) * settings.binwidth_norm;
457  rmedian = rleft + (50 - cumperc) * settings.binwidth_norm / ( c - cumperc );
458  }
459  cumperc = c;
460 
461  #ifndef WIN32
462  if(TR.Trace.visible()) {
463  char buf[128];
464  snprintf(buf, sizeof(buf),
465  "%.2f - %.2f\t%d\t%.1f\t%.1f",
466  (ScValue)-it->first * settings.binwidth_norm - settings.binwidth_norm,
467  (ScValue)-it->first * settings.binwidth_norm,
468  it->second, perc, cumperc);
469  TR.Trace << buf << std::endl;
470  }
471  #endif
472  }
473 
474  run_.results.surface[molecule].s_mean= -norm_sum / my_dots.size();
475  run_.results.surface[molecule].s_median = -rmedian;
476 
477  return 1;
478 }
479 
480 // Find closest neighbor dot for a given dot
481 // NOTE: ~20% of time is spent in this function
483  DOT const &dot1,
484  std::vector<DOT const*> const &their_dots)
485 {
486  ScValue distmin = 999999.0, d;
487  DOT const *neighbor = NULL;
488 
489  // Loop over the entire surface: find and flag neighbour of each point
490  // that we're interested in and store nearest neighbour pointer
491 
492  for(std::vector<DOT const*>::const_iterator idot2 = their_dots.begin();
493  idot2 < their_dots.end(); ++idot2) {
494  DOT const &dot2 = **idot2;
495  if(!dot2.buried)
496  continue;
497  d = dot2.coor.distance_squared(dot1.coor);
498  if(d <= distmin) {
499  distmin = d;
500  neighbor = &dot2;
501  }
502  }
503  return neighbor;
504 }
505 
506 ////////////////////////////////////////////////////////////////////////
507 // GPU SUPPORT FUNCTIONS
508 
509 #ifdef USEOPENCL
510 
511 core::Real inline ShapeComplementarityCalculator::GetTimerMs(clock_t &start)
512 {
513  clock_t now = clock();
514  core::Real d = (now - start)/(CLOCKS_PER_SEC/1000);
515  return d;
516 }
517 
518 void ShapeComplementarityCalculator::gpuInit()
519 {
520  if(gpu.use()) {
521  if(TR.Debug.visible())
522  gpu.profiling(1);
523  if(gpu.Init())
524  settings.gpu = 1;
525  }
526  if(settings.gpu_threads < 32)
527  settings.gpu_threads = gpu.device().threads;
528  gpu.RegisterProgram("gpu/sc.cl");
529 }
530 
531 ShapeComplementarityCalculator::ScValue ShapeComplementarityCalculator::gpuTrimPeripheralBand(
532  std::vector<DOT> const &dots,
533  std::vector<DOT const*> &trimmed_dots)
534 {
535  using namespace basic::gpu;
536 
537  int n, nBur, nAcc;
538  int threads;
539  ScValue area = 0;
540  clock_t timer;
541 
542  threads = MIN(512, settings.gpu_threads);
543  n = dots.size();
544  timer = clock();
545 
546  // Host and device (GPU) memory pointers for dot coordinates and results
547  float4 *hAccDotCoords, *phAccDotCoords;
548  float4 *hBurDotCoords, *phBurDotCoords;
549  char *hDotColl;
550 
551  hAccDotCoords = new float4[UPPER_MULTIPLE(n, threads)];
552  hBurDotCoords = new float4[UPPER_MULTIPLE(n, threads)];
553  hDotColl = new char[UPPER_MULTIPLE(n, threads)];
554 
555  if(!hAccDotCoords || !hBurDotCoords || !hDotColl)
556  throw ShapeComplementarityCalculatorException("Out of host memory!");
557 
558  // Make GPU copy of (x, y, z) buried and accessible coordinates
559  phAccDotCoords = hAccDotCoords;
560  phBurDotCoords = hBurDotCoords;
561  for(std::vector<DOT>::const_iterator idot = dots.begin();
562  idot < dots.end(); ++idot) {
563 #ifdef SC_PRECISION_REAL
564  if(idot->buried) {
565  phBurDotCoords->x = idot->coor.x();
566  phBurDotCoords->y = idot->coor.y();
567  phBurDotCoords->z = idot->coor.z();
568  ++phBurDotCoords;
569  } else {
570  phAccDotCoords->x = idot->coor.x();
571  phAccDotCoords->y = idot->coor.y();
572  phAccDotCoords->z = idot->coor.z();
573  ++phAccDotCoords++;
574  }
575 #else
576  // Quick copy
577  if(idot->buried)
578  *phBurDotCoords++ = *((float4*)&idot->coor.x());
579  else
580  *phAccDotCoords++ = *((float4*)&idot->coor.x());
581 #endif
582  }
583  nBur = phBurDotCoords - hBurDotCoords;
584  nAcc = phAccDotCoords - hAccDotCoords;
585 
586  // Run kernel on GPU
587  float r2 = pow(settings.band, 2);
588  if(!gpu.ExecuteKernel("TrimPeripheralBand", nBur, threads, 32,
589  GPU_IN, UPPER_MULTIPLE(nAcc, threads) * sizeof(*hAccDotCoords), hAccDotCoords,
590  GPU_INT, nAcc,
591  GPU_IN, UPPER_MULTIPLE(nBur, threads) * sizeof(*hBurDotCoords), hBurDotCoords,
592  GPU_OUT, UPPER_MULTIPLE(nBur, threads) * sizeof(*hDotColl), hDotColl,
593  GPU_FLOAT, r2,
594  NULL)) {
595  throw ShapeComplementarityCalculatorException("Failed to launch GPU kernel TrimPeripheralBand!");
596  }
597 
598  // Make a new list of dots that have no collisions
599  char *p = hDotColl;
600  for(std::vector<DOT>::const_iterator idot = dots.begin();
601  idot < dots.end(); ++idot) {
602  DOT const &dot1 = *idot;
603  if(!idot->buried)
604  continue;
605  if(!*p++) {
606  area += dot1.area;
607  trimmed_dots.push_back(&dot1);
608  }
609  }
610 
611  delete hAccDotCoords;
612  delete hBurDotCoords;
613  delete hDotColl;
614 
615  TR.Debug << "Peripheral trimming GPU processing time: " << gpu.lastKernelRuntime() << " ms kernel, " << GetTimerMs(timer) << " ms total" << std::endl;
616 
617  return area;
618 }
619 
620 int ShapeComplementarityCalculator::gpuFindClosestNeighbors(
621  std::vector<DOT const*> const &my_dots,
622  std::vector<DOT const*> const &their_dots,
623  std::vector<DOT const*> &neighbors)
624 {
625  using namespace basic::gpu;
626 
627  int nMyDots, nTheirDots, nNeighbors;
628  int threads;
629  clock_t timer;
630 
631  timer = clock();
632  threads = MIN(512, settings.gpu_threads);
633 
634  // Memory pointers for my and their dot coordinate arrays, CPU and GPU
635  float4 *hMyDotCoords, *phMyDotCoords;
636  float4 *hTheirDotCoords, *phTheirDotCoords;
637 
638  // Dot point pointer map
639  DOT const **hTheirDots, **phTheirDots;
640 
641  // Neighbor ID memory pointers
642  ::uint *hNeighbors;
643 
644  nMyDots = my_dots.size();
645  nTheirDots = their_dots.size();
646  nNeighbors = nMyDots;
647 
648  hMyDotCoords = new float4 [UPPER_MULTIPLE(nMyDots, threads)];
649  hTheirDotCoords = new float4 [UPPER_MULTIPLE(nTheirDots, threads)];
650  hTheirDots = new DOT const * [UPPER_MULTIPLE(nTheirDots, threads)];
651  hNeighbors = new ::uint[UPPER_MULTIPLE(nNeighbors, threads)];
652 
653  // Make GPU copy of (x, y, z) dot coordinates for my dots
654  phMyDotCoords = hMyDotCoords;
655  for(std::vector<DOT const*>::const_iterator idot = my_dots.begin(); idot < my_dots.end(); ++idot) {
656  phMyDotCoords->x = (*idot)->coor.x();
657  phMyDotCoords->y = (*idot)->coor.y();
658  phMyDotCoords->z = (*idot)->coor.z();
659  ++phMyDotCoords;
660  }
661  nMyDots = phMyDotCoords - hMyDotCoords;
662 
663  // Make GPU copy of (x, y, z) dot coordinates for their dots and keep a map
664  phTheirDotCoords = hTheirDotCoords;
665  phTheirDots = hTheirDots;
666  for(std::vector<DOT const*>::const_iterator idot = their_dots.begin(); idot < their_dots.end(); ++idot) {
667  if(!(*idot)->buried)
668  continue;
669  phTheirDotCoords->x = (*idot)->coor.x();
670  phTheirDotCoords->y = (*idot)->coor.y();
671  phTheirDotCoords->z = (*idot)->coor.z();
672  ++phTheirDotCoords;
673  *phTheirDots++ = *idot;
674  }
675  nTheirDots = phTheirDotCoords - hTheirDotCoords;
676 
677  // Run kernel on GPU
678  if(!gpu.ExecuteKernel("FindClosestNeighbor", nMyDots, threads, 32,
679  GPU_IN, UPPER_MULTIPLE(nMyDots, threads) * sizeof(*hMyDotCoords), hMyDotCoords,
680  GPU_IN, UPPER_MULTIPLE(nTheirDots, threads) * sizeof(*hTheirDotCoords), hTheirDotCoords,
681  GPU_INT, nTheirDots,
682  GPU_OUT, UPPER_MULTIPLE(nNeighbors, threads) * sizeof(*hNeighbors), hNeighbors,
683  NULL)) {
684  throw ShapeComplementarityCalculatorException("Failed to launch GPU kernel FindClosestNeighbor!");
685  }
686 
687  for(int i = 0; i < nNeighbors; ++i)
688  neighbors.push_back( hTheirDots[hNeighbors[i]] );
689 
690  delete hMyDotCoords;
691  delete hTheirDotCoords;
692  delete hTheirDots;
693  delete hNeighbors;
694 
695  TR.Debug << "Find Neighbors GPU processing time: " << gpu.lastKernelRuntime() << " ms kernel, " << GetTimerMs(timer) << " ms total" << std::endl;
696 
697  return 1;
698 }
699 
700 #endif // USEOPENCL
701 
702 // The End
703 ////////////////////////////////////////////////////////////////////////////
704 
705 } // namespace sc
706 } // namespace scoring
707 } // namespace core
708 
709 #endif // INCLUDED_core_scoring_sc_ShapeComplementarityCalculator_cc
710 
711 // END //