Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Matcher.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 // :noTabs=false:tabSize=4:indentSize=4:
4 //
5 // (c) Copyright Rosetta Commons Member Institutions.
6 // (c) This file is part of the Rosetta software suite and is made available under license.
7 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
8 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
9 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
10 
11 /// @file protocols/match/Matcher.hh
12 /// @brief
13 /// @author Alex Zanghellini (zanghell@u.washington.edu)
14 /// @author Andrew Leaver-Fay (aleaverfay@gmail.com), porting to mini
15 
16 // Unit headers
18 
19 // Package headers
20 #include <protocols/match/Hit.hh>
25 
34 // AUTO-REMOVED #include <protocols/match/downstream/GeometrySecMatchRPE.hh>
35 // AUTO-REMOVED #include <protocols/match/downstream/ScoringSecMatchRPE.hh>
37 
40 
45 
46 // AUTO-REMOVED #include <protocols/match/downstream/LigandConformer.hh> // REQUIRED FOR WINDOWS
47 
48 // Project headers
50 //#include <protocols/toolbox/match_enzdes_util/EnzConstraintParameters.hh>
53 
54 #include <core/pose/Pose.hh>
55 
56 #include <basic/Tracer.hh>
57 
58 // AUTO-REMOVED #include <core/scoring/ScoringManager.hh>
61 // AUTO-REMOVED #include <core/pack/dunbrack/SemiRotamericSingleResidueDunbrackLibrary.tmpl.hh>
64 
65 // Numeric headers
66 #include <numeric/random/random.hh>
67 
68 // Utility headers
69 #include <utility/LexicographicalIterator.hh>
70 #include <utility/OrderedTuple.hh>
71 #include <utility/string_util.hh>
72 
73 // C++ headers
74 #include <map>
75 // AUTO-REMOVED #include <fstream>
76 #include <string>
77 
78 #include <utility/vector1.hh>
79 
80 //Auto Headers
82 
83 namespace protocols {
84 namespace match {
85 
86 static basic::Tracer TR( "protocols.match.Matcher" );
87 static numeric::random::RandomGenerator RG(6527);
88 
89 /// Construction and Destruction
91  same_build_resids_for_all_csts_( true ),
92  n_geometric_constraints_( 0 ),
93  read_gridlig_file_( false ),
94  use_input_sc_( false ),
95  dynamic_grid_refinement_( false ),
96  output_matches_as_singular_downstream_positioning_( false ),
97  check_potential_dsbuilder_incompatibility_( false )
98 {
100 }
101 
103 
105 {
106  upstream_pose_ = new core::pose::Pose( pose );
107 }
108 
110  core::pose::Pose const & pose,
111  utility::vector1< core::id::AtomID > orientation_atoms
112 )
113 {
114  runtime_assert( orientation_atoms.size() == 3 );
115 
116  downstream_pose_ = new core::pose::Pose( pose );
117  downstream_orientation_atoms_ = orientation_atoms;
118 }
119 
120 void
122 {
124  pose_build_resids_ = resids;
125  std::sort( pose_build_resids_.begin(), pose_build_resids_.end() ); // keep sorted
126  per_cst_build_resids_.resize( 0 );
127 }
128 
130  Size cst_id,
131  utility::vector1< Size > const & resids
132 )
133 {
135  runtime_assert( n_geometric_constraints_ > 0 ); // n_geometric_constraints_ must be set first
136  if ( per_cst_build_resids_.size() == 0 ) {
138  pose_build_resids_.clear();
139  }
140  per_cst_build_resids_[ cst_id ] = resids;
141  std::sort( per_cst_build_resids_[ cst_id ].begin(), per_cst_build_resids_[ cst_id ].end() );
142 
143  /// keep track of the individual resids that are built from by any geometric constraint
144  std::list< Size > uniq_resids;
145  for ( Size ii = 1; ii <= pose_build_resids_.size(); ++ii ) {
146  uniq_resids.push_back( pose_build_resids_[ ii ] );
147  }
148  for ( Size ii = 1; ii <= per_cst_build_resids_[ cst_id ].size(); ++ii ) {
149  uniq_resids.push_back( per_cst_build_resids_[ cst_id ][ ii ] );
150  }
151 
152  uniq_resids.sort();
153  uniq_resids.unique();
154 
155  pose_build_resids_.resize( uniq_resids.size() );
156  std::copy( uniq_resids.begin(), uniq_resids.end(), pose_build_resids_.begin() );
157 
158 }
159 
160 
161 
163 {
164  n_geometric_constraints_ = n_constraints;
165 
170  //std::fill( representative_downstream_algorithm_.begin(), representative_downstream_algorithm_.end(), 0 );
174  //std::fill( downstream_builders_.begin(), downstream_builders_.end(), 0 );
178 }
179 
181  Size cst_id,
183 )
184 {
185  /// ASSUMPTION: matching from protein sidechain
186  assert( restype->aa() <= core::chemical::num_canonical_aas );
187  assert( build_set_id_for_restype_[ cst_id ].find( restype->name() ) == build_set_id_for_restype_[ cst_id ].end() );
188 
189 
190  if ( ! upstream_builders_[ cst_id ] ) {
192  /// default to dunbrack sampler
193  prot_sc_builder->set_sampler( new upstream::DunbrackSCSampler );
194  prot_sc_builder->set_use_input_sidechain( use_input_sc_ );
195  upstream_builders_[ cst_id ] = prot_sc_builder;
196  }
197 
198  upstream::BuildSet build_set;
199  build_set.set_residue_type( restype, restype->aa() == core::chemical::aa_gly ); // HACK gly means backbone only
200 
201  runtime_assert( dynamic_cast< upstream::ProteinUpstreamBuilder * > ( upstream_builders_[ cst_id ].get() ) );
202 
203  upstream::ProteinUpstreamBuilderOP prot_sc_builder( dynamic_cast< upstream::ProteinUpstreamBuilder * > ( upstream_builders_[ cst_id ].get() ));
204  prot_sc_builder->add_build_set( build_set );
205  build_set_id_for_restype_[ cst_id ][ restype->name() ] = prot_sc_builder->n_build_sets();
206 
207 }
208 
210  Size cst_id,
212  Size chi,
213  upstream::SampleStrategyData const & strat
214 )
215 {
216  runtime_assert( build_set_id_for_restype_[ cst_id ].find( restype->name() )
217  != build_set_id_for_restype_[ cst_id ].end() );
218 
219  //Size build_set_id = build_set_id_for_restype_[ cst_id ][ restype->name() ];
220  runtime_assert( dynamic_cast< upstream::ProteinUpstreamBuilder * > ( upstream_builders_[ cst_id ].get() ) );
221  upstream::ProteinUpstreamBuilderOP prot_sc_builder( dynamic_cast< upstream::ProteinUpstreamBuilder * > ( upstream_builders_[ cst_id ].get() ));
222 
223  upstream::BuildSet & build_set = prot_sc_builder->build_set( restype );
224 
225  build_set.set_sample_strategy_for_chi( chi, strat );
226 }
227 
228 void
230  Size cst_id,
232  core::Real fa_dun_cutoff
233 )
234 {
235  assert( build_set_id_for_restype_[ cst_id ].find( restype->name() )
236  != build_set_id_for_restype_[ cst_id ].end() );
237 
238  assert( dynamic_cast< upstream::ProteinUpstreamBuilder * > ( upstream_builders_[ cst_id ].get() ) );
239 
240  upstream::ProteinUpstreamBuilderOP prot_sc_builder( dynamic_cast< upstream::ProteinUpstreamBuilder * > ( upstream_builders_[ cst_id ].get() ));
241  upstream::BuildSet & build_set = prot_sc_builder->build_set( restype );
242  build_set.set_fa_dun_cutoff( fa_dun_cutoff );
243 }
244 
245 
247  Size cst_id,
249  utility::vector1< std::string > const & upstream_launch_atoms,
250  utility::vector1< core::id::AtomID > const & downstream_3atoms,
252  Size const exgeom_id,
253  bool enumerate_ligand_rotamers /* = false */,
254  bool catalytic_bond /*= false */
255 )
256 {
257  TR << " Adding Classical Match Algorithm with geometry samples: " << std::endl;
258  TR << " tor_U3D1:";
259  for ( Size ii = 1; ii <= exgeom.n_tor_U3D1_samples(); ++ii ) {
260  TR << " " << exgeom.tor_U3D1_samples()[ ii ];
261  }
262  TR << std::endl;
263 
264  TR << " ang_U2D1:";
265  for ( Size ii = 1; ii <= exgeom.n_ang_U2D1_samples(); ++ii ) {
266  TR << " " << exgeom.ang_U2D1_samples()[ ii ];
267  }
268  TR << std::endl;
269 
270  TR << " dis_U1D1:";
271  for ( Size ii = 1; ii <= exgeom.n_dis_U1D1_samples(); ++ii ) {
272  TR << " " << exgeom.dis_U1D1_samples()[ ii ];
273  }
274  TR << std::endl;
275 
276  TR << " tor_U2D2:";
277  for ( Size ii = 1; ii <= exgeom.n_tor_U2D2_samples(); ++ii ) {
278  TR << " " << exgeom.tor_U2D2_samples()[ ii ];
279  }
280  TR << std::endl;
281 
282  TR << " ang_U1D2:";
283  for ( Size ii = 1; ii <= exgeom.n_ang_U1D2_samples(); ++ii ) {
284  TR << " " << exgeom.ang_U1D2_samples()[ ii ];
285  }
286  TR << std::endl;
287 
288  TR << " tor_U1D3:";
289  for ( Size ii = 1; ii <= exgeom.n_tor_U1D3_samples(); ++ii ) {
290  TR << " " << exgeom.tor_U1D3_samples()[ ii ];
291  }
292  TR << std::endl;
293 
294 
295  runtime_assert( upstream_launch_atoms.size() == 3 );
296  runtime_assert( downstream_3atoms.size() == 3 );
297 
298  runtime_assert( build_set_id_for_restype_[ cst_id ].find( restype->name() )
299  != build_set_id_for_restype_[ cst_id ].end() );
300 
302  cst_id, restype, upstream_launch_atoms,
303  downstream_3atoms, enumerate_ligand_rotamers, catalytic_bond );
304 
305  //Size build_set_id = build_set_id_for_restype_[ cst_id ][ restype->name() ];
306  runtime_assert( dynamic_cast< upstream::ProteinUpstreamBuilder * > (
307  upstream_builders_[ cst_id ].get() ) );
308  upstream::ProteinUpstreamBuilderOP prot_sc_builder(
309  static_cast< upstream::ProteinUpstreamBuilder * > ( upstream_builders_[ cst_id ].get() ));
310 
311  upstream::BuildSet & build_set = prot_sc_builder->build_set( restype );
312 
313  if ( ! build_set.has_algorithm() ) {
315  match_algorithm->set_residue_type( restype );
316  build_set.set_downstream_algorithm( match_algorithm );
317 
318  downstream_algorithms_[ cst_id ].push_back( match_algorithm );
319  representative_downstream_algorithm_[ cst_id ] = match_algorithm;
320  all_downstream_algorithms_.push_back( match_algorithm );
321  }
322 
323  runtime_assert( dynamic_cast< downstream::ClassicMatchAlgorithm * > ( & build_set.algorithm() ) );
324  downstream::ClassicMatchAlgorithm & algorithm( static_cast< downstream::ClassicMatchAlgorithm & > (build_set.algorithm() ) );
325 
326  algorithm.add_external_geom_sampler(
327  exgeom,
328  exgeom_id,
329  upstream_launch_atoms[ 1 ],
330  upstream_launch_atoms[ 2 ],
331  upstream_launch_atoms[ 3 ],
332  ds_builder
333  );
334 
335 }
336 
337 /// Initialize a secondary matcher object based on the
338 /// geometry from the upstream to the downstream residue types.
340  Size geom_cst_id,
341  Size target_geom_cst_id,
342  core::chemical::ResidueTypeCOP candidate_restype,
343  core::chemical::ResidueTypeCOP target_restype,
344  utility::vector1< Size > const & candidate_atids,
345  utility::vector1< Size > const & target_atids,
347  std::string SecMatchStr,
348  core::pose::Pose const & upstream_pose
349 )
350 {
351  using namespace downstream;
352 
353  runtime_assert( candidate_atids.size() == 3 );
354  runtime_assert( target_atids.size() == 3 );
355 
356  runtime_assert( build_set_id_for_restype_[ geom_cst_id ].find( candidate_restype->name() )
357  != build_set_id_for_restype_[ geom_cst_id ].end() );
358 
359  runtime_assert( dynamic_cast< upstream::ProteinUpstreamBuilder * > (
360  upstream_builders_[ geom_cst_id ].get() ) );
361  upstream::ProteinUpstreamBuilderOP prot_sc_builder(
362  static_cast< upstream::ProteinUpstreamBuilder * > ( upstream_builders_[ geom_cst_id ].get() ));
363 
364  upstream::BuildSet & build_set = prot_sc_builder->build_set( candidate_restype );
365 
366  if ( ! build_set.has_algorithm() ) {
367  SecondaryMatcherToUpstreamResidueOP secondary_match_algorithm
368  = new SecondaryMatcherToUpstreamResidue( geom_cst_id );
369  build_set.set_downstream_algorithm( secondary_match_algorithm );
370  secondary_match_algorithm->set_target_geomcst_id( target_geom_cst_id );
371  downstream_algorithms_[ geom_cst_id ].push_back( secondary_match_algorithm );
372  representative_downstream_algorithm_[ geom_cst_id ] = secondary_match_algorithm;
373  all_downstream_algorithms_.push_back( secondary_match_algorithm );
374  }
375 
376  runtime_assert( dynamic_cast< downstream::SecondaryMatcherToUpstreamResidue * > ( & build_set.algorithm() ) );
377  downstream::SecondaryMatcherToUpstreamResidue & algorithm( static_cast< downstream::SecondaryMatcherToUpstreamResidue & > (build_set.algorithm() ) );
378 
379  algorithm.add_target_restype( target_restype );
380 
381  //Old code: We replaced with SecMatchResiduePairEvaulatorOP(SRPE) codes.
382  //SRPE is parents of ScoringMatchRPE and GeometrySecMatchRPE.
383  /*
384  GeometrySecMatchRPEOP geom_evaluator = new GeometrySecMatchRPE( *mcfi, target_atids, candidate_atids );
385  for ( Size ii = 1; ii <= geom_evaluator->atom_geom_rpes().size(); ++ii ) {
386  TR << " Upstream 2ndary Match: " << geom_evaluator->atom_geom_rpes()[ ii ]->print( candidate_restype, target_restype ) << std::endl;
387  }
388  algorithm.add_evaluator_for_target_restype( target_restype, geom_evaluator, mcfi->index() );
389  */
390 
391  //Author:Kui Chan 101409
392  //Description: added score term evaluator and combine with geometrySecMatchRPE
393  SecMatchResiduePairEvaluatorOP secMatch_evaluator
394  = SecMatchEvaluatorFactory::create_SecMatchResiduePairEvaluatorOP( *mcfi, target_atids, candidate_atids,
395  SecMatchStr, upstream_pose );
396  algorithm.add_evaluator_for_target_restype( target_restype, secMatch_evaluator, mcfi->index() );
397  //END Kui
398 }
399 
400 void
402  Size geom_cst_id,
403  core::chemical::ResidueTypeCOP candidate_restype,
404  core::chemical::ResidueTypeCOP downstream_restype,
405  utility::vector1< Size > const & candidate_atids,
406  utility::vector1< Size > const & target_atids,
408  std::string SecMatchStr,
410  bool catalytic_bond
411 )
412 {
413  using namespace downstream;
414 
415  runtime_assert( candidate_atids.size() == 3 );
416  runtime_assert( target_atids.size() == 3 );
417 
418  runtime_assert( build_set_id_for_restype_[ geom_cst_id ].find( candidate_restype->name() )
419  != build_set_id_for_restype_[ geom_cst_id ].end() );
420 
421  runtime_assert( dynamic_cast< upstream::ProteinUpstreamBuilder * > (
422  upstream_builders_[ geom_cst_id ].get() ) );
423  upstream::ProteinUpstreamBuilderOP prot_sc_builder(
424  static_cast< upstream::ProteinUpstreamBuilder * > ( upstream_builders_[ geom_cst_id ].get() ));
425 
426  upstream::BuildSet & build_set = prot_sc_builder->build_set( candidate_restype );
427 
428  if ( ! build_set.has_algorithm() ) {
429  SecondaryMatcherToDownstreamResidueOP secondary_match_algorithm
430  = new SecondaryMatcherToDownstreamResidue( upstream_pose_, geom_cst_id );
431  build_set.set_downstream_algorithm( secondary_match_algorithm );
432  secondary_match_algorithm->set_downstream_restype( downstream_restype );
433  downstream_algorithms_[ geom_cst_id ].push_back( secondary_match_algorithm );
434  representative_downstream_algorithm_[ geom_cst_id ] = secondary_match_algorithm;
435  all_downstream_algorithms_.push_back( secondary_match_algorithm );
436  if ( catalytic_bond ) {
437  utility::vector1< core::Size > catalytic_atoms(4,0);
438  catalytic_atoms[ 1 ] = candidate_atids[ 2 ];
439  catalytic_atoms[ 2 ] = candidate_atids[ 1 ];
440  //target = downstream
441  catalytic_atoms[ 3 ] = target_atids[ 1 ];
442  catalytic_atoms[ 4 ] = target_atids[ 2 ];
443  secondary_match_algorithm->set_catalytic_atoms( catalytic_atoms );
444  }
445  }
446 
447  runtime_assert( dynamic_cast< downstream::SecondaryMatcherToDownstreamResidue * > ( & build_set.algorithm() ) );
448  downstream::SecondaryMatcherToDownstreamResidue & algorithm( static_cast< downstream::SecondaryMatcherToDownstreamResidue & > (build_set.algorithm() ) );
449 
450 /*
451  GeometrySecMatchRPEOP geom_evaluator = new GeometrySecMatchRPE( *mcfi, target_atids, candidate_atids );
452  for ( Size ii = 1; ii <= geom_evaluator->atom_geom_rpes().size(); ++ii ) {
453  TR << " Downstream 2ndary Match: " << geom_evaluator->atom_geom_rpes()[ ii ]->print( candidate_restype, downstream_restype ) << std::endl;
454  }
455 
456  algorithm.add_evaluator( geom_evaluator, mcfi->index() );
457 */
458  //Author:Kui Chan 101409
459  //Description: added score term evaluator and combine with geometrySecMatchRPE
460  SecMatchResiduePairEvaluatorOP secMatch_evaluator
461  = SecMatchEvaluatorFactory::create_SecMatchResiduePairEvaluatorOP( *mcfi, target_atids, candidate_atids,
462  SecMatchStr, upstream_pose );
463  algorithm.add_evaluator( secMatch_evaluator, mcfi->index() );
464  //End Kui
465 
466 }
467 
468 
469 void
471 {
473  TR << "Set occupied space bounding box: Lower (";
474  TR << bb.lower().x() << ", ";
475  TR << bb.lower().y() << ", ";
476  TR << bb.lower().z() << ") Upper (";
477  TR << bb.upper().x() << ", ";
478  TR << bb.upper().y() << ", ";
479  TR << bb.upper().z() << ")" << std::endl;
480 }
481 
483 {
484  //std::fill( euclidean_bin_widths_.begin(), euclidean_bin_widths_.end(), width );
485  euclidean_bin_widths_ = width;
486 }
487 
489 {
490  //std::fill( euler_bin_widths_.begin(), euler_bin_widths_.end(), width );
491  euler_bin_widths_ = width;
492 }
493 
495 {
496  euclidean_bin_widths_ = widths;
497 }
498 
500 {
501  euler_bin_widths_ = widths;
502 }
503 
504 void Matcher::set_bump_tolerance( Real permitted_overlap )
505 {
506  runtime_assert( upstream_pose_.get() );
507 
508  if ( ! bb_grid_ ) {
510  }
511  bb_grid_->set_general_overlap_tolerance( permitted_overlap );
512 }
513 
514 void
516  MatcherTask const & mtask
517 )
518 {
519  set_upstream_pose( *mtask.upstream_pose() );
525 
526  /// active site definition
528  if ( read_gridlig_file_ ) {
531  } else {
533  }
534 
538  }
539 
540  use_input_sc_ = mtask.use_input_sc();
542 
543  initialize_from_file( * mtask.enz_input_data(), mtask );
544 
545  for ( Size ii = 1; ii <= n_geometric_constraints_; ++ii ) {
548 
549  //Kui Native 110809
550  runtime_assert( dynamic_cast< upstream::ProteinUpstreamBuilder * > ( upstream_builders_[ ii ].get() ) );
551  upstream::ProteinUpstreamBuilderOP prot_sc_builder(
552  static_cast< upstream::ProteinUpstreamBuilder * > ( upstream_builders_[ ii ].get() ));
553 
554  toolbox::match_enzdes_util::MatchConstraintFileInfoListCOP constraint_list = (mtask.enz_input_data())->mcfi_list( ii );
555  utility::vector1< core::chemical::ResidueTypeCOP > const & upres( constraint_list->upstream_restypes() );
556 
557  for ( Size jj = 1; jj <= upres.size(); ++jj ) {
558 
560  constraint_list->mcfis_for_upstream_restype( upres[ jj ] ));
561 
562  for ( Size kk = 1; kk <= jj_mcfis.size(); ++kk ) {
563  //TR << "ii:" << ii << " jj:" << jj << " kk:" << kk << " native:" <<jj_mcfis[ kk ]->native() << std::endl;
564  prot_sc_builder->set_native_flag(jj_mcfis[ kk ]->native());
565  }
566  }
567  //Kui Native 110809
568  }
569 
570  /// Should we use the match_dspos1 output pathway?
571  /// TEMP -- No valid MatchEvaluator yet exists for the match_dspos1 struct; do not use
572  /// the match-by-single-downstream-positioning code until one comes online.
576  for ( Size ii = 1; ii <= mtask.geom_csts_downstream_output().size(); ++ii ) {
578  }
579  }
580 }
581 
582 /// @details Inside the CST::BEGIN blocks, The following ALGORITHM_INFO:: match input data
583 /// may be provided to give additional data to the matcher. Rotamer building
584 /// instructions for particular geometric constraints may be given.
585 /// Here is an example of the kinds of geometric data. Each CHI_STRATEGY line is appropriate
586 /// on it's own, but they are not appropriate together. Lines beginning with "#" are comments
587 /// describing the meaning of each of the lines.
588 ///
589 /// ALGORITHM_INFO:: match
590 /// # If your upstream residue includes a proton chi (e.g. SER/THR/TYR) but the geometry
591 /// # of the downstream partner does not depend on the location of the proton, then
592 /// # use the following line to avoid enumerating rotamers that differ only in their proton coordinate.
593 /// IGNORE_UPSTREAM_PROTON_CHI
594 ///
595 /// # Secondary Matching:
596 /// # You can activate secondary matching, a continuous version of the classic discrete algorithm,
597 /// # by adding the line
598 /// SECONDARY_MATCH: DOWNSTREAM
599 ///
600 /// #or
601 /// SECONDARY_MATCH: UPSTREAM_CST 2
602 ///
603 /// # which instead of building its own hits and hashing them examines the hits generated in
604 /// # previous rounds for compatibility with the geometry that you're seeking.
605 /// # When performing secondary matching, it is not required that you specify all 6 degrees
606 /// # of freedom. If you specify only a subset the geometry of only the subset you've specified
607 /// # will be examined (e.g. if you don't care about torsion_AB, don't specify it
608 /// # in the CST::BEGIN/CST::END block.
609 /// # You can perform secondary matching to the ligand (the downstream target) or to an upstream
610 /// # residue whose geometry was constructed by an earlier geometric constraint. In the example
611 /// # above, the geometric constraint pointed to is #2.
612 /// # The first geometric constraint cannot use secondary matching, it must always use the discrete
613 /// # classic match algorithm. (A later geometric constraint may of course perform secondary
614 /// # matching to the hits produced by the first geometric constraint. At that point the first
615 /// # constraint is playing the same role in upstream matching as the ligand plays in downstream matching)
616 /// # END Secondary Matching comments.
617 ///
618 /// # Below: chi sample strategies -- "1" is for chi-1
619 /// # These are the traditional ex?::level options ( ? == 1, 2, 3 or 4 )
620 /// CHI_STRATEGY:: CHI 1 EX_ONE_STDDEV
621 /// CHI_STRATEGY:: CHI 1 EX_ONE_HALF_STEP_STDDEV
622 /// CHI_STRATEGY:: CHI 1 EX_TWO_FULL_STEP_STDDEVS
623 /// CHI_STRATEGY:: CHI 1 EX_TWO_HALF_STEP_STDDEVS
624 /// CHI_STRATEGY:: CHI 1 EX_FOUR_HALF_STEP_STDDEVS
625 /// CHI_STRATEGY:: CHI 1 EX_THREE_THIRD_STEP_STDDEVS
626 /// CHI_STRATEGY:: CHI 1 EX_SIX_QUARTER_STEP_STDDEVS
627 ///
628 /// # Below: The "AA" field, followed by the 3-letter AA code gives the sub-specification for
629 /// # a block that contains multiple amino acids. If this block allowed both an ASN and a GLN, then
630 /// # the first line would apply to GLN rotamers only and the second line to ASN rotamers only.
631 /// # "AA" fields may be included with
632 /// # any of the CHI_STRATEGY options listed here.
633 /// CHI_STRATEGY:: AA GLN CHI 2 EX_SIX_QUARTER_STEP_STDDEVS
634 /// CHI_STRATEGY:: AA ASN CHI 2 EX_ONE_STDDEV
635 ///
636 /// # Everything below: additional chi sample strategies unique to the matcher.
637 ///
638 /// CHI_STRATEGY:: CHI 1 STEP_WITHIN_SD_RANGE STEP 3.0
639 /// # the above line says "step by 3 degrees within a single standard deviation
640 /// # to the left and right of the mean for chi 1"
641 ///
642 /// CHI_STRATEGY:: CHI 1 STEP_WITHIN_SD_RANGE STEP 3.0 SD_RANGE 2.5
643 /// # the above line says "step by 3 degrees within 2.5 standard deviations to
644 /// # the left and the right of the mean for chi 1"
645 ///
646 /// CHI_STRATEGY:: CHI 2 AA HIS NON_ROTAMERIC_CHI_EXPANSION N_SAMPLES 3 REQUISIT_PROBABILITY 0.10
647 /// # the line above says that "for histidine chi 2 (which is non-rotameric in the 2010 Dunbrak library)
648 /// # take 3 extra samples to the left and right of the median chi value inside the non-rotameric chi well
649 /// # for rotamer wells that have a probability at least 0.1". The 2010 Dunbrack library must be active.
650 /// # (the -dun10 flag should be on the command line).
651 ///
652 /// CHI_STRATEGY:: CHI 2 AA HIS NON_ROTAMERIC_CHI_EXPANSION N_SAMPLES 3
653 /// # the line above is similar for the previous command, but leaves off the REQUISIT_PROBABILITY
654 /// # which sets a default REQUISIT_PROBABILITY of 2 / #chi-bins. For HIS, this is
655 /// # 2 / 36th, since there are 12 bins for chi 2 and 3 bins for chi1 (36 chi bins total).
656 ///
657 ///
658 /// ALGORITHM_INFO::END
659 ///
662  MatcherTask const & mtask
663 )
664 {
665  //std::cout << "APL DEBUG Matcher.cc::initialize_from_file begin" << std::endl;
666  using namespace toolbox::match_enzdes_util;
667 
668  runtime_assert( upstream_pose_ );
669  runtime_assert( downstream_pose_ );
670  TR << "Matcher::initialize_from_file: n geometric constraints: " << enz_data.mcfi_lists_size() << std::endl;
671 
673 
674  for ( Size ii = 1; ii <= n_geometric_constraints_; ++ii ) {
675  TR << "Begin constraint" << ii << std::endl;
676 
677  MatchConstraintFileInfoListCOP constraint_list = enz_data.mcfi_list( ii );
678 
679  utility::vector1< core::chemical::ResidueTypeCOP > const & upres( constraint_list->upstream_restypes() );
680 
681  for ( Size jj = 1; jj <= upres.size(); ++jj ) {
682  add_upstream_restype_for_constraint( ii, upres[ jj ] );
683 
685  constraint_list->mcfis_for_upstream_restype( upres[ jj ] ));
686  TR << " Upstream residue type " << upres[ jj ]->name() << " for geometric constraint #" << ii << std::endl;
687 
688  for ( Size kk = 1; kk <= jj_mcfis.size(); ++kk ) {
689 
690  //Author: Kui Chan Date:101309
691  std::string SecMatchStr("");
692 
693  bool secondary_matching( false );
694  bool secondary_match_upstream_residue( false );
695  Size secondary_match_upstream_geomcst_id( 0 );
696 
697  utility::vector1< bool > chi_sample_data_in_file( upres[ jj ]->nchi(), false );
698  //utility::vector1< SampleStrategyData > file_chi_sample_data( upres[ jj ]->nchi() );
699 
700  /// Process ALGORITHM_INFO data
701  std::map< std::string, utility::vector1< std::string > > const &
702  alg_info( jj_mcfis[ kk ]->algorithm_inputs() );
703  if ( alg_info.find( "match" ) != alg_info.end() ) {
704  utility::vector1< std::string > const & info( alg_info.find( "match" )->second );
705 
706  /// Line by line. Currently, there are no checks for consistency across multiple lines.
707  for ( Size ll = 1; ll <= info.size(); ++ll ) {
708  std::string llstr = info[ ll ];
709  std::istringstream llstream( llstr );
710  std::string first;
711  llstream >> first;
712  if( first == "MAX_DUNBRACK_ENERGY" ){
713  core::Real cutoff;
714  llstream >> cutoff;
715  TR << "Setting dunbrack energy cutoff for restype " << upres[jj]->name() << " in constraint " << ii << " to " << cutoff << "." << std::endl;
716  set_fa_dun_cutoff_for_constraint( ii, upres[jj], cutoff );
717  }
718  else if ( first == "IGNORE_UPSTREAM_PROTON_CHI" ) {
719  /// iterate across the proton chi, set their sample strategy to no_samples.
721  for ( Size mm = 1; mm <= upres[ jj ]->n_proton_chi(); ++mm ) {
722  Size mmchi = upres[ jj ]->proton_chi_2_chi( mm );
723  if ( chi_sample_data_in_file[ mmchi ] ) {
724  TR << " WARNING:: Already encountered chi sampling strategy for proton chi " << mmchi << " and will NOT ignore this proton chi" << std::endl;
725  } else {
726  set_sample_startegy_for_constraint( ii, upres[ jj ], mmchi, nosamps );
727  TR << " ALGORITHM_INFO:: match -- Ignoring proton chi for " << upres[ jj ]->name() << " chi # " << mmchi << std::endl;
728  chi_sample_data_in_file[ mmchi ] = true;
729  }
730  }
731  } else if ( first == "CHI_STRATEGY::" ) {
732  std::string chi_string;
733  llstream >> chi_string;
734  if ( llstream.bad() ) {
735  utility_exit_with_message( "Expected 'CHI' or 'AA' following CHI_STRATEGY:: on line '" + llstr + "'" );
736  }
737  if ( chi_string == "AA" ) {
738  std::string aa3;
739  llstream >> aa3;
740  //std::map< std::string, core::chemical::AA >::const_iterator iter = core::chemical::name2aa().find( aa3 );
741  //if ( iter == core::chemical::name2aa().end() ) {
742  // utility_exit_with_message( "Expected amino acid 3-letter code following 'CHI_STRATEGY:: AA ' but read " + aa3 );
743  //}
744  //core::chemical::AA aa = *iter;
746  bool aa_matches_any = false;
747  for ( Size mm = 1; mm <= upres.size(); ++mm ) {
748  if ( upres[ mm ]->aa() == aa ) {
749  aa_matches_any = true;
750  break;
751  }
752  }
753  if ( ! aa_matches_any ) {
754  std::cerr << "ERROR: amino acid " << aa3 << " on line\n" << llstr << "\nis not accessible for this geometric constraint." << std::endl;
755  std::cerr << "Available amino acids:";
756  for ( Size mm = 1; mm <= upres.size(); ++mm ) {
757  std::cerr << " " << upres[ mm ]->name();
758  }
759  std::cerr << std::endl;
760  utility_exit_with_message( "Amino acid restriction in CHI_STRATEGY:: block is invalid" );
761  }
762  if ( aa != upres[ jj ]->aa() ) {
763  //TR << " Ignoring line '" << llstr << "' in processing amino acid " << upres[ jj ]->name() << std::endl;
764  continue;
765  }
766 
767  llstream >> chi_string;
768  }
769 
770  if ( chi_string != "CHI" ) {
771  utility_exit_with_message( "Expected 'CHI' following CHI_STRATEGY:: on line '" + llstr + "'" );
772  }
773  Size which_chi;
774  llstream >> which_chi;
775  if ( which_chi > upres[ jj ]->nchi() ) {
776  TR << "WARNING: Ignoring rotamer sampling strategy data for chi # "
777  << which_chi
778  << " for residue type " << upres[ jj ]->name() << " because it only has " << upres[ jj ]->nchi() << " chi angles." << std::endl;
779  continue;
780  }
781  if ( chi_sample_data_in_file[ which_chi ] ) {
782  TR << " WARNING:: Repeat chi info for chi " << which_chi << " is being ignored!" << std::endl;
783  TR << " WARNING:: Ignoring: '" << llstr << "'" << std::endl;
784  continue;
785  }
786  chi_sample_data_in_file[ which_chi ] = true;
787  if ( llstream.bad() ) {
788  utility_exit_with_message( "Error parsing CHI_STRATEGY. Expected an "
789  "integer following 'CHI_STRATEGY::' on line '" + llstr );
790  }
791  std::string strategy;
792  llstream >> strategy;
793  if ( core::pack::task::is_rot_sample_name( strategy ) ) {
796  stratdat.set_sample_level( sample_level );
797  set_sample_startegy_for_constraint( ii, upres[ jj ], which_chi, stratdat );
798 
799  TR << " ALGORITHM_INFO:: match -- chi sampling strategy " << strategy << " for chi # " << which_chi << std::endl;
800 
801  } else if ( strategy == "STEP_WITHIN_SD_RANGE" ) {
802  /// PARSE SD_RANGE
803  Real step_size( 1.0 ), sd_range( 1.0 );
804  std::string step, range;
805  llstream >> step;
806  if ( step != "STEP" ) {
807  std::cerr << "ERROR:: Bad line in CHI_STRATEGY: \"" << llstr << "\"" << std::endl;
808  utility_exit_with_message( "While parsing CHI_STRATEGY:: STEP_WITHIN_SD_RANGE, expected \"STEP\" after \"STEP_WITHIN_SD_RANGE\"" );
809  }
810  if ( llstream.bad() ) {
811  std::cerr << "ERROR:: Bad line in CHI_STRATEGY: \"" << llstr << "\"" << std::endl;
812  utility_exit_with_message( "While parsing CHI_STRATEGY:: STEP_WITHIN_SD_RANGE, unexpected EOF" );
813  }
814  llstream >> step_size;
815  if ( llstream.bad() ) {
816  std::cerr << "ERROR:: Bad line in CHI_STRATEGY: \"" << llstr << "\"" << std::endl;
817  utility_exit_with_message( "While parsing CHI_STRATEGY:: STEP_WITHIN_SD_RANGE. Could not read step size" );
818  }
819  if ( step_size == 0.0 ) {
820  std::cerr << "ERROR:: Bad line in CHI_STRATEGY: \"" << llstr << "\"" << std::endl;
821  utility_exit_with_message( "While parsing CHI_STRATEGY:: STEP_WITHIN_SD_RANGE. Invalid step size of 0.0" );
822  }
823  llstream >> range;
824  if ( range != "" ) {
825  if ( range != "SD_RANGE" ) {
826  std::cerr << "ERROR:: Bad line in CHI_STRATEGY: \"" << llstr << "\"" << std::endl;
827  utility_exit_with_message( "While parsing CHI_STRATEGY:: STEP_WITHIN_SD_RANGE. Expected to read SD_RANGE to specify the standard deviation range" );
828  }
829  llstream >> sd_range;
830  if ( llstream.bad() ) {
831  std::cerr << "ERROR:: Bad line in CHI_STRATEGY: \"" << llstr << "\"" << std::endl;
832  utility_exit_with_message( "While parsing CHI_STRATEGY:: STEP_WITHIN_SD_RANGE. Could not read standard-deviation range" );
833  }
834  if ( sd_range <= 0.0 ) {
835  std::cerr << "ERROR:: Bad line in CHI_STRATEGY: \"" << llstr << "\"" << std::endl;
836  utility_exit_with_message( "While parsing CHI_STRATEGY:: STEP_WITHIN_SD_RANGE. Invalid standard-deviation range (must be positive). Read: " + utility::to_string( sd_range ) );
837  }
838  } else {
839  /// Implicit sd_range of 1.0
840  sd_range = 1.0;
841  }
844  stratdat.set_step_size( step_size );
845  stratdat.set_sd_range( sd_range );
846  set_sample_startegy_for_constraint( ii, upres[ jj ], which_chi, stratdat );
847 
848  TR << " ALGORITHM_INFO:: match -- chi sampling strategy STEP_WITHIN_SD_RANGE with step_size= " << step_size << " degrees across " << sd_range << " standard deviations for chi # " << which_chi << std::endl;
849  } else if ( strategy == "NON_ROTAMERIC_CHI_EXPANSION" ) {
850  Size npossiblerots( 0 );
851  /// Expand the number of rotamers for a non-rotameric chi.
852  /// 1st check to make sure that this chi is nonrotameric!
853  {/// SCOPE
854 
855  using namespace core::scoring;
856  using namespace core::pack::dunbrack;
858  SingleResidueRotamerLibraryCAP res_rotlib( rotlib.get_rsd_library( *upres[ jj ] ) );
859 
860  if ( res_rotlib != 0 ) {
861 
863  dynamic_cast< SingleResidueDunbrackLibrary const * >
864  ( res_rotlib.get() ));
865 
866  if ( dun_rotlib == 0 ) {
867  utility_exit_with_message( "Failed to retrieve a Dunbrack rotamer library for AA: " +
868  utility::to_string( upres[ jj ]->aa() ) + " named " + upres[ jj ]->name() );
869  }
870  npossiblerots = dun_rotlib->n_rotamer_bins();
871  if ( npossiblerots == 0 ) {
872  std::cerr << "Error while reading line: " << llstr << std::endl;
873  utility_exit_with_message( "Rotamer library for " + upres[ jj ]->name() + " says it contains no rotamers" );
874  }
875  if ( which_chi != dun_rotlib->nchi() ) {
876  utility_exit_with_message( "Cannot treat chi " + utility::to_string( which_chi ) +
877  " on residue " + upres[ jj ]->name() +
878  " as non-rotameric since there are " + utility::to_string( dun_rotlib->nchi() ) +
879  " chi in the library, and the last chi is the only one that could be non-rotameric" );
880  }
881  bool failed_cast = false;
882  switch ( dun_rotlib->nchi() ) {
883  case 2: {
886  ( dun_rotlib.get() );
887  failed_cast = sr2 == 0;
888  } break;
889  case 3: {
892  ( dun_rotlib.get() );
893  failed_cast = sr3 == 0;
894  } break;
895  default: {
896  utility_exit_with_message( "While parsing CHI_STRATEGY::NON_ROTAMERIC_CHI_EXPANSION\n"
897  "All semi-rotameric libraries have 2 or 3 chi, but the library for "+
898  upres[ jj ]->name() + " has " + utility::to_string( dun_rotlib->nchi() ) + " chi." );
899  }
900  }
901  if ( failed_cast ) {
902  utility_exit_with_message( "While parsing CHI_STRATEGY::NON_ROTAMERIC_CHI_EXPANSION\n"
903  "Failed to find a semi-rotameric rotamer library for " + upres[ jj ]->name() +
904  " (Did you forget the -dun10 flag?)\n"
905  "The following amino acids define semi-rotameric rotamer libraries: DEFHNQ" );
906  }
907  } else {
908  utility_exit_with_message( "While parsing CHI_STRATEGY::NON_ROTAMERIC_CHI_EXPANSION\n"
909  "Failed to find a rotamer library for " + upres[ jj ]->name() );
910  }
911  } // scope to check we're looking at a semi-rotameric rotamer library.
912  std::string nsamps;
913  Size nsamples;
914  if ( ! llstream.good() ) {
915  std::cerr << "Error reading line: " << llstr << std::endl;
916  utility_exit_with_message( "While parsing CHI_STRATEGY::NON_ROTAMERIC_CHI_EXPANSION\n"
917  "Expected to read N_SAMPLES after NON_ROTAMERIC_CHI_EXPANSIONbut reached an end of line");
918  }
919  llstream >> nsamps;
920  if ( nsamps != "N_SAMPLES" ) {
921  std::cerr << "Error reading line: " << llstr << std::endl;
922  utility_exit_with_message( "While parsing CHI_STRATEGY::NON_ROTAMERIC_CHI_EXPANSION\n"
923  "Expected to read N_SAMPLES after NON_ROTAMERIC_CHI_EXPANSION but found '" + nsamps + "'");
924  }
925  llstream >> nsamples;
926  if ( !llstream ) {
927  std::cerr << "Error reading line: " << llstr << std::endl;
928  utility_exit_with_message( "While parsing CHI_STRATEGY::NON_ROTAMERIC_CHI_EXPANSION\n"
929  "Expected to read an integer following N_SAMPLES but could not" );
930  }
931  std::string minprob;
932  Real minprobability( 2.0 / npossiblerots );
933  llstream >> minprob;
934  if ( minprob != "" ) {
935  if ( minprob != "REQUISIT_PROBABILITY" ) {
936  std::cerr << "Error reading line: " << llstr << std::endl;
937  utility_exit_with_message( "While parsing CHI_STRATEGY::NON_ROTAMERIC_CHI_EXPANSION\n"
938  "Expected end-of-line or 'REQUISIT_PROBABILITY' after reading the number of samples, but found '" + minprob + "'" );
939  }
940  if ( ! llstream.good() ) {
941  std::cerr << "Error reading line: " << llstr << std::endl;
942  utility_exit_with_message( "While parsing CHI_STRATEGY::NON_ROTAMERIC_CHI_EXPANSION\n"
943  "Expected to read a probability following the 'REQUISIT_PROBABILITY' string but found an end-of-line" );
944  }
945  llstream >> minprobability;
946  if ( llstream.bad() ) {
947  std::cerr << "Error reading line: " << llstr << std::endl;
948  utility_exit_with_message( "While parsing CHI_STRATEGY::NON_ROTAMERIC_CHI_EXPANSION\n"
949  "Expected to read a probability following the 'REQUISIT_PROBABILITY' string but could not" );
950  }
951  }
952  // OK -- Now set the chi-sample strategy.
955  stratdat.set_n_samples_per_side_of_nrchi_bin( nsamples );
956  stratdat.set_nrchi_prob_minimum_for_extra_samples( minprobability );
957  set_sample_startegy_for_constraint( ii, upres[ jj ], which_chi, stratdat );
958  TR << " ALGORITHM_INFO:: match -- chi sampling strategy NON_ROTAMERIC_CHI_EXPANSION with nsteps= " << nsamples << " for rotamers with a probability better than " << minprobability << std::endl;
959  } else {
960  utility_exit_with_message( "While parsing CHI_STRATEGY:: unsupported sample strategy: " + strategy + " for chi " + utility::to_string( which_chi ) );
961  }
962  } else if ( first == "SECONDARY_MATCH:" ) {
963  if ( ii == 1 ) {
964  std::cerr << "ERROR Reading line " << llstr << " " << " for geometric constraint " << ii << std::endl;
965  utility_exit_with_message( "Seconary matching cannot be chosen for the first geometric constraint!" );
966  }
967  if ( ! llstream ) {
968  utility_exit_with_message( "While parsing SECONDARY_MATCH: line, exptected to read 'UPSTREAM_CST <int>' but reached an end-of-line" );
969  }
970  std::string second;
971  llstream >> second;
972  if ( second != "UPSTREAM_CST" && second != "DOWNSTREAM" ) {
973  std::cerr << "Error reading line: " << llstr << std::endl;
974  utility_exit_with_message( "While parsing SECONDARY_MATCH: line, expected 'UPSTREAM_CST' or 'DOWNSTREAM' but encountered '" + second +"'." );
975  }
976  if ( second == "UPSTREAM_CST" ) {
977  Size cst_id;
978  llstream >> cst_id;
979  if ( ! llstream ) {
980  std::cerr << "Error reading line: " << llstr << std::endl;
981  utility_exit_with_message( "While parsing SECONDARY_MATCH: line, read 'UPSTREAM_CST' and expected to read an integer following, but did not find one." );
982  }
983  if ( cst_id < 1 || cst_id >= ii ) {
984  std::cerr << "Error reading line: '" << llstr << "' for geometric constraint " << ii << std::endl;
985  utility_exit_with_message( "Secondary match algorithm requested to an upstream residue "
986  "produced by an invalid geometric constraint " + utility::to_string( cst_id ) +
987  "\nThe geometric constraint ID must be less than the current geometric constraint id"
988  "and greater than 0" );
989  }
990  secondary_match_upstream_residue = true;
991  secondary_match_upstream_geomcst_id = cst_id;
992  }
993  secondary_matching = true;
994 
995  //Author: Kui Chan Date:101309
996  //Description: secondary scoring by score term(s).
997  } else if ( first == "SCORING_SECMATCH::" ){
998  if( !secondary_matching ){
999  utility_exit_with_message( "SCORING_SECMATCH line detected without previous SECONDARY_MATCH specifier.");
1000  }
1001  SecMatchStr += llstr + "\n";
1002 
1003  } else {
1004  utility_exit_with_message( "While parsing ALGORITHM:: match data. Command '" + first +"' not supported on line '" + llstr + "'");
1005  }
1006  }
1007  } else {
1008  TR << " Did not locate ALGORITHM_INFO:: match for constraint " << ii << std::endl;
1009  }
1010 
1011 
1012  Size const upstream_id = jj_mcfis[ kk ]->upstream_res();
1013  Size const downstream_id = jj_mcfis[ kk ]->downstream_res();
1014 
1016  if ( !secondary_matching ) {
1017  exgs = jj_mcfis[ kk ]->create_exgs();
1018  if ( exgs.get() == 0 ) {
1019  utility_exit_with_message( "ERROR: could not define external geometry between upstream and downstream residues. All 6 parameters must be defined." );
1020  }
1021  }
1022 
1023  if ( jj_mcfis[ kk ]->allowed_restypes( downstream_id ).size() == 0 ) {
1024  utility_exit_with_message( "Input file lists no residue types for template residue " +
1025  utility::to_string( downstream_id ) + " for geometric constraint " +
1026  utility::to_string( ii ) + ". There must be at least one." );
1027  }
1028 
1029  for ( Size ll = 1; ll <= jj_mcfis[ kk ]->allowed_restypes( downstream_id ).size(); ++ll ) {
1030 
1031  core::chemical::ResidueTypeCOP ll_downres( jj_mcfis[ kk ]->allowed_restypes( downstream_id )[ ll ] );
1032 
1033  utility::vector1< std::string > upstream_launch_atoms( 3 );
1034  //utility::vector1< std::string > downstream_launch_atoms( 3 ); /// TEMP HACK. Assume single ligand downstream
1035  utility::vector1< core::id::AtomID > downstream_3atoms( 3 );
1036  for ( Size mm = 1; mm <= 3; ++mm ) downstream_3atoms[ mm ].rsd() = 1;
1037 
1038 
1039  utility::vector1< utility::vector1< Size > > up_ats( 3 ), down_ats( 3 );
1040  for ( Size mm = 1; mm <= 3; ++mm ) up_ats[ mm ] = jj_mcfis[ kk ]->template_atom_inds( upstream_id, mm, *upres[ jj ] );
1041  for ( Size mm = 1; mm <= 3; ++mm ) down_ats[ mm ] = jj_mcfis[ kk ]->template_atom_inds( downstream_id, mm, *ll_downres );
1042 
1043  runtime_assert( up_ats[ 1 ].size() == up_ats[ 2 ].size() );
1044  runtime_assert( up_ats[ 1 ].size() == up_ats[ 3 ].size() );
1045 
1046  utility::vector1< Size > up_down_ncombs( 2 );
1047  up_down_ncombs[ 1 ] = up_ats[ 1 ].size();
1048  up_down_ncombs[ 2 ] = down_ats[ 2 ].size();
1049 
1050  utility::LexicographicalIterator lex( up_down_ncombs );
1051 
1052  while ( ! lex.at_end() ) {
1053  for ( Size mm = 1; mm <= 3; ++mm ) upstream_launch_atoms[ mm ] = upres[ jj ]->atom_name( up_ats[ mm ][ lex[ 1 ] ] );
1054  for ( Size mm = 1; mm <= 3; ++mm ) downstream_3atoms[ mm ].atomno() = down_ats[ mm ][ lex[ 2 ] ];
1055 
1056  TR << " " << upres[ jj ]->name() << " " << ll_downres->name() << std::endl;
1057  TR << " ";
1058  TR << " U3: " << upstream_launch_atoms[ 3 ];
1059  TR << " U2: " << upstream_launch_atoms[ 2 ];
1060  TR << " U1: " << upstream_launch_atoms[ 1 ];
1061  TR << " D1: " << ll_downres->atom_name( down_ats[ 1 ][ lex[ 2 ] ] );
1062  TR << " D2: " << ll_downres->atom_name( down_ats[ 2 ][ lex[ 2 ] ] );
1063  TR << " D3: " << ll_downres->atom_name( down_ats[ 3 ][ lex[ 2 ] ] );
1064  TR << std::endl;
1065 
1066  if ( secondary_matching ) {
1067  utility::vector1< Size > candidate_atids( 3 );
1068  utility::vector1< Size > target_atids( 3 );
1069  for ( Size nn = 1; nn <= 3; ++nn ) {
1070  candidate_atids[ nn ] = up_ats[ nn ][ lex[ 1 ]];
1071  target_atids[ nn ] = down_ats[ nn ][ lex[ 2 ]];
1072  }
1073  if ( secondary_match_upstream_residue ) {
1075  ii, secondary_match_upstream_geomcst_id, upres[ jj ],
1076  ll_downres, candidate_atids, target_atids,
1077  jj_mcfis[ kk ], SecMatchStr, *upstream_pose_ );
1078  } else {
1080  ii, upres[ jj ], ll_downres,
1081  candidate_atids, target_atids, jj_mcfis[ kk ], SecMatchStr, *upstream_pose_,
1082  jj_mcfis[kk]->is_covalent() );
1083  }
1084  } else {
1086  ii, upres[ jj ], upstream_launch_atoms, downstream_3atoms, *exgs,
1087  jj_mcfis[ kk ]->index(),
1088  mtask.enumerate_ligand_rotamers(),
1089  jj_mcfis[kk]->is_covalent() );
1090  }
1091  ++lex;
1092  } //lex loop
1093  } // ll loop over downstream residue types
1094  } //kk loop over mcfis
1095  } //jj loop over restypes for constraint
1096  } //ii loop over geometric constraints
1097 
1098  //std::cout << "APL DEBUG Matcher.cc::initialize_from_file end" << std::endl;
1099 } //initialize_from_file function
1100 
1101 
1102 /// @brief Main worker function
1104 {
1105  if( !initialize_scaffold_build_points() ) return false;
1106  //std::cout << "APL DEBUG Matcher.cc::find_hits 1" << std::endl;
1108  //std::cout << "APL DEBUG Matcher.cc::find_hits 2" << std::endl;
1110  //std::cout << "APL DEBUG Matcher.cc::find_hits 3" << std::endl;
1112  //std::cout << "APL DEBUG Matcher.cc::find_hits 4" << std::endl;
1114  //std::cout << "APL DEBUG Matcher.cc::find_hits 5" << std::endl;
1115 
1116  return generate_hits();
1117 }
1118 
1119 
1120 
1121 void
1123 {
1124  processor.begin_processing();
1125  for ( Size ii = 1; ii <= n_geometric_constraints_; ++ii ) {
1126  representative_downstream_algorithm_[ ii ]->prepare_for_match_enumeration( *this );
1127  }
1128  for( std::list< downstream::DownstreamBuilderOP >::const_iterator ds_it( all_downstream_builders_.begin() ),
1129  ds_end( all_downstream_builders_.end()); ds_it != ds_end; ++ds_it ){
1130  if( (*ds_it)->hits_potentially_incompatible() ){
1132  break;
1133  }
1134  }
1135 
1138  TR << "Potential DownstreamBuilder hit incompatibilities have been detected. All possible hit combinations will be enumerated. This overrides the MatcherTask instruction to output matches with only a single downstream position." << std::endl;
1139  }
1141  } else {
1143  }
1144  processor.end_processing();
1145 }
1146 
1147 /*utility::vector1< std::list< Hit > > const &
1148 Matcher::hits() const
1149 {
1150  return hits_;
1151 }
1152 */
1153 
1156 {
1157  return all_build_points_[ index ];
1158 }
1159 
1160 
1163 {
1164  return upstream_pose_;
1165 }
1166 
1169 {
1170  return downstream_pose_;
1171 }
1172 
1175 {
1176  return upstream_builders_[ cst_id ];
1177 }
1178 
1181 {
1182  if ( downstream_builders_[ geom_cst ].empty() ) {
1183  return 0;
1184  } else {
1185  return *(downstream_builders_[ geom_cst ].begin());
1186  }
1187 }
1188 
1189 std::list< downstream::DownstreamAlgorithmCOP >
1191 {
1192  std::list< downstream::DownstreamAlgorithmCOP > dsalgs;
1193  for ( std::list< downstream::DownstreamAlgorithmOP >::const_iterator
1194  iter = downstream_algorithms_[ cst_id ].begin(),
1195  iter_end = downstream_algorithms_[ cst_id ].end();
1196  iter != iter_end; ++iter ) {
1197  dsalgs.push_back( *iter );
1198  }
1199  return dsalgs;
1200 }
1201 
1204 {
1205  return representative_downstream_algorithm_[ cst_id ];
1206 }
1207 
1208 
1209 Matcher::HitList const &
1210 Matcher::hits( Size cst_id ) const
1211 {
1212  return hits_[ cst_id ];
1213 }
1214 
1215 
1218  return occ_space_hash_;
1219 }
1220 
1223 {
1224  return per_constraint_build_points_[ cst_id ];
1225 }
1226 
1227 
1230 {
1231  return all_build_points_[ index ];
1232 }
1233 
1236 {
1237  return upstream_builders_[ cst_id ];
1238 }
1239 
1240 bool
1242 {
1243  for( core::Size i =1; i<= n_geometric_constraints_; ++i){
1244  if( geomcst_is_upstream_only_[i] ) return true;
1245  }
1246  return false;
1247 }
1248 
1251 {
1252  runtime_assert( ! downstream_builders_[ cst_id ].empty() );
1253  return *(downstream_builders_[ cst_id ].begin());
1254 }
1255 
1256 std::list< downstream::DownstreamBuilderOP > const &
1258 {
1259  return downstream_builders_[ cst_id ];
1260 }
1261 
1262 std::list< downstream::DownstreamAlgorithmOP > const &
1264 {
1265  return downstream_algorithms_[ cst_id ];
1266 }
1267 
1268 
1271  return occ_space_hash_;
1272 }
1273 
1276 {
1277  return hits_[ geom_cst_id ].begin();
1278 }
1279 
1282 {
1283  return hits_[ geom_cst_id ].end();
1284 }
1285 
1286 void
1288  downstream::DownstreamAlgorithm const & dsalg,
1289  Size geom_cst_id_for_hit,
1290  HitListIterator const & iter
1291 )
1292 {
1293  hits_[ geom_cst_id_for_hit ].erase( iter );
1294  if ( geom_cst_id_for_hit != dsalg.geom_cst_id() ) {
1295  /// A downstream algorithm is making a primary modification to
1296  /// the hit list for some other geometric constraint.
1297 
1298  // ASSUMPTION: hits for geom_cst i may not depend on hits for geom_cst j, for j > i.
1299  // Under this assumption, the downstream algorithm for geom_cst i is unable to
1300  // direct the deletion of hits from geom_cst j. Maybe the downstream algorithm
1301  // for geom_cst j deletes its own hits during a call to respond_to_peripheral_hitlist_change,
1302  // as a result of round i hits disappearing; however, round i may not delete round j's hits
1303  // directly.
1304  runtime_assert( dsalg.geom_cst_id() > geom_cst_id_for_hit );
1305 
1306  note_primary_change_to_geom_csts_hitlist( geom_cst_id_for_hit );
1307  }
1308 }
1309 
1310 
1312  //std::cout << "APL DEBUG Matcher.cc::generate_hits() begin" << std::endl;
1313  for ( Size ii = 1; ii <= n_geometric_constraints_; ++ii ) {
1314  //std::cout << "APL DEBUG Matcher.cc::generate_hits() ii=" << ii << std::endl;
1318  //std::cout << "APL DEBUG Matcher.cc::generate_hits() early exit ii=" << ii << std::endl;
1319  return false;
1320  }
1321  }
1322  //std::cout << "APL DEBUG Matcher.cc::generate_hits() end" << std::endl;
1323  return true;
1324 }
1325 
1327 {
1328 
1330  per_constraint_build_points_[ cst_id ].reserve( all_build_points_.size() );
1331  for ( Size ii = 1; ii <= all_build_points_.size(); ++ii ) {
1332  /// if ( logic ) goes here to decide if all build points
1333  /// are apropriate for a pariticular geometric constraint.
1334  per_constraint_build_points_[ cst_id ].push_back( all_build_points_[ ii ] );
1335  }
1336  } else {
1337  per_constraint_build_points_[ cst_id ].reserve( per_cst_build_resids_[ cst_id ].size() );
1338 
1339  /// "merge sort" inspired algorithm; advance through two sorted lists.
1340  Size counter = 1;
1341  for ( Size ii = 1; ii <= pose_build_resids_.size(); ++ii ) {
1342  if ( per_cst_build_resids_[ cst_id ][ counter ] == pose_build_resids_[ ii ] ) {
1343  per_constraint_build_points_[ cst_id ].push_back( all_build_points_[ ii ] );
1344  ++counter;
1345  if ( counter > per_cst_build_resids_[ cst_id ].size() ) break;
1346  }
1347  }
1348  }
1349 
1350 }
1351 
1352 //Author: Kui Chan
1353 //access function to pose_build_resids_
1354 //Reason: Use to update the SecondaryMatcherToUpstreamResidue hit.second()
1357  return pose_build_resids_;
1358 }
1359 
1361 {
1362 
1363  /* Putting this here temporarily
1364  if ( cst_id != 1 ) {
1365  /// Greedy matching: only accept hits that could lead to a match.
1366  /// Greedy matching begins after round 1.
1367  for ( std::list< downstream::DownstreamBuilderOP >::const_iterator
1368  iter = all_downstream_builders_.begin(),
1369  iter_end = all_downstream_builders_.end();
1370  iter != iter_end; ++iter ) {
1371  (*iter)->set_occupied_space_hash( occ_space_hash_ );
1372  }
1373  for ( std::list< DownstreamAlgorithmOP >::const_iterator
1374  iter = all_downstream_algorithms_.begin(),
1375  iter_end = all_downstream_algorithms_.end();
1376  iter != iter_end; ++iter ) {
1377  (*iter)->set_occupied_space_hash( occ_space_hash_ );
1378  }
1379 
1380  }*/
1381 
1382  /// At the conclusion of hit generation, there will be new hits for this geometric constraint;
1383  /// put this constraint ID at the front of the list of geom-csts to trigger primary-change responses.
1385 
1386  std::list< Hit > hits = representative_downstream_algorithm_[ cst_id ]->build_hits_at_all_positions( *this );
1387  hits_[ cst_id ].splice( hits_[ cst_id ].end(), hits );
1388 
1389 }
1390 
1391 bool
1393 {
1394 
1395  /// During the primary and peripheral hit-list prunings, new elements may be pushed-back
1396  /// into the hit-lists-modified-by-primary-deletion list.
1397  for ( std::list< Size >::iterator iter = geom_csts_with_primary_hitlist_modificiations_.begin();
1398  iter != geom_csts_with_primary_hitlist_modificiations_.end(); /* no increment */ ) {
1399 
1400  representative_downstream_algorithm_[ *iter ]->respond_to_primary_hitlist_change( *this, cst_id );
1401 
1402  /// There is an implicit dependency DAG between geometric constraints:
1403  /// round i cannot be dependent on round j, if j > i.
1404  /// Therefore a very simple "topological sort" may be performed offline to come up with
1405  /// the appropriate order in which to update peripheral hits: go from 1 to n.
1406  for ( Size jj = 1; jj <= cst_id; ++jj ) {
1407  if ( jj == *iter ) continue;
1408  representative_downstream_algorithm_[ jj ]->respond_to_peripheral_hitlist_change( *this );
1409  }
1410 
1411  std::list< Size >::iterator iter_next( iter );
1412  ++iter_next;
1413  geom_cst_has_primary_modification_[ *iter ] = false;
1415  iter = iter_next;
1416  }
1417 
1418  //if there are no hits in the list for this constraint, we can abort preemptively
1419  bool hits_found( hits_[cst_id].size() != 0 );
1420 
1421  if ( ( cst_id == n_geometric_constraints_ ) || (!hits_found) ) {
1422  /// We're completely done with hit generation; clean up.
1423  /// Delete the occ_space_hash_
1424  for ( std::list< downstream::DownstreamBuilderOP >::const_iterator
1425  iter = all_downstream_builders_.begin(),
1426  iter_end = all_downstream_builders_.end();
1427  iter != iter_end; ++iter ) {
1428  (*iter)->set_occupied_space_hash( 0 );
1429  }
1430  occ_space_hash_ = 0;
1431  }
1432  return hits_found;
1433 }
1434 
1435 /// @details this function returns false in case
1436 /// there are no build points for a certain cst.
1437 /// this can happen if MPMs in the MatcherTask
1438 /// ruled out all user defined build points
1439 bool
1441 {
1442  runtime_assert( upstream_pose_.get() );
1444  if( pose_build_resids_.size() == 0 ) {
1445  TR << "WARNING: No build points were set in the matcher, could not initialize scaffold build points." << std::endl;
1446  return false;
1447  }
1448  }
1449  else{
1450  for( core::Size i =1; i <= per_cst_build_resids_.size(); ++i ){
1451  if( per_cst_build_resids_[i].size() == 0 ){
1452  TR << "WARNING: No build points for geomcst " << i << " were set in the matcher, could not initialize scaffold build points." << std::endl;
1453  return false;
1454  }
1455  }
1456  }
1457 
1458  all_build_points_.resize( pose_build_resids_.size() );
1459  for ( Size ii = 1; ii <= pose_build_resids_.size(); ++ii ) {
1460  runtime_assert_msg( pose_build_resids_[ ii ] <= upstream_pose_->n_residue(),
1461  "pos file contains position outside of valid range.");
1463  upstream_pose_->residue( pose_build_resids_[ ii ] ), ii );
1464  }
1465  return true;
1466 }
1467 
1468 
1469 void
1471 {
1472  runtime_assert( upstream_pose_.get() );
1473 
1474  clock_t starttime = clock();
1475  TR << "Initializing BumpGrids... " << std::endl;
1476 
1477  if ( ! bb_grid_ ) {
1479  }
1480 
1481  /// This code is fixed-backbone only... it needs to be expanded.
1482  original_scaffold_residue_bump_grids_.resize( upstream_pose_->total_residue() );
1483  for ( Size ii = 1; ii <= upstream_pose_->total_residue(); ++ii ) {
1485  fill_grid_with_backbone_heavyatom_spheres( upstream_pose_->residue( ii ), *resbgop );
1486  bb_grid_->or_with( *resbgop );
1487  original_scaffold_residue_bump_grids_[ ii ] = resbgop;
1488  }
1489 
1490 
1491  // Inform everyone of the grid
1492  for ( std::list< downstream::DownstreamBuilderOP >::const_iterator
1493  iter = all_downstream_builders_.begin(),
1494  iter_end = all_downstream_builders_.end();
1495  iter != iter_end; ++iter ) {
1496  (*iter)->set_bb_grid( bb_grid_ );
1497  }
1498  for ( std::list< downstream::DownstreamAlgorithmOP >::const_iterator
1499  iter = all_downstream_algorithms_.begin(),
1500  iter_end = all_downstream_algorithms_.end();
1501  iter != iter_end; ++iter ) {
1502  (*iter)->set_bb_grid( bb_grid_ );
1503  }
1505  iter = upstream_builders_.begin(),
1506  iter_end = upstream_builders_.end();
1507  iter != iter_end; ++iter ) {
1508  (*iter)->set_bb_grid( bb_grid_ );
1509  }
1510 
1511  TR << "...done" << std::endl;
1512  clock_t stoptime = clock();
1513  TR << " TIMING: Bump grids took " << ((double) stoptime - starttime )/CLOCKS_PER_SEC << " seconds to compute" << std::endl;
1514 }
1515 
1516 void
1518 {
1519  /* TEMP
1520  if ( downstream_atoms_required_inside_active_site_.empty() ) return;
1521 
1522  if ( upstream_resids_and_radii_defining_active_site_.empty() ) {
1523  utility_exit_with_message( "ERROR: Active site undefined, yet downstream atoms are required to be in active site" );
1524  }*/
1525 
1526  if ( read_gridlig_file_ ) {
1528  active_site_grid_->initialize_from_gridlig_file( gridlig_fname_ );
1529 
1530  /*Bool3DGridKinemageWriter writer;
1531  writer.set_write_facets( true );
1532  std::ofstream ostr( "active_site_gridlig.kin" );
1533  writer.set_line_color( "green" );
1534  writer.write_grid_to_kinemage( ostr, "act_site", active_site_grid_->grid() );*/
1535  } else {
1537  active_site_grid_->set_bin_width( 0.25 ); /// Same resolution as the bump grid!
1538 
1539  for ( std::list< std::pair< Size, Real > >::const_iterator
1542  iter != iter_end; ++iter ) {
1543  runtime_assert( iter->first <= upstream_pose_->total_residue() );
1544  active_site_grid_->enlargen_to_capture_volume_within_radius_of_residue(
1545  upstream_pose_->residue( iter->first ), iter->second );
1546  }
1547 
1548  for ( std::list< std::pair< Size, Real > >::const_iterator
1551  iter != iter_end; ++iter ) {
1552  active_site_grid_->or_within_radius_of_residue(
1553  upstream_pose_->residue( iter->first ), iter->second );
1554  }
1555 
1556  // Inform everyone of the grid
1557  for ( std::list< downstream::DownstreamBuilderOP >::const_iterator
1558  iter = all_downstream_builders_.begin(),
1559  iter_end = all_downstream_builders_.end();
1560  iter != iter_end; ++iter ) {
1561  (*iter)->set_active_site_grid( active_site_grid_ );
1562  }
1563  for ( std::list< downstream::DownstreamAlgorithmOP >::const_iterator
1564  iter = all_downstream_algorithms_.begin(),
1565  iter_end = all_downstream_algorithms_.end();
1566  iter != iter_end; ++iter ) {
1567  (*iter)->set_active_site_grid( active_site_grid_ );
1568  }
1569 
1570  /// Inform dowsntream builders of their atoms that must be contained inside the
1571  /// active site grid.
1572  for ( std::list< downstream::DownstreamBuilderOP >::const_iterator
1573  iter = all_downstream_builders_.begin(),
1574  iter_end = all_downstream_builders_.end();
1575  iter != iter_end; ++iter ) {
1576  for ( std::list< core::id::AtomID >::const_iterator
1578  atid_iter_end = downstream_atoms_required_inside_active_site_.end();
1579  atid_iter != atid_iter_end; ++atid_iter ) {
1580  (*iter)->require_atom_to_reside_in_active_site( *atid_iter );
1581  }
1582  }
1583 
1584  /*std::cout << "Writing active-site kinemage" << std::endl;
1585 
1586  Bool3DGrid copy_active = active_site_grid_->grid();
1587  copy_active.subtract( bb_grid_->grid( C_ALA ) );
1588 
1589  Bool3DGridKinemageWriter writer;
1590  writer.set_write_facets( true );
1591  std::ofstream ostr( "active_site_grid.kin" );
1592  writer.set_line_color( "green" );
1593  writer.write_grid_to_kinemage( ostr, "act_site", copy_active );
1594  */
1595  }
1596 }
1597 
1598 void
1600 {
1602  occ_space_hash_->set_bounding_box( occ_space_bounding_box_ );
1603  occ_space_hash_->set_xyz_bin_widths( euclidean_bin_widths_ );
1604  occ_space_hash_->set_euler_bin_widths( euler_bin_widths_ );
1605 
1606  occ_space_hash_->initialize();
1607 
1608 }
1609 
1610 void
1612 {
1613  for ( Size ii = 1; ii <= n_geometric_constraints_; ++ii ) {
1614  geomcst_is_upstream_only_[ ii ] = representative_downstream_algorithm_[ ii ]->upstream_only();
1615  }
1616 }
1617 
1620  Size const cst_id,
1622  utility::vector1< std::string > const & upstream_launch_atoms,
1623  utility::vector1< core::id::AtomID > const & downstream_3atoms,
1624  bool enumerate_ligand_rotamers,
1625  bool catalytic_bond
1626 )
1627 {
1628  runtime_assert( upstream_launch_atoms.size() == 3 );
1629  runtime_assert( downstream_3atoms.size() == 3 );
1630 
1631  runtime_assert( build_set_id_for_restype_[ cst_id ].find( restype->name() )
1632  != build_set_id_for_restype_[ cst_id ].end() );
1633 
1634  //Size build_set_id = build_set_id_for_restype_[ cst_id ][ restype->name() ];
1635  runtime_assert( dynamic_cast< upstream::ProteinUpstreamBuilder * > ( upstream_builders_[ cst_id ].get() ) );
1636  upstream::ProteinUpstreamBuilderOP prot_sc_builder( static_cast< upstream::ProteinUpstreamBuilder * > ( upstream_builders_[ cst_id ].get() ));
1637 
1638  upstream::BuildSet & build_set = prot_sc_builder->build_set( restype );
1639 
1640  runtime_assert( build_set.has_restype() );
1641 
1642  /// Only supports rigid-ligand builders for now... This code will expand in the future.
1643  runtime_assert( downstream_pose_ );
1644  runtime_assert( downstream_pose_->total_residue() == 1 );
1645 
1646  for ( Size ii = 1; ii <= 3; ++ii ) {
1647  runtime_assert( downstream_3atoms[ ii ].rsd() == 1 );
1648  runtime_assert( downstream_3atoms[ ii ].atomno() <= downstream_pose_->residue( 1 ).natoms() );
1649  }
1650 
1652  if ( ! enumerate_ligand_rotamers ) {
1653 
1655  rigid_builder->ignore_h_collisions( true );
1656  rigid_builder->initialize_from_residue(
1657  downstream_3atoms[ 1 ].atomno(),
1658  downstream_3atoms[ 2 ].atomno(),
1659  downstream_3atoms[ 3 ].atomno(),
1660  downstream_orientation_atoms_[ 1 ].atomno(),
1661  downstream_orientation_atoms_[ 2 ].atomno(),
1662  downstream_orientation_atoms_[ 3 ].atomno(),
1663  downstream_pose_->residue(1) );
1664 
1665  if ( catalytic_bond ) {
1666  using namespace core::scoring::etable::count_pair;
1668 
1669  Size upstream_atom_id = build_set.restype().atom_index( upstream_launch_atoms[ 1 ] );
1670  Size downstream_atom_id = downstream_3atoms[ 1 ].atomno();
1671 
1672  bond_list.push_back( std::make_pair( upstream_atom_id, downstream_atom_id ) );
1673 
1675  build_set.restype(),
1676  downstream_pose_->residue_type(1),
1677  bond_list );
1678  /// Unclear what the xover value should be... 3 ignores collisions for
1679  /// atoms that are 3 bonds apart
1680  cpgen->set_crossover( 3 );
1681 
1682  rigid_builder->initialize_upstream_residue( & build_set.restype(), cpgen );
1683  } else {
1684  rigid_builder->initialize_upstream_residue( & build_set.restype() );
1685  }
1686  builder = rigid_builder;
1687  } else {
1689  ligand_rotamer_builder->ignore_h_collisions( true );
1690  ligand_rotamer_builder->initialize_from_residue(
1691  downstream_3atoms[ 1 ].atomno(),
1692  downstream_3atoms[ 2 ].atomno(),
1693  downstream_3atoms[ 3 ].atomno(),
1694  downstream_orientation_atoms_[ 1 ].atomno(),
1695  downstream_orientation_atoms_[ 2 ].atomno(),
1696  downstream_orientation_atoms_[ 3 ].atomno(),
1697  downstream_pose_->residue(1) );
1698 
1699  //note: if the relevant downstream atoms have been set,
1700  //this means that the downsteam conformer builder should split
1701  //up it's rotamer library accordingly
1702  if( relevant_downstream_atoms_.size() != 0 ){
1703  //std::cerr << "determining redundant conformers " << std::endl;
1704  utility::vector1< core::Size > relevant_atom_indices;
1705  for( core::Size i = 1; i <= relevant_downstream_atoms_.size(); ++i){
1706  relevant_atom_indices.push_back( relevant_downstream_atoms_[i].atomno() );
1707  }
1708  ligand_rotamer_builder->determine_redundant_conformer_groups( relevant_atom_indices );
1709  }
1710 
1711  /// Refactor this!
1712  if ( catalytic_bond ) {
1713  using namespace core::scoring::etable::count_pair;
1715 
1716  Size upstream_atom_id = build_set.restype().atom_index( upstream_launch_atoms[ 1 ] );
1717  Size downstream_atom_id = downstream_3atoms[ 1 ].atomno();
1718 
1719  bond_list.push_back( std::make_pair( upstream_atom_id, downstream_atom_id ) );
1720 
1722  build_set.restype(),
1723  downstream_pose_->residue_type(1),
1724  bond_list );
1725  /// Unclear what the xover value should be... 3 ignores collisions for
1726  /// atoms that are 3 bonds apart
1727  cpgen->set_crossover( 3 );
1728 
1729  ligand_rotamer_builder->initialize_upstream_residue( & build_set.restype(), cpgen );
1730  } else {
1731  ligand_rotamer_builder->initialize_upstream_residue( & build_set.restype() );
1732  }
1733 
1734  builder = ligand_rotamer_builder;
1735  }
1736 
1737  downstream_builders_[ cst_id ].push_back( builder );
1738  all_downstream_builders_.push_back( builder );
1739  return builder;
1740 }
1741 
1742 
1743 /// @details Subsample all the available hits for a single voxel in 6D to select
1744 /// 10 or fewer hits for each geometric constraint. If there are already 10 or
1745 /// fewer hits for geom-cst ii, then it takes them all. Otherwise, it selects
1746 /// 10 representatives for each upstream conformation. For example,
1747 /// if all the hits in this voxel come from a single rotamer of the upsteram hit,
1748 /// then exactly 10 upstream hits will be chosen. The purpose of this code is
1749 /// to speed up match enumeration when it might otherwise get very bogged down
1750 /// by the combinatorics
1751 void
1753  utility::vector1< utility::vector1< Hit const * > > const & hit_vectors,
1754  utility::vector1< Size > & n_hits_per_geomcst,
1756 ) const
1757 {
1758 
1759  for ( Size ii = 1; ii <= n_geometric_constraints_; ++ii ) {
1760  if ( ! dynamic_grid_refinement_ && n_hits_per_geomcst[ ii ] > 10 ) { // 10 is arbitrary
1761  Size max_hits_per_us_hit(10); // 10 is arbitrary -- consider a smaller number here
1762 
1763  std::map< upstream_hit, std::set< Size > > us_hit_map;
1764 
1765  for( Size jj = 1; jj <= n_hits_per_geomcst[ ii ]; ++jj ) {
1766  upstream_hit this_us_hit( *hit_vectors[ii][jj] );
1767 
1768  std::map< upstream_hit, std::set< Size > >::iterator hitmap_it(us_hit_map.find( this_us_hit ));
1769  if( hitmap_it == us_hit_map.end() ){
1770  std::set< Size > hits_this_us_hit;
1771  hits_this_us_hit.insert( jj );
1772  us_hit_map.insert( std::pair< upstream_hit, std::set< Size > >( this_us_hit, hits_this_us_hit ) );
1773  }
1774  else{
1775  hitmap_it->second.insert( jj );
1776  }
1777  } //loop over all hits
1778 
1779  //Size unique_us_hits = us_hit_map.size();
1780  n_hits_per_geomcst[ ii ] = 0;
1781  reps[ii].clear();
1782  for( std::map< upstream_hit, std::set< Size> >::const_iterator map_it( us_hit_map.begin() ), map_end( us_hit_map.end() );
1783  map_it != map_end; ++map_it ){
1784  Size counter(0);
1785  for( std::set< Size >::const_iterator set_it( map_it->second.begin() ), set_end( map_it->second.end() );
1786  set_it != set_end; ++set_it ){
1787  ++counter;
1788  ++n_hits_per_geomcst[ ii ];
1789  reps[ii].push_back( *set_it );
1790  if( counter >= max_hits_per_us_hit ) break;
1791  }
1792  }
1793  } else { // n_hits_per_geomcst[ ii ] < 10
1794  reps[ ii ].resize( n_hits_per_geomcst[ ii ] );
1795  for ( Size jj = 1; jj <= n_hits_per_geomcst[ ii ]; ++jj ) {
1796  reps[ ii ][ jj ] = jj;
1797  }
1798  }
1799  }
1800 }
1801 
1802 /// @brief Returns false if all non-upstream-only hits are compatible.
1803 /// Returns true if any non-upstream-only hits are incompatible, and increments
1804 /// the lexicographical iterator at the most-significant dimension possible
1805 bool
1807  match_dspos1 const & m1,
1808  utility::LexicographicalIterator & lex,
1809  output::MatchProcessor const & processor
1810 ) const
1811 {
1812  /// Before descending into the secondary matches, check that none of the non-upstream-only
1813  /// hits are incompatible with each other. Once we're iterating over the upstream-only
1814  /// hits, we'll assume that all the non-upstream-only hits are compatible with each other.
1815  /// Structure this loop so that the "earliest" incompatibility is found.
1816  /// ii will iterate from 2 to n_geometrict_constraints and look at whether
1817  /// it is incomaptible with any of the hits from 1 to ii-1; if it is,
1818  /// then we can increment the lexicographical iterator at position ii.
1819  /// This guarantees the most-significant dimension producing an incompatibility
1820  /// will be advanced, skipping the largest possible number of (incompatible)
1821  /// hit combinations.
1822  for ( Size ii = 2; ii <= n_geometric_constraints_; ++ii ) {
1823  if ( geomcst_is_upstream_only_[ ii ] ) continue; // ignore upstream-only hits
1824 
1825  Size ii_bp( m1.upstream_hits[ ii ].scaffold_build_id() );
1826  for ( Size jj = 1; jj < ii; ++jj ) {
1827  if ( geomcst_is_upstream_only_[ jj ] ) continue; // ignore upstream-only hits
1828 
1829  Size jj_bp( m1.upstream_hits[ jj ].scaffold_build_id() );
1830  if ( ! all_build_points_[ ii_bp ]->compatible( *all_build_points_[ jj_bp ]) ) {
1831  lex.continue_at_dimension( ii );
1832  //std::cout << "Incompatible at 1153 " << ii << " " << jj << " " << ii_bp << " " << jj_bp << std::endl;
1833  }
1834  if ( ! upstream_builders_[ ii ]->compatible( fake_hit( m1.upstream_hits[ ii ] ),
1835  *all_build_points_[ ii_bp ], *upstream_builders_[ jj ],
1836  fake_hit( m1.upstream_hits[ jj ] ), *all_build_points_[ jj_bp ] )) {
1837  lex.continue_at_dimension( ii );
1838  //std::cout << "Incompatible at 1159 " << ii << " " << jj << " " << ii_bp << " " << jj_bp << std::endl;
1839  return true;
1840  }
1841  if( !processor.up_coll_filt()->passes_hardsphere_filter( ii, jj, fake_hit( m1.upstream_hits[ ii ] ), fake_hit( m1.upstream_hits[ jj ] ) ) ){
1842  lex.continue_at_dimension( ii );
1843  return true;
1844  }
1845  }
1846  }
1847  //std::cout << "Compatible at 1377" << std::endl;
1848  return false; // no incompatibility
1849 }
1850 
1851 
1852 /// @brief very similar to above function, the difference being
1853 /// that in checks whether all the downstream builders agree that
1854 /// their hits are compatible with each other
1855 bool
1857  match const & m,
1858  utility::LexicographicalIterator & lex
1859 ) const
1860 {
1861  for ( Size ii = 2; ii <= n_geometric_constraints_; ++ii ) {
1862  if ( geomcst_is_upstream_only_[ ii ] ) continue; // ignore upstream-only hits
1863  if( downstream_builders_[ii].size() == 0 ) continue; // can't check compatibility if there are no downstream builders
1864 
1865  for ( Size jj = 1; jj < ii; ++jj ) {
1866  if ( geomcst_is_upstream_only_[ jj ] ) continue; // ignore upstream-only hits
1867  if( downstream_builders_[jj].size() == 0 ) continue; // can't check compatibility if there are no downstream builders
1868  if( ! (*(downstream_builders_[ii].begin()))->compatible( m[ii], **(downstream_builders_[jj].begin()),m[jj] ) ){
1869  lex.continue_at_dimension( ii );
1870  return true;
1871  }
1872  } // jj loop
1873  } //ii loop
1874  return false;
1875 }
1876 
1877 /// @details returns false if all upstream-only hits in a particular combination
1878 /// are compatible with each other and with the non-upstream-only hits. Returns
1879 /// true if any are incompatible. Ignores compatibility between non-upstream-only hits.
1880 /// If there is an incompatibility, the upstream_only_hit_iterators are advanced.
1881 /// In the event that this increment beyond the incompatible upstream-only-hit combinations
1882 /// advance the most-significant upstream-only geometric-constraint's iterator
1883 /// to its list end, then this function sets the value of
1884 /// last_upstream_only_geomcst_advanced to zero.
1885 /// update flo nov 10:
1886 /// this function now also checks whether upstream only csts clash with any of the downstream
1887 /// objects. it's faster to do this here than in the filters
1888 bool
1890  match const & m,
1891  utility::vector1< HitPtrListCOP > const & upstream_only_hits,
1892  utility::vector1< std::list< Hit const * >::const_iterator > & upstream_only_hit_iterators,
1893  Size & last_upstream_only_geomcst_advanced,
1894  output::MatchProcessor const & processor
1895 ) const
1896 {
1897  /// Determine if any of the secondary hits are incompatible and therefore
1898  /// could not produce a match.
1899  /// If we do find an incompatibility, then "continue" at the most-significant
1900  /// dimension.
1901  bool incompatible( false ), outstanding_list_increment( false );
1902  for ( Size ii = 2; ii <= n_geometric_constraints_; ++ii ) {
1903  Size ii_bp( m[ ii ].scaffold_build_id() );
1904  for ( Size jj = 1; jj < ii; ++jj ) {
1905  Size jj_bp( m[ jj ].scaffold_build_id() );
1906 
1907  /// We have already checked for compatibility between non-upstream-only matches.
1908  if ( ! geomcst_is_upstream_only_[ ii ] && ! geomcst_is_upstream_only_[ jj ] ) continue;
1909 
1910  if ( ! all_build_points_[ ii_bp ]->compatible( *all_build_points_[ jj_bp ] )) {
1911  incompatible = true;
1912  }
1913  if ( ! incompatible && ! upstream_builders_[ ii ]->compatible(
1914  m[ ii ], *all_build_points_[ ii_bp ],
1915  *upstream_builders_[ jj ], m[ jj ], *all_build_points_[ jj_bp ] )) {
1916  incompatible = true;
1917  }
1918 
1919  //clash check
1920  //better here than later to alleviate combinatorics
1921  if( !incompatible && geomcst_is_upstream_only_[ii] && representative_downstream_algorithm_[jj]->generates_primary_hits() ){
1922  if( !processor.up_down_filt()->passes_hardsphere_filter( ii, jj, m[ii], m[jj] ) ) incompatible = true;
1923  }
1924 
1925  if( !incompatible && ( geomcst_is_upstream_only_[ii] || geomcst_is_upstream_only_[jj] ) ){
1926  if( !processor.up_coll_filt()->passes_hardsphere_filter( ii, jj, m[ii], m[jj] ) ) incompatible = true;
1927  }
1928  //clash check over
1929 
1930  if ( incompatible ) {
1931  /// Increment the most-significant geom-cst to alleviate the incompatibility
1932  /// either ii or jj represents an upstream-only geom-cst.
1933  /// if it's not ii, then we start our incrementing at jj.
1934  for ( Size kk = ( ! geomcst_is_upstream_only_[ ii ] ? jj : ii ); kk >= 1; --kk ) {
1935  if ( geomcst_is_upstream_only_[ kk ] ) {
1936  ++upstream_only_hit_iterators[ kk ];
1937  last_upstream_only_geomcst_advanced = kk;
1938  if ( upstream_only_hit_iterators[ kk ] == upstream_only_hits[ kk ]->val().end() ) {
1939  outstanding_list_increment = true;
1940  } else {
1941  outstanding_list_increment = false;
1942  break;
1943  }
1944  }
1945  }
1946  //std::cout << "Incompatible at 1222 " << ii << " " << jj << std::endl;
1947  if ( outstanding_list_increment ) {
1948  // indicate to the calling function that we've visited all of the upstream-only hit combinations.
1949  last_upstream_only_geomcst_advanced = 0;
1950  }
1951  return true;
1952  } // if incompatible
1953  }
1954  }
1955  //std::cout << "Compatible at 1231" << std::endl;
1956  return false;
1957 }
1958 
1959 /// @details same as above
1960 bool
1962  match_dspos1 const & m1,
1963  utility::vector1< HitPtrListCOP > const & upstream_only_hits,
1964  utility::vector1< std::list< Hit const * >::const_iterator > & upstream_only_hit_iterators,
1965  Size & last_upstream_only_geomcst_advanced,
1966  output::MatchProcessor const & processor
1967 ) const
1968 {
1969  /// Determine if any of the secondary hits are incompatible and therefore
1970  /// could not produce a match.
1971  /// If we do find an incompatibility, then "continue" at the most-significant
1972  /// dimension.
1973  bool incompatible( false ), outstanding_list_increment( false );
1974  for ( Size ii = 2; ii <= n_geometric_constraints_; ++ii ) {
1975  Size ii_bp( m1.upstream_hits[ ii ].scaffold_build_id() );
1976  for ( Size jj = 1; jj < ii; ++jj ) {
1977  Size jj_bp( m1.upstream_hits[ jj ].scaffold_build_id() );
1978 
1979  /// We have already checked for compatibility between non-upstream-only matches.
1980  if ( ! geomcst_is_upstream_only_[ ii ] && ! geomcst_is_upstream_only_[ jj ] ) continue;
1981 
1982  if ( ! all_build_points_[ ii_bp ]->compatible( *all_build_points_[ jj_bp ] )) {
1983  incompatible = true;
1984  }
1985  if ( ! incompatible && ! upstream_builders_[ ii ]->compatible(
1986  fake_hit( m1.upstream_hits[ ii ] ), *all_build_points_[ ii_bp ],
1987  *upstream_builders_[ jj ], fake_hit( m1.upstream_hits[ jj ] ), *all_build_points_[ jj_bp ] )) {
1988  incompatible = true;
1989  }
1990 
1991  //upstream downstream clash check
1992  //better here than later to alleviate combinatorics
1993  if( !incompatible && geomcst_is_upstream_only_[ii] && (jj == m1.originating_geom_cst_for_dspos) ){
1994  if( !processor.up_down_filt()->passes_hardsphere_filter( ii, jj, fake_hit(m1.upstream_hits[ii]), full_hit(m1) ) ) incompatible = true;
1995  }
1996 
1997  if( !incompatible && ( geomcst_is_upstream_only_[ii] || geomcst_is_upstream_only_[jj] ) ){
1998  if( !processor.up_coll_filt()->passes_hardsphere_filter( ii, jj, fake_hit( m1.upstream_hits[ii]), fake_hit(m1.upstream_hits[jj]) ) ) incompatible = true;
1999  }
2000  //clash check over
2001 
2002  if ( incompatible ) {
2003  /// Increment the most-significant geom-cst to alleviate the incompatibility
2004  /// either ii or jj represents an upstream-only geom-cst.
2005  /// if it's not ii, then we start our incrementing at jj.
2006  for ( Size kk = ( ! geomcst_is_upstream_only_[ ii ] ? jj : ii ); kk >= 1; --kk ) {
2007  if ( geomcst_is_upstream_only_[ kk ] ) {
2008  ++upstream_only_hit_iterators[ kk ];
2009  last_upstream_only_geomcst_advanced = kk;
2010  if ( upstream_only_hit_iterators[ kk ] == upstream_only_hits[ kk ]->val().end() ) {
2011  outstanding_list_increment = true;
2012  } else {
2013  outstanding_list_increment = false;
2014  break;
2015  }
2016  }
2017  }
2018  //std::cout << "Incompatible at 1222 " << ii << " " << jj << std::endl;
2019  if ( outstanding_list_increment ) {
2020  // indicate to the calling function that we've visited all of the upstream-only hit combinations.
2021  last_upstream_only_geomcst_advanced = 0;
2022  }
2023  return true;
2024  } // if incompatible
2025  }
2026  }
2027  //std::cout << "Compatible at 1231" << std::endl;
2028  return false;
2029 }
2030 
2031 /// @details returns true if more upstream-only hit combinations remain,
2032 /// and false if there are no upstream-only hit cominbations remaining
2033 bool
2035  utility::vector1< HitPtrListCOP > const & upstream_only_hits,
2036  Size starting_point,
2037  utility::vector1< std::list< Hit const * >::const_iterator > & upstream_only_hit_iterators,
2038  Size & last_upstream_only_geomcst_advanced
2039 ) const
2040 {
2041  for ( Size ii = starting_point; ii >= 1; --ii ) {
2042  if ( geomcst_is_upstream_only_[ ii ] ) {
2043  ++upstream_only_hit_iterators[ ii ];
2044  last_upstream_only_geomcst_advanced = ii;
2045  if ( upstream_only_hit_iterators[ ii ] != upstream_only_hits[ ii ]->val().end() ) {
2046  return true; // more upstream-only-hit-combinations remain
2047  }
2048  }
2049  }
2050  return false; // we did not find an upstream-only geom-cst that was not at the end of its hit list.
2051 }
2052 
2053 
2054 /// @details This needs a major refactoring.
2055 void
2057 {
2058 
2059  /// TEMP! These variables need to be incremented by the process_matches_all_hit_combos_given_subsets routine
2060  //core::Size num_potential_matches(0), num_sent_to_proc(0),num_non_up_only_incompatible(0),num_up_only_incompatible(0), num_considered_muliple_origins(0), all_lex_states(0),num_ds_hit_incompatible(0), num_empty_uplist(0);
2061  MatcherOutputStats output_stats;
2062 
2063  utility::vector1< HitNeighborFinder > finders( hits_.size() );
2064  for ( Size ii = 1; ii <= hits_.size(); ++ii ) {
2065  if ( geomcst_is_upstream_only_[ ii ] ) continue; // don't create HitNeighborFinders for upstream-only geometric constraints.
2066  finders[ii].set_bounding_box( occ_space_bounding_box_ );
2067  finders[ii].set_xyz_bin_widths( euclidean_bin_widths_ );
2068  finders[ii].set_euler_bin_widths( euler_bin_widths_ );
2069  finders[ii].initialize();
2070  finders[ii].add_hits( hits_[ ii ] );
2071  }
2072  clock_t starttime = clock();
2073  utility::vector1< std::list< Hit const * > > hit_ccs = finders[ 1 ].connected_components();
2074  clock_t stoptime = clock();
2075  TR << "Found " << hit_ccs.size() << " connected component" << ( hit_ccs.size() != 1 ? "s" : "" ) << " in the hit neighbor graph in " << ((double) stoptime - starttime ) / CLOCKS_PER_SEC << " seconds." << std::endl;
2076  //TR << "CONNECTED COMPONENTS: " << hit_ccs.size() << std::endl;
2077  // -- this doesn't work -- don't turn it on -- #pragma omp parallel for
2078  for ( Size ii = 1; ii <= hit_ccs.size(); ++ii ) {
2079  //Size n_combos = hit_ccs[ ii ].size();
2080  //TR << "CC " << ii << " num neighbors: 1: " << hit_ccs[ ii ].size();
2082  ii_neighbor_hits[ 1 ] = hit_ccs[ ii ]; // convenience: copy the list of hits in this CC.
2083  for ( Size jj = 2; jj <= hits_.size(); ++jj ) {
2084  if ( geomcst_is_upstream_only_[ jj ] ) continue; // no hits for upstream-only geometric constraints
2085  ii_neighbor_hits[ jj ] = finders[ jj ].neighbor_hits( hit_ccs[ ii ] );
2086  }
2087 
2088  Vector ii_euclidean_bin_widths( euclidean_bin_widths_ ), ii_euler_bin_widths( euler_bin_widths_ );
2089  if ( dynamic_grid_refinement_ ) {
2090  ii_neighbor_hits = refine_grid_and_subsample_for_hit_subsets( ii_euclidean_bin_widths, ii_euler_bin_widths, ii_neighbor_hits );
2091  }
2092 
2093  /// Create the hit hasher and then proceed to enumerate all match combos.
2094  HitHasher hit_hasher;
2096  hit_hasher.set_xyz_bin_widths( ii_euclidean_bin_widths );
2097  hit_hasher.set_euler_bin_widths( ii_euler_bin_widths );
2099  hit_hasher.initialize();
2100 
2101  MatcherOutputStats ii_outstats = process_matches_all_hit_combos_for_hit_subsets( processor, hit_hasher, ii_neighbor_hits );
2102  output_stats += ii_outstats;
2103 
2104  /// Iterate across all 64 definitions of the origin to find all matches for this connected component.
2105  }
2106 
2107  TR << "Match enumeration statistics: ";
2108  TR << " num_potential_matches: " << output_stats.num_potential_matches <<
2109  "; all_lex_states " << output_stats.all_lex_states <<
2110  "; num_considered_muliple_origins " << output_stats.num_considered_muliple_origins <<
2111  "; num_non_up_only_incompatible " << output_stats.num_non_up_only_incompatible <<
2112  "; num_ds_hit_incompatible " << output_stats.num_ds_hit_incompatible <<
2113  "; num_up_only_incompatible " << output_stats.num_up_only_incompatible <<
2114  "; num_sent_to_proc " << output_stats.num_sent_to_proc << std::endl;
2115  TR << output_stats.num_empty_uplist << " empty uplists were observed." << std::endl;
2116 
2117 }
2118 
2121  Vector & good_euclidean_bin_widths,
2122  Vector & good_euler_bin_widths,
2123  utility::vector1< std::list< Hit const * > > const & neighbor_hits
2124 ) const
2125 {
2126  Size const acceptable( 500000 ); // "acceptible" must be larger than "take_it"
2127  Size const take_it( 300000 );
2128  Size const bare_minimum( 20 );
2129 
2130  Size n_combos = predict_n_matches_for_hit_subsets( euclidean_bin_widths_, euler_bin_widths_, neighbor_hits, take_it );
2131  Size const initial_n_combos = n_combos;
2132 
2133  if ( n_combos > take_it ) {
2136 
2137  Size n_combos = predict_n_matches_for_hit_subsets( euclidean_bin_widths_, euler_bin_widths_, good_subsamples, take_it );
2138  Size last_n_combos = n_combos;
2139  TR << "subsampling would produce " << n_combos << " matches down from " << initial_n_combos << " matches." << std::endl;
2140 
2141  Size count_refinement( 0 );
2142  bool found_acceptible( false );
2143  while ( n_combos > take_it ) {
2144 
2145  Vector test_euclidean_bin_widths( good_euclidean_bin_widths ), test_euler_bin_widths( good_euler_bin_widths );
2146  test_euclidean_bin_widths *= 0.75;
2147  test_euler_bin_widths *= 0.75;
2148  subsamples = subsample_hits( test_euclidean_bin_widths, test_euler_bin_widths, neighbor_hits ); // subsample the ORIGINAL set of hits
2149  n_combos = predict_n_matches_for_hit_subsets( test_euclidean_bin_widths, test_euler_bin_widths, good_subsamples, take_it );
2150 
2151  TR << "Grid refinement #" << count_refinement + 1 << " predicts " << n_combos << " matches." << std::endl;
2152 
2153  if ( n_combos > take_it ) {
2154  good_euclidean_bin_widths = test_euclidean_bin_widths;
2155  good_euler_bin_widths = test_euler_bin_widths;
2156  good_subsamples = subsamples;
2157  last_n_combos = n_combos;
2158  ++count_refinement;
2159  if ( n_combos < acceptable ) found_acceptible = true;
2160  } else if ( ! found_acceptible && n_combos > bare_minimum ) {
2161  // Imagine a case where we had 2 Billion matches from the last round, and 20 matches in this round.
2162  // go ahead and take this refinement
2163  good_euclidean_bin_widths = test_euclidean_bin_widths;
2164  good_euler_bin_widths = test_euler_bin_widths;
2165  good_subsamples = subsamples; // expensive copy, avoid if we just made the grid smaller
2166  last_n_combos = n_combos;
2167  ++count_refinement;
2168  } else if ( ! found_acceptible ) {
2169  // num combos < bare_minimum; try one last time with a slightly larger grid size:
2170  // NOTE we don't want to iterate through the outer while loop another time after we make this
2171  // refinement or we could get stuck in an infinite loop.
2172  Vector test_euclidean_bin_widths2( good_euclidean_bin_widths ), test_euler_bin_widths2( good_euler_bin_widths );
2173  test_euclidean_bin_widths2 *= 0.9;
2174  test_euler_bin_widths2 *= 0.9;
2175  subsamples = subsample_hits( test_euclidean_bin_widths2, test_euler_bin_widths2, neighbor_hits ); // subsample the ORIGINAL set of hits
2176  n_combos = predict_n_matches_for_hit_subsets( test_euclidean_bin_widths2, test_euler_bin_widths2, subsamples, take_it );
2177 
2178  TR << "(Partial) Grid refinement #" << count_refinement + 1 << " predicts " << n_combos << " matches." << std::endl;
2179  if ( n_combos > bare_minimum ) {
2180  good_euclidean_bin_widths = test_euclidean_bin_widths2;
2181  good_euler_bin_widths = test_euler_bin_widths2;
2182  good_subsamples = subsamples;
2183  last_n_combos = n_combos;
2184  ++count_refinement;
2185  } else {
2186  TR << "Failed to refine grid to acceptible number of matches" << std::endl;
2187  }
2188  break; // we must exit this loop now.
2189  } else {
2190  TR << "Too fine a grid: " << n_combos << " matches predicted." << std::endl;
2191  }
2192  }
2193  TR << "Dynamic grid refinement predicts " << last_n_combos << " matches (down from an initial number of " << initial_n_combos << " matches) after " << count_refinement << " grid refinements" << std::endl;
2194 
2195  return good_subsamples;
2196  } else {
2197  return neighbor_hits;
2198  }
2199 
2200  return neighbor_hits;
2201 }
2202 
2205  Vector const & euclidean_bin_widths,
2206  Vector const & euler_bin_widths,
2207  utility::vector1< std::list< Hit const * > > const & neighbor_hits
2208 ) const
2209 {
2210  /// Take 1 hit per geomcst per halfbin
2212 
2213  Vector half_width_euclid = 0.5 * euclidean_bin_widths;
2214  Vector half_width_euler = 0.5 * euler_bin_widths;
2215 
2216 
2217  HitHasher hit_hasher;
2219  hit_hasher.set_xyz_bin_widths( half_width_euclid );
2220  hit_hasher.set_euler_bin_widths( half_width_euler );
2222  hit_hasher.initialize();
2223 
2224  for ( Size ii = 1; ii <= n_geometric_constraints_; ++ii ) {
2225  if ( geomcst_is_upstream_only_[ ii ] ) continue;
2226  for ( std::list< Hit const * >::const_iterator
2227  hit_iter = neighbor_hits[ ii ].begin(),
2228  hit_iter_end = neighbor_hits[ ii ].end();
2229  hit_iter != hit_iter_end; ++hit_iter ) {
2230  hit_hasher.insert_hit( 1, ii, *hit_iter );
2231  }
2232  }
2233 
2234  for ( HitHasher::HitHash::const_iterator halfbin_iter = hit_hasher.hit_hash_begin( 1 );
2235  halfbin_iter != hit_hasher.hit_hash_end( 1 ); ++halfbin_iter ) {
2236  HitHasher::MatchSet const & matches = halfbin_iter->second;
2237  for ( Size ii = 1; ii <= n_geometric_constraints_; ++ii ) {
2238  if ( geomcst_is_upstream_only_[ ii ] ) continue;
2239  if ( matches[ ii ].begin() == matches[ ii ].end() ) continue;
2240 
2241  // pick one hit at random for each hit coming from a different upstream build point
2242  // first -- check if there are multiple upstream build points.
2243  Size build_point = 0;
2244  bool all_same( true );
2245  for ( std::list< Hit const * >::const_iterator
2246  hit_iter = matches[ ii ].begin(), hit_iter_end = matches[ ii ].end();
2247  hit_iter != hit_iter_end; ++hit_iter ) {
2248  if ( build_point == 0 ) {
2249  build_point = (*hit_iter)->first()[ 1 ];
2250  } else if ( build_point != (*hit_iter)->first()[ 1 ] ) {
2251  all_same = false;
2252  break;
2253  }
2254  }
2255  if ( all_same ) {
2256  Size len = matches[ ii ].size();
2257  if ( len == 1 ) {
2258  subsamples[ ii ].push_back( * matches[ ii ].begin() );
2259  } else {
2260  // pick a random number between 0 and len-1 and increment through the list of hits
2261  // that many times to pick a single random hit.
2262  std::list< Hit const * >::const_iterator hit_iter = matches[ ii ].begin();
2263  Size nsteps = static_cast< Size > ( RG.uniform() * len );
2264  for ( Size jj = 1; jj <= nsteps; ++jj ) ++hit_iter;
2265  subsamples[ ii ].push_back( * hit_iter );
2266  }
2267  } else {
2268  /// insert the hits into an STL map based on the build position; then grab one hit at random from each
2269  std::map< Size, std::list< Hit const * > > buildpos_hitmap;
2270  for ( std::list< Hit const * >::const_iterator
2271  hit_iter = matches[ ii ].begin(), hit_iter_end = matches[ ii ].end();
2272  hit_iter != hit_iter_end; ++hit_iter ) {
2273  buildpos_hitmap[ (*hit_iter)->first()[ 1 ] ].push_back( *hit_iter );
2274  }
2275  for ( std::map< Size, std::list< Hit const * > >::const_iterator
2276  builditer = buildpos_hitmap.begin(), builditer_end = buildpos_hitmap.end();
2277  builditer != builditer_end; ++builditer ) {
2278  Size len = builditer->second.size();
2279  if ( len == 1 ) {
2280  subsamples[ ii ].push_back( * builditer->second.begin() );
2281  } else {
2282  // pick a random number between 0 and len-1 and increment through the list of hits
2283  // that many times to pick a single random hit.
2284  std::list< Hit const * >::const_iterator hit_iter = builditer->second.begin();
2285  Size nsteps = static_cast< Size > ( RG.uniform() * len );
2286  for ( Size jj = 1; jj <= nsteps; ++jj ) ++hit_iter;
2287  subsamples[ ii ].push_back( * hit_iter );
2288  }
2289 
2290  }
2291  }
2292  }
2293  }
2294  return subsamples;
2295 }
2296 
2299  Vector const & euclidean_bin_widths,
2300  Vector const & euler_bin_widths,
2301  utility::vector1< std::list< Hit const * > > const & neighbor_hits,
2302  Size accuracy_threshold
2303 ) const
2304 {
2305  assert( ! neighbor_hits[ 1 ].empty() );
2306 
2307  /// 1. First approximation: just add the log of the number of hits in all bins.
2308  /// If this is less than the log of the accuracy threshold, then don't bother computing a more accurate estimate.
2309  Real log_n_combos = std::log( (double) neighbor_hits[ 1 ].size() );
2310  TR << "predict_n_matches_for_hit_subsets with #hits for each geomcst: " << neighbor_hits[ 1 ].size();
2311  for ( Size ii = 2; ii <= n_geometric_constraints_; ++ii ) {
2312  if ( ! geomcst_is_upstream_only_[ ii ] ) {
2313  Size ii_nhits = neighbor_hits[ ii ].size();
2314  if ( ii_nhits == 0 ) { TR << std::endl; return 0; } // Quit now, no hits for this non-upstream-only geometric constraint
2315  log_n_combos += std::log( (double) ii_nhits ) ;
2316  TR << " " << ii_nhits;
2317  }
2318  //std::cout << "blah 1 " << std::endl;
2319  }
2320  TR << std::endl;
2321  if ( log_n_combos < std::log( (double) accuracy_threshold ) ) return static_cast< Size > ( exp( log_n_combos )) + 1; // good enough approximation
2322 
2323 
2324  MatchCounter match_counter;
2325  match_counter.set_bounding_box( occ_space_bounding_box_ );
2326  match_counter.set_xyz_bin_widths( euclidean_bin_widths );
2327  match_counter.set_euler_bin_widths( euler_bin_widths );
2328 
2329  Size count_non_upstream_only_hits( 0 );
2330  for ( Size ii = 1; ii <= n_geometric_constraints_; ++ii ) if ( ! geomcst_is_upstream_only_[ ii ] ) ++count_non_upstream_only_hits;
2331  match_counter.set_n_geometric_constraints( count_non_upstream_only_hits );
2332 
2333  match_counter.initialize();
2334 
2335  count_non_upstream_only_hits = 0;
2336  for ( Size ii = 1; ii <= n_geometric_constraints_; ++ii ) {
2337  if ( ! geomcst_is_upstream_only_[ ii ] ) {
2338  ++count_non_upstream_only_hits;
2339  match_counter.add_hits( count_non_upstream_only_hits, neighbor_hits[ ii ] );
2340  }
2341  }
2342  return match_counter.count_n_matches(); // the heavy lifting happens inside here
2343 }
2344 
2345 /// This loop iterates across all 64 origin definitions (loop "ii"), inserts the "neighbor hits" into
2346 /// each of the hashes. It then iterates across all the bins in the hash map (loop "iter"), and
2347 /// then enumerates all combinations of hits (loop "lex") for the non-upstream-only geometric constraints.
2348 /// For each combination of hits (a partial match if there are any upstream-only geometric constraints),
2349 /// it then retrieves all upstream-only hits, and iterates across all cominbations of upstream-only
2350 /// hits for this match (loop "true"). Then, for every fully-constructed match,
2353  output::MatchProcessor & processor,
2354  HitHasher & hit_hasher,
2355  utility::vector1< std::list< Hit const * > > const & neighbor_hits
2356 ) const
2357 {
2358  /// TEMP: declare these variables here -- eventually, return them
2359  ///core::Size num_potential_matches(0), num_sent_to_proc(0),
2360  ///num_non_up_only_incompatible(0),num_up_only_incompatible(0),
2361  ///num_considered_muliple_origins(0), all_lex_states(0),
2362  ///num_ds_hit_incompatible(0), num_empty_uplist(0);
2363  MatcherOutputStats ostats;
2364 
2365  //MatchOutputTracker tracker; // APL new space saving algorithm does not require the match output tracker!
2368  match_dspos1 m1( n_geometric_constraints_ ); // for finding upstream-only hits
2369 
2370  for ( Size ii = 1; ii <= 64; ++ii ) {
2371 
2372  /// Initialize the hit-hasher's hash map with the ii'th definition of the origin.
2373  /// Significant memory savings by deleting the ii'th hash map at the conclusion
2374  /// of the ii'th iteration, instead of having all 64 hash maps in memory at the same time.
2375  for ( Size jj = 1; jj <= n_geometric_constraints_; ++jj ) {
2376  if ( ! geomcst_is_upstream_only_[ jj ] ) {
2377  for ( std::list< Hit const * >::const_iterator iter = neighbor_hits[ jj ].begin(),
2378  iter_end = neighbor_hits[ jj ].end(); iter != iter_end; ++iter ) {
2379  hit_hasher.insert_hit( ii, jj, (*iter) );
2380  //hit_scaff_build_pts[ jj ][ iter->first()[ 1 ] ] = true;
2381  }
2382  }
2383  /// else { noop }, do not hash upstream-only hits; such hits are enumerated separately
2384  }
2385 
2386  for ( HitHasher::HitHash::const_iterator
2387  iter = hit_hasher.hit_hash_begin( ii ),
2388  iter_end = hit_hasher.hit_hash_end( ii );
2389  iter != iter_end; ++iter ) {
2390 
2391  /// MATCHES!
2392  HitHasher::MatchSet const & match_set( iter->second );
2393 
2394  utility::vector1< Size > n_hits_per_geomcst( n_geometric_constraints_ );
2396  // representative hits, if we select a subset of hits for each geometric constraint
2397  // enumerating all combinations of hits can be very very expensive.
2399  /// hits from upstream-only downstream algorithms
2402 
2403  /// First check that there is at least one hit per geometric constraint;
2404  /// go ahead and initialize the hit_vectors array at the same time.
2405  bool any_size_zero( false );
2406  for ( Size jj = 1; jj <= n_geometric_constraints_; ++jj ) {
2407  if ( ! geomcst_is_upstream_only_[ jj ] ) {
2408  n_hits_per_geomcst[ jj ] = match_set[ jj ].size();
2409  any_size_zero |= n_hits_per_geomcst[ jj ] == 0;
2410  hit_vectors[ jj ].resize( n_hits_per_geomcst[ jj ] );
2411  std::copy( match_set[ jj ].begin(), match_set[ jj ].end(), hit_vectors[ jj ].begin());
2412  } else {
2413  n_hits_per_geomcst[ jj ] = 1; // for the lex, indicate there's only a single value for geomcst jj
2414  }
2415  }
2416  if ( any_size_zero ) continue; // no matches possible in this voxel.
2417 
2418  select_hit_representatives( hit_vectors, n_hits_per_geomcst, reps );
2419 
2420  /// Prepare to iterate across all combinations of hits.
2421  utility::LexicographicalIterator lex( n_hits_per_geomcst );
2422  ostats.all_lex_states += lex.num_states_total();
2423 
2424  while ( ! lex.at_end() ) {
2425  ++ostats.num_potential_matches;
2426 
2427  /// Assemble the (partial) match
2428  for ( Size jj = 1; jj <= n_geometric_constraints_; ++jj ) {
2429  if ( ! geomcst_is_upstream_only_[ jj ] ) {
2430  mlite[ jj ] = hit_vectors[ jj ][ reps[jj][lex[ jj ]] ];
2431  m[ jj ] = *(hit_vectors[ jj ][ reps[jj][lex[ jj ]] ] );
2432  m1.upstream_hits[ jj ].copy_hit( m[ jj ] );
2433  }
2434  else mlite[jj] = NULL; //null pointer to ensure output tracking works
2435  }
2436 
2437  /// if any of the non-upstream-only hits are incompatible, increment the lex
2438  /// and proceed to the next combination of hits
2439  if ( check_non_upstream_only_hit_incompatibility( m1, lex, processor ) ){
2441  continue;
2442  }
2445  ostats.num_ds_hit_incompatible++;
2446  continue;
2447  }
2448 
2449  numeric::geometry::hashing::Bin6D lower_halfbin_spanned( 1 );
2450  for ( Size jj = 1; jj <= n_geometric_constraints_; ++jj ) {
2451  if ( ! geomcst_is_upstream_only_[ jj ] ) {
2452  numeric::geometry::hashing::Bin6D hit_halfbin = hit_hasher.binner( ii ).halfbin6( m[ jj ].second() );
2453  for ( Size kk = 1; kk <= 6; ++kk ) {
2454  lower_halfbin_spanned[ kk ] *= hit_halfbin[ kk ]; // once it goes to zero, it stays at zero.
2455  }
2456  }
2457  }
2458  bool go_to_next_match( false );
2459  for ( Size jj = 1; jj <= 6; ++jj ) {
2460  if ( lower_halfbin_spanned[ jj ] != 0 ) {
2461  /// We've already seen this match or we'll see it again in a later context; don't process now.
2462  go_to_next_match = true;
2463  break;
2464  }
2465  }
2466  if ( go_to_next_match ) {
2467  ++lex;
2468  continue;
2469  }
2470 
2471  /*if( tracker.match_has_been_output( mlite ) ){
2472  ostats.num_considered_muliple_origins++;
2473  ++lex;
2474  continue;
2475  }
2476  tracker.note_output_match( mlite );*/
2477  /// Now descend into the upstream-only hits (and if there are none, output the singular
2478  /// match combination from the set of non-upstream-only hits)
2479  // "inner lex" traversed here -- we're enumerating all the combinations of upstream-only hits
2480  Size last_upstream_only_geomcst_advanced( 0 );
2481  while ( true ) {
2482  if ( last_upstream_only_geomcst_advanced != 0 ) {
2483  Size const lgca = last_upstream_only_geomcst_advanced; // brevity
2484  mlite[ lgca ] = *upstream_only_hit_iterators[ lgca ];
2485  m[ lgca ] = **upstream_only_hit_iterators[ lgca ];
2486  m1.upstream_hits[ lgca ].copy_hit( m[ lgca ] );
2487  }
2488  Size empty_hitlist_id( 0 );
2489  for ( Size jj = last_upstream_only_geomcst_advanced + 1; jj <= n_geometric_constraints_; ++jj ) {
2490  if ( geomcst_is_upstream_only_[ jj ] ) {
2491  upstream_only_hits[ jj ] = representative_downstream_algorithm_[ jj ]->
2492  hits_to_include_with_partial_match( m1 );
2493  if ( upstream_only_hits[ jj ] == 0 ) {
2494  empty_hitlist_id = jj;
2495  break;
2496  }
2497  upstream_only_hit_iterators[ jj ] = upstream_only_hits[ jj ]->val().begin();
2498  mlite[ jj ] = *upstream_only_hit_iterators[ jj ];
2499  m[ jj ] = **upstream_only_hit_iterators[ jj ];
2500  m1.upstream_hits[ jj ].copy_hit( m[ jj ] );
2501  last_upstream_only_geomcst_advanced = jj;
2502  }
2503  }
2504  if ( empty_hitlist_id != 0 ) {
2505  ostats.num_empty_uplist++;
2506  // advance the upstream_only iterators;
2507  if( ! increment_upstream_only_hit_combination( upstream_only_hits, empty_hitlist_id - 1,
2508  upstream_only_hit_iterators, last_upstream_only_geomcst_advanced ) ) break;
2509  continue;
2510  }
2511 
2513  m, upstream_only_hits, upstream_only_hit_iterators,
2514  last_upstream_only_geomcst_advanced, processor ) ) {
2515  // we have an incompatibility; break if we're at the end of the upstream-only hit combos
2516  ostats.num_up_only_incompatible++;
2517  if ( last_upstream_only_geomcst_advanced == 0 ) break;
2518  continue;
2519  }
2520 
2521  ostats.num_sent_to_proc++;
2522  processor.process_match( m );
2523 
2525  upstream_only_hits,
2527  upstream_only_hit_iterators,
2528  last_upstream_only_geomcst_advanced )) {
2529  break;
2530  }
2531  } //while (true ), iteration over upstream only hits
2532  ++lex;
2533  } // while ( ! lex.at_end() )
2534  } //iteration over hit hashes
2535 
2536  hit_hasher.clear_hash_map( ii );
2537  } //loop over 64 definitions of 6D hash origin
2538 
2539  TR << "Processed " << ostats.num_sent_to_proc << " matches" << std::endl;
2540  return ostats;
2541 }
2542 
2543 void
2545  output::MatchProcessor & processor
2546 ) const
2547 {
2548  typedef utility::fixedsizearray1< Size, 2 > Size2;
2549  typedef utility::OrderedTuple< Size2 > Size2Tuple;
2550  typedef std::map< Size2Tuple, Hit const * > UpstreamRotamerRepresentativeMap;
2551  typedef UpstreamRotamerRepresentativeMap::const_iterator UpRotRepMapConstIterator;
2552 
2554 
2555  for ( Size ii = 1; ii <= n_geometric_constraints_; ++ii ) {
2556  if ( ! geomcst_is_upstream_only_[ ii ] ) {
2557  for ( std::list< Hit >::const_iterator hititer = hits_[ ii ].begin(),
2558  hititer_end = hits_[ ii ].end(); hititer != hititer_end; ++hititer ) {
2559  Size2 rotid;
2560  rotid[ 1 ] = hititer->scaffold_build_id();
2561  rotid[ 2 ] = hititer->upstream_conf_id();
2562  UpRotRepMapConstIterator representative = upstream_rotamer_representatives[ ii ].find( rotid );
2563  if ( representative == upstream_rotamer_representatives[ ii ].end() ) {
2564  upstream_rotamer_representatives[ ii ][ rotid ] = & (*hititer);
2565  }
2566  }
2567  }
2568  }
2569 
2571 
2572 
2573  TR << "Begining match enumeration:" << std::endl;
2574  for ( Size ii = 1; ii <= n_geometric_constraints_; ++ii ) {
2575  TR << "Geometric constraint " << ii << " produced " << hits_[ ii ].size() << " hits" << std::endl;
2576  }
2577 
2578  TR << "Begining examination of each of the 64 definitions of the 6D-hash-grid origin:" << std::endl;
2579  //TR << "process_matches_where_one_geomcst_defines_downstream_location" << std::endl;
2580 
2581  utility::vector1< HitNeighborFinder > finders( hits_.size() );
2582  for ( Size ii = 1; ii <= hits_.size(); ++ii ) {
2583  finders[ii].set_bounding_box( occ_space_bounding_box_ );
2584  finders[ii].set_xyz_bin_widths( euclidean_bin_widths_ );
2585  finders[ii].set_euler_bin_widths( euler_bin_widths_ );
2586  finders[ii].initialize();
2587  finders[ii].add_hits( hits_[ ii ] );
2588  }
2589  utility::vector1< std::list< Hit const * > > hit_ccs = finders[ 1 ].connected_components();
2590  TR << "CONNECTED COMPONENTS: " << hit_ccs.size() << std::endl;
2591  for ( Size ii = 1; ii <= hit_ccs.size(); ++ii ) {
2592  Size n_combos = hit_ccs[ ii ].size();
2593  std::cout << "CC " << ii << " num neighbors: 1: " << hit_ccs[ ii ].size();
2595  ii_neighbor_hits[ 1 ] = hit_ccs[ ii ]; // convenience: copy the list of hits in this CC.
2596  for ( Size jj = 2; jj <= hits_.size(); ++jj ) {
2597  ii_neighbor_hits[ jj ] = finders[ jj ].neighbor_hits( hit_ccs[ ii ] );
2598  Size jj_nhits = ii_neighbor_hits[ jj ].size();
2599  if ( ! geomcst_is_upstream_only_[ jj ] ) n_combos *= jj_nhits;
2600  std::cout << " " << jj << "= " << jj_nhits ;
2601  }
2602  std::cout << " ncombos: " << n_combos << std::endl;
2603 
2604  HitHasher hit_hasher;
2608 
2610  hit_hasher.initialize();
2611 
2612  MatchOutputTracker tracker;
2613  //match m( n_geometric_constraints_ );
2616 
2617  /// The match lite where each hit but one has been replaced by it's
2618  /// upstream-rotamer-representative pointer. This match-lite prevents repitition
2619  /// of the match_dspos1 outputs.
2620  match_lite mliteprime( n_geometric_constraints_ );
2621 
2622 
2623  for ( Size jj = 1; jj <= 64; ++jj ) {
2624  for ( Size kk = 1; kk <= n_geometric_constraints_; ++kk ) {
2625  if ( ! geomcst_is_upstream_only_[ kk ] ) {
2626  for ( std::list< Hit >::const_iterator iter = hits_[ kk ].begin(), iter_end = hits_[ kk ].end();
2627  iter != iter_end; ++iter ) {
2628  hit_hasher.insert_hit( jj, kk, & (*iter) );
2629  hit_scaff_build_pts[ kk ][ iter->first()[ 1 ] ] = true;
2630  }
2631  } /// else noop, do not hash upstream-only hits; such hits are enumerated separately
2632  }
2633 
2634  for ( HitHasher::HitHash::const_iterator
2635  iter = hit_hasher.hit_hash_begin( jj ),
2636  iter_end = hit_hasher.hit_hash_end( jj );
2637  iter != iter_end; ++iter ) {
2638 
2639  /// MATCHES!
2640  HitHasher::MatchSet const & match_set( iter->second );
2641 
2642  utility::vector1< Size > n_hits_per_geomcst( n_geometric_constraints_ );
2644  //utility::vector1< utility::vector1< Size > > reps( n_geometric_constraints_ ); //representatives
2645 
2646  utility::vector1< Size > n_unique_upstream_rots( n_geometric_constraints_ );
2648  /// hits from upstream-only downstream algorithms
2651 
2652  bool any_size_zero( false );
2653  for ( Size kk = 1; kk <= n_geometric_constraints_; ++kk ) {
2654  if ( ! geomcst_is_upstream_only_[ kk ] ) {
2655  n_hits_per_geomcst[ kk ] = match_set[ kk ].size();
2656  if ( n_hits_per_geomcst[ kk ] == 0 ) {
2657  any_size_zero = true;
2658  break;
2659  }
2660  hit_vectors[ kk ].resize( n_hits_per_geomcst[ kk ] );
2661  std::copy( match_set[ kk ].begin(), match_set[ kk ].end(), hit_vectors[ kk ].begin());
2662 
2663  std::list< Hit const * > hit_representatives;
2664  for ( Size ll = 1; ll <= match_set[ kk ].size(); ++ll ) {
2665  Size2 upstream_rotamer;
2666  upstream_rotamer[ 1 ] = hit_vectors[ kk ][ ll ]->scaffold_build_id();
2667  upstream_rotamer[ 2 ] = hit_vectors[ kk ][ ll ]->upstream_conf_id();
2668  hit_representatives.push_back( upstream_rotamer_representatives[ kk ][ upstream_rotamer ] );
2669  }
2670  hit_representatives.sort();
2671  hit_representatives.unique();
2672  n_unique_upstream_rots[ kk ] = hit_representatives.size();
2673  unique_hit_representatives[ kk ].resize( n_unique_upstream_rots[ kk ] );
2674  std::copy( hit_representatives.begin(), hit_representatives.end(), unique_hit_representatives[ kk ].begin() );
2675  } else {
2676  n_unique_upstream_rots[ kk ] = 1; // to molify the lex
2677  }
2678  }
2679  if ( any_size_zero ) continue;
2680 
2681  utility::vector1< Size > n_hits_to_enumerate( n_geometric_constraints_ );
2682 
2683  for ( Size kk = 1; kk <= n_geometric_constraints_; ++kk ) {
2684  if ( ! output_match_dspos1_for_geomcst_[ kk ] ) continue;
2685  runtime_assert( ! geomcst_is_upstream_only_[ kk ] ); // THIS DOESN'T BELONG HERE. FIND IT A HOME!
2687 
2688  /// For the geometric constraint being examined, construct the counts
2689  /// of unique matches where the location of the downstream partner is
2690  /// irrelevant for all other geometric constraints -- only the rotamer id of the
2691  /// upstream residue is relevant.
2692  for ( Size ll = 1; ll <= n_geometric_constraints_; ++ll ) {
2693  if ( kk == ll ) {
2694  n_hits_to_enumerate[ ll ] = n_hits_per_geomcst[ ll ];
2695  } else {
2696  n_hits_to_enumerate[ ll ] = n_unique_upstream_rots[ ll ];
2697  }
2698  }
2699 
2700  utility::LexicographicalIterator lex( n_hits_to_enumerate );
2701 
2702  while ( ! lex.at_end() ) {
2703  /// Assemble the match
2704  for ( Size ll = 1; ll <= n_geometric_constraints_; ++ll ) {
2705  if ( ! geomcst_is_upstream_only_[ ll ] ) {
2706  if ( ll == kk ) {
2707  mlite[ ll ] = hit_vectors[ ll ][ lex[ll] ];
2708  //m[ ll ] = *(hit_vectors[ ll ][ lex[ll] ] );
2709  m1.upstream_hits[ ll ] = upstream_hit( *(hit_vectors[ ll ][ lex[ll] ] ));
2710  m1.downstream_conf_id = hit_vectors[ ll ][ lex[ll] ]->downstream_conf_id();
2711  m1.dspos = hit_vectors[ ll ][ lex[ll] ]->second();
2712  } else {
2713  mlite[ ll ] = unique_hit_representatives[ ll ][ lex[ll] ];
2714  //m[ ll ] = *(unique_hit_representatives[ ll ][ lex[ll] ]);
2715  m1.upstream_hits[ ll ].copy_hit( *(unique_hit_representatives[ ll ][ lex[ll] ]) );
2716  }
2717  }
2718  }
2719 
2720  /// if any of the non-upstream-only hits are incompatible, increment the lex
2721  /// and proceed to the next combination of hits
2722  if ( check_non_upstream_only_hit_incompatibility( m1, lex, processor ) ) continue;
2723 
2724  /// Now descend into the upstream-only hits (and if there are none, output the singular
2725  /// match combination from the set of non-upstream-only hits)
2726  // "inner lex" traversed here -- we're enumerating all the combinations of upstream-only hits
2727  Size last_upstream_only_geomcst_advanced( 0 );
2728  while ( true ) {
2729  if ( last_upstream_only_geomcst_advanced != 0 ) {
2730  Size const lgca = last_upstream_only_geomcst_advanced; // brevity
2731  mlite[ lgca ] = *upstream_only_hit_iterators[ lgca ];
2732  //m[ lgca ] = **upstream_only_hit_iterators[ lgca ];
2733  m1.upstream_hits[ lgca ].copy_hit( *mlite[ lgca ] );
2734  }
2735  Size empty_hitlist_id( 0 );
2736  for ( Size kk = last_upstream_only_geomcst_advanced + 1; kk <= n_geometric_constraints_; ++kk ) {
2737  if ( geomcst_is_upstream_only_[ kk ] ) {
2738  upstream_only_hits[ kk ] = representative_downstream_algorithm_[ kk ]->
2739  hits_to_include_with_partial_match( m1 );
2740  if ( upstream_only_hits[ kk ] == 0 ) {
2741  empty_hitlist_id = kk;
2742  break;
2743  }
2744  upstream_only_hit_iterators[ kk ] = upstream_only_hits[ kk ]->val().begin();
2745  mlite[ kk ] = *upstream_only_hit_iterators[ kk ];
2746  //m[ kk ] = **upstream_only_hit_iterators[ kk ];
2747  m1.upstream_hits[ kk ].copy_hit( *mlite[ kk ] );
2748  last_upstream_only_geomcst_advanced = kk;
2749  }
2750  }
2751  if ( empty_hitlist_id != 0 ) {
2752  // advance the upstream_only iterators;
2753  increment_upstream_only_hit_combination( upstream_only_hits, empty_hitlist_id - 1,
2754  upstream_only_hit_iterators, last_upstream_only_geomcst_advanced );
2755  }
2756 
2758  m1, upstream_only_hits, upstream_only_hit_iterators,
2759  last_upstream_only_geomcst_advanced, processor ) ) {
2760  // we have an incompatibility; break if we're at the end of the upstream-only hit combos
2761  if ( last_upstream_only_geomcst_advanced == 0 ) break;
2762  continue;
2763  }
2764 
2765  /// If we've already seen this match for a previous value of jj (some alternate
2766  /// definition of the hasher origin) then avoid outputting it a second time.
2767  if ( ! tracker.match_has_been_output( mlite ) ) {
2768  /// Record that we have seen this combination of hits.
2769  tracker.note_output_match( mlite );
2770  processor.process_match( m1 );
2771  }
2772 
2774  upstream_only_hits,
2776  upstream_only_hit_iterators,
2777  last_upstream_only_geomcst_advanced )) {
2778  break;
2779  }
2780  }
2781  ++lex;
2782  }
2783  }
2784  }
2785 
2786  hit_hasher.clear_hash_map( jj );
2787  std::cout << "." << std::flush;
2788  if ( jj % 20 == 0 ) std::cout << std::endl;
2789  }
2790  }
2791  std::cout << std::endl;
2792 }
2793 
2794 
2795 void
2797 {
2798  if ( ! geom_cst_has_primary_modification_[ geomcst_id ] ) {
2799  geom_csts_with_primary_hitlist_modificiations_.push_back( geomcst_id );
2800  geom_cst_has_primary_modification_[ geomcst_id ] = true;
2801  }
2802 }
2803 
2804 /*
2805  core::pose::PoseOP upstream_pose_;
2806  core::pose::PoseOP downstream_pose_;
2807 
2808  utility::vector1< Size > pose_build_resids_;
2809  utility::vector1< ScaffoldBuildPointOP > all_build_points_;
2810  utility::vector1< utility::vector1< ScaffoldBuildPointOP > > per_constraint_build_points_;
2811 
2812  utility::vector1< std::list< Hit > > hits_;
2813 
2814  Size n_geometric_constraints_;
2815  utility::vector1< UpstreamBuilderOP > upstream_builders_;
2816  utility::vector1< std::map< std::string, Size > > build_set_id_for_restype_;
2817  utility::vector1< utility::vector1< DownstreamAlgorithmOP > > downstream_algorithms_;
2818 
2819  utility::vector1< DownstreamBuilderOP > all_downstream_builders_;
2820  utility::vector1< DownstreamAlgorithmOP > all_downstream_algorithms_;
2821 
2822  BumpGridOP bb_grid_;
2823  utility::vector1< BumpGridOP > original_scaffold_residue_bump_grids_;
2824 
2825  OccupiedSpaceHashOP occ_space_hash_;
2826 
2827 */
2828 
2829 }
2830 }