Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
MatcherTask.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 // :noTabs=false:tabSize=4:indentSize=4:
4 //
5 // (c) Copyright Rosetta Commons Member Institutions.
6 // (c) This file is part of the Rosetta software suite and is made available under license.
7 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
8 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
9 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
10 
11 /// @file protocols/match/MatcherTask.cc
12 /// @brief
13 /// @author Andrew Leaver-Fay (aleaverfay@gmail.com)
14 
15 // Unit headers
17 
18 // Project headers
24 
26 
29 // AUTO-REMOVED #include <core/chemical/ResidueTypeSet.hh>
30 
32 
33 #include <basic/options/option.hh>
34 // AUTO-REMOVED #include <basic/options/keys/enzdes.OptionKeys.gen.hh>
35 #include <basic/options/keys/packing.OptionKeys.gen.hh>
36 #include <basic/options/keys/match.OptionKeys.gen.hh>
37 
38 #include <core/pose/Pose.hh>
40 #include <core/pose/PDBInfo.hh>
42 
43 #include <basic/Tracer.hh>
44 
45 /// Numeric headers
46 #include <numeric/constants.hh>
47 #include <numeric/xyz.functions.hh>
48 
49 // Utility headers
50 #include <utility/string_util.hh>
51 #include <utility/io/izstream.hh>
52 
53 // C++ headers
54 #include <sstream>
55 
56 #include <utility/vector1.hh>
57 
58 
59 namespace protocols {
60 namespace match {
61 
62 static basic::Tracer TR( "protocols.match.MatcherTask" );
63 
65  enumerate_ligand_rotamers_( false ),
66  only_enumerate_non_match_redundant_ligand_rotamers_( false ),
67  ignore_cmdline_for_build_points_(false),
68  share_build_points_for_geomcsts_( true ),
69  gridlig_active_site_definition_( false ),
70  occ_space_bounding_box_( Vector( 0.0 ), Vector( 0.0 ) ),
71  euclidean_bin_widths_( 1.0 ),
72  euler_bin_widths_( 10.0 ),
73  permitted_overlap_( 0.0 ),
74  use_input_sc_( false ),
75  dynamic_grid_refinement_( false ),
76  consolidate_matches_( true ),
77  n_to_output_per_group_( 5 ),
78  grouper_name_( "SameChiBinComboGrouper" ),
79  evaluator_name_( "DownstreamRMSEvaluator" ),
80  output_writer_name_( "PDB" ),
81  output_file_name_( "output_matches.kin" ),
82  grouper_ds_rmsd_(1.5),
83  output_matchres_only_(true),
84  filter_upstream_residue_collisions_( false ),
85  filter_upstream_collisions_by_score_( false ),
86  upstream_residue_collision_tolerance_( 1.0 ),
87  upstream_residue_collision_score_cutoff_( 1.0 ),
88  upstream_residue_collision_Wfa_atr_( 1.0 ),
89  upstream_residue_collision_Wfa_rep_( 1.0 ),
90  upstream_residue_collision_Wfa_sol_( 1.0 ),
91  filter_upstream_and_downstream_residue_collisions_( false ),
92  filter_upstream_and_downstream_collisions_by_score_( false ),
93  upstream_downstream_atom_collision_tolerance_( 1.0 ),
94  upstream_downstream_residue_collision_score_cutoff_( 1.0 ),
95  upstream_downstream_residue_collision_Wfa_atr_( 1.0 ),
96  upstream_downstream_residue_collision_Wfa_rep_( 1.0 ),
97  upstream_downstream_residue_collision_Wfa_sol_( 1.0 ),
98  define_match_by_single_downstream_positioning_( false )
99 {}
100 
102  ReferenceCount()
103 {
104  (*this) = other;
105 }
106 
107 MatcherTask const &
109 {
110  if ( this != & rhs ) {
162 
163  }
164  return *this;
165 }
166 
168 
169 void
171  core::pose::Pose const & input_pose
172 )
173 {
174  upstream_pose_ = new core::pose::Pose( input_pose );
175  utility::vector1< std::string > path = utility::string_split( input_pose.pdb_info()->name(), '/' );
176  upstream_pose_name_ = utility::string_split( path[ path.size() ], '.' )[1];
177 }
178 
179 void
181  core::pose::Pose const & input_pose
182 )
183 {
184  downstream_pose_ = new core::pose::Pose( input_pose );
185 
186  if ( downstream_orientation_atoms_.size() != 0 ) {
188  }
189 }
190 
191 void
193  core::pose::Pose const & input_pose,
194  utility::vector1< core::id::AtomID > const & orientation_atoms
195 )
196 {
197  downstream_pose_ = new core::pose::Pose( input_pose );
198  downstream_orientation_atoms_ = orientation_atoms;
199 
201 }
202 
203 void
205  utility::vector1< core::id::AtomID > const & orientation_atoms
206 )
207 {
208  downstream_orientation_atoms_ = orientation_atoms;
209  if ( downstream_pose_ ) {
211  }
212 }
213 
214 void
217 }
218 
219 void
222 }
223 
224 void
226 {
229  } else {
230  for ( Size ii = 1; ii <= per_cst_pose_build_resids_.size(); ++ii ) {
231  per_cst_pose_build_resids_[ ii ] = resids;
232  }
233  }
234 }
235 
236 
237 void
239 {
240 
241  runtime_assert( enz_input_data_ );
242  runtime_assert( upstream_pose_ );
243  bool switch_to_per_cst_resids_necessary( share_build_points_for_geomcsts_ );
244 
245  for( core::Size i =1; i<= enz_input_data_->num_mcfi_lists(); ++i ){
246 
247  std::map< std::string, utility::vector1< std::string > > const & alg_map = (*(enz_input_data_->mcfi_list( i )->mcfi( 1 ))).algorithm_inputs();
248  std::map< std::string, utility::vector1< std::string > >::const_iterator map_it( alg_map.find( "match_positions" ) );
249  if( map_it == alg_map.end() ) continue;
250 
251  // silly: this could mean that different positions are used for each cst, so we might have to switch
252  if( switch_to_per_cst_resids_necessary ){
254  switch_to_per_cst_resids_necessary = false;
255  per_cst_pose_build_resids_.resize( enz_input_data_->num_mcfi_lists() );
258  }
259 
260  for( core::Size mpm_string =1; mpm_string <= map_it->second.size(); ++mpm_string ){
262  tokens.push_back(""); //weird util vect1 copy behavior makes this necessary
263  tokens = utility::split( map_it->second[mpm_string] );
264  MatchPositionModifierCOP mpm( create_match_position_modifier( tokens[1], i, tokens ) );
265  if( !mpm ) utility_exit_with_message("Could not create a MatchPositionModifier based on name "+tokens[1]+".");
266  per_cst_pose_build_resids_[i] = mpm->modified_match_positions( per_cst_pose_build_resids_[ i ], *upstream_pose_, this );
267  }
268 
269  TR << "Match position modifiers changed match positions for geomcst " << i << " to the following: " << std::endl;
270  for( core::Size newpos = 1; newpos <= per_cst_pose_build_resids_[i].size(); ++newpos ) TR << per_cst_pose_build_resids_[i][newpos] << "+";
271  TR << std::endl;
272 
273  } //loop over all mcfi lists
274 }
275 
276 void
278 {
279  enumerate_ligand_rotamers_ = setting;
280 }
281 
282 void
284 {
286 }
287 
288 
290  Size n_geometric_constraints
291 )
292 {
295  per_cst_pose_build_resids_.resize( n_geometric_constraints );
296 }
297 
298 /// @brief Set the build point id's for a particular geometric constraint
299 void
301  Size geom_cst_id,
302  utility::vector1< Size > const & resids
303 )
304 {
305  runtime_assert( ! share_build_points_for_geomcsts_ ); // use_different_build_points_for_each_geometric_constraint() must be called first
306  runtime_assert( geom_cst_id <= per_cst_pose_build_resids_.size() );
307  per_cst_pose_build_resids_[ geom_cst_id ] = resids;
308 }
309 
310 void
312 {
314  TR << "overriding res/radius active site definition by setting gridlig file " << file_name << std::endl;
315  }
317  gridlig_fname_ = file_name;
318 }
319 
320 void
322 {
324 }
325 
326 void
328  Size resid,
329  Real radius
330 )
331 {
333  TR << "overriding gridlig_active_site_definition_ by appending res/radius pair: " << resid << " " << radius << std::endl;
335  }
336  upstream_resids_and_radii_defining_active_site_.push_back( std::make_pair( resid, radius ) );
337 }
338 
339 void
341  core::id::AtomID atid
342 )
343 {
345 }
346 
347 
348 void
350 {
352 }
353 
354 void
356 {
357  euclidean_bin_widths_ = width;
358 }
359 
360 void
362 {
363  euler_bin_widths_ = width;
364 }
365 
366 void
368 {
370 }
371 
372 void
374 {
375  using namespace basic::options;
376  using namespace basic::options::OptionKeys::match;
377 
378  /// 1. Read the ligand grid header file and initialize the boundary for the active site.
379  /// 2. Initialize the euclid and euler bin witdhs
380  /// 3. Note the location of the output file
381  /// 4. Read in the EnzConstraintIO file.
382  /// 5. Read in the build position list
383 
384  /// 1. liggrid
386 
387  euclidean_bin_widths_ = option[ euclid_bin_size ];
388  euler_bin_widths_ = option[ euler_bin_size ];
389 
391 
393 
395 
396  //note: this function only does something if the input pose
397  //already contains a partial match
399 
400  /// bump tolerance
401  permitted_overlap_ = option[ bump_tolerance ];
402 
403  use_input_sc_ = option[ OptionKeys::packing::use_input_sc ];
404  dynamic_grid_refinement_ = option[ OptionKeys::match::dynamic_grid_refinement ];
405 
407 
408  consolidate_matches_ = option[ OptionKeys::match::consolidate_matches ];
409  n_to_output_per_group_ = option[ output_matches_per_group ];
410 
414 
416 
417  enumerate_ligand_rotamers_ = option[ OptionKeys::match::enumerate_ligand_rotamers ];
418  only_enumerate_non_match_redundant_ligand_rotamers_ = option[ OptionKeys::match::only_enumerate_non_match_redundant_ligand_rotamers ];
419 }
420 
421 void
423 {
424  consolidate_matches_ = setting;
425 }
426 
428 {
430 }
431 
433 {
434  assert( filter_upstream_residue_collisions_ ); // must set filter_upstream_residue_collisions_ first!
436 }
437 
439 {
441 }
442 
444 {
446 }
447 
449 {
451 }
452 
454 {
456 }
457 
459 {
461 }
462 
464 {
466 }
467 
469 {
471 }
472 
474 {
476 }
477 
479 {
481 }
482 
484 {
486 }
487 
489 {
491 }
492 
494 {
496 }
497 
499 {
501 }
502 
504 {
505  n_to_output_per_group_ = setting;
506 }
507 
508 void
509 MatcherTask::add_filter( std::string const & filter_name )
510 {
511  filter_names_.push_back( filter_name );
512 }
513 
514 void
516 {
517  grouper_name_ = setting;
518 }
519 
520 void
522 {
523  evaluator_name_ = setting;
524 }
525 
526 void
528 {
529  output_writer_name_ = setting;
530 }
531 
532 void
534 {
535  output_file_name_ = setting;
536 }
537 
538 
539 void
541 {
542  enz_input_data_ = data;
544 }
545 
548 {
549  return upstream_pose_;
550 }
551 
554 {
555  return downstream_pose_;
556 }
557 
560 {
562 }
563 
564 bool
566 {
568 }
569 
570 bool
572 {
574 }
575 
576 
579 {
582  } else {
583  runtime_assert( cst_id <= per_cst_pose_build_resids_.size() );
584  return per_cst_pose_build_resids_[ cst_id ];
585  }
586 }
587 
588 std::map< core::Size, core::Size > const &
590  return upstream_only_geom_cst_;}
591 
592 /// @brief Define the active site through a gridlig file (true), or by listing residue/radii paris (false)?
593 bool
595 {
597 }
598 
599 /// @brief Accessor for the file name containing the active-site definition in gridlig format
600 std::string const &
602 {
603  runtime_assert( gridlig_active_site_definition_ );
604  return gridlig_fname_;
605 }
606 
607 
608 std::list< std::pair< MatcherTask::Size, MatcherTask::Real > > const &
610 {
611  runtime_assert( ! gridlig_active_site_definition_ );
613 }
614 
615 std::list< core::id::AtomID > const &
617 {
619 }
620 
623 {
625 }
626 
627 
630 {
632 }
633 
636 {
637  return euclidean_bin_widths_;
638 }
639 
642 {
643  return euler_bin_widths_;
644 }
645 
648 {
649  return permitted_overlap_;
650 }
651 
652 bool
654  return use_input_sc_;
655 }
656 
657 bool
659 {
661 }
662 
663 bool
665 {
666  return consolidate_matches_;
667 }
668 
671 {
672  return n_to_output_per_group_;
673 }
674 
675 
676 std::list< std::string > const &
678 {
679  return filter_names_;
680 }
681 
682 std::string const &
684 {
685  return upstream_pose_name_;
686 }
687 
688 std::string const &
690 {
691  return cstfile_name_;
692 }
693 
694 std::string const &
696 {
697  return grouper_name_;
698 }
699 
700 std::string const &
702 {
703  return evaluator_name_;
704 }
705 
706 std::string const &
708 {
709  return output_writer_name_;
710 }
711 
712 std::string const &
714 {
715  return output_file_name_;
716 }
717 
720 {
721  return grouper_ds_rmsd_;
722 }
723 
724 
725 bool
727 {
728  return output_matchres_only_;
729 }
730 
733 {
735 }
736 
739 {
740  return enz_input_data_;
741 }
742 
744 {
746 }
747 
749 {
751 }
752 
755 {
757 }
758 
760 {
762 }
763 
765 {
767 }
768 
770 {
772 }
773 
775 {
777 }
778 
780 {
782 }
783 
785 {
787 }
788 
790 {
792 }
793 
796 {
798 }
799 
802 {
804 }
805 
808 {
810 }
811 
814 {
816 }
817 
820 {
822 }
823 
824 
825 void
827 {
828  /// STUBBED OUT
829 }
830 
831 
832 /// @details This function reads a file specified on the command line through the
833 /// -match::grid_boundary <filename> flag. The "grid boundary" specifies a bounding
834 /// box inside which the third orientation atom of the downstream partner must lie
835 /// for a downstream conformation to become a match. The orientation atoms are
836 /// described in the match::orientation_atoms flag description.
837 /// The format for the file describing this bounding box is the first four lines
838 /// of the gridlig file format from rosetta++. The same file may be listed as
839 /// both the grid_boundary file and the active_site_definition_by_gridlig file.
840 /// Though the active_site_definition_by_gridlig flag is not required for the matcher to
841 /// work, the grid_boundary flag is necessary.
842 ///
843 /// Format:
844 /// The gridlig format has been coopted. This file specifies voxels for a region of 3D.
845 /// Though the occupied space bounding box does not require a discritization for its
846 /// description, this gridlig file format does.
847 ///
848 /// Gridlig format
849 /// The gridlig format specifies a discretization of euclidean space, and then gives
850 /// the 0 and 1 values for each voxel.
851 /// The first line in the liggrid file is "NAME: gridlig"
852 /// The second line describes the coordinate of the lower corner:
853 /// "BASE: <xcoord> <ycoord> <zcoord>
854 /// The third line describes the number of voxels in each dimension:
855 /// "SIZE: <xcount> <ycount> <zcount>
856 /// The fourth line describes the length of each dimension describing a voxel:
857 /// "LENGTH: <xlength> <ylength> <zlength>
858 /// Only the first four lines of the gridlig file are read. Additional lines
859 /// are ignored.
860 /// Example:
861 /// <begin file>
862 /// NAME: gridlig
863 /// BASE: 27.712 15.827 39.155
864 /// SIZE: 39 39 34
865 /// LENGTH: 0.500 0.500 0.500
866 /// <end file>
867 ///
868 /// NOTE: The discritization of the occupied space bounding box has no impact
869 /// on the hash size when detecting that two hits match. The
870 /// -match::euclid_bin_size <Real> flag alone controls that behavior.
871 void
873 {
874  using namespace basic::options;
875  using namespace basic::options::OptionKeys::match;
876 
877  if ( option[ grid_boundary ].user() ) {
878 
879  std::string filename = option[ grid_boundary ];
880  utility::io::izstream istr( filename.c_str() );
881  std::string name, liggrid;
882  istr >> name >> liggrid;
883  runtime_assert( name == "NAME:" || name == "@NAME:" ); // for some odd reason, the scaffold library has extra "@"'s.
884  runtime_assert( liggrid == "gridlig" );
885  std::string base;
886  Real xbase( 0.0 ), ybase( 0.0 ), zbase( 0.0 );
887  istr >> base;
888  runtime_assert( base == "BASE:" );
889  istr >> xbase; runtime_assert( ! istr.bad() );
890  istr >> ybase; runtime_assert( ! istr.bad() );
891  istr >> zbase; runtime_assert( ! istr.bad() );
892 
894  istr >> size;
895  runtime_assert( size == "SIZE:" );
896  Size xsize( 0 ), ysize( 0 ), zsize( 0 );
897  istr >> xsize; runtime_assert( ! istr.bad() );
898  istr >> ysize; runtime_assert( ! istr.bad() );
899  istr >> zsize; runtime_assert( ! istr.bad() );
900 
901  runtime_assert( xsize != 0 );
902  runtime_assert( ysize != 0 );
903  runtime_assert( zsize != 0 );
904 
905  std::string length;
906  istr >> length;
907  runtime_assert( length == "LENGTH:");
908  Real xwidth( 0.0 ), ywidth( 0.0 ), zwidth( 0.0 );
909 
910  istr >> xwidth; runtime_assert( ! istr.bad() );
911  istr >> ywidth; runtime_assert( ! istr.bad() );
912  istr >> zwidth; runtime_assert( ! istr.bad() );
913 
914  runtime_assert( xwidth != 0 );
915  runtime_assert( ywidth != 0 );
916  runtime_assert( zwidth != 0 );
917 
918  Vector lower_corner( xbase, ybase, zbase );
919  Vector upper_corner( lower_corner );
920  upper_corner.x() += xwidth * xsize;
921  upper_corner.y() += ywidth * ysize;
922  upper_corner.z() += zwidth * zsize;
923  occ_space_bounding_box_.set_lower( lower_corner );
924  occ_space_bounding_box_.set_upper( upper_corner );
925  }
926  else{
927  if( !upstream_pose_ ) utility_exit_with_message( "Grid boundary defintion file must be specified with the -match::grid_boundary <filename> flag, or the upstram_pose_ must be set in the MatcherTask." );
928 
929  TR << "WARNING WARNING WARNING: no grid file specified for option -grid_boundary. The bounding grid will be generated from the dimensions of the pose. This is experimental at the moment." << std::endl;
930 
931  Real lowx(upstream_pose_->residue(1).xyz(1).x());
932  Real lowy(upstream_pose_->residue(1).xyz(1).y());
933  Real lowz(upstream_pose_->residue(1).xyz(1).z());
934  Real highx(lowx), highy(lowy), highz(lowz);
935 
936  for( Size i =1; i <= upstream_pose_->total_residue(); ++i ){
937  core::conformation::Residue const & cur_res( upstream_pose_->residue( i ) );
938 
939  if( !cur_res.is_protein() ) continue;
940  utility::vector1< Size > const & bb_atoms( cur_res.type().all_bb_atoms() );
941 
942  for( Size j =1; j <= bb_atoms.size(); ++j){
943  Real curx( cur_res.xyz( bb_atoms[j] ).x() ), cury( cur_res.xyz( bb_atoms[j] ).y() ), curz( cur_res.xyz( bb_atoms[j] ).z() );
944 
945  if( curx < lowx ) lowx = curx;
946  else if( curx > highx ) highx = curx;
947  if( cury < lowy ) lowy = cury;
948  else if( cury > highy ) highy = cury;
949  if( curz < lowz ) lowz = curz;
950  else if( curz > highz ) highz = curz;
951  }
952  }
953  //lowx -= padding, lowy -= padding, lowz -= padding;
954  //highx += padding, highy += padding, highz += padding;
955 
956 
957  Vector lower_corner( lowx , lowy, lowz );
958  Vector upper_corner( highx, highy, highz );
959  TR << "Bounding box lower corner set to (" << lower_corner.x() << "," << lower_corner.y() << "," << lower_corner.z() << "), upper corner set to to (" << upper_corner.x() << "," << upper_corner.y() << "," << upper_corner.z() << ")." << std::endl;
960  occ_space_bounding_box_.set_lower( lower_corner );
961  occ_space_bounding_box_.set_upper( upper_corner );
962  }
963 }
964 
965 /// @details flo may '12, bulk of this function has been moved
966 /// to AllowedSeqposForGeomCst file
967 void
969 {
970  using namespace basic::options;
971  using namespace basic::options::OptionKeys::match;
972 
973  if ( ! option[ scaffold_active_site_residues ].user() && ! option[ scaffold_active_site_residues_for_geomcsts ].user() && ( generic_pose_build_resids_.size() == 0 ) && (per_cst_pose_build_resids_.size() == 0 ) ) {
974  utility_exit_with_message( "Scaffold build point defintion file must be specified with either "
975  "the -match::scaffold_active_site_residues <filename> flag\n"
976  "or the -match::scaffold_active_site_residues_for_geomcsts <filename> flag" );
977  }
978 
981 
982  //now go through and initialize the MatcherTask data from the file reader
983  //first check whether there's actually any positions to go from
984  if( scaf_seqpos.num_seqpos_lists() == 0 ) utility_exit_with_message("Apparently match positions couldn't get read from file...");
985 
986  else if( scaf_seqpos.num_seqpos_lists() == 1 ){
989  for( core::Size i = 1; i <= scaf_seqpos.seqpos_for_geomcst( 1 ).size(); ++i ){
990  if( scaf_seqpos.seqpos_for_geomcst( 1 )[i] <= upstream_pose_->total_residue() ){
991  if( upstream_pose_->residue_type( scaf_seqpos.seqpos_for_geomcst( 1 )[i] ).is_protein() ) generic_pose_build_resids_.push_back( scaf_seqpos.seqpos_for_geomcst( 1 )[i] );
992  }
993  else std::cerr << "Warning: seqpos number " << scaf_seqpos.seqpos_for_geomcst( 1 )[i] << " is larger than size of pose, ignoring. Check if you're using the right match posfile.";
994 
995  }
996  }
997  else{
998  if( scaf_seqpos.num_seqpos_lists() != enz_input_data_->mcfi_lists_size() ) utility_exit_with_message("#geometric constraints disagreement between file given for option match::scaffold_active_site_residues_for_geomcsts having " + utility::to_string(scaf_seqpos.num_seqpos_lists() ) + "constraitns and Enz .cst file: " + option[ geometric_constraint_file ]()() +
999  " having (" + utility::to_string(enz_input_data_->mcfi_lists_size()) + ") constraints." );
1000 
1003  per_cst_pose_build_resids_.resize( scaf_seqpos.num_seqpos_lists() );
1004 
1005  for( core::Size i =1; i <= scaf_seqpos.num_seqpos_lists(); ++i ){
1006  for( core::Size j = 1; j <= scaf_seqpos.seqpos_for_geomcst( i ).size(); ++j ){
1007  if( scaf_seqpos.seqpos_for_geomcst( i )[j] <= upstream_pose_->total_residue() ){
1008  if( upstream_pose_->residue_type( scaf_seqpos.seqpos_for_geomcst( i )[j] ).is_protein() ) per_cst_pose_build_resids_[i].push_back( scaf_seqpos.seqpos_for_geomcst( i )[j] );
1009  }
1010  else std::cerr << "Warning: seqpos number " << scaf_seqpos.seqpos_for_geomcst( i )[j] << " is larger than size of pose, ignoring. Check if you're using the right match posfile.";
1011  }
1012  }
1013  }
1014 }
1015 
1016 /// @details go through all of the remarks in the pose and see if any of them specify
1017 /// already existing catalytic interactions
1018 void
1020 {
1021  if( !upstream_pose_ ) return;
1022 
1023  core::pose::PDBInfoCOP pose_pdbinfo = upstream_pose_->pdb_info();
1024 
1025  core::pose::Remarks const & pose_remarks = pose_pdbinfo->remarks();
1026 
1027  bool change_build_point_lists( false );
1028  bool switch_to_different_build_points_required( share_build_points_for_geomcsts_ );
1029  utility::vector1< Size > generic_pose_build_resids_copy( generic_pose_build_resids_ );
1030  std::set< Size > cst_ids_already_present;
1031  Size n_geometric_constraints( enz_input_data_->mcfi_lists_size() );
1032 
1033  // Loop over all the remarks in pdb file
1034  for( std::vector< core::pose::RemarkInfo >::const_iterator remark_it = pose_remarks.begin(); remark_it != pose_remarks.end(); remark_it++){
1035 
1036  std::string resA_type(""), resB_type("");
1037  int resA_num(0), resB_num(0);
1038  Size cst_block(0);
1039  std::string resA_chain ,resB_chain;
1040  core::Size ex_geom_id;
1041  if( protocols::toolbox::match_enzdes_util::split_up_remark_line(remark_it->value, resA_chain,
1042  resA_type, resA_num, resB_chain, resB_type,
1043  resB_num, cst_block, ex_geom_id) ){
1044 
1045  if( !change_build_point_lists ) {
1046  change_build_point_lists = true;
1047  if( switch_to_different_build_points_required ) use_different_build_points_for_each_geometric_constraint( n_geometric_constraints );
1048  }
1049 
1050  Size upstream_seqpos( pose_pdbinfo->pdb2pose(resB_chain.c_str()[0],resB_num) );
1051  utility::vector1< Size > res_id;
1052  runtime_assert( upstream_pose_->residue_type( upstream_seqpos ).is_protein() );
1053  TR << "An interaction for geometric constraint " << cst_block << " already seems to be present in the pose at seqpos " << upstream_seqpos << ". Matching for this geomcst will only be done at this position." << std::endl;
1054  res_id.push_back( upstream_seqpos );
1056  cst_ids_already_present.insert( cst_block );
1057  }
1058  }
1059  if( change_build_point_lists){
1060  //it is likely that the downstream object is part of the upstream_pose_
1061  //in this case, bc the starting structure already contained a partial match.
1063 
1064  if( switch_to_different_build_points_required ){
1065  for( Size i = 1; i <= n_geometric_constraints; ++i){
1066  if( cst_ids_already_present.find( i ) == cst_ids_already_present.end() ){
1067  set_original_scaffold_build_points_for_geometric_constraint( i, generic_pose_build_resids_copy );
1068  }
1069  }
1070  }
1071  }
1072 } //set_active_site_residue_list_to_preexisting_partial_match()
1073 
1074 void
1076 {
1077  //right now this function only works for ligand downstream objects
1078  if( (downstream_pose_->total_residue() != 1 ) ) utility_exit_with_message("Can't remove a downstream pose containing more than one residue from the upstream pose.");
1079 
1080  utility::vector1< Size > seqpos_to_remove;
1081  for( core::Size i = 1; i<= upstream_pose_->conformation().num_chains(); ++i ){
1082  core::Size chain_begin( upstream_pose_->conformation().chain_begin( i ) );
1083  if( (upstream_pose_->conformation().chain_end( i ) - chain_begin == 0 ) &&
1084  downstream_pose_->residue_type(1).name3() == upstream_pose_->residue_type( chain_begin ).name3() ){
1085  seqpos_to_remove.push_back( chain_begin );
1086  }
1087  }
1088  if( seqpos_to_remove.size() == 0 ) return;
1089 
1090  core::pose::PoseOP mod_up_pose = new core::pose::Pose( *upstream_pose_ );
1091  for( core::Size i = seqpos_to_remove.size(); i >= 1; --i){
1092  TR << "Removing seqpos " << seqpos_to_remove[i] << " from upstream_pose_ because it is part of the downstream object." << std::endl;
1093  mod_up_pose->conformation().delete_residue_slow( seqpos_to_remove[i] );
1094  }
1095  upstream_pose_ = mod_up_pose;
1096 }
1097 
1098 void
1100 {
1101  using namespace basic::options;
1102  using namespace basic::options::OptionKeys::match;
1103 
1104  using namespace protocols::toolbox::match_enzdes_util;
1105  using namespace core::chemical;
1106  /// create a local non-const version of the input data.
1108  & ChemicalManager::get_instance()->nonconst_residue_type_set( FA_STANDARD ));
1109  if ( ! option[ geometric_constraint_file ].user() ) {
1110  utility_exit_with_message( "Option match::geometric_constraint_file must be specified on the command line" );
1111  }
1112  cstfile_name_ = option[ geometric_constraint_file ]();
1113 
1114  enz_input_data->read_enzyme_cstfile( cstfile_name_ );
1117 
1118 }
1119 
1120 
1121 /// @details note: for the time being, this function sets all the residue numbers of
1122 /// the AtomIDs in relevant_downstream_atoms_ to 1, i.e. it will only function properly
1123 /// for cases where the downstream pose consists of one residue only (i.e. ligands )
1124 void
1126 {
1127  runtime_assert( enz_input_data_ );
1129  std::set< core::id::AtomID > seen_atoms;
1130 
1131  for( core::Size i = 1; i <= enz_input_data_->num_mcfi_lists(); ++i ){
1132 
1133  //make sure this is not upstream matching
1134  bool upstream_matching( false);
1135  std::map< std::string, utility::vector1< std::string > > const &
1136  alg_info( enz_input_data_->mcfi_list( i )->mcfi( 1 )->algorithm_inputs() );
1137  if ( alg_info.find( "match" ) != alg_info.end() ) {
1138  utility::vector1< std::string > const & info( alg_info.find( "match" )->second );
1139  for ( Size ll = 1; ll <= info.size(); ++ll ) {
1140  std::string llstr = info[ ll ];
1141  std::istringstream llstream( llstr );
1142  std::string first, second;
1143  llstream >> first >> second;
1144  if( first == "SECONDARY_MATCH:" && second == "UPSTREAM_CST" ){
1145  core::Size target_id(0);
1146  llstream >> target_id;
1147  upstream_matching = true;
1148  upstream_only_geom_cst_.insert( std::pair< core::Size, core::Size >( i, target_id ));
1149  break;
1150  }
1151  }
1152  if( upstream_matching ) continue;
1153  }
1154  //upstream matching check over
1155 
1156  for( core::Size j = 1; j <= enz_input_data_->mcfi_list( i )->num_mcfis(); ++j){
1158  core::Size ds_matchres( cur_mcfi->downstream_res() );
1159  core::chemical::ResidueTypeCOP ds_restype( cur_mcfi->allowed_restypes( ds_matchres )[1] );
1160 
1161  for( core::Size k = 1; k <= 3; ++k){
1162 
1163  for( core::Size l = 1; l <= cur_mcfi->template_atom_inds( ds_matchres, k, *ds_restype ).size(); ++l ){
1164  core::id::AtomID this_at( cur_mcfi->template_atom_inds( ds_matchres, k, *ds_restype )[l], 1 );
1165  if( seen_atoms.find( this_at ) == seen_atoms.end() ){
1166  seen_atoms.insert( this_at );
1167  relevant_downstream_atoms_.push_back( this_at );
1168  //std::cerr << " atom " << cur_mcfi->template_atom_inds( ds_matchres, k, *ds_restype )[l] << " is relevant for matcher" << std::endl;
1169  }
1170  } //loop over template atom inds
1171  } //loop over template atoms
1172  } //loop over mcifs
1173  } //loop over mcfi lists
1174 }
1175 
1176 void
1178 {
1179  using namespace basic::options;
1180  using namespace basic::options::OptionKeys::match;
1181 
1182  if ( option[ orientation_atoms ].user() ) {
1183  if ( downstream_pose_ ) {
1184  utility::vector1< std::string > names = option[ orientation_atoms ];
1185  if ( names.size() != 3 ) {
1186  std::cerr << "ERROR: expected exactly three atom names for the downstream pose, but read ";
1187  std::cerr << names.size() << " from the command line." << std::endl;
1188  for ( Size ii = 1; ii <= names.size(); ++ii ) {
1189  std::cerr << names[ ii ] << " ";
1190  }
1191  std::cerr << std::endl;
1192  utility_exit_with_message( "Invalid use of the flag -match::orientation_atoms" );
1193  }
1194 
1195  if ( downstream_pose_->total_residue() != 1 ) {
1196  std::cerr << "ERROR: Cannot use the flag -match::orientation_atoms if the downstream pose has multiple residues" << std::endl;
1197  utility_exit_with_message( "Invalide use of the flag -match::orientation_atoms" );
1198  }
1199  utility::vector1< Size > ats( 3 );
1200  for ( Size ii = 1; ii <= 3; ++ii ) {
1201  if ( ! downstream_pose_->residue( 1 ).has( names[ ii ] ) ) {
1202  std::cerr << "Could not find atom named '" << names[ ii ] << "' in residue " << downstream_pose_->residue( 1 ).name() << std::endl;
1203  utility_exit_with_message ("Unrecognized atom name listed in flag -match::orientation_atoms" );
1204  } else {
1205  ats[ ii ] = downstream_pose_->residue( 1 ).atom_index( names[ ii ] );
1206  downstream_orientation_atoms_[ ii ] = core::id::AtomID( ats[ ii ], 1 );
1207  }
1208  }
1209 
1210  Real angle = numeric::constants::d::radians_to_degrees * numeric::angle_radians(
1211  downstream_pose_->residue( 1 ).xyz( ats[ 1 ] ),
1212  downstream_pose_->residue( 1 ).xyz( ats[ 2 ] ),
1213  downstream_pose_->residue( 1 ).xyz( ats[ 3 ] ) );
1214  if ( angle > 175 ) {
1215  std::cerr << "ERROR: The three atoms used to define the orientation of the downstream partner are nearly co-linear: angle= " << angle << "\n";
1216  std::cerr << "Please choose atoms with an angle that will not be so numerically sensitive." << std::endl;
1217  utility_exit_with_message( "Nearly-colinear points chosen as orientation atoms of the downstream partner" );
1218  }
1219 
1220  } else {
1221  std::cout << "Warning, Downstream Pose has not been set in MatcherTask when initialize_from_command_line\n";
1222  std::cout << "was invoked. The parameters in the command-line flag -match::orientation_atoms are being ignored" << std::endl;
1223  }
1224  }
1225 
1226 }
1227 
1228 /// @details This function reads three types of files relating to the active site.
1229 /// The file formats are described below.
1230 /// 1. A file listing the atom names of those atoms in the downstream partner
1231 /// which are required to be in the active site -- conformations of the downstream
1232 /// partner in which any of these atoms lie outside of the active site are excluded.
1233 /// This file format is only valid if the downstream partner is a single residue
1234 /// (e.g. a ligand).
1235 /// 2. A file defining the active site geometry by listing residue indices of the
1236 /// scaffold and, for each residue i, a reach, r_i (a radius). A point in space is
1237 /// within the active site it is within r_i of any atom of residue i.
1238 /// 3. A "gridlig" file from rosetta++ where a box in space whose voxels are labeled
1239 /// with 0's and 1's; voxels with 1's are part of the active site, those with 0's are
1240 /// not.
1241 ///
1242 /// File formats:
1243 /// 1. Active site atoms.
1244 /// The file lists on one or more lines the atom names for all atoms that are required
1245 /// to be present in the active site. The file format does not support comments.
1246 /// Example:
1247 /// <begin file>
1248 /// C7 O2 O3 C22 C23 N5
1249 /// <end file>
1250 ///
1251 /// 2. Residue/reach pairs.
1252 /// Each line of the file should list a residue index (where residue numbering starts
1253 /// from 1) for a residue on the scaffold and a reach (a radius).
1254 /// Example.
1255 /// <begin file>
1256 /// 10 5.5
1257 /// 12 7.2
1258 /// 13 7.0
1259 /// 23 10.0
1260 /// 25 7.0
1261 /// 58 5.5
1262 /// 60 7.0
1263 /// <end file>
1264 ///
1265 /// 3. Gridlig format
1266 /// The gridlig format specifies a discretization of euclidean space, and then gives
1267 /// the 0 and 1 values for each voxel.
1268 /// The first line in the liggrid file is "NAME: gridlig"
1269 /// The second line describes the coordinate of the lower corner:
1270 /// "BASE: <xcoord> <ycoord> <zcoord>
1271 /// The third line describes the number of voxels in each dimension:
1272 /// "SIZE: <xcount> <ycount> <zcount>
1273 /// The fourth line describes the length of each dimension describing a voxel:
1274 /// "LENGTH: <xlength> <ylength> <zlength>
1275 /// The subsequent lines are arranged in blocks sweeping through the grid in
1276 /// "row-major" order. Each line has zcount space-separated 0's and 1's. Lines
1277 /// are grouped into ycount blocks; a single empty line separates blocks.
1278 /// Example:
1279 /// <begin file>
1280 /// NAME: gridlig
1281 /// BASE: 27.712 15.827 39.155
1282 /// SIZE: 39 39 34
1283 /// LENGTH: 0.500 0.500 0.500
1284 /// 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1285 /// 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1286 /// 0 0 0 0 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1287 /// 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1288 /// <the file contains 1563 lines total>
1289 void
1291 {
1292  using namespace basic::options;
1293  using namespace basic::options::OptionKeys::match;
1294 
1295  if ( option[ required_active_site_atom_names ].user() ) {
1296  if ( downstream_pose_->total_residue() != 1 ) {
1297  utility_exit_with_message( "Flag match::required_active_site_atom_names may only be used when the downstream pose is a single residue" );
1298  }
1299  if (
1300  ! option[ active_site_definition_by_residue ].user() &&
1301  ! option[ active_site_definition_by_gridlig ].user() ) {
1302  utility_exit_with_message( "Flag match::required_active_site_atom_names must be used in combination with\n" "either the match::active_site_definition_by_residue flag or the match::active_site_definition_by_gridlig flag" );
1303  }
1304  std::string filename = option[ required_active_site_atom_names ];
1305  utility::io::izstream istr( filename.c_str() );
1306  while ( istr ) {
1307  std::string atname;
1308  istr >> atname;
1309  if ( ! istr.bad() ) {
1310  if ( atname == "" ) break;
1311  if ( downstream_pose_->residue( 1 ).has( atname ) ) {
1312  TR << "Requiring downstream atom '" << atname << "' to reside in the scaffold's active site" << std::endl;
1314  core::id::AtomID( downstream_pose_->residue( 1 ).atom_index( atname ), 1 ));
1315  } else {
1316  std::cerr << "ERROR reading required_active_site_atom_names: atom named '" << atname << "' is not an atom of " << downstream_pose_->residue( 1 ).name() << std::endl;
1317  utility_exit_with_message( "Problem reading required_active_site_atom_names file " + filename );
1318  }
1319  }
1320  }
1322  utility_exit_with_message( "No active site atoms read; empty required_active_site_atom_names file " + filename );
1323  }
1324  }
1325 
1326  if ( option[ active_site_definition_by_residue ].user() ) {
1327  if ( option[ active_site_definition_by_gridlig ].user() ) {
1328  std::cerr << "ERROR: found incompatible flags active_site_definition_by_residue and active_site_definition_by_gridlig on the command line" << std::endl;
1329  utility_exit_with_message( "Ambiguous command line." );
1330  }
1331  std::string filename = option[ active_site_definition_by_residue ];
1332  utility::io::izstream istr( filename.c_str() );
1333  while ( istr ) {
1334  Size resid; Real radius;
1335  istr >> resid;
1336  if ( resid > upstream_pose_->total_residue() ) {
1337  std::cerr << "ERROR reading active_site_definition: residue " << resid << " exceeds the number of residues in the scaffold pose." << std::endl;
1338  utility_exit_with_message( "Problem reading active_site_definition " + filename );
1339  }
1340  if ( ! istr.good() ) break;
1341  istr >> radius;
1342  if ( istr.bad() ) {
1343  std::cerr << "Error reading active_site_definition: expected to read a radius after reading resid " << resid << std::endl;
1344  utility_exit_with_message( "Problem reading active_site_definition " + filename );
1345  }
1346  TR << "Including sphere of radius " << radius << " surrounding scaffold residue " << resid << " in active site definition" << std::endl;
1347  upstream_resids_and_radii_defining_active_site_.push_back( std::make_pair( resid, radius ));
1348  }
1351  /// No matches possible as the active site is undefined, yet the
1352  /// there are atoms required to reside in the active site. Quit.
1353  std::cerr << "ERROR reading active_site_definition: no active site residues were read!" << std::endl;
1354  utility_exit_with_message( "Problem reading active_site_definition " + filename );
1355  }
1356  }
1357  }
1358 
1359  if ( option[ active_site_definition_by_gridlig ].user() ) {
1361  gridlig_fname_ = option[ active_site_definition_by_gridlig ]();
1362  }
1363 }
1364 
1365 void
1367 {
1368  using namespace basic::options;
1369  using namespace basic::options::OptionKeys::match;
1370 
1371  if ( option[ OptionKeys::match::filter_colliding_upstream_residues ] ) {
1373  if ( option[ OptionKeys::match::upstream_residue_collision_score_cutoff ].user() ) {
1375  upstream_residue_collision_score_cutoff_ = option[ OptionKeys::match::upstream_residue_collision_score_cutoff ];
1376  upstream_residue_collision_Wfa_atr_ = option[ OptionKeys::match::upstream_residue_collision_Wfa_atr ];
1377  upstream_residue_collision_Wfa_rep_ = option[ OptionKeys::match::upstream_residue_collision_Wfa_rep ];
1378  upstream_residue_collision_Wfa_sol_ = option[ OptionKeys::match::upstream_residue_collision_Wfa_sol ];
1379  } else {
1381  if ( option[ OptionKeys::match::upstream_residue_collision_tolerance ].user() ) {
1382  upstream_residue_collision_tolerance_ = option[ OptionKeys::match::upstream_residue_collision_tolerance ];
1383  } else {
1384  upstream_residue_collision_tolerance_ = option[ OptionKeys::match::bump_tolerance ];
1385  }
1386  }
1387  }
1388 }
1389 
1390 void
1392 {
1393  using namespace basic::options;
1394  using namespace basic::options::OptionKeys::match;
1395 
1396  if ( option[ OptionKeys::match::filter_upstream_downstream_collisions ] ) {
1398  if ( option[ OptionKeys::match::updown_residue_collision_score_cutoff ].user() ) {
1400  upstream_downstream_residue_collision_score_cutoff_ = option[ OptionKeys::match::updown_residue_collision_score_cutoff ];
1401  upstream_downstream_residue_collision_Wfa_atr_ = option[ OptionKeys::match::updown_residue_collision_Wfa_atr ];
1402  upstream_downstream_residue_collision_Wfa_rep_ = option[ OptionKeys::match::updown_residue_collision_Wfa_rep ];
1403  upstream_downstream_residue_collision_Wfa_sol_ = option[ OptionKeys::match::updown_residue_collision_Wfa_sol ];
1404  } else {
1406  if ( option[ OptionKeys::match::updown_collision_tolerance ].user() ) {
1407  upstream_downstream_atom_collision_tolerance_ = option[ OptionKeys::match::updown_collision_tolerance ];
1408  } else {
1409  upstream_downstream_atom_collision_tolerance_ = option[ OptionKeys::match::bump_tolerance ];
1410  }
1411  }
1412  }
1413 }
1414 
1415 void
1417 {
1418 
1419  using namespace basic::options;
1420  using namespace basic::options::OptionKeys::match;
1421 
1422  output_writer_name_ = option[ OptionKeys::match::output_format ];
1423  grouper_name_ = option[ OptionKeys::match::match_grouper ];
1424 
1425  grouper_ds_rmsd_ = option[ OptionKeys::match::grouper_downstream_rmsd ];
1426  output_matchres_only_ = option[ OptionKeys::match::output_matchres_only ];
1427 
1428  utility::vector1< int > tempvec = option[ OptionKeys::match::geom_csts_downstream_output ];
1429  if( tempvec.size() == 0 ) utility_exit_with_message("Bad user input: empty vector specified for option -match::geom_csts_downstream_output.");
1431 
1432  core::Size num_geom_csts( enz_input_data_->num_mcfi_lists() );
1433 
1434  for( core::Size i = 1; i <= tempvec.size(); ++i ){
1435 
1436  if( tempvec[ i ] < 1 ){
1437  utility_exit_with_message("Retarded user input. Output for geom cst with id smaller than 1 requested.");
1438  }
1439 
1440  if( (Size) tempvec[ i ] > num_geom_csts ){
1441  utility_exit_with_message("Bad user input. Output for geom cst with id higher than the number of total geomcsts requested.");
1442  }
1443 
1444  geom_csts_downstream_output_.push_back( (core::Size ) tempvec[ i ] );
1445  }
1446 
1447  if ( option[ OptionKeys::match::define_match_by_single_downstream_positioning ] ) {
1448  if( geom_csts_downstream_output_.size() != 1 ) utility_exit_with_message("Bad user input: values specified for options -match::define_match_by_single_downstream_positioning and -match::geom_csts_downstream_output are incompatible.");
1449  if ( ! option[ OptionKeys::match::consolidate_matches ] ) {
1451  } else {
1453  }
1454  } else {
1455  if ( option[ OptionKeys::match::output_format ]() == "PDB" &&
1456  ! option[ OptionKeys::match::consolidate_matches ] && (geom_csts_downstream_output_.size() == 1 ) ) {
1457  TR << "MatcherTask initialised to treat matches as defined by single downstream positioning." << std::endl;
1459  } else {
1461  }
1462  }
1463 }
1464 
1465 }
1466 }
1467