// -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
// vi: set ts=2 noet:
// :noTabs=false:tabSize=4:indentSize=4:
//
// (c) Copyright Rosetta Commons Member Institutions.
// (c) This file is part of the Rosetta software suite and is made available under license.
// (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
// (c) For more information, see http://www.rosettacommons.org. Questions about this can be
// (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.

/// @file   protocols/match/MatcherTask.cc
/// @brief
/// @author Andrew Leaver-Fay (aleaverfay@gmail.com)

// Unit headers
#include <protocols/match/MatcherTask.hh>

// Project headers
#include <protocols/enzdes/EnzConstraintIO.hh>

#include <core/chemical/ChemicalManager.hh>
#include <core/chemical/ResidueType.hh>
#include <core/chemical/ResidueTypeSet.hh>

#include <core/conformation/Residue.hh>

#include <core/options/option.hh>
#include <core/options/keys/enzdes.OptionKeys.gen.hh>
#include <core/options/keys/packing.OptionKeys.gen.hh>
#include <core/options/keys/match.OptionKeys.gen.hh>

#include <core/pose/Pose.hh>

#include <core/util/Tracer.hh>

/// Numeric headers
#include <numeric/constants.hh>
#include <numeric/xyz.functions.hh>

// Utility headers
#include <utility/string_util.hh>
#include <utility/io/izstream.hh>

// C++ headers
#include <sstream>

namespace protocols {
namespace match {

static core::util::Tracer TR( "protocols.match.MatcherTask" );

MatcherTask::MatcherTask() :
	share_build_points_for_geomcsts_( true ),
	gridlig_active_site_definition_( false ),
	occ_space_bounding_box_( Vector( 0.0 ), Vector( 0.0 ) ),
	euclidean_bin_widths_( 1.0 ),
	euler_bin_widths_( 10.0 ),
	permitted_overlap_( 0.0 ),
	use_input_sc_( false ),
	consolidate_matches_( true ),
	n_to_output_per_group_( 5 ),
	grouper_name_( "SameChiBinComboGrouper" ),
	evaluator_name_( "DownstreamRMSEvaluator" ),
	output_writer_name_( "PDB" ),
	output_file_name_( "output_matches.kin" ),
	output_matchres_only_(true),
	filter_upstream_residue_collisions_( false ),
	upstream_residue_collision_tolerance_( 1.0 ),
	upstream_residue_collision_score_cutoff_( 1.0 ),
	upstream_residue_collision_Wfa_atr_( 1.0 ),
	upstream_residue_collision_Wfa_rep_( 1.0 ),
	upstream_residue_collision_Wfa_sol_( 1.0 ),
	define_match_by_single_downstream_positioning_( false )
{}

MatcherTask::MatcherTask( MatcherTask const & other ) :
	ReferenceCount()
{
	(*this) = other;
}

MatcherTask const &
MatcherTask::operator = ( MatcherTask const & rhs )
{
	if ( this != & rhs ) {
		upstream_pose_ = rhs.upstream_pose_;
		downstream_pose_ = rhs.downstream_pose_;
		downstream_orientation_atoms_ = rhs.downstream_orientation_atoms_;
		share_build_points_for_geomcsts_ = rhs.share_build_points_for_geomcsts_;
		generic_pose_build_resids_ = rhs.generic_pose_build_resids_;
		per_cst_pose_build_resids_ = rhs.per_cst_pose_build_resids_;
		gridlig_active_site_definition_ = rhs.gridlig_active_site_definition_;
		gridlig_fname_ = rhs.gridlig_fname_;
		upstream_resids_and_radii_defining_active_site_ =
			rhs.upstream_resids_and_radii_defining_active_site_;
		downstream_atoms_required_inside_active_site_ =
			rhs.downstream_atoms_required_inside_active_site_;
		occ_space_bounding_box_ = rhs.occ_space_bounding_box_;
		euclidean_bin_widths_ = rhs.euclidean_bin_widths_;
		euler_bin_widths_ = rhs.euler_bin_widths_;
		permitted_overlap_ = rhs.permitted_overlap_;
		use_input_sc_ = rhs.use_input_sc_;
		consolidate_matches_ = rhs.consolidate_matches_;
		n_to_output_per_group_ = rhs.n_to_output_per_group_;
		filter_names_ = rhs.filter_names_;
		grouper_name_ = rhs.grouper_name_;
		evaluator_name_ = rhs.evaluator_name_;
		output_writer_name_ = rhs.output_writer_name_;
		output_file_name_ = rhs.output_file_name_;
		output_matchres_only_ = rhs.output_matchres_only_;
		geom_csts_downstream_output_ = rhs.geom_csts_downstream_output_;
		enz_input_data_ = rhs.enz_input_data_;
		filter_upstream_residue_collisions_ = rhs.filter_upstream_residue_collisions_;
		upstream_residue_collision_tolerance_ = rhs.upstream_residue_collision_tolerance_;
		upstream_residue_collision_score_cutoff_ = rhs.upstream_residue_collision_score_cutoff_;
		upstream_residue_collision_Wfa_atr_ = rhs.upstream_residue_collision_Wfa_atr_;
		upstream_residue_collision_Wfa_rep_ = rhs.upstream_residue_collision_Wfa_rep_;
		upstream_residue_collision_Wfa_sol_ = rhs.upstream_residue_collision_Wfa_sol_;
		define_match_by_single_downstream_positioning_ = rhs.define_match_by_single_downstream_positioning_;
	}
	return *this;
}

MatcherTask::~MatcherTask() {}

void
MatcherTask::set_upstream_pose(
	core::pose::Pose const & input_pose
)
{
	upstream_pose_ = new core::pose::Pose( input_pose );
}

void
MatcherTask::set_downstream_pose(
	core::pose::Pose const & input_pose
)
{
	downstream_pose_ = new core::pose::Pose( input_pose );

	if ( downstream_orientation_atoms_.size() != 0 ) {
		validate_downstream_orientation_atoms();
	}
}

void
MatcherTask::set_downstream_pose(
	core::pose::Pose const & input_pose,
	utility::vector1< core::id::AtomID > const & orientation_atoms
)
{
	downstream_pose_ = new core::pose::Pose( input_pose );
	downstream_orientation_atoms_ = orientation_atoms;

	validate_downstream_orientation_atoms();
}

void
MatcherTask::set_downstream_orientation_atoms(
	utility::vector1< core::id::AtomID > const & orientation_atoms
)
{
	downstream_orientation_atoms_ = orientation_atoms;
	if ( downstream_pose_ ) {
		validate_downstream_orientation_atoms();
	}
}

void
MatcherTask::clear_downstream_orientation_atoms() {
	downstream_orientation_atoms_.clear();
}


void
MatcherTask::set_original_scaffold_build_points( utility::vector1< Size > const & resids )
{
	if ( share_build_points_for_geomcsts_ ) {
		generic_pose_build_resids_ = resids;
	} else {
		for ( Size ii = 1; ii <= per_cst_pose_build_resids_.size(); ++ii ) {
			per_cst_pose_build_resids_[ ii ] = resids;
		}
	}
}

void MatcherTask::use_different_build_points_for_each_geometric_constraint(
	Size n_geometric_constraints
)
{
	share_build_points_for_geomcsts_ = false;
	per_cst_pose_build_resids_.clear();
	per_cst_pose_build_resids_.resize( n_geometric_constraints );
}

/// @brief Set the build point id's for a particular geometric constraint
void
MatcherTask::set_original_scaffold_build_points_for_geometric_constraint(
	Size geom_cst_id,
	utility::vector1< Size > const & resids
)
{
	runtime_assert( ! share_build_points_for_geomcsts_ ); // use_different_build_points_for_each_geometric_constraint() must be called first
	runtime_assert( geom_cst_id <= per_cst_pose_build_resids_.size() );
	per_cst_pose_build_resids_[ geom_cst_id ] = resids;
}


void
MatcherTask::define_active_site_from_gridlig_file( std::string const & file_name )
{
	if ( ! gridlig_active_site_definition_ && ! upstream_resids_and_radii_defining_active_site_.empty() ) {
		TR << "overriding res/radius active site definition by setting gridlig file " << file_name << std::endl;
	}
	gridlig_active_site_definition_ = true;
	gridlig_fname_ = file_name;
}

void
MatcherTask::define_active_site_from_residue_radii_list()
{
	gridlig_active_site_definition_ = false;
}

void
MatcherTask::append_upstream_resiue_as_defining_active_site(
	Size resid,
	Real radius
)
{
	if ( gridlig_active_site_definition_ ) {
		TR << "overriding gridlig_active_site_definition_ by appending res/radius pair: " << resid << " " << radius << std::endl;
		gridlig_active_site_definition_ = false;
	}
	upstream_resids_and_radii_defining_active_site_.push_back( std::make_pair( resid, radius ) );
}

void
MatcherTask::append_downstream_atom_to_active_site_required_list(
	core::id::AtomID atid
)
{
	downstream_atoms_required_inside_active_site_.push_back( atid );
}


void
MatcherTask::set_occupied_space_bounding_box( BoundingBox const & bb )
{
	occ_space_bounding_box_ = bb;
}

void
MatcherTask::set_hash_euclidean_bin_width( Real width )
{
	euclidean_bin_widths_ = width;
}

void
MatcherTask::set_hash_euler_bin_width(     Real width )
{
	euler_bin_widths_ = width;
}

void
MatcherTask::set_permitted_overlap( Real permitted_overlap )
{
	permitted_overlap_ = permitted_overlap;
}

void
MatcherTask::initialize_from_command_line()
{
	using namespace core::options;
	using namespace core::options::OptionKeys::match;

	/// 1. Read the ligand grid header file and initialize the boundary for the active site.
	/// 2. Initialize the euclid and euler bin witdhs
	/// 3. Note the location of the output file
	/// 4. Read in the EnzConstraintIO file.
	/// 5. Read in the build position list

	/// 1. liggrid
	initialize_occupied_space_bounding_box_from_command_line();

	euclidean_bin_widths_ = option[ euclid_bin_size ];
	euler_bin_widths_ = option[ euler_bin_size ];

	initialize_enzdes_input_data_from_command_line();

	initialize_scaffold_active_site_residue_list_from_command_line();

	/// bump tolerance
	permitted_overlap_ = option[ bump_tolerance ];

	use_input_sc_ = option[ OptionKeys::packing::use_input_sc ];

	initialize_orientation_atoms_from_command_line();

	consolidate_matches_ = option[ OptionKeys::match::consolidate_matches ];
	n_to_output_per_group_ = option[ output_matches_per_group ];

	initialize_active_site_definition_from_command_line();
	initialize_upstream_residue_collision_filter_data_from_command_line();

	initialize_output_options_from_command_line();
}

void
MatcherTask::consolidate_matches( bool setting )
{
	consolidate_matches_ = setting;
}

void MatcherTask::filter_upstream_residue_collisions( bool setting )
{
	filter_upstream_residue_collisions_ = setting;
}

void MatcherTask::filter_upstream_collisions_by_score( bool setting )
{
	assert( filter_upstream_residue_collisions_ ); // must set filter_upstream_residue_collisions_ first!
	filter_upstream_collisions_by_score_ = setting;
}

void MatcherTask::upstream_residue_collision_tolerance( Real setting )
{
	upstream_residue_collision_tolerance_ = setting;
}

void MatcherTask::upstream_residue_collision_score_cutoff( Real setting )
{
	upstream_residue_collision_score_cutoff_ = setting;
}

void MatcherTask::upstream_residue_collision_Wfa_atr( Real setting )
{
	upstream_residue_collision_Wfa_atr_ = setting;
}

void MatcherTask::upstream_residue_collision_Wfa_rep( Real setting )
{
	upstream_residue_collision_Wfa_rep_ = setting;
}

void MatcherTask::upstream_residue_collision_Wfa_sol( Real setting )
{
	upstream_residue_collision_Wfa_sol_ = setting;
}


void MatcherTask::define_match_by_single_downstream_positioning( bool setting )
{
	define_match_by_single_downstream_positioning_ = setting;
}

void MatcherTask::n_to_output_per_group( Size setting )
{
	n_to_output_per_group_ = setting;
}

void
MatcherTask::add_filter( std::string const & filter_name )
{
	filter_names_.push_back( filter_name );
}

void
MatcherTask::grouper_name( std::string const & setting )
{
	grouper_name_ = setting;
}

void
MatcherTask::evaluator_name( std::string const & setting )
{
	evaluator_name_ = setting;
}

void
MatcherTask::output_writer_name( std::string const & setting )
{
	output_writer_name_ = setting;
}

void
MatcherTask::output_file_name( std::string const & setting )
{
	output_file_name_ = setting;
}


void
MatcherTask::set_enz_input_data( enzdes::EnzConstraintIOCOP data )
{
	enz_input_data_ = data;
}

core::pose::PoseCOP
MatcherTask::upstream_pose() const
{
	return upstream_pose_;
}

core::pose::PoseCOP
MatcherTask::downstream_pose() const
{
	return downstream_pose_;
}

utility::vector1< core::id::AtomID > const &
MatcherTask::downstream_orientation_atoms() const
{
	return downstream_orientation_atoms_;
}

utility::vector1< core::Size > const &
MatcherTask::upstream_pose_build_resids_for_geometric_constraint( Size cst_id ) const
{
	if ( share_build_points_for_geomcsts_ ) {
		return generic_pose_build_resids_;
	} else {
		runtime_assert( cst_id <= per_cst_pose_build_resids_.size() );
		return per_cst_pose_build_resids_[ cst_id ];
	}
}

/// @brief Define the active site through a gridlig file (true), or by listing residue/radii paris (false)?
bool
MatcherTask::gridlig_active_site_definition() const
{
	return gridlig_active_site_definition_;
}

/// @brief Accessor for the file name containing the active-site definition in gridlig format
std::string const &
MatcherTask::gridlig_file_name() const
{
	runtime_assert( gridlig_active_site_definition_ );
	return gridlig_fname_;
}


std::list< std::pair< MatcherTask::Size, MatcherTask::Real > > const &
MatcherTask::upstream_resids_and_radii_defining_active_site() const
{
	runtime_assert( ! gridlig_active_site_definition_ );
	return upstream_resids_and_radii_defining_active_site_;
}

std::list< core::id::AtomID > const &
MatcherTask::downstream_atoms_required_inside_active_site() const
{
	return downstream_atoms_required_inside_active_site_;
}


MatcherTask::BoundingBox const &
MatcherTask::occ_space_bounding_box() const
{
	return occ_space_bounding_box_;
}

MatcherTask::Vector
MatcherTask::euclidean_bin_widths() const
{
	return euclidean_bin_widths_;
}

MatcherTask::Vector
MatcherTask::euler_bin_widths() const
{
	return euler_bin_widths_;
}

MatcherTask::Real
MatcherTask::permitted_overlap() const
{
	return permitted_overlap_;
}

bool
MatcherTask::use_input_sc() const {
	return use_input_sc_;
}

bool
MatcherTask::consolidate_matches() const
{
	return consolidate_matches_;
}

MatcherTask::Size
MatcherTask::n_to_output_per_group() const
{
	return n_to_output_per_group_;
}


std::list< std::string > const &
MatcherTask::filter_names() const
{
	return filter_names_;
}

std::string const &
MatcherTask::grouper_name() const
{
	return grouper_name_;
}

std::string const &
MatcherTask::evaluator_name() const
{
	return evaluator_name_;
}

std::string const &
MatcherTask::output_writer_name() const
{
	return output_writer_name_;
}

std::string const &
MatcherTask::output_file_name() const
{
	return output_file_name_;
}

bool
MatcherTask::output_matchres_only() const
{
	return output_matchres_only_;
}

utility::vector1< core::Size > const &
MatcherTask::geom_csts_downstream_output() const
{
	return geom_csts_downstream_output_;
}

enzdes::EnzConstraintIOCOP
MatcherTask::enz_input_data() const
{
	return enz_input_data_;
}

bool MatcherTask::filter_upstream_residue_collisions() const
{
	return filter_upstream_residue_collisions_;
}

bool MatcherTask::filter_upstream_collisions_by_score() const
{
	return filter_upstream_collisions_by_score_;
}

MatcherTask::Real
MatcherTask::upstream_residue_collision_tolerance() const
{
	return upstream_residue_collision_tolerance_;
}

MatcherTask::Real MatcherTask::upstream_residue_collision_score_cutoff() const
{
	return upstream_residue_collision_score_cutoff_;
}

MatcherTask::Real MatcherTask::upstream_residue_collision_Wfa_atr() const
{
	return upstream_residue_collision_Wfa_atr_;
}

MatcherTask::Real MatcherTask::upstream_residue_collision_Wfa_rep() const
{
	return upstream_residue_collision_Wfa_rep_;
}

MatcherTask::Real MatcherTask::upstream_residue_collision_Wfa_sol() const
{
	return upstream_residue_collision_Wfa_sol_;
}

bool MatcherTask::define_match_by_single_downstream_positioning() const
{
	return define_match_by_single_downstream_positioning_;
}


void
MatcherTask::validate_downstream_orientation_atoms() const
{

}


/// @details This function reads a file specified on the command line through the
/// -match::grid_boundary <filename> flag.  The "grid boundary" specifies a bounding
/// box inside which the third orientation atom of the downstream partner must lie
/// for a downstream conformation to become a match.  The orientation atoms are
/// described in the match::orientation_atoms flag description.
/// The format for the file describing this bounding box is the first four lines
/// of the gridlig file format from rosetta++.  The same file may be listed as
/// both the grid_boundary file and the active_site_definition_by_gridlig file.
/// Though the active_site_definition_by_gridlig flag is not required for the matcher to
/// work, the grid_boundary flag is necessary.
///
/// Format:
/// The gridlig format has been coopted.  This file specifies voxels for a region of 3D.
/// Though the occupied space bounding box does not require a discritization for its
/// description, this gridlig file format does.
///
/// Gridlig format
/// The gridlig format specifies a discretization of euclidean space, and then gives
/// the 0 and 1 values for each voxel.
/// The first line in the liggrid file is "NAME: gridlig"
/// The second line describes the coordinate of the lower corner:
/// "BASE: <xcoord> <ycoord> <zcoord>
/// The third line describes the number of voxels in each dimension:
/// "SIZE: <xcount> <ycount> <zcount>
/// The fourth line describes the length of each dimension describing a voxel:
/// "LENGTH: <xlength> <ylength> <zlength>
/// Only the first four lines of the gridlig file are read.  Additional lines
/// are ignored.
/// Example:
/// <begin file>
/// NAME: gridlig
/// BASE:   27.712   15.827   39.155
/// SIZE:   39   39   34
/// LENGTH:    0.500    0.500    0.500
/// <end file>
///
/// NOTE: The discritization of the occupied space bounding box has no impact
/// on the hash size when detecting that two hits match.  The
/// -match::euclid_bin_size <Real> flag alone controls that behavior.
void
MatcherTask::initialize_occupied_space_bounding_box_from_command_line()
{
	using namespace core::options;
	using namespace core::options::OptionKeys::match;

	if ( ! option[ grid_boundary ].user() ) {
		utility_exit_with_message( "Grid boundary defintion file must be specified with the -match::grid_boundary <filename> flag" );
	}
	std::string filename = option[ grid_boundary ];
	utility::io::izstream istr( filename.c_str() );
	std::string name, liggrid;
	istr >> name >> liggrid;
	runtime_assert( name == "NAME:" || name == "@NAME:" ); // for some odd reason, the scaffold library has extra "@"'s.
	runtime_assert( liggrid == "gridlig" );
	std::string base;
	Real xbase( 0.0 ), ybase( 0.0 ), zbase( 0.0 );
	istr >> base;
	runtime_assert( base == "BASE:" );
	istr >> xbase; runtime_assert( ! istr.bad() );
	istr >> ybase; runtime_assert( ! istr.bad() );
	istr >> zbase; runtime_assert( ! istr.bad() );

	std::string size;
	istr >> size;
	runtime_assert( size == "SIZE:" );
	Size xsize( 0 ), ysize( 0 ), zsize( 0 );
	istr >> xsize; runtime_assert( ! istr.bad() );
	istr >> ysize; runtime_assert( ! istr.bad() );
	istr >> zsize; runtime_assert( ! istr.bad() );

	runtime_assert( xsize != 0 );
	runtime_assert( ysize != 0 );
	runtime_assert( zsize != 0 );

	std::string length;
	istr >> length;
	runtime_assert( length == "LENGTH:");
	Real xwidth( 0.0 ), ywidth( 0.0 ), zwidth( 0.0 );

	istr >> xwidth; runtime_assert( ! istr.bad() );
	istr >> ywidth; runtime_assert( ! istr.bad() );
	istr >> zwidth; runtime_assert( ! istr.bad() );

	runtime_assert( xwidth != 0 );
	runtime_assert( ywidth != 0 );
	runtime_assert( zwidth != 0 );

	Vector lower_corner( xbase, ybase, zbase );
	Vector upper_corner( lower_corner );
	upper_corner.x() += xwidth * xsize;
	upper_corner.y() += ywidth * ysize;
	upper_corner.z() += zwidth * zsize;
	occ_space_bounding_box_.set_lower( lower_corner );
	occ_space_bounding_box_.set_upper( upper_corner );
}

/// @details This function reads one of two files from the command line
/// depending on which options the user has provided.  These files define
/// the set of residues on the scaffold to consider as launch-points
/// for the geometric constraints (e.g. to consider as part of the
/// the catalytic core in an enzyme). The file meanings and formats are below.
///
/// 1. A list of residue id's to consider for all of the geometric constraints.
/// Such a file can be generated for a scaffold and then used along side any
/// enzyme-design .cst file.  It is scaffold dependent and constraint-file
/// independent.  The file should list the residue indexes for the scaffold
/// on one or more lines.  The file format does not support comments.
/// Residue id's start counting at 1; the input pdb resids are ignored.
/// (It's best to renumber your scaffold resids starting from 1 to avoid confusion)
///
/// Example.
/// <begin file>
/// 104 106 108 109 117 118 137 139 143 144 36 6 85 87 88 89 91 92 97
/// <end file>
///
/// 2. A list for each geometric constraint of the residues to consider.
/// Such a file allows the user to focus on particular residues for certain
/// geometric constraints for a particular scaffold.  Such a file should be
/// depends on both the scaffold and the match constraint file and cannot
/// be generalized across either multiple scaffolds or multiple constraint files.
/// The first line of the file begins with N_CST, followed by the number of geometric
/// contraints.  This must match the number of geometric constraints in the .cst file.
/// On each subsequent line, the geometric constraint ID is given, followed by
/// a colon and then followed by all of the residue ID's that should be considered
/// for that geometric constraint.  Each geometric constraint must appear on one
/// line in the file, though they may be listed in any order.  The file format does
/// not support comments.
/// Example.
/// <begin file>
/// N_CST 3
/// 1: 9
/// 3: 9
/// 2: 6 7 9 11 12 14 15 17 18 21 22 23 25 26 38 40 43 46 47 49 53 54 57 60 61
/// <end file>
///
void
MatcherTask::initialize_scaffold_active_site_residue_list_from_command_line()
{
	using namespace core::options;
	using namespace core::options::OptionKeys::match;

	if ( ! option[ scaffold_active_site_residues ].user() && ! option[ scaffold_active_site_residues_for_geomcsts ].user()  ) {
		utility_exit_with_message( "Scaffold build point defintion file must be specified with either "
			"the -match::scaffold_active_site_residues <filename> flag\n"
			"or the -match::scaffold_active_site_residues_for_geomcsts <filename> flag" );
	}

	if ( option[ scaffold_active_site_residues ].user() && option[ scaffold_active_site_residues_for_geomcsts ].user()  ) {
		utility_exit_with_message( "Conflicting scaffold build point defintion: both "
			"the -match::scaffold_active_site_residues <filename> flag\n"
			"and the -match::scaffold_active_site_residues_for_geomcsts <filename> flag were found on the command line" );
	}

	if ( option[ scaffold_active_site_residues ].user() ) {
		share_build_points_for_geomcsts_ = true;

		std::string filename = option[ scaffold_active_site_residues ];
		utility::io::izstream istr( filename.c_str() );
		std::list< Size > upstream_build_resids;
		TR << "Reading match::scaffold_active_stie_residues " << filename << ":";
		while ( istr ) {
			Size resid;
			istr >> resid;
			if ( istr.good() ) {
				TR << " " << resid;
				upstream_build_resids.push_back( resid );
			}

		}
		if ( upstream_build_resids.empty() ) {
			utility_exit_with_message( "Failed to read any scaffold active site residues from file " + filename + " specified by the flag match::scaffold_active_stie_residues" );
		}

		TR << std::endl;
		generic_pose_build_resids_.resize( upstream_build_resids.size() );
		std::copy( upstream_build_resids.begin(), upstream_build_resids.end(), generic_pose_build_resids_.begin() );
	} else {
		share_build_points_for_geomcsts_ = false;

		std::string filename = option[ scaffold_active_site_residues_for_geomcsts ];
		utility::io::izstream istr( filename.c_str() );
		std::list< Size > upstream_build_resids;
		TR << "Reading match::scaffold_active_site_residues_for_geomcsts " << filename << std::endl;
		std::string ncsts_string; Size ncsts;
		if ( ! istr.good() ) {
			utility_exit_with_message( "Could not read first line from match::scaffold_active_site_residues_for_geomcsts " + filename );
		}
		istr >> ncsts_string;
		if ( ! istr.good() ) {
			utility_exit_with_message( "Failed to read N_CST field in first line from match::scaffold_active_site_residues_for_geomcsts " + filename );
		}
		if ( ncsts_string != "N_CST" ) {
			utility_exit_with_message( "Failed to read N_CST field in first line from match::scaffold_active_site_residues_for_geomcsts " + filename );
		}
		istr >> ncsts;
		if ( ! istr.good() ) {
			utility_exit_with_message( "Failed to read the number of geometric constraints in first line from match::scaffold_active_site_residues_for_geomcsts " + filename );
		}
		if ( ncsts != enz_input_data_->mcfi_lists_size() ) {
			utility_exit_with_message( "#geometric constraints disagreement between "
				"match::scaffold_active_site_residues_for_geomcsts " + filename +
				" (" + utility::to_string(ncsts) + ") and Enz .cst file: " + option[ geometric_constraint_file ]()() +
				" (" + utility::to_string(enz_input_data_->mcfi_lists_size()) + ")" );
		}
		per_cst_pose_build_resids_.resize( ncsts );

		std::string finish_the_line;
		istr.getline( finish_the_line );
		utility::vector1< Size > data_read_for_cst( ncsts, 0 );
		Size linenum = 2;
		while ( istr ) {
			Size geomcst_id;
			istr >> geomcst_id;
			if ( istr.eof() ) break;

			if ( ! istr.good() ) {
				utility_exit_with_message( "Reading line " + utility::to_string( linenum ) + " of " + filename + ". Failed to read the geometric constraint id at the beginning of the line." );
			}
			if ( geomcst_id > ncsts ) {
				utility_exit_with_message( "Reading line " + utility::to_string( linenum ) + " of " + filename + ". Geometric constraint id at the beginning of the line is out of range: "
					+ utility::to_string( geomcst_id ) + " > " +  utility::to_string( ncsts ) );
			}
			if ( data_read_for_cst[ geomcst_id ] != 0 ) {
				utility_exit_with_message( "Reading line " + utility::to_string( linenum ) + " of " + filename + ". Residue list for geometric constraint #"
					+ utility::to_string( geomcst_id ) + " appeared already on line " +  utility::to_string( data_read_for_cst[ geomcst_id ] ) );
			}
			data_read_for_cst[ geomcst_id ] = linenum;
			std::string colon;
			istr >> colon;
			if ( colon != ":" ) {
				utility_exit_with_message( "Reading line " + utility::to_string( linenum ) + " of " + filename + ". Failed to read the colon (:) separating the geometric constraint id from the rest of the resids." );
			}
			TR << std::endl << geomcst_id << " :";
			istr.getline( finish_the_line );
			std::istringstream isstr( finish_the_line );
			while ( isstr.good() ) {
				Size resid( 0 );
				isstr >> resid;
				if ( ! isstr.bad() ) {
					if ( resid > 0 && resid <= upstream_pose_->total_residue() ) {
						TR << " " << resid;
						per_cst_pose_build_resids_[ geomcst_id ].push_back( resid );
					} else if ( resid > upstream_pose_->total_residue() ){
						std::cerr << std::endl << "ERROR parsing line fragment: " << finish_the_line << std::endl;
						utility_exit_with_message( "Reading line " + utility::to_string( linenum ) + " of " + filename + ". Requested upstream build resid of " + utility::to_string(resid) + " exceeds the number of residues in the pose ( " + utility::to_string( upstream_pose_->total_residue() ) + ")"  );
					} else {
						std::cerr << std::endl << "ERROR parsing line fragment: " << finish_the_line << std::endl;
						utility_exit_with_message( "Reading line " + utility::to_string( linenum ) + " of " + filename + ". Failed to read an integer."  );
					}
				} else {
					std::cerr << std::endl << "ERROR parsing line fragment: " << finish_the_line << std::endl;
					utility_exit_with_message( "Reading line " + utility::to_string( linenum ) + " of " + filename + ". Only integers may be included." );
				}
			}
			++linenum;
		}
		TR << std::endl;
		bool any_absent( false );
		for ( Size ii = 1; ii <= ncsts; ++ii ) {
			if ( data_read_for_cst[ ii ] == 0 ) {
				std::cerr << "ERROR reading " << filename << ": did not find residue list for constraint # " << ii << std::endl;
				any_absent = true;
			}
		}
		if ( any_absent ) {
			utility_exit_with_message( "Failed to read a residue list for one or more constraints" );
		}
	}
}

void
MatcherTask::initialize_enzdes_input_data_from_command_line()
{
	using namespace core::options;
	using namespace core::options::OptionKeys::match;

	using namespace protocols::enzdes;
	using namespace core::chemical;
	/// create a local non-const version of the input data.
	EnzConstraintIOOP enz_input_data = new EnzConstraintIO(
		& ChemicalManager::get_instance()->nonconst_residue_type_set( FA_STANDARD ));
	if ( ! option[ geometric_constraint_file ].user() ) {
		utility_exit_with_message( "Option match::geometric_constraint_file must be specified on the command line" );
	}
	std::string fname( option[ geometric_constraint_file ]() );

	enz_input_data->read_enzyme_cstfile( fname );
	enz_input_data_ = enz_input_data;

}

void
MatcherTask::initialize_orientation_atoms_from_command_line()
{
	using namespace core::options;
	using namespace core::options::OptionKeys::match;

	if ( option[ orientation_atoms ].user() ) {
		if ( downstream_pose_ ) {
			utility::vector1< std::string > names = option[ orientation_atoms ];
			if ( names.size() != 3 ) {
				std::cerr << "ERROR: expected exactly three atom names for the downstream pose, but read ";
				std::cerr << names.size() << " from the command line." << std::endl;
				for ( Size ii = 1; ii <= names.size(); ++ii ) {
					std::cerr << names[ ii ] << " ";
				}
				std::cerr << std::endl;
				utility_exit_with_message( "Invalid use of the flag -match::orientation_atoms" );
			}

			if ( downstream_pose_->total_residue() != 1 ) {
				std::cerr << "ERROR: Cannot use the flag -match::orientation_atoms if the downstream pose has multiple residues" << std::endl;
				utility_exit_with_message( "Invalide use of the flag -match::orientation_atoms" );
			}
			utility::vector1< Size > ats( 3 );
			for ( Size ii = 1; ii <= 3; ++ii ) {
				if ( ! downstream_pose_->residue( 1 ).has( names[ ii ] ) ) {
					std::cerr << "Could not find atom named '" << names[ ii ] << "' in residue " << downstream_pose_->residue( 1 ).name() << std::endl;
					utility_exit_with_message ("Unrecognized atom name listed in flag -match::orientation_atoms" );
				} else {
					ats[ ii ] = downstream_pose_->residue( 1 ).atom_index( names[ ii ] );
					downstream_orientation_atoms_[ ii ] = core::id::AtomID( ats[ ii ], 1 );
				}
			}

			Real angle = numeric::constants::d::radians_to_degrees * numeric::angle_radians(
				downstream_pose_->residue( 1 ).xyz( ats[ 1 ] ),
				downstream_pose_->residue( 1 ).xyz( ats[ 2 ] ),
				downstream_pose_->residue( 1 ).xyz( ats[ 3 ] ) );
			if ( angle > 175 ) {
				std::cerr << "ERROR: The three atoms used to define the orientation of the downstream partner are nearly co-linear: angle= " << angle << "\n";
				std::cerr << "Please choose atoms with an angle that will not be so numerically sensitive." << std::endl;
				utility_exit_with_message( "Nearly-colinear points chosen as orientation atoms of the downstream partner" );
			}

		} else {
			std::cout << "Warning, Downstream Pose has not been set in MatcherTask when initialize_from_command_line\n";
			std::cout << "was invoked.  The parameters in the command-line flag -match::orientation_atoms are being ignored" << std::endl;
		}
	}

}

/// @details This function reads three types of files relating to the active site.
/// The file formats are described below.
/// 1. A file listing the atom names of those atoms in the downstream partner
/// which are required to be in the active site -- conformations of the downstream
/// partner in which any of these atoms lie outside of the active site are excluded.
/// This file format is only valid if the downstream partner is a single residue
/// (e.g. a ligand).
/// 2. A file defining the active site geometry by listing residue indices of the
/// scaffold and, for each residue i, a reach, r_i (a radius).  A point in space is
/// within the active site it is within r_i of any atom of residue i.
/// 3. A "gridlig" file from rosetta++ where a box in space whose voxels are labeled
/// with 0's and 1's; voxels with 1's are part of the active site, those with 0's are
/// not.
///
/// File formats:
/// 1.  Active site atoms.
/// The file lists on one or more lines the atom names for all atoms that are required
/// to be present in the active site.  The file format does not support comments.
/// Example:
/// <begin file>
/// C7 O2 O3 C22 C23 N5
/// <end file>
///
/// 2.  Residue/reach pairs.
/// Each line of the file should list a residue index (where residue numbering starts
/// from 1) for a residue on the scaffold and a reach (a radius).
/// Example.
/// <begin file>
/// 10 5.5
/// 12 7.2
/// 13 7.0
/// 23 10.0
/// 25 7.0
/// 58 5.5
/// 60 7.0
/// <end file>
///
/// 3. Gridlig format
/// The gridlig format specifies a discretization of euclidean space, and then gives
/// the 0 and 1 values for each voxel.
/// The first line in the liggrid file is "NAME: gridlig"
/// The second line describes the coordinate of the lower corner:
/// "BASE: <xcoord> <ycoord> <zcoord>
/// The third line describes the number of voxels in each dimension:
/// "SIZE: <xcount> <ycount> <zcount>
/// The fourth line describes the length of each dimension describing a voxel:
/// "LENGTH: <xlength> <ylength> <zlength>
/// The subsequent lines are arranged in blocks sweeping through the grid in
/// "row-major" order.  Each line has zcount space-separated 0's and 1's.  Lines
/// are grouped into ycount blocks; a single empty line separates blocks.
/// Example:
/// <begin file>
/// NAME: gridlig
/// BASE:   27.712   15.827   39.155
/// SIZE:   39   39   34
/// LENGTH:    0.500    0.500    0.500
/// 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
/// 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
/// 0 0 0 0 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
/// 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1
/// <the file contains 1563 lines total>
void
MatcherTask::initialize_active_site_definition_from_command_line()
{
	using namespace core::options;
	using namespace core::options::OptionKeys::match;

	if ( option[ required_active_site_atom_names ].user() ) {
		if ( downstream_pose_->total_residue() != 1 ) {
			utility_exit_with_message( "Flag match::required_active_site_atom_names may only be used when the downstream pose is a single residue" );
		}
		if (
				! option[ active_site_definition_by_residue ].user() &&
				! option[ active_site_definition_by_gridlig ].user() ) {
			utility_exit_with_message( "Flag match::required_active_site_atom_names must be used in combination with\n" "either the match::active_site_definition_by_residue flag or the match::active_site_definition_by_gridlig flag" );
		}
		std::string filename = option[ required_active_site_atom_names ];
		utility::io::izstream istr( filename.c_str() );
		while ( istr ) {
			std::string atname;
			istr >> atname;
			if ( istr.good() ) {
				if ( downstream_pose_->residue( 1 ).has( atname ) ) {
					TR << "Requiring downstream atom '" << atname << "' to reside in the scaffold's active site" << std::endl;
					downstream_atoms_required_inside_active_site_.push_back(
						core::id::AtomID( downstream_pose_->residue( 1 ).atom_index( atname ), 1 ));
				} else {
					std::cerr << "ERROR reading required_active_site_atom_names: atom named '" << atname << "' is not an atom of " << downstream_pose_->residue( 1 ).name() << std::endl;
					utility_exit_with_message( "Problem reading required_active_site_atom_names file " + filename );
				}
			}
		}
		if ( downstream_atoms_required_inside_active_site_.empty() ) {
			utility_exit_with_message( "No active site atoms read; empty required_active_site_atom_names file " + filename );
		}
	}

	if ( option[ active_site_definition_by_residue ].user() ) {
		if ( option[ active_site_definition_by_gridlig ].user() ) {
			std::cerr << "ERROR: found incompatible flags active_site_definition_by_residue and active_site_definition_by_gridlig on the command line" << std::endl;
			utility_exit_with_message( "Ambiguous command line." );
		}
		std::string filename = option[ active_site_definition_by_residue ];
		utility::io::izstream istr( filename.c_str() );
		while ( istr ) {
			Size resid; Real radius;
			istr >> resid;
			if ( resid > upstream_pose_->total_residue() ) {
				std::cerr << "ERROR reading active_site_definition: residue " << resid << " exceeds the number of residues in the scaffold pose." << std::endl;
				utility_exit_with_message( "Problem reading active_site_definition " + filename );
			}
			if ( ! istr.good() ) break;
			istr >> radius;
			if ( ! istr.good() ) break;
			TR << "Including sphere of radius " << radius << " surrounding scaffold residue " << resid << " in active site definition" << std::endl;
			upstream_resids_and_radii_defining_active_site_.push_back( std::make_pair( resid, radius ));
		}
		if ( upstream_resids_and_radii_defining_active_site_.empty() ) {
			if ( ! downstream_atoms_required_inside_active_site_.empty() ) {
				/// No matches possible as the active site is undefined, yet the
				/// there are atoms required to reside in the active site.  Quit.
				std::cerr << "ERROR reading active_site_definition: no active site residues were read!" << std::endl;
				utility_exit_with_message( "Problem reading active_site_definition " + filename );
			}
		}
	}

	if ( option[ active_site_definition_by_gridlig ].user() ) {
		gridlig_active_site_definition_ = true;
		gridlig_fname_ = option[ active_site_definition_by_gridlig ]();
	}
}

void
MatcherTask::initialize_upstream_residue_collision_filter_data_from_command_line()
{
	using namespace core::options;
	using namespace core::options::OptionKeys::match;

	if ( option[ OptionKeys::match::filter_colliding_upstream_residues ] ) {
		filter_upstream_residue_collisions_ = true;
		if ( option[ OptionKeys::match::upstream_residue_collision_score_cutoff ].user() ) {
			filter_upstream_collisions_by_score_ = true;
			upstream_residue_collision_score_cutoff_ = option[ OptionKeys::match::upstream_residue_collision_score_cutoff ];
			upstream_residue_collision_Wfa_atr_ = option[ OptionKeys::match::upstream_residue_collision_Wfa_atr ];
			upstream_residue_collision_Wfa_rep_ = option[ OptionKeys::match::upstream_residue_collision_Wfa_rep ];
			upstream_residue_collision_Wfa_sol_ = option[ OptionKeys::match::upstream_residue_collision_Wfa_sol ];
		} else {
			filter_upstream_collisions_by_score_ = false;
			if ( option[ OptionKeys::match::upstream_residue_collision_tolerance ].user() ) {
				upstream_residue_collision_tolerance_ = option[ OptionKeys::match::upstream_residue_collision_tolerance ];
			} else {
				upstream_residue_collision_tolerance_ = option[ OptionKeys::match::bump_tolerance ];
			}
		}
	}
}

void
MatcherTask::initialize_output_options_from_command_line()
{

	using namespace core::options;
	using namespace core::options::OptionKeys::match;

	if(  option[ OptionKeys::match::output_format ].user() ){
		output_writer_name_ = option[ OptionKeys::match::output_format ];
	}

	output_matchres_only_ =  option[ OptionKeys::match::output_matchres_only ];

	utility::vector1< int > tempvec = option[ OptionKeys::match::geom_csts_downstream_output ];

	core::Size num_geom_csts( enz_input_data_->num_mcfi_lists() );

	for( core::Size i = 1; i <= tempvec.size(); ++i ){

		if( tempvec[ i ]  < 1 ){
			utility_exit_with_message("Retarded user input. Output for geom cst with id smaller than 1 requested.");
		}

		if( (Size) tempvec[ i ] > num_geom_csts ){
			utility_exit_with_message("Bad user input. Output for geom cst with id higher than the number of total geomcsts requested.");
		}

		geom_csts_downstream_output_.push_back( (core::Size ) tempvec[ i ] );
	}

	if ( option[ OptionKeys::match::define_match_by_single_downstream_positioning ] ) {
		if ( ! option[ OptionKeys::match::consolidate_matches ] ) {
			define_match_by_single_downstream_positioning_ = true;
		} else {
			define_match_by_single_downstream_positioning_ = false;
		}
	} else {
		if ( option[ OptionKeys::match::output_format ]() == "PDB" &&
				! option[ OptionKeys::match::consolidate_matches ] ) {
			define_match_by_single_downstream_positioning_ = true;
		} else {
			define_match_by_single_downstream_positioning_ = false;
		}
	}
}
/*
private:

	core::pose::PoseOP upstream_pose_;

	core::pose::PoseOP downstream_pose_;
	utility::vector1< core::id::AtomID > downstream_orientation_atoms_;

	utility::vector1< Size > pose_build_resids_;


	BoundingBox occ_space_bounding_box_;
	Vector euclidean_bin_widths_;
	Vector euler_bin_widths_;

	Real permitted_overlap_;

	bool consolidate_matches_; /// MatchConsolidator vs MatchOutputter
	std::list< std::string > filter_names_;
	std::string grouper_name_;
	std::string evaluator_name_;
	std::string outputter_name_;

	enzdes::EnzConstraintIOOP enz_input_data_;
};
*/

}
}

