// -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
// vi: set ts=2 noet:
// :noTabs=false:tabSize=4:indentSize=4:
//
// (c) Copyright Rosetta Commons Member Institutions.
// (c) This file is part of the Rosetta software suite and is made available under license.
// (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
// (c) For more information, see http://www.rosettacommons.org. Questions about this can be
// (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.

/// @file
/// @brief Utilities for modifying and utilizing Residues and other core::chemical classes.


// Unit headers
#include <core/chemical/util.hh>

// Package Headers
#include <core/chemical/ResidueTypeSet.hh>
#include <core/chemical/AA.hh>
#include <core/chemical/ResidueType.hh>
#include <core/chemical/VariantType.hh>
// Commented by inclean daemon #include <core/chemical/residue_io.hh>
#include <core/chemical/ChemicalManager.hh>
#include <core/chemical/disulfide_util.hh>

// Project Headers
#include <core/types.hh>
#include <core/pose/Pose.hh>
#include <core/conformation/Residue.hh>
#include <core/conformation/ResidueFactory.hh>
#include <core/conformation/util.hh>
#include <core/id/AtomID_Map.Pose.hh>
#include <core/scoring/Energies.hh>
#include <core/scoring/constraints/ConstraintSet.hh>

#include <core/pack/rotamer_trials.hh>
#include <core/pack/task/PackerTask.hh>
#include <core/pack/task/TaskFactory.hh>
#include <core/pack/pack_rotamers.hh>
#include <protocols/moves/MinMover.hh>
#include <core/kinematics/MoveMap.hh>

#include <core/options/option.hh>
#include <core/options/keys/run.OptionKeys.gen.hh>
#include <core/util/Tracer.hh>

// Utility Headers
#include <utility/vector1.hh>
// Commented by inclean daemon #include <utility/io/izstream.hh>

// Numeric Headers

// C++ headers
#include <utility>

namespace core {
namespace chemical {

static util::Tracer tr("core.chemical");

using namespace core;
using namespace core::conformation;

/// helper function for residue replacement/residuetype switching
/// these functions should probably move to pose/util.cc
/// @note  Will call new_rsd->fill_missing_atoms if the new residue has atoms
/// that the old one doesn't

void
replace_pose_residue_copying_existing_coordinates(
	pose::Pose & pose,
	Size const seqpos,
	ResidueType const & new_rsd_type
)
{

	Residue const & old_rsd( pose.residue( seqpos ) );
	ResidueOP new_rsd( ResidueFactory::create_residue( new_rsd_type ) );
	conformation::copy_residue_coordinates_and_rebuild_missing_atoms( old_rsd, *new_rsd, pose.conformation() );
	pose.replace_residue( seqpos, *new_rsd, false );

}



/// helper function for residue replacement/residuetype switching
/// these functions should probably move to pose/util.cc
/// @note  Will call new_rsd->fill_missing_atoms if the new residue has atoms
/// that the old one doesn't

void
replace_conformation_residue_copying_existing_coordinates(
	conformation::Conformation & conformation,
	Size const seqpos,
	ResidueType const & new_rsd_type
)
{

	Residue const & old_rsd( conformation.residue( seqpos ) );
	ResidueOP new_rsd( ResidueFactory::create_residue( new_rsd_type ) );
	conformation::copy_residue_coordinates_and_rebuild_missing_atoms( old_rsd, *new_rsd, conformation );
	conformation.replace_residue( seqpos, *new_rsd, false );

}



///////////////////////////////////////////////////////////////////////////////
/// @brief construct a variant of an existing pose residue
/// @details eg make a terminus variant, and replace the orignal in pose.
/// @note this copies any atoms in common between old and new residues, rebuild the others
void
add_variant_type_to_pose_residue(
	pose::Pose & pose,
	VariantType const & variant_type,
	Size const seqpos
)
{

	Residue const & old_rsd( pose.residue( seqpos ) );

	// the type of the desired variant residue
	ResidueTypeSet const & rsd_set( old_rsd.residue_type_set() );
	ResidueType const & new_rsd_type( rsd_set.get_residue_type_with_variant_added( old_rsd.type(), variant_type ) );

	replace_pose_residue_copying_existing_coordinates( pose, seqpos, new_rsd_type );
}

// this belongs in conformation/util

///////////////////////////////////////////////////////////////////////////////
/// @brief construct a non-variant of an existing pose residue
/// @details eg remove a terminus variant, and replace the orignal in pose.
/// @note this copies any atoms in common between old and new residues, rebuild the others
void
remove_variant_type_from_pose_residue(
	pose::Pose & pose,
	VariantType const & variant_type,
	Size const seqpos
)
{
	Residue const & old_rsd( pose.residue( seqpos ) );

	// the type of the desired variant residue
	ResidueTypeSet const & rsd_set( old_rsd.residue_type_set() );
	ResidueType const & new_rsd_type( rsd_set.get_residue_type_with_variant_removed( old_rsd.type(), variant_type ) );

	replace_pose_residue_copying_existing_coordinates( pose, seqpos, new_rsd_type );
}

///////////////////////////////////////////////////////////////////////////////
/// @brief construct a variant of an existing pose residue
/// @details eg make a terminus variant, and replace the orignal in pose.
/// @note this copies any atoms in common between old and new residues, rebuild the others
void
add_variant_type_to_conformation_residue(
	conformation::Conformation & conformation,
	VariantType const & variant_type,
	Size const seqpos
)
{

	Residue const & old_rsd( conformation.residue( seqpos ) );

	// the type of the desired variant residue
	ResidueTypeSet const & rsd_set( old_rsd.residue_type_set() );
	ResidueType const & new_rsd_type( rsd_set.get_residue_type_with_variant_added( old_rsd.type(), variant_type ) );

	replace_conformation_residue_copying_existing_coordinates( conformation, seqpos, new_rsd_type );
}

///////////////////////////////////////////////////////////////////////////////
/// @brief construct a non-variant of an existing pose residue
/// @details eg remove a terminus variant, and replace the orignal in pose.
/// @note this copies any atoms in common between old and new residues, rebuild the others
void
remove_variant_type_from_conformation_residue(
	conformation::Conformation & conformation,
	VariantType const & variant_type,
	Size const seqpos
)
{

	Residue const & old_rsd( conformation.residue( seqpos ) );

	// the type of the desired variant residue
	ResidueTypeSet const & rsd_set( old_rsd.residue_type_set() );
	ResidueType const & new_rsd_type( rsd_set.get_residue_type_with_variant_removed( old_rsd.type(), variant_type ) );

	replace_conformation_residue_copying_existing_coordinates( conformation, seqpos, new_rsd_type );
}

///////////////////////////////////////////////////////////////////////////////
void
add_lower_terminus_type_to_pose_residue(
	pose::Pose & pose,
	Size const seqpos
)
{
	add_variant_type_to_pose_residue( pose, LOWER_TERMINUS, seqpos );
}

///////////////////////////////////////////////////////////////////////////////
void
remove_lower_terminus_type_from_pose_residue(
	pose::Pose & pose,
	Size const seqpos
)
{
	remove_variant_type_from_pose_residue( pose, LOWER_TERMINUS, seqpos );
}

///////////////////////////////////////////////////////////////////////////////
void
add_upper_terminus_type_to_pose_residue(
	pose::Pose & pose,
	Size const seqpos
)
{
	add_variant_type_to_pose_residue( pose, UPPER_TERMINUS, seqpos );
}

///////////////////////////////////////////////////////////////////////////////
void
remove_upper_terminus_type_from_pose_residue(
	pose::Pose & pose,
	Size const seqpos
)
{
	remove_variant_type_from_pose_residue( pose, UPPER_TERMINUS, seqpos );
}

///////////////////////////////////////////////////////////////////////////////
void
add_lower_terminus_type_to_conformation_residue(
	conformation::Conformation & conformation,
	Size const seqpos
)
{
	add_variant_type_to_conformation_residue( conformation, LOWER_TERMINUS, seqpos );
}

///////////////////////////////////////////////////////////////////////////////
void
remove_lower_terminus_type_from_conformation_residue(
	conformation::Conformation & conformation,
	Size const seqpos
)
{
	remove_variant_type_from_conformation_residue( conformation, LOWER_TERMINUS, seqpos );
}

///////////////////////////////////////////////////////////////////////////////
void
add_upper_terminus_type_to_conformation_residue(
	conformation::Conformation & conformation,
	Size const seqpos
)
{
	add_variant_type_to_conformation_residue( conformation, UPPER_TERMINUS, seqpos );
}

///////////////////////////////////////////////////////////////////////////////
void
remove_upper_terminus_type_from_conformation_residue(
	conformation::Conformation & conformation,
	Size const seqpos
)
{
	remove_variant_type_from_conformation_residue( conformation, UPPER_TERMINUS, seqpos );
}

////////////////////////////////////////////////////////////////////////////////
/// @details Given a protein sequence where each character represents an amino
/// acid, and a ResidueTypeSet, return the residue types that match the
/// sequence. NOTE: support making residue types from a fully annotated sequence
/// now, that is, for each residue variant or ligand which cannot be deduced
/// from one letter code directly, a [] is added directly following the one
/// letter code containig the residue's fullname, for example
/// K[lys_p:NtermProteinFull]ADFGCH[HIS_D]QNVE[glu_p:CtermProteinFull]Z[ZN].
/// This allows a pose to be constructed with full features from a silent output
/// file, such as with distiguished HIS tautomers, various chain termini and
/// cutpoint variants etc. Currently not working with disulfide variant CYD, but
/// this is on to-do list.
ResidueTypeCAPs residue_types_from_sequence(
	std::string const & sequence_in,
	chemical::ResidueTypeSet const & residue_set,
	bool const auto_termini /* true */
)
{
	ResidueTypeCAPs requested_types;

	using namespace core::chemical;

	if ( !sequence_in.size() ) return requested_types;

	// deal with the sequence read in; any non-standard protein AA name including lig should be put within a bracket[]
	// following the one-letter AA character. X for aa_vrt and Z for aa_unk
	std::string fullname;
	utility::vector1< std::string > fullname_list; // a vector of non-standard full names
	std::vector< Size > oneletter_to_fullname_index; // for each one-letter sequence, zero means no fullname given

	// we start with the first character in sequence and that should be a standard AA.
	std::string one_letter_sequence = sequence_in.substr( 0,1 );
	Size last_index = 0; // zero means this one-letter name does not have a fullname sepcified in bracket.
	bool in_bracket = false; // currently whether scanning fullname in bracket or not.

	for ( Size seqpos = 1; seqpos < sequence_in.length(); ++seqpos ) {
		// inside the bracket will be the base name of this residue;
		char aa = sequence_in[ seqpos ];

		// note that a full-name aa will also have its one-letter code present e.g. C[CYS]
		// hence the seqpos-count is not messed up
		if ( aa == '[' ) { // bracket starts, turn on flag and reset fullname string
			in_bracket = true;
			fullname = "";
			continue;
		} else if ( sequence_in[ seqpos ] == ']' ) { // bracket ends, save fullname and map its index
			in_bracket = false;
			fullname_list.push_back( fullname );
			last_index = fullname_list.size();
			continue;
		}

		if ( in_bracket ) { // in bracket, get fullname one char at a time
			fullname += aa;
			continue;
		} else { // outside bracket, save regular one-letter sequence.
			one_letter_sequence += aa;
			oneletter_to_fullname_index.push_back( last_index );
			last_index = 0;
		}
	} // finish reading in the whole sequence.

	oneletter_to_fullname_index.push_back( last_index );
	tr.Debug << "one_letter: " << one_letter_sequence << std::endl;
	tr.Debug << "seq_in: " << sequence_in << std::endl;

	// setup the pose by appending the appropriate residues residues
	for ( Size seqpos = 1; seqpos <= one_letter_sequence.length(); ++seqpos ) {
		char aa = one_letter_sequence[ seqpos-1 ]; // string indexing is zero-based!
		AA my_aa = aa_from_oneletter_code( aa );

		bool is_lower_terminus(false), is_upper_terminus(false);

		// is there an annotated fullname defined for this one-letter code?
		Size index = oneletter_to_fullname_index[ seqpos-1 ];
		if ( index ) { // fullname defined and get it directly from name_map
			// The next call requires reference -> CAP because ResidueTypeSet's
			// methods are not yet consistent in handing out ref vs CAP.
			requested_types.push_back( &residue_set.name_map( fullname_list[ index ] ) );
			is_lower_terminus = ( *requested_types.back() ).has_variant_type( LOWER_TERMINUS );
			is_upper_terminus = ( *requested_types.back() ).has_variant_type( UPPER_TERMINUS );
		} else {
			// use aa_map to find list of possible ResidueTypes
			ResidueTypeCAPs const & rsd_type_list( residue_set.aa_map( my_aa ) );
			// for non-annotated sequence, assume single chain for now
			is_lower_terminus = auto_termini && ( seqpos == 1 );
			is_upper_terminus = auto_termini && ( seqpos == one_letter_sequence.length() );
			bool const is_terminus( is_lower_terminus || is_upper_terminus ); // redundant, but for convenience

			Size best_index = 0;
			// iterate over rsd_types, pick one.
			for ( Size j = 1; j <= rsd_type_list.size(); ++j ) {
				ResidueType const & rsd_type( *(rsd_type_list[ j ]) );

				bool const is_polymer( rsd_type.is_polymer() );
				// pick a ResidueType
				Size nvariants = rsd_type.variant_types().size();
				if ( is_polymer && ( is_terminus && ( nvariants == 0 ) ) ) continue;
				if ( is_polymer && ( is_lower_terminus != rsd_type.has_variant_type( LOWER_TERMINUS ) ||
						is_upper_terminus != rsd_type.has_variant_type( UPPER_TERMINUS ) ) ) continue;

				best_index = j;
				break;
			}
			if ( !best_index ) utility_exit_with_message( " can't find residue type at pos " + ObjexxFCL::string_of(seqpos) +
				"in sequence "+ sequence_in);
			// add the ResidueTypeCAP
			requested_types.push_back( rsd_type_list[ best_index ] );
		}

		tr.Trace << "residue_types_from_sequence():  seqpos: " << seqpos << " aa " << aa << " " << my_aa << std::endl;

	} // for seqpos

	return requested_types;
}


////////////////////////////////////////////////////////////////////////////////
/// @details Given a Pose, a protein sequence where each character represents an
/// amino acid, and a ResidueTypeSet, give the Pose a conformation of covalently
/// linked residues that match the sequence. NOTE: support making pose from a
/// fully annotated sequence now, that is, for each residue variant or ligand
/// which cannot be deduced from one letter code directly, a [] is added
/// directly following the one letter code containig the residue's fullname, e.g.
/// K[lys_p:NtermProteinFull]ADFGCH[HIS_D]QNVE[glu_p:CtermProteinFull]Z[ZN].
/// This allows a pose to be constructed with full features from a silent output
/// file, such as with distiguished HIS tautomers, various chain termini and
/// cutpoint variants etc. Currently not working with disulfide variant CYD, but
/// this is on to-do list.
void make_pose_from_sequence(
	pose::Pose & pose,
	std::string const & sequence_in,
	chemical::ResidueTypeSet const & residue_set,
	bool const auto_termini /* true */
)
{
	typedef core::Size Size;

	// grab residue types
	ResidueTypeCAPs requested_types = residue_types_from_sequence( sequence_in, residue_set, auto_termini );
	assert( annotated_to_oneletter_sequence( sequence_in ).length() == requested_types.size() );

	// clear the pose
	pose.clear();

	// make the pose
	bool jump_to_next = false;
	for ( Size i = 1, ie = requested_types.size(); i <= ie; ++i ) {
		// grab the new residue
		ResidueType const & rsd_type = *requested_types[ i ];
		core::conformation::ResidueOP new_rsd( NULL );
		new_rsd = conformation::ResidueFactory::create_residue( rsd_type );

		// yab 20090219: The following error check was in the original
		// code prior to the split into residue_types_from_sequence()
		// and this function, but it doesn't appear to be triggerable
		// because ResidueFactory always returns a residue.  I leave it
		// in for now, but consider taking it out.
		if ( !new_rsd ) {
			std::cerr << "cannot create a residue that matches the residue type "
				<< rsd_type.name1() << " " << rsd_type.name() << " at position " << i << '\n';
			utility_exit_with_message( "make_pose_from_sequence fails\n" );
		}

		tr.Trace << "make_pose_from_sequence():  seqpos: " << i << " " << new_rsd->aa() << std::endl;

		// do the actual append
		if ( rsd_type.has_variant_type( LOWER_TERMINUS ) ||
				new_rsd->aa() == aa_unk || new_rsd->aa() == aa_vrt ||
				jump_to_next ) {
			if ( new_rsd->aa() == aa_unk  || new_rsd->aa() == aa_vrt ) {
				//fpd tr.Warning << "found unknown aminoacid or X in sequence at position " << i <<  std::endl;
				//fpd if ( i< ie ) {
				//fpd 	utility_exit_with_message( "found unknown aminoacid or X in sequence\n this leads to a seg-fault if we keep going...\n");
				//fpd }

				// if you don't think so ... make the code more stable and remove this
				// but only if this sequence doesn't seg-fault: KPAFGTNQEDYASYIXNGIIK" );

				///fpd ^^^ the problem is that the residue following the X should be connected by a jump as well.
				///     it should be of LOWER_TERMINUS variant type, but if not, we'll recover & spit out a warning for now.
				///     same thing for ligands???
				jump_to_next = true;
			} else if ( jump_to_next ) {
				jump_to_next = false;
				if ( !rsd_type.has_variant_type( LOWER_TERMINUS ) )
					tr.Warning << "Residue following X or Z is _not_ a lower terminus type!  Continuing ..." << std::endl;
			}
			pose.append_residue_by_jump( *new_rsd, 1, "", "", true ); // each time this happens, a new chain should be started
		} else {
			pose.append_residue_by_bond( *new_rsd, true );
		}
	}

	tr.Debug << "sequence in pose: " << pose.sequence() << std::endl;
	tr.Debug << "annotated seq: " << pose.annotated_sequence() << std::endl;

} // make_pose_from_sequence

////////////////////////////////////////////////////////////////////////////////
/// overloaded version of previous mak_pose_from_sequence, does the same
/// function, but reads in a string of the residue type set instead of a
/// ResidueTypeSet object.  Made for PyRosetta.
/// olange: DONT DUPLICATE CODE sid!  --- I removed the duplication by calling the original "make_pose_from_sequence"
void make_pose_from_sequence(
	pose::Pose & pose,
	std::string const & sequence_in,
	std::string const & type_set_name,
	//chemical::ResidueTypeSet const & residue_set,
	bool const auto_termini /* true */
) {
	ResidueTypeSetCAP residue_set( ChemicalManager::get_instance()->residue_type_set( type_set_name ) );
	make_pose_from_sequence( pose, sequence_in, *residue_set, auto_termini );
}


////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

std::string annotated_to_oneletter_sequence(
	std::string const & annotated_seq
) {
	bool add( true );
	std::string oneletter_seq;
	for ( Size i = 0; i < annotated_seq.length(); ++i ) {
		if ( annotated_seq.at(i) == '[' ) add = false;
		if ( add ) oneletter_seq += annotated_seq.at(i);
		if ( annotated_seq.at(i) == ']' ) add = true;
	}

	return oneletter_seq;
}


////////////////////////////////////////////////////////////////////////////////////////////////////////////
///@details the function allows a pose to use a different residue_type_set to represent all its residues,
///such as from fullatom residues to centroid residues, or vice versa. During the switch, corresponding atoms
///will be copied. Redundant atoms will be removed (in case from fullatom to centroid) and missing atoms will be
///built by ideal geometry (in the case from centroid to fullatom).
void
switch_to_residue_type_set(
	pose::Pose & pose,
	std::string const & type_set_name
)
{
	using namespace core::chemical;
	using namespace core::conformation;
	using namespace std;
	using utility::vector1;

	//SML 04/06/09
	//Energies object is not properly "aware" of typeset changes, and can attempt to score your pose with an incompatible
	//scorefunction if you go FA->CEN (or vice versa) and access the Energies without rescoring.
	//So, we'll eject the Energies to be safe!
	pose.energies().clear();


	// retrieve proper residue_type_set
	ResidueTypeSetCAP target_residue_type_set( ChemicalManager::get_instance()->residue_type_set( type_set_name ) );
	// loop each position and find new type that matches from the new type set
	for ( Size i=1; i<= pose.total_residue(); ++i ) {
		Residue const & rsd( pose.residue(i) );
		// in future we may have a conformation using mixed type set, so check this by residue
		std::string const & current_type_set_name ( rsd.type().residue_type_set().name() ); // database_directory() );
		if ( current_type_set_name == type_set_name ) {
			tr.Warning << "switch_to_residue_type_set: residue " << i << " already in " << type_set_name
				<< " residue_type_set" << std::endl;
			continue;
		}

		// get all residue types with same AA

		ResidueOP new_rsd( 0 );
		if( ( rsd.aa() == aa_unk ) || ( rsd.name().substr(0,5) == "HIS_D" ) ){
			// ligand or metal ions are all defined as "UNK" AA, so check a rsdtype with same name
			// for HIS_D tautomer, we want to keep its tautomer state
			ResidueTypeCAPs const & rsd_types( target_residue_type_set->name3_map( rsd.name3() ) );
			for (Size j=1; j<=rsd_types.size(); ++j ) {
				ResidueType const & new_rsd_type( *rsd_types[j] );
				if ( rsd.type().name() == new_rsd_type.name() ) {
					new_rsd = ResidueFactory::create_residue( new_rsd_type, rsd, pose.conformation() );
					break;
				}
			}
		} else  {
			// for a normal AA/DNA/RNA residue, now look for a rsdtype with same variants
			ResidueTypeCAPs const & rsd_types( target_residue_type_set->name3_map( rsd.name().substr(0,3) ) );
			for ( Size j=1; j<= rsd_types.size(); ++j ) {
				ResidueType const & new_rsd_type( *rsd_types[j] );
				if ( rsd.type().variants_match( new_rsd_type ) ) {
					new_rsd = ResidueFactory::create_residue( new_rsd_type, rsd, pose.conformation() );
					break;
				}
			}
		}

		if ( ! new_rsd ) {
			tr.Error  << "can not find a residue type that matches the residue " << rsd.name()
				<< "at position " << i << std::endl;
			utility_exit_with_message( "switch_to_residue_type_set fails\n" );
		}
		// switch to corresponding residue type in the new set.
		if ( !rsd.is_protein() ) {
			// rethink this logic, phil
			tr.Debug << "trying to preserve existing coords for non-protein residue: " << rsd.seqpos() << ' ' << rsd.name() << std::endl;
			core::conformation::copy_residue_coordinates_and_rebuild_missing_atoms( rsd, *new_rsd, pose.conformation() );
		}
		pose.replace_residue( i, *new_rsd, false );
	}

	// After a CEN->FA transition, rebuild the disulfides
	if(pose.is_fullatom() && options::option[ core::options::OptionKeys::run::rebuild_disulf ]() ) {
		vector1<pair<Size,Size> > disulfides;
		disulfide_bonds(pose, disulfides);

		if( disulfides.size() > 0 ) {
				// Setup Packer & Minimizer
				pack::task::PackerTaskOP task = pack::task::TaskFactory::create_packer_task( pose );
				task->initialize_from_command_line().or_include_current( true );
				task->restrict_to_repacking();

				kinematics::MoveMapOP mm(new kinematics::MoveMap);
				mm->set_bb( false );

				// Set up each residue individually
				for( Size i(1); i <= pose.total_residue(); ++i )
				{
					Residue const& res(pose.residue(i));
					if( !res.is_protein() )
						continue;

					// Determine if i is part of disulfides
					bool is_disulf = false;
					for(vector1<pair<Size, Size> >::const_iterator
							disulf(disulfides.begin()), end_disulf(disulfides.end());
							disulf != end_disulf; ++disulf)
					{
						if( i == disulf->first || i == disulf->second ) {
							is_disulf = true;
							break;
						}
					}

					if( is_disulf ) {
						// repack & minimize disulfides
						mm->set_chi(i, true);
					} else {
						// Other residues are unchanged
						task->nonconst_residue_task(i).prevent_repacking();
					}
				}

				// Rebuild disulfides
				chemical::rebuild_disulfide(pose,disulfides, task, NULL, mm, NULL);
		}
	}

} // switch_to_residue_type_set





} // namespace chemical
} // namespace core
