// -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
// vi: set ts=2 noet:
//
// (c) Copyright Rosetta Commons Member Institutions.
// (c) This file is part of the Rosetta software suite and is made available under license.
// (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
// (c) For more information, see http://www.rosettacommons.org. Questions about this can be
// (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.

/// @file   core/pose/PDBInfo.hh
/// @brief  pose information so it's not loose in the pose
/// @author Steven Lewis
/// @author Yih-En Andrew Ban (yab@u.washington.edu)


#ifndef INCLUDED_core_pose_PDBInfo_HH
#define INCLUDED_core_pose_PDBInfo_HH

#include <core/pose/PDBInfo.fwd.hh>

// Unit headers
#include <core/pose/Pose.fwd.hh>

// type headers
#include <core/types.hh>

// Project headers
#include <core/conformation/Conformation.fwd.hh>
#include <core/conformation/signals/ConnectionEvent.fwd.hh>
#include <core/conformation/signals/IdentityEvent.fwd.hh>
#include <core/conformation/signals/LengthEvent.fwd.hh>
// AUTO-REMOVED #include <core/id/AtomID.hh>

#include <core/pose/PDBPoseMap.hh>

#include <core/io/pdb/file_data.hh>


// Utility headers
#include <utility/exit.hh>
#include <utility/pointer/ReferenceCount.hh>
// AUTO-REMOVED #include <utility/vector1.hh>

// C++ Headers
#include <algorithm>
#include <map>

namespace core {
namespace pose {


///@brief info about an atom in a unrecognized res (not in pose, but we want to remember it)
class UnrecognizedAtomRecord {

public:

	UnrecognizedAtomRecord(
		Size res_num,
		std::string res_name,
		std::string atom_name,
		numeric::xyzVector<Real> coords,
		Real temp
	) : res_num_(res_num),
		 res_name_(res_name),
		 atom_name_(atom_name),
	 	 coords_(coords),
		 temp_(temp)
	{}

	inline core::Size               const & res_num()   const { return res_num_; }
	inline std::string              const & res_name()  const { return res_name_; }
	inline std::string              const & atom_name() const { return atom_name_; }
	inline numeric::xyzVector<Real> const & coords()    const { return coords_; }
	inline Real                     const & temp()      const { return temp_; }

private:

	Size res_num_;
	std::string res_name_, atom_name_;
	numeric::xyzVector<Real> coords_;
	Real temp_;

};


/// @brief maintains pdb residue & atom information inside a Pose
/// @details Upon creation of new residue records, e.g. when calling the
///  constructors without 'init' or appending/prepending residues, the
///  chain id for the new records will be set to a character, currently
///  '^', denoting "empty record".  This character may be looked up by
///  calling the static method PDBInfo::empty_record().
/// @remarks Class implementation is biased towards simplicity and fast lookup.
///  Residue/atom information are kept in vectors.  An internally maintained
///  PDBPoseMap provides mapping from pdb -> pose residue numbering.  This
///  causes residue mutators to be a bit more expensive due to map updates,
///  but this is ok because they are typically called sparingly.  Accessors
///  and mutators have overloaded method convention, while special mutators
///  use .set_* convention.
class PDBInfo : public utility::pointer::ReferenceCount {


public: // typedefs


	typedef utility::pointer::ReferenceCount Super;

	typedef core::Size Size;
	typedef core::Real Real;
	typedef std::string String;
	typedef core::io::pdb::Remarks Remarks;


private: // forward declarations


	struct AtomRecord;
	struct ResidueRecord;


private: // typedefs


	typedef utility::vector1< AtomRecord > AtomRecords;
	typedef utility::vector1< ResidueRecord > ResidueRecords;


private: // structs


	/// @brief internal struct for storing PDB atom related information
	struct AtomRecord {
		/// @brief default constructor
		AtomRecord() :
			isHet( false ),
			altLoc( ' ' ),
			occupancy( 1.0 ),
			temperature( 0.0 )
		{}

		/// @brief is heterogen atom
		bool isHet;
		/// @brief alternate location
		char altLoc;
		/// @brief occupancy
		Real occupancy;
		/// @brief temperature factor
		Real temperature;
	};


	/// @brief internal struct for storing PDB residue related information
	struct ResidueRecord {
		/// @brief default constructor
		ResidueRecord () :
			chainID( PDBInfo::empty_record() ),
			resSeq( 0 ),
			iCode( ' ')
		{}

		/// @brief chain id
		char chainID;
		/// @brief residue sequence number
		int resSeq;
		/// @brief insertion code
		char iCode;
		/// @brief vector of AtomRecord
		/// @details sized the same as number of atoms for a given instance of core::conformation::Residue
		AtomRecords atomRec;
	};


public: // constructors


	/// @brief default constructor, obsolete is *true*
	PDBInfo();


	/// @brief size constructor (ensure space for 'n' residue records),
	///  obsolete is *true*
	PDBInfo( Size const n );


	/// @brief Pose constructor (ensures space for residue and atom records
	///  relative to Pose)
	/// @param[in] pose  Pose
	/// @param[in] init  if true (default), then residue records are initialized
	///  and obsolete set to false, otherwise obsolete is true
	///  using Pose residue numbering and chains of the Residues in the Conformation
	PDBInfo(
		Pose const & pose,
		bool init = true
	);


	/// @brief copy constructor
	PDBInfo( PDBInfo const & info );


public: // destructor


	/// @brief default destructor
	virtual ~PDBInfo();


public: // assignment


	/// @brief copy assignment
	PDBInfo &
	operator =( PDBInfo const & info );


public: // observer interface


	/// @brief is this PDBInfo currently observing a conformation?
	/// @return the Conformation being observed, otherwise NULL
	core::conformation::Conformation const *
	is_observing();


	/// @brief attach to Conformation and begin observation
	void
	attach_to( core::conformation::Conformation & conf );


	/// @brief detach from Conformation and stop observation
	/// @remarks takes no arguments because PDBInfo can only observe one
	///  Conformation at a time
	void
	detach_from();


	/// @brief update when connection to Conformation is changed
	void
	on_connection_change( core::conformation::signals::ConnectionEvent const & event );


	/// @brief update atom records when residue identity changes in Conformation
	void
	on_identity_change( core::conformation::signals::IdentityEvent const & event );


	/// @brief update residue and atom records when length changes in Conformation,
	///  obsoletes PDBInfo
	void
	on_length_change( core::conformation::signals::LengthEvent const & event );


public: // obsolescence


	/// @brief is obsolete and needs updating?
	/// @details This flag is currently not used within the class and
	///  is provided only for user convenience.  Setting this will
	///  forcibly turn off pdb numbering when dumping pdbs.
	inline
	bool
	obsolete() const
	{
		return obsolete_;
	}


	/// @brief set obsolete state
	/// @details this flag is currently not used within the class and
	///  is provided only for user convenience.  Setting this will
	/// forcibly turn off pdb numbering when dumping pdbs.
	inline
	void
	obsolete( bool flag )
	{
		obsolete_ = flag;
	}


public: // state


	/// @brief return number of residues represented
	inline
	Size
	nres() const
	{
		return residue_rec_.size();
	}


	/// @brief return number of atoms represented for a residue
	/// @param[in] res  residue in pose numbering
	inline
	Size
	natoms( Size const res ) const
	{
		return residue_rec_[ res ].atomRec.size();
	}


	/// @brief resize for 'n' residue records
	/// @details Leaves atom record state inconsistent.  Atom records for
	///  remaining residues are untouched while new residues have no atom
	///  records, so make sure and call one of resize_atom_records()
	///  afterwards if necessary.
	/// @warning Do not use this method for ins/del of residues, as it leaves
	///  the data state inconsistent.  See append_res/prepend_res/delete_res
	///  for that type of functionality.
	void
	resize_residue_records( Size const n );


	/// @brief ensure 'n' available atom records for particular residue
	/// @param[in] res  residue
	/// @param[in] n  number of atoms
	/// @param[in] zero  if true, zero the atom records for this residue
	void
	resize_atom_records(
		Size const res,
		Size const n,
		bool const zero = true
	);


	/// @brief ensure 'n' available atom records for every residue
	/// @param[in] res  residue
	/// @param[in] n  number of atoms
	/// @param[in] zero  if true, zero the atom records
	void
	resize_atom_records(
		Size const n,
		bool const zero = true
	);


	/// @brief update number of atom records with respect to atoms in Pose
	/// @details Number of internally available atom records will be adjusted
	///  to match number of atoms within each residue in Pose.  Only newly
	///  created records will be zeroed, any existing records are untouched.
	void
	resize_atom_records( Pose const & pose );


	/// @brief tighten memory usage
	void
	tighten_memory();


	/// @brief return the chain id character specifying "empty record",
	///  currently '^'
	inline
	static
	char empty_record() {
		return '^';
	}


public: // pdb-wide accessors/mutators


	/// @brief name for pdb
	inline
	String const &
	name() const
	{
		return name_;
	}


	/// @brief set name for pdb
	inline
	void
	name( String const & s )
	{
		name_ = s;
	}


	/// @brief model tag for multi-model pdb
	inline
	String const &
	modeltag() const
	{
		return modeltag_;
	}


	/// @brief set model tag for multi-model pdb
	inline
	void
	modeltag( String const & tag )
	{
		modeltag_ = tag;
	}


	/// @brief pdb remarks (const)
	inline
	Remarks const &
	remarks() const
	{
		return remarks_;
	}


	/// @brief pdb remarks (mutable)
	/// @note we allow direct access to the remarks vector because its
	///  state is independent of the rest of PDBInfo and it's much more
	///  convenient for the user
	inline
	Remarks &
	remarks()
	{
		return remarks_;
	}


	/// @brief set pdb remarks
	inline
	void
	remarks( Remarks const & in )
	{
		remarks_ = in;
	}


public: // single residue accessors


	/// @brief chain id for residue
	inline
	char const &
	chain( Size const res ) const
	{
		return residue_rec_[ res ].chainID;
	}


	/// @brief pdb residue sequence number
	inline
	int const &
	number( Size const res ) const
	{
		return residue_rec_[ res ].resSeq;
	}


	/// @brief insertion code for residue
	inline
	char const &
	icode( Size const res ) const
	{
		return residue_rec_[ res ].iCode;
	}


	/// @brief translate pdb numbering to pose numbering
	/// @param[in] chain  chain id
	/// @param[in] res  pdb residue numbering
	/// @param[in] ins_code  insertion code
	/// @return pose numbering for residue, returns 0 if not found
	inline
	Size
	pdb2pose(
			char const chain,
			int const res,
			char const icode = ' '
	) const
	{
		return pdb2pose_.find( chain, res, icode );
	}


	/// @brief translates the pose number to pdb numbering string
	/// for use in PyRosetta.
	/// @param[in] res pose residue number
	/// @return pdb string containing chainID and number
	String
	pose2pdb( Size const res ) const;


public: // single residue mutators


	/// @brief set chain id for residue
	/// @remarks chain id should not be the empty record character, currently '^'
	void
	chain(
		Size const res,
		char const chain_id
	);


	/// @brief set pdb residue sequence number
	void
	number(
		Size const res,
		int const pdb_res
	);


	/// @brief set insertion code for residue
	void
	icode(
		Size const res,
		char const ins_code
	);


	/// @brief set chain/pdb/insertion code for residue simultaneously
	/// @note convenience method; more efficient than doing each individually
	///  due to map updates
	/// @param[in] res  residue in pose numbering
	/// @param[in] chain_id  pdb chain id
	/// @param[in] pdb_res  residue in pdb numbering
	/// @param[in] ins_code  pdb insertion code
	void
	set_resinfo(
		Size const res,
		char const chain_id,
		int const pdb_res,
		char const ins_code = ' '
	);


public: // atom accessors


	/// @brief is atom heterogen?
	/// @param[in] res  residue number
	/// @param[in] atom_index  atom index within instance of core::conformation::Residue
	/// @note currently Rosetta's pdb file output treats the value of .is_het() as an
	///  override -- if this is set to true, the record will be hetatm, otherwise it
	///  will decide by Residue type (the usual default)
	inline
	bool const &
	is_het(
		Size const res,
		Size const atom_index
	) const
	{
		return residue_rec_[ res ].atomRec[ atom_index ].isHet;
	}


	/// @brief alternate location for atom
	/// @param[in] res  residue number
	/// @param[in] atom_index  atom index within instance of core::conformation::Residue
	inline
	char const &
	alt_loc(
		Size const res,
		Size const atom_index
	) const
	{
		return residue_rec_[ res ].atomRec[ atom_index ].altLoc;
	}


	/// @brief occupancy for atom
	/// @param[in] res  residue number
	/// @param[in] atom_index  atom index within instance of core::conformation::Residue
	inline
	Real const &
	occupancy(
		Size const res,
		Size const atom_index
	) const
	{
		return residue_rec_[ res ].atomRec[ atom_index ].occupancy;
	}


	/// @brief temperature for atom
	/// @param[in] res  residue number
	/// @param[in] atom_index  atom index within instance of core::conformation::Residue
	inline
	Real const &
	temperature(
		Size const res,
		Size const atom_index
	) const
	{
		return residue_rec_[ res ].atomRec[ atom_index ].temperature;
	}


public: // atom mutators


	/// @brief set atom heterogen flag
	/// @param[in] res  residue number
	/// @param[in] atom_index  atom index within instance of core::conformation::Residue
	/// @param[in] flag  boolean indicating heterogen status
	/// @note currently Rosetta's pdb file output treats the value of .is_het() as an
	///  override -- if this is set to true, the record will be hetatm, otherwise it
	///  will decide by Residue type (the usual default)
	inline
	void
	is_het(
		Size const res,
		Size const atom_index,
		bool const flag
	)
	{
		residue_rec_[ res ].atomRec[ atom_index ].isHet = flag;
	}


	/// @brief set alternate location for atom
	/// @param[in] res  residue number
	/// @param[in] atom_index  atom index within instance of core::conformation::Residue
	/// @param[in] loc  alternate location character
	inline
	void
	alt_loc(
		Size const res,
		Size const atom_index,
		char const loc
	)
	{
		residue_rec_[ res ].atomRec[ atom_index ].altLoc = loc;
	}


	/// @brief set occupancy for atom
	/// @param[in] res  residue number
	/// @param[in] atom_index  atom index within instance of core::conformation::Residue
	/// @param[in] occ  occupancy
	inline
	void
	occupancy(
		Size const res,
		Size const atom_index,
		Real const occ
	)
	{
		residue_rec_[ res ].atomRec[ atom_index ].occupancy = occ;
	}


	/// @brief set temperature for atom
	/// @param[in] res  residue number
	/// @param[in] atom_index  atom index within instance of core::conformation::Residue
	/// @param[in] t  temperature
	inline
	void
	temperature(
		Size const res,
		Size const atom_index,
		Real const t
	)
	{
		residue_rec_[ res ].atomRec[ atom_index ].temperature = t;
	}


public: // residue accessors en masse


	/// @brief grab the internally maintained PDBPoseMap
	inline
	PDBPoseMap const &
	pdb2pose() const
	{
		return pdb2pose_;
	}


public: // residue mutators en masse


	/// @brief set all residue chain IDs to a single character
	void set_chains( char const id );


	/// @brief set residue chain IDs from some char container, iterator version
	/// @warning This function does not check if number of elements within span
	///  of iterators exceeds number of residues currently defined in object.
	///  User must ensure this independently.
	template< typename CharIterator >
	inline
	void
	set_chains(
		CharIterator const & begin,
		CharIterator const & end
	)
	{
		ResidueRecords::iterator rr = residue_rec_.begin();

		for ( CharIterator i = begin; i < end; ++i, ++rr ) {
			rr->chainID = *i;
			assert( rr < residue_rec_.end() );
		}

		rebuild_pdb2pose();
	}


	/// @brief set residue chain IDs from some char container, e.g. utility::vector1
	/// @details Container must have const .size(), .begin() and .end() iterator
	///  access methods.
	/// @warning This function checks for size first and will cause failure if
	///  size of container does not match number of residues currently defined
	///  in object.
	template< typename CharContainer >
	inline
	void
	set_chains( CharContainer const & c )
	{
		assert( residue_rec_.size() == c.size() );
		check_residue_records_size( c.size() ); // run-time check

		set_chains( c.begin(), c.end() );
	}


	/// @brief set pdb residue numbering from some int container, iterator version
	/// @warning This function does not check if number of elements within span
	///  of iterators exceeds number of residues currently defined in object.
	///  User must ensure this independently.
	template< typename IntIterator >
	inline
	void
	set_numbering(
		IntIterator const & begin,
		IntIterator const & end
	)
	{
		ResidueRecords::iterator rr = residue_rec_.begin();

		for ( IntIterator i = begin; i < end; ++i, ++rr ) {
			rr->resSeq = *i;
			assert( rr < residue_rec_.end() );
		}

		rebuild_pdb2pose();
	}


	/// @brief set pdb residue numbering from some int container, e.g. utility::vector1
	/// @details Container must have const .size(), .begin() and .end() iterator
	///  access methods.
	/// @warning This function checks for size first and will cause failure if
	///  size of container does not match number of residues currently defined
	///  in object.
	template< typename IntContainer >
	inline
	void
	set_numbering( IntContainer const & c )
	{
		assert( residue_rec_.size() == c.size() );
		check_residue_records_size( c.size() ); // run-time check

		set_numbering( c.begin(), c.end() );
	}


	/// @brief set insertion codes from some char container, iterator version
	/// @warning This function does not check if number of elements within span
	///  of iterators exceeds number of residues currently defined in object.
	///  User must ensure this independently.
	template< typename CharIterator >
	inline
	void
	set_icodes(
		CharIterator const & begin,
		CharIterator const & end
	)
	{
		ResidueRecords::iterator rr = residue_rec_.begin();

		for ( CharIterator i = begin; i < end; ++i, ++rr ) {
			rr->iCode = *i;
			assert( rr < residue_rec_.end() );
		}

		rebuild_pdb2pose();
	}


	/// @brief set insertion codes from some char container, e.g. utility::vector1
	/// @details Container must have const .size(), .begin() and .end() iterator
	///  access methods.
	/// @warning This function checks for size first and will cause failure if
	///  size of container does not match number of residues currently defined
	///  in object.
	template< typename CharContainer >
	inline
	void
	set_icodes( CharContainer const & c )
	{
		assert( residue_rec_.size() == c.size() );
		check_residue_records_size( c.size() ); // run-time check

		set_icodes( c.begin(), c.end() );
	}


	/// @brief copy a section from another PDBInfo
	/// @param[in] input_info the PDBInfo to copy from
	/// @param[in] copy_from the first residue position in input_info to copy
	/// @param[in] copy_to the final residue position in input_info to copy
	/// @param[in] start_from the first residue position in this PDBInfo to
	///  copy into
	void
	copy(
		PDBInfo const & input_info,
		Size const copy_from,
		Size const copy_to,
		Size const start_from
	);


public: // residue insertion/deletion


	/// @brief append residue records after given residue number
	/// @param[in] res  residue to append after (in internal/pose numbering)
	/// @param[in] natoms  number of atoms in type of appended residue
	/// @param[in] n    number of residue records to append
	void
	append_res(
		Size const res,
		Size const natoms,
		Size const n = 1
	);


	/// @brief prepend residue records before given residue number
	/// @param[in] res  residue to prepend before (in internal/pose numbering)
	/// @param[in] natoms  number of atoms in type of appended residue
	/// @param[in] n    number of residue records to prepend
	void
	prepend_res(
		Size const res,
		Size const natoms,
		Size const n = 1
	);


	/// @brief "replace" residue record for given residue number
	/// @details Leaves information in residue record untouched, but resizes
	///  and zeroes atom records for the residue.
	/// @param[in] res residue to replace
	/// @param[in] natoms number of atoms in type of residue
	void
	replace_res(
		Size const res,
		Size const natoms
	);


	/// @brief delete 'n' residue records starting from given residue
	/// @param[in] res  residue to start deleting from (in internal/pose numbering)
	/// @param[in] n    number of residue records to delete
	void
	delete_res(
		Size const res,
		Size const n = 1
	);


	// added by sheffler
	/// @brief remembers info about atoms not read into the pose
	inline
	utility::vector1< UnrecognizedAtomRecord > const &
	get_unrecognized_atoms() const {
		return unrecognized_atoms_;
	}


	core::Size const &
	get_num_unrecognized_atoms() const {
		return num_unrecognized_atoms_;
	}


	inline
	core::Size const &
	get_num_unrecognized_res() const {
		return num_unrecognized_res_;
	}


	inline
	std::string const &
	get_unrecognized_res_name( core::Size const & i ) const {
		return unrecognized_res_num2name_.find(i)->second;
	}


	inline
	core::Size const &
	get_unrecognized_res_size( core::Size const & i ) const {
		return unrecognized_res_size_.find(i)->second;
	}


	// added by sheffler
	/// @brief remembers info about atoms not read into the pose
	void
	add_unrecognized_atom(
		Size resnum,
		std::string resname,
		std::string atomname,
		numeric::xyzVector<Real> coords,
		Real temp
	);


private: // methods


	/// @brief if size of residue records != passed value, fail fast
	/// @note This is meant to be used only for en masse methods, not individual
	///  residue/atom methods
	void
	check_residue_records_size( Size const size ) const;


	/// @brief rebuilds PDBPoseMap from scratch
	void
	rebuild_pdb2pose();


private: // data


	/// @brief indicates object is out of sync with reference (e.g. parent Pose)
	/// @details control boolean prevents PDB emitter access if info out of sync
	bool obsolete_;


	/// @brief name of pdb/structure
	String name_;


	/// @brief model tag for multi-model pdbs
	String modeltag_;


	/// @brief pdb remarks
	Remarks remarks_;


	/// @brief residue records in internal rosetta numbering from 1 .. n
	ResidueRecords residue_rec_;


	/// @brief maps PDB chain,residue -> internal residue numbering
	PDBPoseMap pdb2pose_;


	/// @brief Conformation being observed, NULL if not attached
	core::conformation::Conformation const * conf_;


	// added by sheffler
	/// @brief information about unrecognized residues
	utility::vector1< UnrecognizedAtomRecord > unrecognized_atoms_;
	std::map< core::Size, std::string > unrecognized_res_num2name_;
	std::map< core::Size, core::Size >  unrecognized_res_size_;
	core::Size num_unrecognized_res_;
	core::Size num_unrecognized_atoms_;


}; //end class PDBInfo


} // namespace pose
} // namespace core


#endif //INCLUDED_core_pose_PDBInfo_HH
