// -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
// vi: set ts=2 noet:
//
// (c) Copyright Rosetta Commons Member Institutions.
// (c) This file is part of the Rosetta software suite and is made available under license.
// (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
// (c) For more information, see http://www.rosettacommons.org. Questions about this can be
// (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.

/// @file src/protocols/enzdes/enzdes_util.cc
/// @brief a bunch of utility functions used in enzdes
/// @author Florian Richter, floric@u.washington.edu


// Unit headers
#include <protocols/enzdes/enzdes_util.hh>

#include <core/scoring/ScoreFunction.fwd.hh>

// Project headers
#include <core/conformation/Residue.hh>
#include <core/chemical/ResidueTypeSet.fwd.hh>
#include <core/pose/Pose.hh>
#include <core/util/datacache/BasicDataCache.hh>
#include <core/pose/datacache/CacheableDataType.hh>
#include <core/util/datacache/CacheableString.hh>
#include <core/pack/task/TaskFactory.hh> //task shit
#include <core/pack/task/PackerTask.hh>
#include <core/pack/task/IGEdgeReweightContainer.hh>
#include <core/pack/rotamer_set/RotamerSetOperation.hh>


// Utility Headers
#include <utility/pointer/ReferenceCount.hh>
#include <utility/string_util.hh>
#include <set>


//boost
//#include <boost/regex.hpp> //regular expressions

// C++ Headers

namespace protocols {
namespace enzdes {
namespace enzutil{

static core::util::Tracer tr("protocols.enzdes.enzdes_util");

void
replace_residue_keeping_all_atom_positions(
	core::pose::Pose & pose,
	core::conformation::Residue new_res,
	core::Size res_pos
 ){

	//have to set the position of the new res to their old values, so we gotta save them now
	std::map< std::string, core::PointPosition > atom_name_to_xyz;

	for( core::Size at_ct = 1; at_ct <= pose.residue(res_pos).natoms(); at_ct++){
		atom_name_to_xyz.insert( 	std::pair< std::string, core::PointPosition > (pose.residue(res_pos).atom_name(at_ct), pose.residue(res_pos).xyz( at_ct ) ) );
	}

	//replacing the residue
	pose.replace_residue( res_pos, new_res, true);

	//and resetting the xyz positions
	for( core::Size at_ct = 1; at_ct <= pose.residue(res_pos).natoms(); at_ct++){

		std::map< std::string, core::PointPosition>::iterator xyz_map_it = atom_name_to_xyz.find( pose.residue(res_pos).atom_name(at_ct) );

		if(xyz_map_it == atom_name_to_xyz.end() ) {
			std::cerr << "ERROR: when trying to make constraint covalent, atom " << pose.residue(res_pos).atom_name(at_ct) << " was not found for residue " << pose.residue(res_pos).name3() << " at position " << res_pos << std::endl;
			utility::exit( EXIT_FAILURE, __FILE__, __LINE__);
		}
		else{
			pose.set_xyz( core::id::AtomID (at_ct, res_pos), xyz_map_it->second );
		}
	}

} //replace_residues_keeping_positions



void
make_continuous_true_regions_in_bool_vector(
	utility::vector1< bool > & the_vector,
 	core::Size const min_number_continuous_trues
)
{

	if(  min_number_continuous_trues > the_vector.size() ){
		utility_exit_with_message("ridiculous. continuous region is requested to be longer than the actual vector. go play in traffic.\n");
	}

	utility::vector1< std::pair< core::Size, core::Size > > continuous_regions;

	bool in_cont_region = false;
	core::Size last_cont_begin(0);

	for( core::Size i = 1; i <= the_vector.size(); ++i){

		if( ( ! in_cont_region ) && (the_vector[ i ]==true) ) {
			last_cont_begin = i;
			in_cont_region = true;
			//tr << "cont region begin at " << i << ", ";
		}

		if ( in_cont_region && ( the_vector[ i ]==false ) ){
			continuous_regions.push_back( std::pair< core::Size, core::Size > ( last_cont_begin, (i - 1) ));
			in_cont_region = false;
			//tr << "cont_region end at " << (i-1) << " with last_cont_begin "<< last_cont_begin << std::endl;
		}
	}

	if( continuous_regions.size() == 0 ){
		utility_exit_with_message("The passed in vector doesn't have a single element set to true.\n");
	}

	for( core::Size j = 1; j<= continuous_regions.size(); ++j){
		std::pair< core::Size, core::Size > & cur_region = continuous_regions[j];

		//tr << "processing cont region between " << cur_region.first << " and " << cur_region.second << std::endl;
		if( cur_region.second - cur_region.first + 1 < min_number_continuous_trues ){

			//we need to do something
			core::Size to_fill = min_number_continuous_trues - ( cur_region.second - cur_region.first + 1 );

			core::Size to_fill_each_side = (to_fill / 2) + 1;

			core::Size left_gap(0), right_gap(0);
			core::Size prev_region_first(1), next_region_second( the_vector.size () );

			if( j == 1 ) left_gap = cur_region.first - 1;
			else {
				left_gap = cur_region.first - continuous_regions[ j - 1 ].second - 1;
				prev_region_first = continuous_regions[ j - 1 ].first;
			}

			if( j == continuous_regions.size() ) right_gap = the_vector.size() - cur_region.second;
			else {
				right_gap = continuous_regions[ j + 1 ].first - cur_region.second - 1;
				next_region_second = continuous_regions[ j + 1 ].second;
			}

			//tr << "left gap is " << left_gap << " right is " << right_gap << std::endl;
			core::Size cur_region_tmp_first = cur_region.first - to_fill_each_side;
			core::Size cur_region_tmp_second = cur_region.second + to_fill_each_side;

			for( core::Size k = 1; k <= to_fill_each_side; ++k ){

				if( k <= left_gap ) {
					the_vector[ cur_region.first - k ] = true;
					//tr << (cur_region.first - k) << "set true , ";

					if( k == left_gap)  {
						if( j == 1 )cur_region_tmp_first = 1;

						else cur_region_tmp_first = continuous_regions[ j -1 ].second + 1;


						if( ((cur_region.second + k - 1 ) - prev_region_first + 1) >= min_number_continuous_trues){
							cur_region_tmp_second = cur_region.second + k - 1;
							//tr << "breaking because of left gap ";
							break;
						}
					}
				}
				else if ( ((cur_region.second + k - 1 ) - prev_region_first + 1) >= min_number_continuous_trues){
					cur_region_tmp_second = cur_region.second + k - 1;
					break;
				}

				if( k <= right_gap ) {
					the_vector[ cur_region.second + k ] = true;
					//tr << (cur_region.second + k) << "set true , ";

					if( k == right_gap ) {
						if( j == continuous_regions.size() ) cur_region_tmp_second = the_vector.size();
						else cur_region_tmp_second = continuous_regions[ j + 1 ].first - 1;

						if( (next_region_second - (cur_region.first - k ) + 1 ) >= min_number_continuous_trues){
							cur_region_tmp_first =  (cur_region.first - k );
							//tr << "breaking because of right gap ";
							break;
						}
					}
				}
				else if(  (next_region_second - (cur_region.first - k ) + 1 ) >= min_number_continuous_trues){
					cur_region_tmp_first =  (cur_region.first - k );
					break;
				}
			}

			cur_region.first = cur_region_tmp_first;
			cur_region.second = cur_region_tmp_second;
			//tr << std::endl;
		} //if we need to fill up this region

	}
	//tr << " making vector continuous over." << std::endl;

} // make_contiuous_true_regions


core::pack::task::PackerTaskOP
recreate_task(
	core::pose::Pose const & pose,
	core::pack::task::PackerTask const & orig_task
)
{

	using namespace core::pack::task;

	if( orig_task.total_residue() != pose.total_residue() ) utility_exit_with_message("old task and pose don't have same number of residues.");

	PackerTaskOP mod_task = TaskFactory::create_packer_task( pose );
	mod_task->initialize_from_command_line();

	for( core::Size i = 1; i <= pose.total_residue(); ++i ){

		//first, we need to copy the rotamer and rotamerset operations
		for( core::pack::rotamer_set::RotamerOperations::const_iterator rot_it = orig_task.residue_task(i).rotamer_operations().begin(); rot_it != orig_task.residue_task(i).rotamer_operations().end(); ++rot_it ){
			mod_task->nonconst_residue_task( i ).append_rotamer_operation( *rot_it );
		}
		for( core::pack::rotamer_set::RotSetOperationListIterator rotset_it = orig_task.residue_task(i).rotamer_set_operation_begin(); rotset_it != orig_task.residue_task(i).rotamer_set_operation_end(); ++rotset_it ){
			mod_task->nonconst_residue_task( i ).append_rotamerset_operation( *rotset_it );
		}

		if( !orig_task.residue_task( i ).being_packed() ) mod_task->nonconst_residue_task(i).prevent_repacking();
		else if( !orig_task.residue_task( i ).being_designed() ) mod_task->nonconst_residue_task(i).restrict_to_repacking();
		else{
			utility::vector1< bool > keep_aas( core::chemical::num_canonical_aas, false );

      for( ResidueLevelTask::ResidueTypeCAPListConstIter res_it = orig_task.residue_task( i ).allowed_residue_types_begin(); res_it != orig_task.residue_task( i ).allowed_residue_types_end(); ++res_it) {

        keep_aas[ (*res_it)->aa() ] = true;
      }


			//keep_aas[ core::chemical::aa_cys ] = false;
			mod_task->nonconst_residue_task(i).restrict_absent_canonical_aas( keep_aas );
		}
	}

	if( orig_task.IGEdgeReweights() ) {
		for( utility::vector1< IGEdgeReweighterOP >::const_iterator it = orig_task.IGEdgeReweights()->reweighters_begin(); it != orig_task.IGEdgeReweights()->reweighters_end(); ++it){
			mod_task->set_IGEdgeReweights()->add_reweighter( *it );
		}
	}

	return mod_task;
} //recreate_task


std::string
assemble_remark_line(
	std::string chainA,
	std::string resA,
	int seqposA,
	std::string chainB,
	std::string resB,
	int seqposB,
	core::Size cst_block,
	core::Size ex_geom_id
)
{
	std::string posA = utility::to_string( seqposA );
	utility::add_spaces_right_align( posA, 4 );

	std::string posB = utility::to_string( seqposB );
	utility::add_spaces_right_align( posB, 4 );

	return "MATCH TEMPLATE "+ chainA +" "+ resA +" "+ posA +  " MATCH MOTIF "+ chainB + " " + resB + " "+posB + "  " + utility::to_string( cst_block ) + "  " + utility::to_string( ex_geom_id );

} //assemble remark line function


bool
split_up_remark_line(
	std::string line,
	std::string & chainA,
	std::string & resA,
	int & seqposA,
	std::string & chainB,
	std::string & resB,
	int & seqposB,
	core::Size & cst_block,
	core::Size & ex_geom_id
){

	std::istringstream line_stream;
	std::string buffer(""), tag("");

	line_stream.clear();
	line_stream.str( line );

	line_stream >> buffer >> tag;
	if( tag == "TEMPLATE"){
		line_stream >> chainA >> resA >> seqposA >> buffer >> buffer;
		line_stream >> chainB >> resB >> seqposB >> cst_block;

		if( !line_stream.good() ){
			tr << "ERROR when trying to split up pdb remark line. Not all fields seem to have been specified." << std::endl;
			return false;
		}

		line_stream >> ex_geom_id;
		if( !line_stream.good() ) ex_geom_id = 1;

		return true;
	}

	return false;
}  //split up remark line function


/// @details combine all input sequence mappings into one.
/// sequentially, that is
core::sequence::SequenceMappingOP
combine_sequence_mappings(
	utility::vector1< core::sequence::SequenceMapping > const & smaps
){

	using namespace core::sequence;

	//gigo :)
	if( smaps.size() == 0 ) return new SequenceMapping() ;

	SequenceMappingOP composite_smap = new SequenceMapping();
	*composite_smap = smaps[1];

	for( core::Size i = 2; i <= smaps.size(); ++i ){
		combine_sequence_mappings( *composite_smap, smaps[i] );
	}

	return composite_smap;

} //combine_sequence_mappings



/// @details combine smap_to_add into smap,
/// i.e. smap[j] becomes smap_to_add[ smap[j] ]
void
combine_sequence_mappings(
	core::sequence::SequenceMapping & smap,
	core::sequence::SequenceMapping const & smap_to_add )
{

	for( core::Size i = 1; i <= smap.size1(); ++i){

		if( smap[i] != 0 ){

			if( smap[i] <= smap_to_add.size1() ) smap[i] = smap_to_add[ smap[i] ];

			else smap[i] = 0;
		}
	}
}


/// @detail function not implemented very slick at the moment, need to find way to compile boost regex library :((
/// @detail to extract the pdb code from the pose tag without a regular expression module, some explicit functions
/// @detail have been written below
std::string
get_pdb_code_from_pose_tag( core::pose::Pose const & pose ){
	using namespace core::pose::datacache;

	std::string outtag = pose.data().get_const_ptr< core::util::datacache::CacheableString >( CacheableDataType::JOBDIST_OUTPUT_TAG )->str();

	utility::vector1< std::string > pdb_matches;

	//std::cerr <<"string that's supposed to contain a pdb code is " << outtag << ", found to contain the following pdb tags: " << std::endl;

	std::vector< std::string > tagparts = utility::string_split( outtag, '_' );

	for( std::vector< std::string >::const_iterator it = tagparts.begin(); it != tagparts.end(); ++it){

		if( it->size() != 4 ) continue;
		std::string cand_str = *it;
		//ok, no boost regex, so clumsy implementation to look for pdb code in string
		if( is_digit( &cand_str[0]) ){
			if( is_digit( &cand_str[1]) && is_digit( &cand_str[2]) && is_digit ( &cand_str[3] ) ) continue;

			if( (( is_uppercase_letter( & cand_str[1] )|| is_digit( & cand_str[1] ) )
					&& ( is_uppercase_letter( & cand_str[2] )|| is_digit( & cand_str[2] ) )
					&& ( is_uppercase_letter( & cand_str[3] )|| is_digit( & cand_str[3] ) ) )
				||(( is_lowercase_letter( & cand_str[1] )|| is_digit( & cand_str[1] ) )
					&& ( is_lowercase_letter( & cand_str[2] )|| is_digit( & cand_str[2] ) )
					&& ( is_lowercase_letter( & cand_str[3] )|| is_digit( & cand_str[3] ) ) )
			){

				//std::cerr << "yeah, found putative pdb code " << cand_str << std::endl;
				pdb_matches.push_back( cand_str );
			}
		}
	}
	/*

	//assemble regular expression to match pdb codes:
	//4 char string, first one is a digit, remaining 3 are digits or letters
	boost::regex pdb_re("\d([a-z]{3}|[A-Z]{3})");

	boost::regex pdb_re("\d((\d|[a-z]){3}) | \d((\d|[A-Z]){3})");
	boost::cmatch pdb_rematches;

	if( boost::regex_match(outtag.c_str(), pdb_rematches, pdb_re) ){

		for (core::Size i = 1; i < pdb_rematches.size(); i++){
			pdb_matches.push_back( std::string (pdb_rematches[i].first, pdb_rematches[i].second ) );
		}
	}

	*/

	//for( core::Size i = 1; i <= pdb_matches.size(); ++i)	std::cerr << pdb_matches[i] << std::endl;

	if( pdb_matches.size() == 0 ){
		std::cerr << "protocols/enzdes/enzdes_util: WARNING: string " << outtag << "does not seem to contain a pdb code, returning N/A. " << std::endl;
		pdb_matches.push_back( "N/A" );
	}

	if( pdb_matches.size() > 1 ){
		tr << "WARNING WARNING: in tag " << outtag << ", more than 1 pdbcode like pattern has been identified. assuming the first one (" << pdb_matches[1] << ") is the correct one." << std::endl;
	}

	return pdb_matches[1];

	//	for( std::vector< std::string >::const_iterator it = tagparts.begin(); it != tagparts.end(); ++it){
	//	if( it->size() != 4 ) continue;
	//}

}


bool
is_digit( char * cha )
{

	//std::cerr << "comparing " << cha[0] << " to digits. " << std::endl;

	if( cha[0] == '0' ) return true;
	else if (cha[0] == '1') return true;
	else if (cha[0] == '2') return true;
	else if (cha[0] == '3') return true;
	else if (cha[0] == '4') return true;
	else if (cha[0] == '5') return true;
	else if (cha[0] == '6') return true;
	else if (cha[0] == '7') return true;
	else if (cha[0] == '8') return true;
	else if (cha[0] == '9') return true;

	return false;
}


bool
is_uppercase_letter( char * cha)
{

	if( cha[0] == 'A' ) return true;
	else if (cha[0] == 'B') return true;
	else if (cha[0] == 'C') return true;
	else if (cha[0] == 'D') return true;
	else if (cha[0] == 'E') return true;
	else if (cha[0] == 'F') return true;
	else if (cha[0] == 'G') return true;
	else if (cha[0] == 'H') return true;
	else if (cha[0] == 'I') return true;
	else if (cha[0] == 'J') return true;
	else if (cha[0] == 'K') return true;
	else if (cha[0] == 'L') return true;
	else if (cha[0] == 'M') return true;
	else if (cha[0] == 'N') return true;
	else if (cha[0] == 'O') return true;
	else if (cha[0] == 'P') return true;
	else if (cha[0] == 'Q') return true;
	else if (cha[0] == 'R') return true;
	else if (cha[0] == 'S') return true;
	else if (cha[0] == 'T') return true;
	else if (cha[0] == 'U') return true;
	else if (cha[0] == 'V') return true;
	else if (cha[0] == 'W') return true;
	else if (cha[0] == 'X') return true;
	else if (cha[0] == 'Y') return true;
	else if (cha[0] == 'Z') return true;

	return false;
}


bool
is_lowercase_letter( char * cha)
{

	if( cha[0] == 'a' ) return true;
	else if (cha[0] == 'b') return true;
	else if (cha[0] == 'c') return true;
	else if (cha[0] == 'd') return true;
	else if (cha[0] == 'e') return true;
	else if (cha[0] == 'f') return true;
	else if (cha[0] == 'g') return true;
	else if (cha[0] == 'h') return true;
	else if (cha[0] == 'i') return true;
	else if (cha[0] == 'j') return true;
	else if (cha[0] == 'k') return true;
	else if (cha[0] == 'l') return true;
	else if (cha[0] == 'm') return true;
	else if (cha[0] == 'n') return true;
	else if (cha[0] == 'o') return true;
	else if (cha[0] == 'p') return true;
	else if (cha[0] == 'q') return true;
	else if (cha[0] == 'r') return true;
	else if (cha[0] == 's') return true;
	else if (cha[0] == 't') return true;
	else if (cha[0] == 'u') return true;
	else if (cha[0] == 'v') return true;
	else if (cha[0] == 'w') return true;
	else if (cha[0] == 'x') return true;
	else if (cha[0] == 'y') return true;
	else if (cha[0] == 'z') return true;

	return false;
}


} //enzutil
} //enzdes
} //protocols
