// -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
// vi: set ts=2 noet:
//
// This file is made available under the Rosetta Commons license.
// See http://www.rosettacommons.org/license
// (C) 199x-2007 University of Washington
// (C) 199x-2007 University of California Santa Cruz
// (C) 199x-2007 University of California San Francisco
// (C) 199x-2007 Johns Hopkins University
// (C) 199x-2007 University of North Carolina, Chapel Hill
// (C) 199x-2007 Vanderbilt University

/// @file   epigraft_io.cc
/// @brief  Common I/O routines for epigraft.
/// @author Bill Schief (schief@u.washington.edu)
/// @author Yih-En Andrew Ban (yab@u.washington.edu)

// unit headers
#include <epigraft/epigraft_io.hh>

// package headers
#include <epigraft/epigraft_types.hh>
#include <epigraft/LoopInfo.hh>
#include <epigraft/match/MatchResult.hh>

// Rosetta headers
#include <files_paths.h>

// ObjexxFCL headers
#include <ObjexxFCL/string.functions.hh>

// utility headers
#include <utility/file/FileName.hh>
#include <utility/file/file_sys_util.hh>
#include <utility/io/izstream.hh>
#include <utility/io/ozstream.hh>
#include <utility/vector1.hh>

// Platform headers
#include <platform/types.hh>

// C++ headers
#include <fstream>
#include <iostream>
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <vector>

// platform specific
#ifdef _WIN32
	#include <direct.h>
#else
	#include <dirent.h>
	#include <sys/types.h>
	#include <sys/stat.h>
#endif // _WIN32

namespace epigraft {


/// @brief get base pdb filename sans .pdb or .pdb.gz suffix
std::string
base_pdb_filename(
	std::string const & filename
)
{
	std::string::size_type pos = filename.rfind( ".pdb" ); // will also catch .pdb.gz
	if ( pos != std::string::npos ) {
		return filename.substr( 0, pos );
	}

	return filename;
}


/// @brief recursively create directory structure
/// @note will check if a subdirectory of given path exists before making
/// @warning Doesn't do path separator conversion!  Make sure you do this before calling the function.
void
recursively_create_directory(
	std::string const & path
)
{
	std::istringstream in( path );
	std::string subdir_element;
	std::ostringstream dir;
	std::vector< std::string > subdirectories;

	// run through each directory element and gather subdirectories
	while ( getline( in, subdir_element, platform::file::PATH_SEPARATOR ) ) {
		dir << subdir_element << platform::file::PATH_SEPARATOR;
		subdirectories.push_back( dir.str() );
	}

	// run through all subdirectories and attempt to create
	for ( std::vector< std::string >::const_iterator sd = subdirectories.begin(), sde = subdirectories.end(); sd != sde; ++sd ) {

		if ( !utility::file::file_exists( *sd ) ) {
#ifdef _WIN32
			_mkdir( sd->c_str() );
#else
			mkdir( sd->c_str(), 0777 );
#endif // _WIN32
		}

	}
}


/// @brief recursively create directory structure for filename
/// @details filename needs to have full path
void
recursively_create_directory_for_filename(
  std::string const & filename_with_path
)
{
	utility::file::FileName fn( filename_with_path );
	recursively_create_directory( fn.path() );
}


/// @brief return filenames in a directory
/// @param[in] dirpath the name of the directory
/// @param[in] include_hidden whether to include hidden files/dirs, e.g.
///  in unix these are files whose names begin with '.'; default false
std::vector< std::string >
dir(
	std::string const & dirpath,
	bool const include_hidden
)
{
	dirent * de = NULL;
	DIR * d = NULL;

	d = opendir( dirpath.c_str() );
	if ( d == NULL ) {
		// fail fast
		utility::exit( __FILE__, __LINE__, "ERROR: Couldn't open directory: " + dirpath );
	}

	std::vector< std::string> filenames;

	while ( ( de = readdir( d ) ) ) {
		if ( de->d_name[ 0 ] != '.' || include_hidden ) {
			filenames.push_back( de->d_name );
		}
	}

	closedir( d );

	return filenames;
}


/// @brief count number of files in directory
/// @param[in] dirpath the name of the directory
/// @param[in] include_hidden whether to include hidden files/dirs, e.g.
///  in unix these are files whose names begin with '.'; default false
std::size_t
count_n_files(
	std::string const & dirpath,
	bool const include_hidden
)
{
	return dir( dirpath, include_hidden ).size();
}


/// @brief load scaffold filenames
void
load_scaffold_filenames(
	std::string const & filename,
	utility::vector1< std::string > & v
)
{
	utility::io::izstream infile ( filename );
	if ( !infile ) {
		std::stringstream ss;
		ss << "load_scaffold_filenames: cannot open file " << filename << std::endl;
		utility::exit( __FILE__, __LINE__, ss.str() );
	}

	std::string line;
	while ( getline( infile, line ) ) {

		ObjexxFCL::strip_whitespace(line);
		if ( line == "" ) {
			continue;
		}

		std::istringstream line_stream( line );

		// grab each scaffold filename
		std::string scaffold_filename;
		line_stream >> scaffold_filename;
		v.push_back( scaffold_filename );
	}

	if ( v.size() == 0 ) {
		utility::exit( __FILE__, __LINE__, "load_scaffold_filenames: no filenames, list is empty!" );
	}

	// report to user
	std::cout << "load_scaffold_filenames: read " << v.size() << " scaffold pdb filenames" << std::endl;
}


/// @brief load epitope ranges
// TODO: clean this up in terms of the user specified superposition residues?
void
load_epitope_subranges(
	std::string const & filename,
	utility::vector1< LoopInfo > & v,
	std::set< Integer > & user_specified_superposition_residues,
	bool use_SS_align
)
{
	utility::io::izstream infile( filename );
	if ( !infile ) {
		std::ostringstream ss;
		ss << "load_epitope_ranges: cannot open file " << filename << std::endl;
		utility::exit( __FILE__, __LINE__, ss.str() );
	}

	bool found_superposition_residues = false;

	Integer id = -1;
	bool is_primary = true; //vds by default, any loop can be the "primary" loop, unless you specify otherwise
	std::string dummy;
	std::string line;
	while ( getline( infile, line ) ) {

		ObjexxFCL::strip_whitespace(line);
		if ( line == "" ) {
			continue;
		}

		// TODO: clean this up!
		if ( use_SS_align ) {

			if ( line.substr( 0, 23 ) == "superposition_residues:" ) {
				Integer residue;
				utility::vector1< std::string > entries;
				split_string( line, entries ); // grab all columns from input file
				for ( utility::vector1< std::string >::const_iterator entry = entries.begin(), past_last_entry = entries.end(); entry != past_last_entry; ++entry ) {
					// TODO: write a set of custom iterators or functors for these operations?
					std::istringstream( *entry ) >> residue;
					user_specified_superposition_residues.insert( residue );
				}

				found_superposition_residues = true;
			}

		}

		if ( line.substr( 0, 5 ) == "loop:" ) { // new epitope
			utility::vector1< String > loop_entry;
			split_string( line, loop_entry );
			std::istringstream( loop_entry[ 2 ] ) >> id;
			is_primary = true; // by default allow loop to be used in primary matching (find_singleton...)
			if ( loop_entry.size() == 3 && loop_entry[ 3 ] == "disallow_primary" ) {
				is_primary = false;
			}

			// look for number of residues
			std::string full_range_line;
			getline( infile, full_range_line );

			if ( full_range_line.substr( 0, 11 ) != "full_range:" ) { // not found, throw error
				std::ostringstream ss;
				ss << "Error! load_epitope_ranges: missing full_range for loop " << id << std::endl;
				ss << "                            instead got: " << full_range_line << std::endl;
				utility::exit( __FILE__, __LINE__, ss.str() );
			}

			Integer full_range_begin, full_range_end;
			std::istringstream full_range_stream( full_range_line );
			full_range_stream >> dummy >> full_range_begin >> full_range_end;

			// we have enough information to create LoopInfo
			LoopInfo l_info( id, ResidueRange( full_range_begin, full_range_end ), is_primary );

			// look for number of ranges
			std::string nranges_line;
			getline( infile, nranges_line );

			if ( nranges_line.substr( 0, 8 ) != "nranges:" ) { // not found, throw error
				std::stringstream ss;
				ss << "Error! load_epitope_ranges: missing nranges for loop " << id << std::endl;
				ss << "                            instead got: " << nranges_line << std::endl;
				utility::exit( __FILE__, __LINE__, ss.str() );
			}

			Integer nranges = -1;
			std::istringstream nranges_stream( nranges_line );
			nranges_stream >> dummy >> nranges;

			// now cycles through all specified ranges
			for ( Integer i = 1; i <= nranges; ++i ) {
				std::string range_line;
				getline( infile, range_line );

				if (range_line.substr( 0, 6 ) != "range:" ) { // not found, throw error
					std::ostringstream ss;
					ss << "Error! load_epitope_ranges: malformed range for loop " << id << std::endl;
					ss << "                            instead got: " << range_line << std::endl;
					utility::exit( __FILE__, __LINE__, ss.str() );
				}

				Integer a, b; // native epitope subrange
				std::istringstream range_stream( range_line );
				range_stream >> dummy >> a >> b;

				// add range to current LoopInfo
				l_info.add_native_subrange( ResidueRange( a, b ) );
			}

			// we're done, add LoopInfo to vector
			v.push_back( l_info );

		} // end new epitope
	} // end master while getline


	// close input file
	infile.close();

	// report to user
	std::cout << "load_epitope_ranges: read " << v.size() << " loops as follows:" << std::endl;
	for ( Integer i = 1, ie = v.size(); i <= ie; i++ ) {
		std::cout << v[ i ].to_string();
	}

	if ( use_SS_align ) {
		if ( found_superposition_residues ) {

			std::cout << "using specific superposition residues for SS_align as follows:" << std::endl;
			for ( std::set< Integer >::const_iterator ri = user_specified_superposition_residues.begin(), rie = user_specified_superposition_residues.end(); ri != rie; ++ri ) {
				std::cout << " " << *ri;
			}
			std::cout << std::endl;

		} else {
			utility::exit( __FILE__, __LINE__, "SS_align specified but no specific superposition residues in input file!" );
		}
	}
}


/// @brief make sure epitope subranges and pdb file match
/// @note looks up pdb information via pdb info in pose
bool
epitope_and_epitope_subranges_one_to_one(
	utility::vector1< LoopInfo > const & loops,
	Pose & epitope
)
{
	bool residues_are_one_to_one = true;

	// gather full native ranges of loops from loop ranges
	std::set< Integer > specified_epitope_residues;
	for ( utility::vector1< LoopInfo >::const_iterator l = loops.begin(), le = loops.end(); l < le; ++l ) {
		for ( Integer i = l->full_native_range().begin(), ie = l->full_native_range().end(); i <= ie; ++i ) {
			specified_epitope_residues.insert( i );
		}
	}

	// gather all epitope ranges from pose
	std::set< Integer > input_epitope_residues;
	for ( Integer res = 1, last_res = epitope.total_residue(); res <= last_res; ++res ) {
		input_epitope_residues.insert( epitope.pdb_info().pdb_res_num( res ) );
	}

	// lookup specified residues in input
	for ( std::set< Integer >::const_iterator r = specified_epitope_residues.begin(), re = specified_epitope_residues.end(); r != re; ++r ) {
		residues_are_one_to_one = residues_are_one_to_one && input_epitope_residues.find( *r ) != input_epitope_residues.end();
		if ( !residues_are_one_to_one ) {
			std::cerr << "WARNING: residue " << *r << " specified in epitope loop ranges but does not exist in input structure!" << std::endl;
			break;
		}
	}

	// lookup if input residues were specified
	if ( residues_are_one_to_one ) {
		for ( std::set< Integer >::const_iterator r = input_epitope_residues.begin(), re = input_epitope_residues.end(); r != re; ++r ) {
			residues_are_one_to_one = residues_are_one_to_one && specified_epitope_residues.find( *r ) != specified_epitope_residues.end();
			if ( !residues_are_one_to_one ) {
				std::cerr << "WARNING: residue " << *r << " found in input structure but was not in specified epitope loop ranges!" << std::endl;
				break;
			}
		}
	}

	return residues_are_one_to_one;
}


/// @brief open output file
void
open_output_file(
	std::string const & filename,
	utility::io::ozstream & outfile
)
{
	std::string output_filename = files_paths::score_path + filename;

	// does input file exist?
	// TODO: is there another way to do this other than opening the file?
	utility::io::izstream infile ( filename );

	if ( infile ) {
		infile.close();
		outfile.open_append( filename );
	} else {
		outfile.open( filename );
	}

	if ( ! outfile.good() ) {
		std::stringstream ss;
		ss << "open_output_file: cannot open " << output_filename << "!";
		utility::exit( __FILE__, __LINE__, ss.str() );
	}
}


/// @brief load old format match results as input
/// @note  file format:
/// @note  direc  initial_align_sys  native_loop_begin  native_loop_end  loop_subrange_begin  loop_subrange_end  scaffold_range_begin  scaffold_range_end  filename
void
load_old_format_match_results(
	std::string const & filename,
	std::map< std::string, utility::vector1< epigraft::match::MatchResult > > & results
)
{
	using namespace epigraft::match;
	using epigraft::match::align::AlignmentSystem;

	utility::io::izstream infile( filename );
	if ( !infile ) {
		std::stringstream ss;
		ss << "load_old_format_match_results: cannot open file " << filename << std::endl;
		utility::exit( __FILE__, __LINE__, ss.str() );
	}

	std::string direc;
	std::string initial_align_sys;
	Integer native_loop_begin;
	Integer native_loop_end;
	Integer loop_subrange_begin;
	Integer loop_subrange_end;
	Integer scaffold_range_begin;
	Integer scaffold_range_end;
	std::string pdb_filename;

	Integer counter = 0;
	std::string line;
	while ( getline( infile, line ) ) {

		// skip blank lines
		ObjexxFCL::strip_whitespace(line);
		if ( line == "" ) {
			continue;
		}

		std::istringstream iss( line );

		iss >> direc >> initial_align_sys >> native_loop_begin >> native_loop_end
		    >> loop_subrange_begin >> loop_subrange_end >> scaffold_range_begin
		    >> scaffold_range_end >> pdb_filename;

		// figure out initial alignment system
		std::string system_name;
		AlignmentSystem::SystemType system_type = AlignmentSystem::ENDPOINT;
		if ( direc == "n2c" ) {
			system_name = "N2C_" + initial_align_sys;
			system_type = AlignmentSystem::N2C;
		} else if ( direc == "c2n" ) {
			system_name = "C2N_" + initial_align_sys;
			system_type = AlignmentSystem::C2N;
		} else if ( initial_align_sys == "S") {
			system_name = initial_align_sys;
			system_type = AlignmentSystem::SUPERPOSITION;
		} else if ( initial_align_sys == "E" ) {
			system_name = initial_align_sys;
			system_type = AlignmentSystem::ENDPOINT;
		} else {
			// shouldn't be here
			std::stringstream ss;
			ss << "load_match_results: file " << filename << " has malformed directionalty or alignment system" << std::endl;
			utility::exit( __FILE__, __LINE__, ss.str() );
		}

		if ( results.find( pdb_filename ) == results.end() ) {
			results[ pdb_filename ] = utility::vector1< MatchResult >();
		}

		MatchResult match_result;
		match_result.components.push_back( MatchComponent(
		                                     -1, // old format doesn't have loop id, so we store -1!
		                                     ResidueRange( loop_subrange_begin, loop_subrange_end ),
		                                     ResidueRange( native_loop_begin, native_loop_end ),
		                                     ResidueRange( scaffold_range_begin, scaffold_range_end )
		                                   )
		                                 );
		match_result.system_name = system_name;
		match_result.system_type = system_type;
		results[ pdb_filename ].push_back( match_result );
		counter++;
	}

	// close input file
	infile.close();

	// report to user
	std::cout << "load_old_format_match_results: read " << counter << " records from " << results.size() << " pdb files" << std::endl;
}


} // namespace epigraft
