// -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
// vi: set ts=2 noet:
//  CVS information:
//  $Revision: 14282 $
//  $Date: 2007-04-16 17:28:18 -0700 (Mon, 16 Apr 2007) $
//  $Author: tex $


// Rosetta Headers
#include "aaproperties_pack.h"
#include "after_opts.h"
#include "barcode_stats_classes.h"
#include "cenlist.h"
#include "current_pose.h"
#include "force_barcode.h"
#include "fragments.h"
#include "fragments_ns.h"
#include "fragments_pose.h"
#include "homolog_distances.h"
#include "initialize.h" // native routines
#include "jumping_pairings.h"
#include "jumping_util.h"
#include "make_pdb.h"
#include "misc.h"
#include "pose.h"
#include "pose_io.h"
#include "pose_rms.h"
#include "pose_vdw.h"
#include "random_numbers.h"
#include "read_aaproperties.h"
#include "read_paths.h"
#include "refold.h"
#include "score.h" // score_set_new_pose
#include "silent_input.h"
#include "structure.h"
#include "util_vector.h"
#include "pose_idealize.h"
#include "vdw.h"

// ObjexxFCL Headers
#include <ObjexxFCL/FArray1D.hh>
#include <ObjexxFCL/FArray2D.hh>
#include <ObjexxFCL/FArray3D.hh>
#include <ObjexxFCL/FArray3Dp.hh>
#include <ObjexxFCL/FArray4D.hh>
#include <ObjexxFCL/formatted.io.hh>
#include <ObjexxFCL/string.functions.hh>

// Numeric Headers
#include <numeric/all.fwd.hh>
#include <numeric/xyzVector.hh>

// Utility Headers
#include <utility/basic_sys_util.hh>
#include <utility/io/izstream.hh>

// C++ Headers
#include <algorithm>
#include <cmath>
#include <cassert>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <string>
#include <utility>
#include <vector>



using namespace silent_io;

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

namespace contact_stats {
	bool output_fasta( false );
	bool output_all_contacts( false );
	std::string contact_file;
	std::string pdbfile;
	std::string silent_file_name;
	std::string distance_pdf_filename( "null" );
	std::string native_filename;
	char chain;

	int max_contact_dist( 20 ); // maximum distance in angstroms for homolog cen_dist scoring
	bool have_distance_pdf( false );
	FArray3D_float dist_log_probs;
}

void
get_centroid_contact_options() {

	using namespace contact_stats;
	if ( truefalseoption("pdbfile") ) {
		pdbfile = stringafteroption("pdbfile");
	}

	if ( truefalseoption("silent_file") ) {
		silent_file_name = stringafteroption("silent_file");
	}

	if ( truefalseoption( "chain" ) ) {
		std::string chain = stringafteroption("chain");
	}

	if ( truefalseoption("contacts_file") ) {
		contact_file = stringafteroption("contacts_file");
	} else if ( truefalseoption("pdbfile") ) {
		contact_file = pdbfile + ".contacts";
	} else {
		contact_file = silent_file_name + ".contacts";
	}

	if ( truefalseoption("output_all_contacts") ) {
		output_all_contacts = true;
	}

	if ( truefalseoption("output_fasta") ) {
		output_fasta = true;
	}

	if ( truefalseoption("distance_pdf_filename") ) {
		distance_pdf_filename = stringafteroption("distance_pdf_filename");
	}
	if ( truefalseoption("n") ) {
		native_filename = stringafteroption("n");
	}
} // get_centroid_contact_options()

void
get_distances() {
	static bool init( false );
	if ( !init ) {
		get_centroid_contact_options();
	}

	pose_ns::Pose current_pose;
	pose_from_pdb( current_pose, contact_stats::pdbfile, true, true, true, contact_stats::chain, false, true );

	// open filename for printing contacts
	std::string filename = contact_stats::pdbfile + ".homolog_distances";
	std::ofstream c_stream( filename.c_str() );
	if ( ! c_stream.is_open() ) {
		std::cerr << "Open failed for file: " << contact_stats::contact_file << std::endl;
		utility::exit( EXIT_FAILURE, __FILE__, __LINE__ );
	}

	// atom types:
	//	4 - O
	//  5 - CB
	//  6 - CG
	std::vector< std::pair< std::string, std::string > > atom_pairs_wanted;
	atom_pairs_wanted.push_back( std::make_pair( "O", "O"   ) );
	atom_pairs_wanted.push_back( std::make_pair( "CB", "CB" ) );
	atom_pairs_wanted.push_back( std::make_pair( "CB", "O"  ) );
	atom_pairs_wanted.push_back( std::make_pair( "O", "CB"  ) );
	atom_pairs_wanted.push_back( std::make_pair( "CG", "O"  ) );
	atom_pairs_wanted.push_back( std::make_pair( "O", "CG"  ) );

	// full_coord is indexed by [1,2,3] for xyz coordinates, then by atom_number,
	// and lastly by sequence position.
	const FArray3D_float & full_coord( current_pose.full_coord() );
	std::vector< std::pair< std::string, std::string > >::iterator atom_pair;
	for ( int i = 1; i <= misc::total_residue - 1; ++i ) {
		for ( int j = i + 1; j <= misc::total_residue; ++j ) {
			for ( atom_pair = atom_pairs_wanted.begin();
						atom_pair < atom_pairs_wanted.end();
						atom_pair++ ) {
				//const int aa1  = current_pose.res(i);
				//const int aa2  = current_pose.res(j);
				char resname1 = misc::chars::residue1(i);
				char resname2 = misc::chars::residue1(j);

				int atom_index1 = get_atom_index( resname1, atom_pair->first  );
				int atom_index2 = get_atom_index( resname2, atom_pair->second );

				if ( atom_index1 != -1 && atom_index2 != -1 ) {
					numeric::xyzVector_double w( &full_coord( 1, atom_index1, i ) );
	    		numeric::xyzVector_double v( &full_coord( 1, atom_index2, j ) );
					float dist = distance( w, v );
					c_stream	<< i << '\t' << j << '\t' << dist << '\t'
									<< resname1 << ',' << resname2 << '\t'
									<< atom_pair->first << ',' << atom_pair->second
									<< std::endl;
				}
			}
		}
	}
	c_stream.close();

	if ( contact_stats::output_fasta ) {
		std::string seqfile = contact_stats::contact_file + ".fasta";
		std::ofstream seq_stream( seqfile.c_str() );
		for ( int i = 1; i <= misc::total_residue; i++ ) {
			seq_stream << misc::chars::residue1(i);
		}
		seq_stream << std::endl;
		seq_stream.close();
	}
}

int
get_atom_index(
	char residue,
	std::string atom_name
) {
	using namespace std;
	static map< char, int > cbeta_indices;
	static map< char, int > cgamma_indices;
	static map< char, int > oxygen_indices;

	static bool init( false );
	if ( !init ) {
		cbeta_indices['A']  =  5;
		cbeta_indices['C']  =  5;
		cbeta_indices['D']  =  5;
		cbeta_indices['E']  =  5;
		cbeta_indices['F']  =  5;
		cbeta_indices['G']  = -1; // glycine doesn't have a c-beta
		cbeta_indices['H']  =  5;
		cbeta_indices['I']  =  5;
		cbeta_indices['K']  =  5;
		cbeta_indices['L']  =  5;
		cbeta_indices['M']  =  5;
		cbeta_indices['N']  =  5;
		cbeta_indices['P']  =  5;
		cbeta_indices['Q']  =  5;
		cbeta_indices['R']  =  5;
		cbeta_indices['S']  =  5;
		cbeta_indices['T']  =  5;
		cbeta_indices['V']  =  5;
		cbeta_indices['W']  =  5;
		cbeta_indices['Y']  =  5;
		
		cgamma_indices['A'] =  1; // alanine doesn't have a c-gamma
		cgamma_indices['C'] =  1;
		cgamma_indices['D'] =  1;
		cgamma_indices['E'] =  1;
		cgamma_indices['F'] =  1;
		cgamma_indices['G'] = -1; // glycine doesn't have a c-gamma
		cgamma_indices['H'] =  1;
		cgamma_indices['I'] =  1;
		cgamma_indices['K'] =  1;
		cgamma_indices['L'] =  1;
		cgamma_indices['M'] =  1;
		cgamma_indices['N'] =  1;
		cgamma_indices['P'] =  1;
		cgamma_indices['Q'] =  1;
		cgamma_indices['R'] =  1;
		cgamma_indices['S'] =  1; // serine doesn't have a c-gamma
		cgamma_indices['T'] =  1;
		cgamma_indices['V'] =  1;
		cgamma_indices['W'] =  1;
		cgamma_indices['Y'] =  1;

		init = true;
	} // if ( !init )
	if ( atom_name =="CB" ) {
		return cbeta_indices[residue];
	} else if ( atom_name == "CG" ) {
		return cgamma_indices[residue];
	} else if ( atom_name == "O" ) {
		return 4; // oxygen always has an atom-number of four
	}

	return -1;
}

void
centroid_contacts() {
	pose_ns::Pose current_pose;
	pose_from_pdb( current_pose, contact_stats::pdbfile, true, true, true, contact_stats::chain, false, true );
	pose_update_cendist( current_pose );

	// open filename for printing contacts
	std::ofstream c_stream( contact_stats::contact_file.c_str() );
	if ( ! c_stream.is_open() ) {
		std::cerr << "Open failed for file: " << contact_stats::contact_file << std::endl;
		utility::exit( EXIT_FAILURE, __FILE__, __LINE__ );
	}

	const FArray2D_float & cendist( current_pose.get_2D_score( pose_ns::CENDIST ) );
	for ( int i = 1; i <= misc::total_residue - 1; ++i ) {
		for ( int j = i + 1; j <= misc::total_residue; ++j ) {

			float true_dist = std::sqrt( cendist( i, j ) ); // cendist is dist^2

			if ( contact_stats::output_all_contacts ) {
				c_stream	<< i << '\t' << j << '\t' << true_dist << '\t'
									<< misc::chars::residue1(i) << ',' << misc::chars::residue1(j)
									<< std::endl;
			} else {
			}
		}
	}
	c_stream.close();

	std::string seqfile = contact_stats::contact_file + ".fasta";
	std::ofstream seq_stream( seqfile.c_str() );
	for ( int i = 1; i <= misc::total_residue; i++ ) {
		seq_stream << misc::chars::residue1(i);
	}
	seq_stream << std::endl;

}

void
get_per_residue_homolog_scores(
	pose_ns::Pose  & my_pose,
	FArray1D_float & homolog_residue_scores
) {
	using namespace contact_stats;

	if ( !have_distance_pdf ) {
		initialize_distance_pdf();
		have_distance_pdf = true;
	}

	int nres = misc::total_residue;
	if ( nres == -1 ) { nres = 500; }
	homolog_residue_scores.dimension( nres, 0.0f );

	const FArray3D_float & full_coord( my_pose.full_coord() );
	for ( int i = 1; i <= misc::total_residue - 1; ++i ) {
		for ( int j = i + 1; j <= misc::total_residue; ++j ) {
			// use cbetas rather than centroids!
			int cb_atom_index = 5; // cbeta atom index in full_coord
			if ( my_pose.res(i) != 'G' && my_pose.res(j) != 'G' ) {
				numeric::xyzVector_double w( &full_coord( 1, cb_atom_index, i ) );
				numeric::xyzVector_double v( &full_coord( 1, cb_atom_index, j ) );
				float true_dist = distance( w, v );

				if ( true_dist < max_contact_dist && true_dist > 2 ) {
					float local_score = calc_resi_resj_score( "CBCB", i, j, true_dist );
					homolog_residue_scores( i ) += local_score;
					homolog_residue_scores( j ) += local_score;
				}
			}

			// try using oxygens!
			int o_atom_index = 4; // oxygen atom index in full_coord
			numeric::xyzVector_double w( &full_coord( 1, o_atom_index, i ) );
			numeric::xyzVector_double v( &full_coord( 1, o_atom_index, j ) );
			float true_dist = distance( w, v );

			if ( true_dist < max_contact_dist && true_dist > 2 ) {
				float local_score = calc_resi_resj_score( "OO", i, j, true_dist );
				homolog_residue_scores( i ) += local_score;
				homolog_residue_scores( j ) += local_score;
			}
		} // for ( int j = i + 1; j <= misc::total_residue; ++j )
	} // for ( int i = 1; i <= misc::total_residue - 1; ++i )
} // get_per_residue_homolog_scores

void
misc_homolog_score(
	float & homolog_score
) {
	using namespace contact_stats;
	if ( !have_distance_pdf ) {
		initialize_distance_pdf();
		have_distance_pdf = true;
	}
	pose_ns::Pose my_pose;
	pose_from_misc( my_pose, true, false, true );
	pose_homolog_score( my_pose, homolog_score );
	//const FArray2D_float & cendist( cenlist_ns::dists::cendist );
	//homolog_score = 0;

	//for ( int i = 1; i <= misc::total_residue - 1; ++i ) {
	//	for ( int j = i + 1; j <= misc::total_residue; ++j ) {
	//		float true_dist = std::sqrt( cendist( i, j ) ); // cendist is dist^2
	//		int rounded_dist = (int) true_dist;
	//		//if ( rounded_dist > 0 && rounded_dist < max_contact_dist ) {
	//		//	homolog_score += dist_log_probs( i, j, rounded_dist );
	//		//}

	//		if ( true_dist < max_contact_dist && true_dist > 4 ) {
	//			float local_score = calc_resi_resj_score( "OO", i, j, true_dist );
	//			homolog_score += local_score;
	//		}
	//	}
	//}

	homolog_score = homolog_score * -1;
}

void
homolog_rescore() {
	// read in the distance pdf for this structure
	using namespace contact_stats;

	get_centroid_contact_options();
	//read_distance_pdf( contact_stats::distance_pdf_filename );
	pose_ns::Pose current_decoy;
	pose_ns::Pose native_pose;
	//pose_from_pdb( native_pose, native_filename, true, false );
	pose_from_pdb(
		native_pose, native_filename, true, false, false, '-', false, true
	);

	if ( truefalseoption("idealize_native") ) {
		idealize_pose( native_pose );
	}

	// score the native_pose, print out that score to stdout
	float native_h_score;
	pose_homolog_score( native_pose, native_h_score );
	std::cout << "NATIVE HOMOLOG SCORE = " << native_h_score << std::endl;

	Silent_file_data silent_data( contact_stats::silent_file_name );

	std::string output_file = contact_stats::silent_file_name + ".homolog_score";
	std::ofstream c_stream( output_file.c_str() );
	if ( ! c_stream.is_open() ) {
		std::cerr << "Open failed for file: " << output_file << std::endl;
		utility::exit( EXIT_FAILURE, __FILE__, __LINE__ );
	}

	//get_distance_hist();

	// iterate over the structures in the silent-file
	c_stream	<< "total_score" << '\t'
						<< "homol_score" << '\t'
						<< "CA_rmsd"	<< '\t'
						<< "CA_maxsub" << '\t'
						<< "tag"  << '\t'
						<< std::endl;

	for ( Silent_file_data::const_iterator it = silent_data.begin(),
	 it_end = silent_data.end(); it != it_end; ++it) {
		std::string tag = it->first;
		//Silent_structure const * structure = it->second;
		it->second->fill_pose( current_decoy );
		pose_update_cendist( current_decoy );

		float h_score = 0;
		float total_score = it->second->total_score;

		int ca_maxsub  = CA_maxsub( native_pose, current_decoy );
		float ca_rmsd  = CA_rmsd( native_pose, current_decoy );
		pose_homolog_score( current_decoy, h_score );
		c_stream  << total_score << '\t'
							<< h_score << '\t'
							<< ca_rmsd  << '\t'
							<< ca_maxsub << '\t'
							<< tag
							<< std::endl;
	} // iterate over the structures in the silent-file
	c_stream.close();
} // homolog_rescore

void
get_distance_hist() {
	using namespace contact_stats;

	get_centroid_contact_options();
	pose_ns::Pose current_decoy;

	Silent_file_data silent_data( contact_stats::silent_file_name );

	FArray3D_int distance_hist( misc::total_residue, misc::total_residue, misc::total_residue, 0 );
	int nstruct = 0;
	for ( Silent_file_data::const_iterator it = silent_data.begin(),
	 it_end = silent_data.end(); it != it_end; ++it) {
		std::string tag = it->first;
		//Silent_structure const * structure = it->second;
		it->second->fill_pose( current_decoy );
		pose_update_cendist( current_decoy );


		const FArray2D_float & cendist(
			current_decoy.get_2D_score( pose_ns::CENDIST )
		);

		for ( int i = 1; i < current_decoy.total_residue(); i++ ) {
			for ( int j = i + 1; j <= current_decoy.total_residue(); j++ ) {
				int rounded_dist = (int) ( std::sqrt(cendist(i,j)) + 0.5 );
				distance_hist( i, j, rounded_dist )++;	
			}
		}
		nstruct++;
	} // iterate over the structures in the silent-file

	std::string output_file = contact_stats::silent_file_name + ".distances";
	std::ofstream c_stream( output_file.c_str() );
	if ( ! c_stream.is_open() ) {
		std::cerr << "Open failed for file: " << output_file << std::endl;
		utility::exit( EXIT_FAILURE, __FILE__, __LINE__ );
	}

	c_stream	<< "resi"	<< '\t'
						<< "resj"	<< '\t'
						<< "dist" << '\t'
						<< "freq" << std::endl;
	for ( int i = 1; i < misc::total_residue; i++ ) {
		for ( int j = i+1; j <= misc::total_residue; j++ ) {
			for ( int d = 1; d <= 20; d++ ) {
				float freq = (float) distance_hist( i, j, d ) / nstruct;
				c_stream	<< i		<< '\t'
									<< j		<< '\t'
									<< d		<< '\t'
									<< freq	<< std::endl;
			}
		}
	}
	c_stream.close();
} // get_distance_hist

void read_distance_pdf(
	std::string pdf_file_name
) {

	using namespace contact_stats;
	std::ifstream input( pdf_file_name.c_str() );
	int resi, resj;
	int nres = misc::total_residue;
	if ( nres == -1 ) {
		nres = 500;
	}
	float log_prob;
	int dist;

	if ( !input.is_open() ) {
		std::cerr << "STOP: error opening file " << pdf_file_name << std::endl;
		utility::exit( EXIT_FAILURE, __FILE__, __LINE__);
	}

	dist_log_probs.dimension( nres, nres, max_contact_dist , 0.0f );
	// ignore the first line
	std::string line;
	getline( input, line );
	while( getline(input,line) ) {
		std::istringstream line_str( line );
		if ( line[0] != '#' ) {
			line_str >> resi >> resj >> dist >> log_prob;
			dist_log_probs( resi, resj, dist ) = (float) log_prob;
		}
		//std::cout << "(" << resi << "," << resj << "," << dist << ") = " << log_prob << std::endl;
	}

	have_distance_pdf = true;
}

int get_bin_index(
	std::string variable_name,
	float value
) {
	static bool init( false );

	using namespace std;
	map< string, vector< float > > bounds;

	if ( !init ) {
		std::ifstream input( "bounds.txt" );
		if ( ! input.is_open() ) {
			std::cerr << "error opening file bounds.txt!" << std::endl;
			utility::exit( EXIT_FAILURE, __FILE__, __LINE__ );
		}
		
		string line, temp, bound_name;
		while ( getline(input,line) ) {
			istringstream line_str( line );
	  	line_str	>> bound_name;
			vector< float > my_vector;
			while ( line_str >> temp ) {
				my_vector.push_back( atof( temp.c_str() ) );
				//std::cout << bound_name << ": " << atof( temp.c_str() ) << std::endl;
			}

			bounds[ bound_name ] = my_vector;
		}
	} // if ( !init )

	//std::cout << "FINDING BIN FOR " << variable_name
	//					<< " of " << value << std::endl;
	vector< float > my_vector = bounds[ variable_name ];
	for ( unsigned int i = 1; i <= my_vector.size(); i++ ) {
		//std::cout << "my_vector[" << i << "] = " << my_vector[i-1]
		//					<< std::endl;
		if ( my_vector[i-1] > value ) {
			//std::cout << "value = " << value
			//					<< ", bin = " << i
			//					<< ", variable_name = " << variable_name
			//					<< std::endl;
			
			return i;
		}
	}

	return my_vector.size() + 1;
} // int get_bin_index


namespace homolog_stats {
	// goofy way of dealing with groups of three parameters:
	// stick them each in their own FArray and manage the
	// indexing yourself. Eventually these should be replaced by
	// a smooth function fitting for a continuous set of parameters
	// based on the data, but for now just manage these myself.
	int MAX_BINS = 15;
	FArray4D_float param_a( MAX_BINS, MAX_BINS, MAX_BINS, MAX_BINS );
	FArray4D_float param_b( MAX_BINS, MAX_BINS, MAX_BINS, MAX_BINS );
	FArray4D_float param_c( MAX_BINS, MAX_BINS, MAX_BINS, MAX_BINS );
	FArray3D_float bg_param_a( MAX_BINS, MAX_BINS, MAX_BINS );
	FArray3D_float bg_param_b( MAX_BINS, MAX_BINS, MAX_BINS );
	FArray3D_float bg_param_c( MAX_BINS, MAX_BINS, MAX_BINS );
	FArray3D_float bg_param_d( MAX_BINS, MAX_BINS, MAX_BINS );

	FArray2D_float native_distances;

	float min_evalue( 0 );
	float max_evalue( 100 );

	bool debug( false );
	bool score_oxygens( false );
	
	using namespace std;
	// this map is a bit confusing, here's the structure:
	// respair = std::make_pair( resi, resj );
	// map[ respair ][ atom_type ][ param_name ] = parameter;
	map< pair< int, int >, map< string, map< string, vector< float > > > > params;
}

void
pose_homolog_score(
	pose_ns::Pose & my_pose,
	float & homolog_score
) {
	using namespace contact_stats;
	using namespace homolog_stats;

	if ( !have_distance_pdf ) {
		initialize_distance_pdf();
		have_distance_pdf = true;
	}

	homolog_score = 0;

	const FArray3D_float & full_coord( my_pose.full_coord() );
	for ( int i = 1; i <= misc::total_residue - 1; ++i ) {
		for ( int j = i + 1; j <= misc::total_residue; ++j ) {
			// use cbetas rather than centroids!
			int cb_atom_index = 5; // cbeta atom index in full_coord
			if ( my_pose.res(i) != 'G' && my_pose.res(j) != 'G' ) {
				numeric::xyzVector_double w( &full_coord( 1, cb_atom_index, i ) );
				numeric::xyzVector_double v( &full_coord( 1, cb_atom_index, j ) );
				float true_dist = distance( w, v );

				if ( true_dist < max_contact_dist && true_dist > 2 ) {
					float local_score = calc_resi_resj_score( "CBCB", i, j, true_dist );
					homolog_score += local_score;
				}
			}

			// try using oxygens!
			if ( score_oxygens ) {
				int o_atom_index = 4; // oxygen atom index in full_coord
				numeric::xyzVector_double w( &full_coord( 1, o_atom_index, i ) );
				numeric::xyzVector_double v( &full_coord( 1, o_atom_index, j ) );
				float true_dist = distance( w, v );

				if ( true_dist < max_contact_dist && true_dist > 2 ) {
					float local_score = calc_resi_resj_score( "OO", i, j, true_dist );
					homolog_score += local_score;
				}
			}
		}
	}

	homolog_score = homolog_score * -1;
}



void
read_homolog_distribution_parameters() {
	// read in parameters for the background distribution
	std::string fn = "bgfit.txt";
	std::ifstream input( fn.c_str() );
	if ( ! input.is_open() ) {
		std::cerr << "error opening file " << fn << "!" << std::endl;
		utility::exit( EXIT_FAILURE, __FILE__, __LINE__ );
	}

	double a, b, c, d, e_value, pred_dist, seq_score, avg_d_gap;
	std::string line;
	getline( input, line ); // ignore first line, contains header information
	while ( getline(input,line) ) {
	  std::istringstream line_str( line );
	  line_str	>> a >> b >> c >> d >> e_value
							>> pred_dist >> seq_score >> avg_d_gap
							>> skip();
		int bin2 = get_bin_index( "pred_dist", pred_dist );
		int bin3 = get_bin_index( "avg_d_gap", avg_d_gap );
		int bin4 = get_bin_index( "seq_score", seq_score );
		homolog_stats::bg_param_a( bin2, bin3, bin4 ) = a;
		homolog_stats::bg_param_b( bin2, bin3, bin4 ) = b;
		homolog_stats::bg_param_c( bin2, bin3, bin4 ) = c;
		homolog_stats::bg_param_d( bin2, bin3, bin4 ) = d;
		//std::cout << a << '\t' << b << '\t' << c << '\t' << d << std::endl;
	}
	input.close();

	fn = "fit.txt";
	input.open( fn.c_str() );
	if ( ! input.is_open() ) {
		std::cerr << "error opening file " << fn << "!" << std::endl;
		utility::exit( EXIT_FAILURE, __FILE__, __LINE__ );
	}

	getline( input, line ); // ignore first line, contains header information
	while ( getline(input,line) ) {
	  std::istringstream line_str( line );
	  line_str	>> a >> b >> c >> e_value
							>> pred_dist >> seq_score >> avg_d_gap
							>> skip();
		int bin1 = get_bin_index( "e_value",   e_value );
		int bin2 = get_bin_index( "pred_dist", pred_dist );
		int bin3 = get_bin_index( "avg_d_gap", avg_d_gap );
		int bin4 = get_bin_index( "seq_score", seq_score );

		// Verify that this is an informative prediction. The idea behind this is
		// is that if you predict a distance d, your inferred prediction should be
		// more likely at distance d than the background (i.e, the resi_resj_score
		// should be positive). The score_check is actually evaluated at 0.5
		// angstroms away from d, and the check is that it's greater than 1.0,
		// implying two things:
		//	- the prediction should stay positive for a while around the predicted
		//    distance.
		//  - the prediction should be positive enough to be a little more
		//    convervative in our acceptance of predictions.
		double a0, b0, c0, d0;
		get_bg_coefficients( pred_dist, avg_d_gap, seq_score, a0, b0, c0, d0 );
		double score_check1 =
			calc_resi_resj_score( pred_dist, pred_dist, a, b, c, a0, b0, c0, d0 );
		double score_check2 =
			calc_resi_resj_score( pred_dist + 0.5, pred_dist, a, b, c, a0, b0, c0, d0 );
		//std::cout << "score_check = " << score_check << std::endl

		if (
			score_check1 > 1.0 &&
			score_check2 > 1.0 &&
			score_check1 > score_check2 ) {

			homolog_stats::param_a( bin1, bin2, bin3, bin4 ) = a;
			homolog_stats::param_b( bin1, bin2, bin3, bin4 ) = b;
			homolog_stats::param_c( bin1, bin2, bin3, bin4 ) = c;
		}
	}
	input.close();

}

void
dump_distance_pdf() {
	static int count = 0;
	std::stringstream ss;
	ss << count;
	std::string fn = "distance_pdf." + ss.str();
	count++;
	std::ofstream dump( fn.c_str() );
	if ( ! dump.is_open() ) {
		std::cerr << "Open failed for file: " << "distance_pdf" << std::endl;
		utility::exit( EXIT_FAILURE, __FILE__, __LINE__ );
	}

	using namespace homolog_stats;
	using namespace contact_stats;
	dump	<< "i"						<< '\t'
				<< "j" 						<< '\t'
				<< "dist"					<< '\t'
				<< "log_prob"			<< '\t'
				<< "native_dist"	<< '\t'
				<< std::endl;
	for ( int i = 1; i < misc::total_residue; i++ ) {
		for ( int j = i+1; j <= misc::total_residue; j++ ) {
			for ( int d = 4; d <= 20; d++ ) {
				dump	<< i << '\t' << j << '\t' << d << '\t'
							<< dist_log_probs( i, j, d ) << std::endl;
			}
		}
	}

	dump.close();

} // dump_distance_pdf()


inline void get_coefficients (
	double e_value,
	double pred_dist,
	double avg_d_gap,
	double seq_score,
	double & a_out,
	double & b_out,
	double & c_out
) {
	int bin1 = get_bin_index( "e_value", e_value );
	int bin2 = get_bin_index( "pred_dist", pred_dist );
	int bin3 = get_bin_index( "avg_d_gap", avg_d_gap );
	int bin4 = get_bin_index( "seq_score", seq_score );

	a_out = homolog_stats::param_a( bin1, bin2, bin3, bin4 );
	b_out = homolog_stats::param_b( bin1, bin2, bin3, bin4 );
	c_out = homolog_stats::param_c( bin1, bin2, bin3, bin4 );

	//std::cout << e_value   << '\t' << bin1 << '\t'
	//					<< pred_dist << '\t' << bin2 << '\t'
	//					<< avg_d_gap << '\t' << bin3 << '\t'
	//					<< seq_score << '\t' << bin4 << '\t'
	//					<< a_out << '\t'
	//					<< b_out << '\t'
	//					<< c_out << '\t'
	//					<< std::endl;

}

inline void get_bg_coefficients (
	double pred_dist,
	double avg_d_gap,
	double seq_score,
	double & a_out,
	double & b_out,
	double & c_out,
	double & d_out
) {
	int bin2 = get_bin_index( "pred_dist", pred_dist );
	//int bin3 = 1;
	//int bin4 = 1;
	int bin3 = get_bin_index( "avg_d_gap", avg_d_gap );
	int bin4 = get_bin_index( "seq_score", seq_score );


	a_out = homolog_stats::bg_param_a( bin2, bin3, bin4 );
	b_out = homolog_stats::bg_param_b( bin2, bin3, bin4 );
	c_out = homolog_stats::bg_param_c( bin2, bin3, bin4 );
	d_out = homolog_stats::bg_param_d( bin2, bin3, bin4 );
}

void
make_homolog_scored_pdbs() {

	using namespace std;
	using namespace pose_ns;

	Pose mypose;
	string listfile( stringafteroption("l") );
	string line, pdb_filename;

	int nres = misc::total_residue;
	if ( nres == -1 ) { nres = 500; }
	FArray1D_float homolog_residue_scores( nres, 0.0f );

	utility::io::izstream input;
	input.open( listfile );
	if ( !input ) std::cout << "Can't open " << input.filename() << std::endl;

	getline( input, line ); // ignore first line, contains header information
	while ( getline(input,line) ) {
	  istringstream line_str( line );
	  line_str >> pdb_filename >> skip();

		float homolog_score = 0;
		pose_from_pdb( mypose, pdb_filename, true, false );
		pose_homolog_score( mypose, homolog_score );

		get_per_residue_homolog_scores( mypose, homolog_residue_scores );
		string new_filename = pdb_filename + ".homolog_score";
		mypose.dump_pdb( new_filename );

		std::cout << "score(" << pdb_filename << ") = "
							<< homolog_score << std::endl;
		//for ( int i = 1; i < 50; i++ ) {
		//	std::cout << "score(i) = " << homolog_residue_scores(i) << std::endl;
		//}

		alter_bfactor_pdbfile( new_filename, homolog_residue_scores );
	}
} // make_homolog_scored_pdbs()

// try to fake reading a distance pdf from a .12 prediction file
void
initialize_distance_pdf() {
	using namespace homolog_stats;

	if ( truefalseoption("score_oxygens") ) {
		score_oxygens = true;
	}

	std::string fn( stringafteroption("distance_pdf_filename") );
	std::ifstream input( fn.c_str() );
	//utility::io::izstream & input( open_data_file( fn ) );
	if ( ! input.is_open() ) {
		std::cerr << "error opening file " << fn << "!" << std::endl;
		utility::exit( EXIT_FAILURE, __FILE__, __LINE__ );
	}

	if ( truefalseoption( "max_evalue" ) ) {
		max_evalue = realafteroption( "max_evalue" );
	}
	if ( truefalseoption( "min_evalue" ) ) {
		min_evalue = realafteroption( "min_evalue" );
	}
	if ( truefalseoption( "debug" ) ) { homolog_stats::debug = true; }

	read_homolog_distribution_parameters();
	int nres = misc::total_residue;
	if ( nres == -1 ) { nres = 500; }
	contact_stats::dist_log_probs.dimension( nres, nres, contact_stats::max_contact_dist, 0.0f );


	int resi, resj;
	double e_value, prob, avg_d_gap, seq_score, pred_dist, nat_dist;
	std::string atom_type_i, atom_type_j;
	std::string query_id, template_id, line;
	std::string last_template_id = "test";

	getline( input, line ); // ignore first line, contains header information
	while ( getline(input,line) ) {
	  std::istringstream line_str( line );
	  line_str	>> e_value >> prob
							>> avg_d_gap >> seq_score
							>> resi >> resj
							>> atom_type_i >> atom_type_j
							>> pred_dist >> nat_dist
							>> query_id >> template_id
							>> skip();
		// conditions for skipping this resi,resj combination
		if ( pred_dist > 20 ) { continue; }
		if ( std::abs( resi - resj ) < 10 ) { continue; }
		if ( e_value > max_evalue ) { continue; }
		if ( e_value < min_evalue ) { continue; }

		// get coefficients for distribution and bg.
		double a, b, c, a0, b0, c0, d0;
		get_coefficients( e_value, pred_dist, avg_d_gap, seq_score, a, b, c );
		//get_bg_coefficients( pred_dist, a0, b0, c0, d0 );
		get_bg_coefficients( pred_dist, avg_d_gap, seq_score, a0, b0, c0, d0 );
		std::pair< int, int > respair = std::make_pair( resi, resj );	
		std::string atom_type = atom_type_i + atom_type_j;
		params[ respair ][ atom_type ][ "a" ].push_back( a );
		params[ respair ][ atom_type ][ "b" ].push_back( b );
		params[ respair ][ atom_type ][ "c" ].push_back( c );
		params[ respair ][ atom_type ][ "a0" ].push_back( a0 );
		params[ respair ][ atom_type ][ "b0" ].push_back( b0 );
		params[ respair ][ atom_type ][ "c0" ].push_back( c0 );
		params[ respair ][ atom_type ][ "d0" ].push_back( d0 );
		params[ respair ][ atom_type ][ "pred_dist" ].push_back( pred_dist );
		params[ respair ][ atom_type ][ "seq_score" ].push_back( seq_score );

		for ( int dist = 4; dist <= 20; dist++ ) {
			float this_score = calc_resi_resj_score( dist, pred_dist, a, b, c, a0, b0, c0, d0 );
			if ( resi > nres || resj > nres ) {
				continue;
			}
			contact_stats::dist_log_probs( resi, resj, dist ) += (float) this_score;
		}

		if ( homolog_stats::debug ) {
			if ( last_template_id != template_id ) {
				dump_distance_pdf();
				last_template_id = template_id;
			}
		}
	} // while ( getline(input,line )

	dump_distance_pdf();
} // void initialize_distance_pdf


double
calc_resi_resj_score(
	double x, // distance to be evaluated
	double pred_dist, // predicted distance from template
	double a,  double b,  double c, // coefficients for prediction
	double a0, double b0, double c0, double d0 // coefficients for background
) {
	// E(r) = Dens(obs) / Dens(bg)
	// Density calculations are carried out using a mixture of a Gaussian
	// and an Exponential distribution:
	// Density(a,b,c)	= (1-c) * gaussian_component + c * exponential_component
	// gaussian_component    = ( 2 * a * sqrt( 2 * pi ) ) * e ^ ( -1/2 * a * r^2 )
	// exponential_component = b * exp( -1/2 * b * r )
	// The background calculations have a fourth parameter d0, which is the
	// anchor for the Gaussian and exponential distributions.

	using namespace std;
	using namespace homolog_stats;

	// Return 0 if there is any of the coefficients are 0, that indicates missing
	// data.
	if ( a == 0 || b == 0 || c == 0 ||
			 a0 == 0 || b0 == 0 || c0 == 0 || d0 == 0 ) {
		return 0;
	}

	double r = abs( x - pred_dist );

	double sqrt_2pi = 2.50662721600161f;
	double gaussian_inferred =
		(1-c)  * (1/(2*a *sqrt_2pi)) * exp( -0.5 *a *r*r);
	double exponential_inferred = c  * b *exp(-1 * b *r);
	double inferred_score = gaussian_inferred + exponential_inferred;

	double bg_anchor = abs( r - d0 ); // anchor of the bg dist
	double gaussian_background =
		(1-c0) * (1/(2*a0*sqrt_2pi)) * exp(-0.5*a0*bg_anchor*bg_anchor);
	double exponential_background = c0 * b0*exp(-1 * b0*bg_anchor);
	double background_score = gaussian_background + exponential_background;

	// ignore this score if there is a negative probability here (probably
	// an artifact of function fitting).
	if ( inferred_score < 0 )   { return 0; }
	if ( background_score < 0 ) { return 0; }
	double score = log( inferred_score ) - log( background_score );

	return score;
}

double
calc_resi_resj_score(
	std::string atom_type,
	int resi,
	int resj,
	double dist // distance to be evaluated
) {

	double score = 0;
	double a, b, c, a0, b0, c0, d0, pred_dist;
	using namespace std;
	using namespace homolog_stats;

	pair< int, int > respair = make_pair( resi, resj );
	int param_count = params[ respair ][ atom_type ][ "a" ].size();

	// Iterarate over all sets of coefficients for these residues. Add up the
	// score using the other calc_resi_resj_score function.
	for ( int i = 0; i < param_count; i++ ) {
		a  = params[ respair ][ atom_type ][ "a"  ][ i ];	
		b  = params[ respair ][ atom_type ][ "b"  ][ i ];	
		c  = params[ respair ][ atom_type ][ "c"  ][ i ];	
		a0 = params[ respair ][ atom_type ][ "a0" ][ i ];	
		b0 = params[ respair ][ atom_type ][ "b0" ][ i ];	
		c0 = params[ respair ][ atom_type ][ "c0" ][ i ];	
		d0 = params[ respair ][ atom_type ][ "d0" ][ i ];	
		pred_dist = params[ respair ][ atom_type ][ "pred_dist" ][ i ];
		
		score += calc_resi_resj_score( dist, pred_dist, a, b, c, a0, b0, c0, d0 );
	}

	return score;
}

// FIX ME!
double
calc_resi_resj_deriv(
	double r,	// radius between two residues
	double a,  double b,  double c,
	double a0, double b0, double c0
) {
	//E'(r) = ( 2are^(-ar^2) - bce^(-cr) ) / ( e(-ar^2) + be^(-cr) )
	//      - ( -2a0re^(-a0r^2) - b0c0e^(-c0r) ) / ( e^-(a0r^2) + b0e^(-c0r) )
	using namespace std;
	double score =
		( 2*a*r*exp( -1*a*r*r )  - b*c*exp( -1*c*r ) ) /
		( exp( -1*a*r*r )        + b*exp(-1*c*r) ) -
		( -2*a0*r*exp( -a0*r*r ) - b0*c0*exp( -1*c0*r ) ) /
		( exp( -1*a0*r*r )       + b0*exp( -1*c0*r ) );
	return score;
}

void get_homolog_jumps(
	pose_ns::Pose & my_pose
) {
	using namespace pose_ns;

	static bool init( false );
	static std::vector< Jump > jump_list;
	static std::vector< int > jump_start_list;
	static std::vector< int > jump_stop_list;

	// If we haven't initialized reading from the -jumps_file yet, do it now.
	// This block reads a list of jumps from a file and puts them into jump_list.
	if ( !init ) {
		// setup reading from a .jumps file later.
		std::string jumps_file = stringafteroption("jumps_file");
		std::ifstream jump_in( jumps_file.c_str() );
		if ( ! jump_in.is_open() ) {
			std::cerr << "error opening jumps_file! bug tex about this!" << std::endl;
			utility::exit( EXIT_FAILURE, __FILE__, __LINE__ );
		}

		int q_resi, q_resj, t_resi, t_resj;
		float pred_dist, nat_dist;
		std::string line, e_value, template_id, query_id, last_template_id, last_query_id;
		Pose template_pose;

  	getline( jump_in, line ); // ignore first line, contains header information
  	while ( getline(jump_in,line) ) {
  	  std::istringstream line_str( line );
  	  line_str	>> e_value >> q_resi >> q_resj
								>> t_resi >> t_resj
								>> pred_dist >> nat_dist
								>> query_id >> template_id;


			if ( template_id != last_template_id ) {
				pose_from_pdb( template_pose, template_id, true, false );
				last_template_id = template_id;
			}

			FArray3D_float const & Epos( template_pose.Eposition() );

			Jump jump( Epos(1, 1, t_resi), Epos(1, 1, t_resj) );
			jump_list.push_back( jump );
			jump_start_list.push_back( q_resi );
			jump_stop_list.push_back(  q_resj );
			//std::cout << "jump : " << jump << std::endl;
		}
	} // if ( !init )


	Fold_tree fold_tree;
	// pick a random jump from the jump_list, apply it to my_pose.
	// Only adding one jump at this time.
	int njumps = jump_list.size();
	int index  = static_cast<int>( ran3() * njumps );
	Jump random_jump = jump_list       [ index ];
	int jump_point1  = jump_start_list [ index ];
	int jump_point2  = jump_stop_list  [ index ];
	int cut_point    = intafteroption("cut_point");

	//std::cout << "picking jump at index " << index << std::endl;
	//std::cout << random_jump << std::endl;
	//std::cout << "jump_point1 = "	<< jump_point1 << ", "
	//					<< "jump_point2 = " << jump_point2 << std::endl;

	Fold_tree f = my_pose.fold_tree();
	f.new_jump( jump_point1, jump_point2, cut_point );
	my_pose.set_fold_tree( f );
	my_pose.set_jump( 1, random_jump );

	// don't allow fragment insertions at these points
	my_pose.set_allow_bb_move( jump_point1, false );
	my_pose.set_allow_bb_move( jump_point2, false );
	//std::cout << "successfully set jump!" << std::endl;
}

// like calc_resi_resj_score, but summed over all atoms at this position.
double
get_resi_resj_score(
	int resi,
	int resj,
	FArray3D_float full_coord
) {
	using namespace homolog_stats;

	float score = 0;
	float max_contact_dist = 20;
	// use cbetas rather than centroids!
	int cb_atom_index = 5; // cbeta atom index in full_coord
	numeric::xyzVector_double w( &full_coord( 1, cb_atom_index, resi ) );
	numeric::xyzVector_double v( &full_coord( 1, cb_atom_index, resj ) );
	float true_dist = distance( w, v );

	if ( true_dist < max_contact_dist && true_dist > 2 ) {
		float local_score = calc_resi_resj_score( "CBCB", resi, resj, true_dist );
		score += local_score;
	}

	// try using oxygens!
	int o_atom_index = 4; // oxygen atom index in full_coord
	numeric::xyzVector_double j( &full_coord( 1, o_atom_index, resi ) );
	numeric::xyzVector_double t( &full_coord( 1, o_atom_index, resj ) );
	true_dist = distance( j, t );

	if ( true_dist < max_contact_dist && true_dist > 2 ) {
		float local_score = calc_resi_resj_score( "OO", resi, resj, true_dist );
		score += local_score;
	}

	return score;
} // get_resi_resj_score

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
