// -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
// vi: set ts=2 noet:
//  CVS information:
//  $Revision: 14694 $
//  $Date: 2007-05-02 10:41:14 -0700 (Wed, 02 May 2007) $
//  $Author: tex $


// Rosetta Headers
#include "fragments_pose.h"
#include "after_opts.h"
#include "angles.h"
#include "barcode_stats.h"
#include "chuck.h"
#include "cluster_fragments.h"
#include "current_pose.h"
#include "files_paths.h"
#include "fragments.h"
#include "fragments_ns.h"
#include "fragments_pose.h"
#include "gunn.h"
#include "jumping_ns.h"
#include "jumping_pairings.h"
#include "pose.h"
#include "maps_ns.h"
#include "misc.h"
#include "param.h"
#include "param_aa.h"
#include "pose_io.h"
#include "random_numbers.h"
#include "recover.h"
#include "refold.h"
#include "score.h"
#include "silent_input.h"
#include "ssblocks.h"
#include "vall_data.h"
#include "pdb.h"

// ObjexxFCL Headers
#include <ObjexxFCL/DimensionExpressions.hh>
#include <ObjexxFCL/FArray1D.hh>
#include <ObjexxFCL/FArray2D.hh>
#include <ObjexxFCL/FArray3D.hh>
#include <ObjexxFCL/FArray4D.hh>
#include <ObjexxFCL/formatted.io.hh>
#include <ObjexxFCL/string.functions.hh>

// C++ Headers
#include <map>
#include <vector>
#include <algorithm>
#include <cstdlib>
#include <fstream>
#include <iostream>


//Utility Headers
#include <utility/basic_sys_util.hh>
#include <utility/io/izstream.hh>
#include <utility/io/ozstream.hh>

void
cluster_fragments() {
	using namespace fragments::frag_pointer;
	using namespace fragments;
	using namespace pose_ns;

	static int  nres( 100 );
	static int  size( 3 );
	static int  top_N_frags( 200 );
	static bool init( false );
	static bool debug( false );
	static bool native_exists( false );
	static std::string output_file( "fragment_rms" );
	std::string native_pdb_fn( "junk" );

	if ( !init ) {
		if ( truefalseoption("debug") )  { debug = true; }
		if ( truefalseoption("nres") )   { nres = intafteroption("nres"); }
		if ( truefalseoption("size") )   { size = intafteroption("size"); }
		if ( truefalseoption("nfrags") ) { top_N_frags = intafteroption("nfrags"); }
		if ( truefalseoption("output_file") ) {
			output_file = stringafteroption("output_file");
		}
		std::string frag_name( stringafteroption("frag_name") );
		read_fragments_simple( frag_name, nres );

		if ( truefalseoption("n") ) {
			native_pdb_fn = stringafteroption("n");
			native_exists = true;
		}
	}

	int const size_bin ( get_index_by_frag_size(size) );

	// create a sequence of the appropriate length
	std::string sequence;
	for ( int i = 1; i <= size; ++i ) {
		sequence = sequence + 'A';
	}

	Pose pose1, pose2, pose3, native_pose;
	// create pose1 and pose2 using the sequence created above.
	pose1.simple_fold_tree( size );
	pose2.simple_fold_tree( size );
	pose3.simple_fold_tree( size );
	pose1.set_sequence( sequence );
	pose2.set_sequence( sequence );
	pose3.set_sequence( sequence );
	if ( native_exists ) {
		pose_from_pdb( native_pose, native_pdb_fn, false, false );
	}


	// keep track of the rmsd between fragments
	FArray3D_float fragment_rmsd( nres, top_N_frags, top_N_frags );
	FArray2D_float native_rmsd( nres, top_N_frags );

	for ( int position = 1; position < ( nres - size + 1 ); ++position ) {
		std::cout << "calculating fragment RMS for position " << position << std::endl;
		// iterate over all pairwise combinations of nn_num
		for ( int a = 1; a <= top_N_frags; ++a ) {
			for ( int b = a + 1; b <= top_N_frags; ++b ) {
				for ( int i = 1; i <= size; ++i ) {
					pose1.set_phi       (i, align_phi   (position,a,i-1,size_bin) );
					pose1.set_psi       (i, align_psi   (position,a,i-1,size_bin) );
					pose1.set_omega     (i, align_omega (position,a,i-1,size_bin) );

					pose2.set_phi       (i, align_phi   (position,b,i-1,size_bin) );
					pose2.set_psi       (i, align_psi   (position,b,i-1,size_bin) );
					pose2.set_omega     (i, align_omega (position,b,i-1,size_bin) );
				} // for ( int i = 0; i < size; ++i )

				// trigger a refold with copy_to_misc()
				pose1.copy_to_misc();
				pose2.copy_to_misc();

				if ( debug ) {
					std::string fn2 = string_of(position) + "_" + string_of(b) + "_test.pdb";
					pose2.dump_pdb( fn2 );
				}

				// calculate Calpha RMSD between pose1 and pose2.
				float rmsd = CA_rmsd( pose1, pose2 );
				fragment_rmsd( position, a, b ) = rmsd;
				fragment_rmsd( position, b, a ) = rmsd;
			} // for ( int b = a + 1; b < top_N_frags; ++b )
			if ( debug ) {
				std::string fn1 = string_of(position) + "_" + string_of(a) + "_test.pdb";
				pose1.dump_pdb( fn1 );
			}
			if ( native_exists ) {
  			steal_angles( native_pose, pose3, position, size );
				pose3.copy_to_misc();
   			float n_rmsd = CA_rmsd( pose1, pose3 );
				native_rmsd( position, a ) = n_rmsd;
			}
		} // for ( int a = 0; a < top_N_frags; ++a )
	} // for ( int position = 1; position <= ( nres - size + 1 ); position++ )

	// print the fragment-fragment rmsds out to a file
	std::ofstream output( output_file.c_str() );

	output	<< "position"  << '\t'
					<< "frag1_pos" << '\t'
					<< "frag2_pos" << '\t'
					<< "rmsd";
	if ( native_exists ) { output << '\t' <<  "frag1_native_rmsd"; }
	output << std::endl;

	for ( int position = 1; position <= ( nres - size + 1 ); position++ ) {
		for ( int i = 1; i < top_N_frags; ++i ) {
			for ( int j = i + 1; j <= top_N_frags; ++j ) {
				float rmsd   = fragment_rmsd( position, i, j );
				float n_rmsd = native_rmsd( position, i );
				output	<< position << '\t' << i << '\t' << j << '\t'
								<< rmsd;
				if ( native_exists ) {
					output << '\t' << n_rmsd;
				}
				output << std::endl;
			}
		}
	}
	output.close();

	// Use agglomerative average-linkage hierarchical clustering to cluster
	// fragments based on pairwise rmsd between fragments.
	FArray2D_float fragment_weights;
	for ( int position = 1; position <= ( nres - size + 1 ); position++ ) {
		FArray1D_int cluster_ids( -1 );
		for ( int i = 1; i < top_N_frags; ++i ) {
			cluster_ids( i ) = i;
		}
	}

	// reweight fragments based on the sum of the rmsd scores (lower total rmsd
	// should mean a better score).
	//output	<< "position" << '\t'
	//				<< "frag_pos" << '\t'
	//				<< "weight"   << '\t';
	//if ( native_exists ) { output << '\t' << "n_rmsd"; }
	//output	<< "size"			<< std::endl;

	//for ( int position = 1; position <= ( nres - size + 1 ); position++ ) {
	//	// calculate a total per-position rmsd
	//	float total_rmsd = 0;
	//	for ( int i = 1; i <= top_N_frags; ++i ) {
	//		for ( int j = 1; j <= top_N_frags; ++j ) {
	//			if ( i != j ) {
	//				total_rmsd += fragment_rmsd( position, i, j );
	//			}
	//		}
	//	}
	//
	//	// For each fragment, divide it's rmsd to other fragments in the position
	//	// by total_rmsd.
	//	for ( int i = 1; i <= top_N_frags; ++i ) {
	//		float i_rmsd = 0;
	//		for ( int j = 1; j <= top_N_frags; ++j ) {
	//			if ( i != j ) {
	//				i_rmsd += fragment_rmsd( position, i, j );
	//			}
	//		}

	//		output	<< position << '\t'
	//						<< i << '\t'
	//						<< 1 / (i_rmsd / total_rmsd) << '\t';
	//		if ( native_exists ) {
	//			output << native_rmsd( position, i ) << '\t';
	//		}
	//		output	<< size
	//						<< std::endl;
	//
	//	} // for ( int i = 1; i <= top_N_frags; ++i )
	//} // for ( int position = 1; position <= ( nres - size + 1 ); position++ )
} // void cluster_fragments()

void
fragment_quality() {
	using namespace fragments::frag_pointer;
	using namespace fragments;
	using namespace pose_ns;

	static int  nres( 100 );
	static int  size( 3 );
	static int  top_N_frags( 200 );
	static bool debug( false );
	// fragments with an RMSD better than this are considered "good"
	static float rmsd_threshold(1.5);
	static std::string output_file( "fragment_rms" );
	std::string native_pdb_fn( "junk" );
	Pose pose1, pose2, pose3, native_pose;

	if ( truefalseoption("debug") )  { debug = true; }
	if ( truefalseoption("size") )   { size = intafteroption("size"); }
	if ( truefalseoption("nfrags") ) { top_N_frags = intafteroption("nfrags"); }
	if ( truefalseoption("output_file") ) {
		output_file = stringafteroption("output_file");
	}

	if ( truefalseoption("rmsd_threshold") ) {
		rmsd_threshold = realafteroption("rmsd_threshold");
	}

	native_pdb_fn = stringafteroption("n");
	pose_from_pdb( native_pose, native_pdb_fn, false, false, false, '-', false, true );
	nres = misc::total_residue;
	int const ind_CA=2;
	Pdb_info const& native_info = native_pose.pdb_info();
	for ( int i = 1; i <= nres; i++ ) {
		std::cout << "occupancy of native pdb at residue" << native_info.pdb_res_num()(i) << " is " <<  native_info.occupancy()(ind_CA,i) << std::endl;
	}

	std::string frag_name( stringafteroption("frag_name") );
	read_fragments_simple( frag_name, nres );

	int const size_bin ( get_index_by_frag_size(size) );

	// create a sequence of the appropriate length
	std::string sequence;
	for ( int i = 1; i <= size; ++i ) {
		sequence = sequence + 'A';
	}

	// create pose1 and pose2 using the sequence created above.
	pose1.simple_fold_tree( size );
	pose3.simple_fold_tree( size );
	pose1.set_sequence( sequence );
	pose3.set_sequence( sequence );

	// keep track of the rmsd of fragments to native
	FArray2D_float native_rmsd( nres, top_N_frags );

	for ( int position = 1; position <= ( nres - size   ); ++position ) {
		// define subset to compute RMSD on
		FArray1D_bool subset(size);
		for ( int si = 1; si <= size; ++si ) {
			subset(si) = native_info.occupancy()(ind_CA,position+si-1);
		}

		std::cout << "calculating fragment RMS for position "
							<< position << std::endl;
		// iterate over all pairwise combinations of nn_num
		for ( int a = 1; a <= top_N_frags; ++a ) {
			for ( int i = 1; i <= size; ++i ) {
				pose1.set_phi       (i, align_phi   (position,a,i-1,size_bin) );
				pose1.set_psi       (i, align_psi   (position,a,i-1,size_bin) );
				pose1.set_omega     (i, align_omega (position,a,i-1,size_bin) );
			}

			// trigger a refold with copy_to_misc()
			pose1.copy_to_misc();
			steal_angles( native_pose, pose3, position, size );
			pose3.copy_to_misc();
			float n_rmsd = CA_rmsd_by_subset( pose1, pose3, subset );
			native_rmsd( position, a ) = n_rmsd;

			if ( debug ) {
				std::string fn1 =
					string_of(position) + "_" + string_of(a) + "_test.pdb";
				pose1.dump_pdb( fn1 );
				if ( a == 1 ) { // only dump the native fragment for the first position
					std::string fn2 =
						"n" + string_of(position) + "_" + string_of(a) + "_test.pdb";
					pose3.dump_pdb( fn2 );
				}
			}
		} // for ( int a = 0; a < top_N_frags; ++a )
	} // for ( int position = 1; position <= ( nres - size + 1 ); position++ )

	// print the fragment-fragment rmsds out to a file
	std::ofstream output( output_file.c_str() );

	FArray1D_int quality_count( nres, 0 );
	output	<< "position" << '\t'
					<< "frag_n" << '\t'
					<< "CA_rmsd"  << '\t'
					<< std::endl;

	int n_good = 0;
	int total_good = 0;
	for ( int position = 1; position <= ( nres - size + 1 ); position++ ) {
		for ( int i = 1; i <= top_N_frags; ++i ) {
			float n_rmsd = native_rmsd( position, i );
			output	<< position << '\t'
							<< i << '\t'
							<< n_rmsd
							<< std::endl;
			if ( n_rmsd <= rmsd_threshold ) {
				quality_count( position )++;
			}
		} // for ( int i = 1; i <= top_N_frags; ++i )

		if ( quality_count(position) > 0 ) {
			n_good++;
		}
		total_good += quality_count( position );
	} // for ( int position = 1; position <= ( nres - size + 1 ); position++ )

	float percent_good     = (float) n_good / nres;
	float avg_frag_quality = (float) total_good / nres;
	std::cout << "n_good = " << n_good << std::endl;
	std::cout << "total_good = " << total_good << std::endl;
	std::cout << "Percentage of positions with at least one quality fragment: "
						<< percent_good << std::endl;
	std::cout << "Average number of quality fragments per position: "
						<< avg_frag_quality << std::endl;
	output.close();
	std::cerr << "you got to this point!" << std::endl;
} // void fragment_quality()



// takes a native structure, a starting point and a length arguments and chops
// it up into a fragment using only phi, psi and omega angles. Returns a pose
// that represents the native fragment at this position.
void steal_angles(
	pose_ns::Pose & from_pose,
	pose_ns::Pose & into_pose,
	int start,
	int length
) {
	using namespace pose_ns;

	// create pose1 and pose2 using the sequence created above.
	//fragment.simple_fold_tree( size );
	//fragment.set_sequence( sequence );

	// copy the angles from the native pose to the fragment_pose
	for ( int i = start; i <= length+start-1; ++i ) {
		int frag_index = i - start + 1;
		into_pose.set_phi(   frag_index, from_pose.phi(i) );
		into_pose.set_psi(   frag_index, from_pose.psi(i) );
		into_pose.set_omega( frag_index, from_pose.omega(i) );

		//	std::cout << "grabbing native fragment, "
		//					<< "i = " << i << ", phi = "
		//					<< from_pose.phi(i) << std::endl;
	}
}

void
cluster_by_maxsub() {
	using namespace std;
	using namespace silent_io;
	using namespace pose_ns;

	string silent_file_name = stringafteroption("s");
	Silent_file_data silent_data1( silent_file_name );
	Silent_file_data silent_data2( silent_file_name );
	int n = silent_data1.size();
	FArray2D_float sim_matrix( n, n, 0.0f );

	vector< string > tags;

	Pose pose1, pose2;

	int i = 1, j;
	Silent_file_data::const_iterator it1, it2;
	for ( it1 = silent_data1.begin(); it1 != silent_data1.end(); ++it1 ) {
		j = 1;
		for ( it2 = silent_data2.begin(); it2 != silent_data2.end(); ++it2 ) {
			if ( i < j ) {
				it1->second->fill_pose( pose1 );
				it2->second->fill_pose( pose2 );
				int maxsub = CA_maxsub( pose1, pose2 );
				sim_matrix( i, j ) = maxsub;
				//sim_matrix( j, i ) = maxsub;
			}
			j++;
		}
		i++;
		tags.push_back( it1->first );
		std::cout << "finished with row " << i << std::endl;
	} // for ( it1 = silent_data1.begin(); it1 != silent_data1.end(); ++it1 )

	FArray1D_int clusters = cluster_by_similarity_matrix( sim_matrix, n );
	cout << "Results of clustering:" << endl;
	cout	<< "index" << '\t'
				<< "tag" << '\t'
				<< "cluster" << '\t' << endl;
	for ( int k = 1; k <= n; k++ ) {
		cout << k << '\t' << tags[k-1] << '\t' << clusters[k] << endl;
	}
	cout << endl;
} // cluster_by_maxsub()

void
cluster_test() {
	FArray2D_float sim_matrix( 3, 3 );

	sim_matrix( 1, 2 ) = 1;
	sim_matrix( 1, 3 ) = 4;
	sim_matrix( 2, 3 ) = 10;
	sim_matrix( 2, 1 ) = 1;
	sim_matrix( 3, 1 ) = 4;
	sim_matrix( 3, 2 ) = 10;
	cluster_by_similarity_matrix( sim_matrix, 3 );
}

FArray1D_int
cluster_by_similarity_matrix( FArray2D_float sim_matrix, int count ) {
	float min_sim( 25 );
	if ( truefalseoption("min_cluster_sim") ) {
		min_sim = realafteroption("min_cluster_sim");
	} else {
		min_sim = static_cast< int >(misc::total_residue / 2);
	}

	FArray1D_int cluster_identity =
		cluster_by_similarity_matrix( sim_matrix, count, min_sim );
	return cluster_identity;
}

// Cluster a matrix of similar objects by complete linkage clustering. Returns
// a one-dimensional FArray of integers in which the indices of the similarity
// matrix contain the cluster id of each element from the sim_matrix.
FArray1D_int
cluster_by_similarity_matrix(
	FArray2D_float sim_matrix,
	int count,
	float min_sim_threshold
) {
	using namespace std;

	CompleteLinkageSimilarity metric;

	vector< vector< float > > cluster_matrix;
	cluster_matrix.resize( count );
	for ( int i = 1; i <= count; i++ ) {
		vector< float > myvec;
		myvec.resize( count );
		cluster_matrix[i-1] = myvec;
		for ( int j = 1; j <= count; j++ ) {
			cluster_matrix[i-1][j-1] = sim_matrix( i, j );
		}
	}

	FArray1D_int cluster_membership( count, 0 );
	vector< vector< int > > clusters;
	for ( int i = 1; i <= count; i++ ) {
		vector< int > myvec;
		myvec.push_back( i ); // each cluster is a neighbor to itself
		clusters.push_back( myvec );
	}

	bool done( false );
	while( !done ) {
		// find the maximum similarity in the matrix
		int p = 0;
		int q = 0;
		float max_sim( 0.0f );
		for ( unsigned int i = 0; i < clusters.size(); i++ ) {
			for ( unsigned int j = i + 1; j < clusters.size(); j++ ) {
				if ( max_sim <= cluster_matrix[i][j] ) {
					p = i;
					q = j;
					max_sim = cluster_matrix[i][j];
				}
			}
		}

		// check termination condition
		if ( max_sim < min_sim_threshold ) {
			done = true;
		}

		if ( !done ) {
			// join the two entries into the same cluster
			join_clusters( clusters[p], clusters[q] );

			// delete qth cluster, as its members are now in cluster p
			clusters.erase( clusters.begin() + q );
			int new_size = clusters.size();
			for ( unsigned int i = 0; i < cluster_matrix.size(); i++ ) {
				cluster_matrix[i].resize( new_size );
			}
			cluster_matrix.resize( new_size );

			// update similarity matrix by recalculating distances between cluster p
			// and all other clusters.
			for ( unsigned int i = 0; i < clusters.size(); i++ ) {
				for ( unsigned int j = i + 1; j < clusters.size(); j++ ) {
					float dist =
						//calc_cluster_similarity( clusters[i], clusters[j], sim_matrix );
						metric.calc_cluster_similarity( clusters[i], clusters[j], sim_matrix );
					cluster_matrix[i][j] = dist;
					cluster_matrix[j][i] = dist;
				}
			}
		} // if ( !done )
	} // while( !done )

	for ( unsigned int i = 0; i < clusters.size(); i++ ) {
		for ( unsigned int j = 0; j < clusters[i].size(); j++ ) {
			cluster_membership( clusters[i][j] + 1 ) = i;
			cout	<< "cluster_membership[" << clusters[i][j]
						<< "] = " << i << endl; }
		cout << endl;
	}

	return cluster_membership;
} // cluster_by_similarity_matrix()

void
join_clusters(
	std::vector< int > & cluster1,
	std::vector< int > & cluster2
) {

	int n = cluster2.size();
	for ( int i = 0; i < n; i++ ) {
		cluster1.push_back( cluster2[i] );
	}
}

float
calc_cluster_similarity(
	std::vector< int > & cluster1,
	std::vector< int > & cluster2,
	FArray2D_float sim_matrix
) {
	// complete-linkage clustering. Return the maximum distance
	// (or minimum similarity) between the two clusters.
	float min_sim( 10000 );
	for ( unsigned int i = 0; i < cluster1.size(); i++ ) {
		for ( unsigned int j = 0; j < cluster2.size(); j++ ) {
			if ( cluster1[i] != cluster2[j] ) {
				float sim = sim_matrix( cluster1[i] + 1, cluster2[j] + 1 );
				if ( sim <= min_sim ) {
					min_sim = sim;
				}
			}
		}
	}
	return min_sim;
}

void
print_matrix(
	FArray2D_float sim_matrix,
	int count
) {
	using namespace std;

	for ( int i = 1; i <= count; i++ ) {
		for ( int j = i+1; j <= count; j++ ) {
			cout << '\t' << sim_matrix( i, j );
		}
		cout << endl;
	}
}

void
print_vector_matrix (
	std::vector< std::vector< int > > matrix
) {
	using namespace std;
	int n = matrix.size();
	for ( int i = 0; i < n; i++ ){
		for ( int j = i + 1; j < n; j++ ) {
			cout << '\t' << matrix[i][j];
		}
		cout << endl;
	}
}
