// -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
// vi: set ts=2 noet:
//  CVS information:
//  $Revision: 23432 $
//  $Date: 2008-06-24 16:25:52 +0300 (Tue, 24 Jun 2008) $
//  $Author: yab $


// Rosetta Headers
#include "maxsub.h"
#include "cenlist.h"
#include "design.h"
#include "docking.h"
#include "files_paths.h"
#include "initialize.h"
#include "misc.h"
#include "native.h"
#include "param.h"
#include "param_aa.h"
#include "rms.h"

// ObjexxFCL Headers
#include <ObjexxFCL/FArray1Da.hh>
#include <ObjexxFCL/FArray2Da.hh>
#include <ObjexxFCL/FArray3D.hh>
#include <ObjexxFCL/FArray3Da.hh>
//#include <ObjexxFCL/formatted.o.hh>

// C++ Headers
#include <cmath>
#include <cstdlib>


////////////////////////////////////////////////////////////////////////////////
/// @begin maxsub_native
///
/// @brief
///
/// @detailed
/// cems 2001.
/// this is the main rosetta entry point for this function.
/// it is a wrapper for max sub, converting the input arrays to
///  double precision and reducing them
/// to just calphas
/// it does max sub comparing the claphas passed into thos of the
///  native and returns
/// the number of aligned residues, the rms of these and the log eval c
/// of the comparison
///
/// @param  x - [in/out]? -
/// @param  nali - [in/out]? -
/// @param  rms - [in/out]? -
/// @param  logeval - [in/out]? -
///
/// @global_read
///
/// @global_write
///
/// @remarks
///
/// @references
///
/// @authors
///
/// @last_modified
/////////////////////////////////////////////////////////////////////////////////
void
maxsub_native(
	FArray3DB_float const & x,
	int & nali,
	float & rms,
	float & logeval
)
{
	using namespace misc;
	using namespace native;

// local
	FArray2D_double xp( 3, total_residue );
	FArray2D_double xe( 3, total_residue );
	double mxrms,mxpsi,mxzscore,mxscore,mxeval;

	if ( !get_native_exists() || ( files_paths::multi_chain && !design::dna_interface && !files_paths::antibody_modeler ) ) {
		rms = 0.0;
		logeval = 0.0;
		nali = 0;
		return;
	}

	int n_points = 0;
	for ( int i = 1; i <= total_residue; ++i ) {
		if ( native_occupancy( 2, i ) <= 0.0 ) continue;
		n_points++;
		for ( int k = 1; k <= 3; ++k ) {
			xe(k,n_points) = native_ca(k,i);
			xp(k,n_points) = x(k,2,i); // calphas
		}
	}
	maxsub(n_points,xe,xp,mxrms,mxpsi,nali,mxzscore,mxeval,mxscore);

	rms = mxrms; // double to float conversion
	logeval = std::log(mxeval);

}

//------------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @begin maxsub_partial
///
/// @brief
///   wrapper for maxsub that allows input of pdbs with missing residues.
///
/// @detailed
///
/// @param  nres - [in/out]? -
/// @param  x1 - [in/out]? -
/// @param  x2 - [in/out]? -
/// @param  occ1 - [in/out]? -
/// @param  occ2 - [in/out]? -
/// @param  nali - [in/out]? -
/// @param  rms - [in/out]? -
/// @param  logeval - [in/out]? -
///
/// @global_read
///
/// @global_write
///
/// @remarks
///
/// @references
///
/// @authors
///
/// @last_modified
/////////////////////////////////////////////////////////////////////////////////
void
maxsub_partial(
	const int nres,
	FArray3Da_float x1,
	FArray3Da_float x2,
	FArray1Da_bool occ1,
	FArray1Da_bool occ2,
	int & nali,
	float & rms,
	float & logeval
)
{

	// dimension the args:
	x1.dimension(3,param::MAX_POS,nres);
	x2.dimension(3,param::MAX_POS,nres);
	occ1.dimension(nres);
	occ2.dimension(nres);

// cems 2001. pb mod 2004

// local
	FArray2D_double xp( 3, nres );
	FArray2D_double xe( 3, nres );

	int natoms = 0;
	const int sup_atom = 2; // C-alpha superposition

	for ( int i = 1; i <= nres; ++i )
		if ( occ1(i) && occ2(i) ) {
			natoms++;
			for ( int k = 1; k <= 3; ++k ) {
				xe(k,natoms) = x1(k,sup_atom,i);
				xp(k,natoms) = x2(k,sup_atom,i);
			}
		}

	if ( natoms <= 0) {
		nali = 0;
		rms = 0.0;
		logeval = 0.0;
		return;
	}

	// maxsub args
	double mxrms,mxpsi,mxzscore,mxscore,mxeval;

	// call maxsub:
	maxsub(natoms,xe,xp,mxrms,mxpsi,nali,mxzscore,mxeval,mxscore);

	rms = mxrms; // double to float conversion
	logeval = std::log(mxeval);
}

//------------------------------------------------------------------------------


////////////////////////////////////////////////////////////////////////////////
/// @begin maxsub
///
/// @brief
///  this function was adapted and improved  by cem strauss from an
///  original template provided by angel ortiz.
///
/// @detailed
///     Here applies a modification of Dani Fischer's heuristic algorithm
///     for finding the largest subset of residues for superimposition within
///     a threshold. The part that restraints the secondary structure
///     matching needs to be changed.
///
///     At this point, the algorithm works as follows: first, the residue
///     assignment is done on the basis of the global secondary structure
///     similarity. Then, with this assignment for residue pairs, the
///     heuristic procedure of Fisher is used.
///
///
/// @param  nsup - [in/out]? -
/// @param  xe - [in/out]? -
/// @param  xp - [in/out]? -
/// @param  rms - [in/out]? -
/// @param  psi - [in/out]? -
/// @param  nali - [in/out]? -
/// @param  zscore - [in/out]? -
/// @param  evalue - [in/out]? -
/// @param  score - [in/out]? -
///
/// @global_read
///
/// @global_write
///
/// @remarks
///
/// @references
///
/// @authors
///
/// @last_modified
/////////////////////////////////////////////////////////////////////////////////
void
maxsub(
	int & nsup,
	FArray1Da_double xe,
	FArray1Da_double xp,
	double & rms,
	double & psi,
	int & nali,
	double & zscore,
	double & evalue,
	double & score
)
{
	xe.dimension( 3*nsup );
	xp.dimension( 3*nsup );


//     --- Vector alignment

	//int const maxres = { 3000 };
	//int const maxlen = { 2 * maxres };
	double const rmstol = { 4.0 };
	//int const maxfrag = { 100 };
	//double const angmax = { 60.0 };

	double znew;
	double t, t2, am,as;

	FArray1D_double xp0( 3*nsup );
	FArray1D_double xe0( 3*nsup );
	FArray1D_double wmax( nsup );

	FArray1D_bool logical_w( nsup );


//------------------------------------------------------------------------------
//     First selects the residues allowed to be superimposed
//------------------------------------------------------------------------------
	// need to pass in xp and xe, nsup = total_res

	int nr = nsup;
	int l = 7;

	for ( int i = 1; i <= nsup*3; ++i ) {
		xe0(i) = xe(i);
		xp0(i) = xp(i);
	}
//------------------------------------------------------------------------------
//     Now apply Fishers's maxsub algorithm. An heptapeptide is used.
//     The algorithm is modified in that only pairs of residues with
//     similar local secondary structures are allowed to be used as
//     seed residues to superimpose.
//------------------------------------------------------------------------------
	int smax = 0;
	for ( int i = 1; i <= nsup; ++i ) { // should this not be <=3*nsup?
		//w(i) = 0.0;
		wmax(i) = 0.0;
	}
	double rmsmax = 1000.0;


	for ( int i = 1; i <= nsup-l+1; ++i ) {

		clear_rms();
		// if ( matched(i)} ) {  // this line is Angel's variation on Danni Fischer's method.
		// only seed at points with matched SS.

		// build up a seed segment of length l
		int lmax = 0;
		for ( int j = 1; j <= nsup; ++j ) {
			if ( j >= i && j <= i+l-1 ) { // could do this without if statement
				add_rms(j,xp0,xe0);
				logical_w(j) = true; // w(j) = 1.0
				++lmax;
			} else {
				logical_w(j) = false; // w(j) = 0.0
			}
		}

		// find initial alignment using seed
		// rmsfitca3 rotates all of the residues but the rotation only aligns the
		// residues pushed into add_rms.
		rmsfitca3(nsup,xp0,xp,xe0,xe,rms);

		// next we iterate the following algorithm
		//  1) using current alignment find all atoms within a threshold t of being superimposed
		//  2) add these close ones to set to be aligned
		//  3) aling using the current set, then reorient all atoms.
		//  4) increment t by a small amount.
		//  5) repeat this until theshold = 7 angstroms.
		t = 0.0;
		int const last = lmax;
		while ( t < 7.0 ) {  // for ( m = 1; m <= 7; ++m ) {

			t += 1.0;
			 // increment threshold by one angstrom. (note this is ties to int(min_d))
			t2 = t*t; // t squared
			double min_d2 = 8.0*8.0;
			// t = float(m);  // *7.0/float(7);  // huh? must be a relic???
			for ( int n = 1; n <= nsup; ++n ) {
				if ( !logical_w(n)) { // if ( w(n) == 0.0} ) {
					int k = 3*(n-1);
					double const xpek1 = xp(k+1) - xe(k+1);
					double const xpek2 = xp(k+2) - xe(k+2);
					double const xpek3 = xp(k+3) - xe(k+3);
					double const d2 =
					 ( xpek1 * xpek1 ) + ( xpek2 * xpek2 ) + ( xpek3 * xpek3 );
					 // squared distance
					if ( d2 <= t2 ) { // is this atom within threshold?
						add_rms(n,xp0,xe0); // if so, add to list
						logical_w(n) = true; //w(n) = 1.0  // set membership flag
						++lmax; // keep a count of members
					} else {
						// if not below threshold, then find the closest atom
						if ( d2 <= min_d2 ) min_d2 = d2; //min_d = min(d,min_d)
					}
				}
			}
			// check if we added any residues on this iteration.
			// if not then 1) we dont need to refit the calphas 2) we can advance threshold level
			if ( lmax != last ) {
				rmsfitca3(nsup,xp0,xp,xe0,xe,rms);
			} else {

				//std::cout << i << " skipping " << t << ' ' <<
				// static_cast< int >(min_d) << ' ' << lmax << ' ' << min_d << std::endl;
				t = static_cast< int >(std::sqrt(min_d2)); // advance the threshold
			}
		}

		// huh? logic here is confusing.
		if ( (lmax > smax) && (rms <= rmstol) ) {
			smax = lmax;
			for ( int n = 1; n <= nsup; ++n ) {
				if ( logical_w(n) ) {
					wmax(n) = 1.0;
				} else {
					wmax(n) = 0.0;
				}
				// wmax(n) = w(n);
			}
			rmsmax = rms;
		} else if ( (lmax == smax) && (rms < rmsmax) ) {
			smax = lmax;
			for ( int n = 1; n <= nsup; ++n ) {
				if ( logical_w(n) ) {
					wmax(n) = 1.0;
				} else {
					wmax(n) = 0.0;
				}
				//wmax(n) = w(n);
			}
			rmsmax = rms;
		}
	}

//------------------------------------------------------------------------------
//     --- Confirm final superimposition.
//     --- first, compile regions without indels. Report rms
//------------------------------------------------------------------------------

	if ( smax > 1 ) {
		rmsfitca2(nr,xp,xe,wmax,smax,rms);
		 // side effect sets xpc,zpc etc... via common
	} else {
		// if smax is less than 2 then basically we failed to find an alignement
		//  to make the best of a bad situation we simply revert to aligning all of the residue
		for ( int i = 1; i <= nr; ++i ) {
			wmax(i) = 1.0;
		}
		rmsfitca2(nr,xp,xe,wmax,nr,rms);
		 // side effect sets xpc,zpc etc... via common
	}

//	std::cout << std::endl << "RMS = " << F( 7, 3, rms ) <<
//	 " SMAX = " << I( 5, smax ) << std::endl << std::endl;
	psi = ( static_cast< double >( smax ) / nsup ) * 100.0;

//------------------------------------------------------------------------------
//     --- Transform PSI to Levitt & Gerstein Sstr
//     --- NEED TO BE REMOVED
//------------------------------------------------------------------------------

//     compute score without gaps

	score = 0.0;
	nali  = 0;

	for ( int i = 1; i <= nsup; ++i ) {
		if ( wmax(i) == 1.0 ) {
			double d = 0.0;
			int k = 3*(i-1);
			for ( int j = 1; j <= 3; ++j ) {
				double const xpekj = xp(k+j) - xe(k+j);
				d += xpekj * xpekj;
			}
			d = std::sqrt(d) / rmstol;
			score += 1.0 / ( 1.0 + ( d * d ) );
			++nali;
		}
	}

//------------------------------------------------------------------------------
//     --- All done. Report match statistics
//------------------------------------------------------------------------------

//------------------------------------------------------------------------------
//     --- Report probabilities for random matches.
//     --- These are preliminary values for the average and standard deviation
//     --- of PSI as a function of NORM. These values are:
//     --- m(L) = 759.31 * L**(-0.7545)
//     --- s(L) = 393.32 * L**(-0.9009)
//------------------------------------------------------------------------------

//     am = 759.31 * std::pow( norm, -0.7545 );
//     as = 393.32 * std::pow( norm, -0.9009 );
//     am = 695.41 * std::pow( norm, -0.7278 );
//     as = 340.00 * std::pow( norm, -0.9045 );
//     EV fitting, using N > 70
	am = 747.29 * std::pow( static_cast< double >( nr ), -0.7971 );
	as = 124.99 * std::pow( static_cast< double >( nr ), -0.6882 );
	zscore = (psi-am)/as;
//     this is the gaussian approach. Actually, extreme-value is more adequate
	evalue = 0.5 * erfcc(zscore/std::sqrt(2.0));
//     here it is the EV approach
	znew   = 0.730*((1.2825755*zscore)+0.5772);
	evalue = 1.0-std::exp(-std::exp(-znew));
//     due to numerical errors, the e-value is cutoff at 2.650E-14
	if ( evalue < 2.650E-14 ) evalue = 2.650E-14;

}

//////////////////////////////////////////////////////////////////////////////
/// @begin erfcc
///
/// @brief
///
/// @detailed
///
/// @param  x - [in/out]? -
///
/// @return
///
/// @global_read
///
/// @global_write
///
/// @remarks
///
/// @references
///  (C) Copr. 1986-92 Numerical Recipes Software
///
/// @authors
///
/// @last_modified
////////////////////////////////////////////////////////////////////////////////
double
erfcc( double x )
{

	double t,z;
	z = std::abs(x);
	t = 1.0/(1.0+0.5*z);
	double erfcc = t*std::exp(-z*z-1.26551223+t*(1.00002368+t*(.37409196+
	 t*(.09678418+t*(-.18628806+t*(.27886807+t*(-1.13520398+t*(1.48851587+
	 t*(-.82215223+t*.17087277)))))))));
	if ( x < 0.0 ) erfcc = 2.0 - erfcc;
	return erfcc;
}

////////////////////////////////////////////////////////////////////////////////
/// @begin COMAS
///
/// @brief
//    Calculate the center of geometry for the selected atoms ---
///
/// @detailed
///
/// @param  C - [in/out]? -
/// @param  WT - [in/out]? -
/// @param  NAT - [in/out]? -
/// @param  XC - [in/out]? -
/// @param  YC - [in/out]? -
/// @param  ZC - [in/out]? -
///
/// @global_read
///
/// @global_write
///
/// @remarks
///
/// @references
///
/// @authors
///
/// @last_modified
/////////////////////////////////////////////////////////////////////////////////
void
COMAS(
	FArray1Da_double C,
	FArray1Da_double WT,
	int NAT,
	double & XC,
	double & YC,
	double & ZC
)
{
	C.dimension( star );
	WT.dimension( star );

//     local
	static double const ZERO = { 0.0 };

	double SUMX = ZERO;
	double SUMY = ZERO;
	double SUMZ = ZERO;
	double SUM  = ZERO;
	int i3 = 0;

	for ( int i = 1; i <= NAT; ++i ) {
		double const WT_i = WT(i);

		SUMX += C(i3+1) * WT_i;
		SUMY += C(i3+2) * WT_i;
		SUMZ += C(i3+3) * WT_i;
		SUM += WT_i;
		i3 += 3;
	}

	SUM = 1.0/SUM;

	XC = SUMX*SUM;

	YC = SUMY*SUM;

	ZC = SUMZ*SUM;

//
	i3 = 0;

	for ( int i = 1; i <= NAT; ++i ) {
		C(i3+1) -= XC;
		C(i3+2) -= YC;
		C(i3+3) -= ZC;
		i3 += 3;
	}

//	std::cout << "CENTER OF MASS:" << space( 19 ) <<
//	 F( 10, 4, XC ) << F( 10, 4, YC ) << F( 10, 4, ZC ) << std::endl;
}

////////////////////////////////////////////////////////////////////////////////
/// @begin fraction_native_contacts
///
/// @brief
///
/// @detailed
///
/// @param
///
/// @return  fraction native contacts
///
/// @global_read
///
/// @global_write
///
/// @remarks
///
/// @references
///
/// @authors  mj 03/28/05
///
/// @last_modified
/////////////////////////////////////////////////////////////////////////////////
float
fraction_native_contacts()
{
	using namespace misc;
	using namespace param_aa;
	using namespace cenlist_ns;
	using namespace native;

	// return if no native
	if ( !get_native_exists() ) {
		return float( 0);
	}

	// loop over all pairs of AA i and j
	int same(0), diff(0), nati(0);
	for ( int i = 1; i <= total_residue-3; ++i ) {
		if ( is_protein(res(i)) || is_nonnatural(res(i)) ) {
			for ( int j = i+3; j <= total_residue; ++j ) {
				if ( is_protein(res(j)) || is_nonnatural(res(j)) ) {

					// obtain decoy_contact and native contact for all AA i and j with abs(j-i) > 2
					bool decoy_contact((cendist(i,j) < 64.0) ? true : false);
					float nativedist((native_centroid(1,i) - native_centroid(1,j)) * (native_centroid(1,i) - native_centroid(1,j))
                         + (native_centroid(2,i) - native_centroid(2,j)) * (native_centroid(2,i) - native_centroid(2,j))
									 			 + (native_centroid(3,i) - native_centroid(3,j)) * (native_centroid(3,i) - native_centroid(3,j)));
					bool native_contact((nativedist < 64.0) ? true : false);

					// increment counters
					if ( native_contact) {
						++nati;
						if ( decoy_contact) ++same;
					} else {
						if ( decoy_contact) ++diff;
					}
				}
			}
		}
	}

	// compute_fraction
	if ( nati > 0 ) {
		return float( same - diff ) / float( nati );
	}

	// return
	return float( 0 );
}
