// -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
// vi: set ts=2 noet:
//  CVS information:
//  $Revision: 23432 $
//  $Date: 2008-06-24 16:25:52 +0300 (Tue, 24 Jun 2008) $
//  $Author: yab $


// Rosetta Headers
#include "maxsub_threshold.h"
#include "design.h"
#include "docking.h"
#include "files_paths.h"
#include "initialize.h"
#include "maxsub.h"
#include "misc.h"
#include "native.h"
#include "param.h"
#include "rms.h"

// ObjexxFCL Headers
#include <ObjexxFCL/FArray1Da.hh>
#include <ObjexxFCL/FArray2Da.hh>
#include <ObjexxFCL/FArray3Da.hh>
#include <ObjexxFCL/Fmath.hh>
//#include <ObjexxFCL/formatted.o.hh>

// C++ Headers
#include <cmath>
//#include <iostream>


////////////////////////////////////////////////////////////////////////////////
/// @begin maxsub_native
///
/// @brief
///
/// @detailed
/// this is the main rosetta entry point for this function.
/// it is a wrapper for max sub, converting the input arrays to double precision
/// and reducing them to just calphas it does max sub comparing the claphas
/// passed in to those of the native and returns the number of aligned residues,
/// the rms of these and the log eval of the comparison
/// cems 2001.
///
/// @param  x - [in/out]? -
/// @param  nali - [in/out]? -
/// @param  rms - [in/out]? -
/// @param  logeval - [in/out]? -
/// @param  rmsd_threshold - [in/out]? -
/// @param  final_alignment - [in/out]? -
///
/// @global_read
///
/// @global_write
///
/// @remarks
///
/// @references
///
/// @authors
///
/// @last_modified
/////////////////////////////////////////////////////////////////////////////////
void
maxsub_native_threshold(
	FArray3Da_float x,
	int & nali,
	float & rms,
	float & logeval,
	float const rmsd_threshold,
	FArray1Da_bool final_alignment
)
{
	using namespace misc;
	using namespace native;
	using namespace param;

	x.dimension( 3, 5, MAX_RES() );
	final_alignment.dimension( MAX_RES() );

// local
	FArray2D_double xp( 3, MAX_RES()() );
	FArray2D_double xe( 3, MAX_RES()() );
	double mxrms,mxpsi,mxzscore,mxscore,mxeval,rmstol;

	if ( !get_native_exists() || ( files_paths::multi_chain && !design::dna_interface && !files_paths::antibody_modeler ) ) {
		rms = 0.0;
		logeval = 0.0;
		nali = 0;
		return;
	}
	for ( int i = 1; i <= total_residue; ++i ) {
		for ( int k = 1; k <= 3; ++k ) {
			xe(k,i) = native_ca(k,i);
			xp(k,i) = x(k,2,i); // calphas
		}

	}
	rmstol = rmsd_threshold;
	maxsub_threshold(total_residue,xe,xp,mxrms,mxpsi,nali,mxzscore,mxeval,
	 mxscore,rmstol,final_alignment);

	rms = mxrms; // double to float conversion
	logeval = std::log(mxeval);
}

////////////////////////////////////////////////////////////////////////////////
/// @begin maxsub
///
/// @brief
///
/// @detailed
/// Here applies a modification of Dani Fischer's heuristic algorithm
/// for finding the largest subset of residues for superimposition within
/// a threshold. The part that restraints the secondary structure
/// matching needs to be changed.
///
/// At this point, the algorithm works as follows: first, the residue
/// assignment is done on the basis of the global secondary structure
/// similarity. Then, with this assignment ofr residue pairs, the
/// heuristic procedure of Fisher is used.
///
/// this function was adapted and improved  by cem strauss from an
/// original template provided by angel ortiz.
///
/// @param  nsup - [in/out]? -
/// @param  xe - [in/out]? -
/// @param  xp - [in/out]? -
/// @param  rms - [in/out]? -
/// @param  psi - [in/out]? -
/// @param  nali - [in/out]? -
/// @param  zscore - [in/out]? -
/// @param  evalue - [in/out]? -
/// @param  score - [in/out]? -
/// @param  rmstol - [in/out]? -
/// @param  final_alignment - [in/out]? - output the final alignment
///
/// @global_read
///
/// @global_write
///
/// @remarks
///
/// @references
///
/// @authors
///
/// @last_modified
/////////////////////////////////////////////////////////////////////////////////
void
maxsub_threshold(
	int const nsup,
	FArray1Da_double xe,
	FArray1Da_double xp,
	double & rms,
	double & psi,
	int & nali,
	double & zscore,
	double & evalue,
	double & score,
	double const rmstol,
	FArray1Da_bool final_alignment, // output the final alignment
	double distance_tolerance // default = 0.0
)
{
	xe.dimension( 3*nsup );
	xp.dimension( 3*nsup );
	final_alignment.dimension( nsup );


	int const maxres = { 3000 };
//	int const maxlen = { 2 * maxres };
//	int const maxfrag = { 100 };
//	double const angmax = { 60.0 };

	double znew;
	int k, nr;
	double rmsmax, t, t2, d, am, as;
	int lmax,smax,l;

	FArray1D_double xp0( 3*maxres );
	FArray1D_double xe0( 3*maxres );
	FArray1D_double wmax( maxres );

	FArray1D_bool logical_w( maxres );

	int last;
	double min_d2, d2, distance_increment;

	if( distance_tolerance == -1.0 )
		distance_tolerance = ( rmstol * 7.0 ) / 4.0; // first guess, extrapolating from 4 -> 7

	distance_increment = distance_tolerance / 7.0; // maintain same number of cycles


//------------------------------------------------------------------------------
// First selects the residues allowed to be superimposed
//------------------------------------------------------------------------------
	// need to pass in xp and xe, nsup = total_res

	nr = nsup;
	l = 7;

	for ( int i = 1; i <= nsup; ++i ) {
		final_alignment(i) = false; // not aligned, initially
	}

	for ( int i = 1; i <= nsup*3; ++i ) {
		xe0(i) = xe(i);
		xp0(i) = xp(i);
	}

//------------------------------------------------------------------------------
// Now apply Fishers's maxsub algorithm. An heptapeptide is used.
// The algorithm is modified in that only pairs of residues with
// similar local secondary structures are allowed to be used as
// seed residues to superimpose.
//------------------------------------------------------------------------------
	smax = 0;
	for ( int i = 1; i <= nsup; ++i ) { // should this not be 1,3*nsup?
		// w(i) = 0.0;
		wmax(i) = 0.0;
	}
	rmsmax = 1000;

	for ( int i = 1; i <= nsup-l+1; ++i ) {

		clear_rms();
		// if ( matched(i) ) { // this line is Angel's variation on Danni Fischer's method.
		// only seed at points with matched SS.

		// build up a seed segment of length l
		lmax = 0;
		for ( int j = 1; j <= nsup; ++j ) {
			if ( j >= i && j <= i+l-1 ) { // could do this without if statement
				add_rms(j,xp0,xe0);
				logical_w(j) = true; // w(j) = 1.0;
				++lmax;
			} else {
				logical_w(j) = false; // w(j) = 0.0;
			}
		}


		// find initial alignment using seed
		// rmsfitca3 rotates all of the residues but the rotation only alligns the
		// residues pushed into add_rms.
		rmsfitca3(nsup,xp0,xp,xe0,xe,rms);

		// next we iterate the following algorithm
		//  1) using current allignment find all atoms within a threshold t of being superimposed
		//  2) add these close ones to set to be aligned
		//  3) aling using the current set, then reorient all atoms.
		//  4) increment t by a small amount.
		//  5) repeat this until theshold = 7 angstroms.
		t = 0.0;
		while ( t < distance_tolerance ) { // for ( int m = 1; m <= 7; ++m ) { // not anymore (pb)
			last = lmax; //pb: bugfix, was outside the loop

			t += distance_increment;
			//t += 1.0; // increment threshold by one angstrom. (note this is ties to int(min_d))
			t2 = t*t; // t squared
			min_d2 = square( distance_tolerance + 1 );
			// t = float(m) // *7.0/float(7) // huh? must be a relic???
			for ( int n = 1; n <= nsup; ++n ) {
				if ( ! logical_w(n) ) { // if ( w(n) == 0.0 ) {
					k = 3*(n-1);
					d2 =
					 square( xp(k+1) - xe(k+1) ) +
					 square( xp(k+2) - xe(k+2) ) +
					 square( xp(k+3) - xe(k+3) ); // squared distance
					if ( d2 <= t2 ) { // is this atom within threshold?
						add_rms(n,xp0,xe0); // if so, add to list
						logical_w(n) = true; // w(n) = 1.0 // set membership flag
						++lmax; // keep a count of members
					} else { // if not below threshold, then find the closest atom
						if ( d2 <= min_d2 ) min_d2 = d2; // min_d = min(d,min_d)
					}
				}
			}
			// check if we added any residues on this iteration.
			// if not then 1) we dont need to refit the calphas 2) we can advance threshold level
			if ( lmax != last ) {
				rmsfitca3(nsup,xp0,xp,xe0,xe,rms);
			} else {

//				std::cout << SS( i ) << "skipping" << SS( t ) <<
//				 SS( static_cast< int >( min_d ) ) << SS( lmax ) << SS( min_d ) << std::endl;
//				t = static_cast< int >( std::sqrt( min_d2 ) ); // advance the threshold
				t = std::sqrt( min_d2 ); // advance the threshold (took out the int)
			}
		}

		// huh? logic here is confusing.
		if ( ( lmax > smax ) && ( rms <= rmstol ) ) {
			smax = lmax;
			for ( int n = 1; n <= nsup; ++n ) {
				if ( logical_w(n) ) {
					wmax(n) = 1.0;
				} else {
					wmax(n) = 0.0;
				}
				// wmax(n) = w(n);
			}
			rmsmax = rms;
		} else if ( ( lmax == smax ) && ( rms < rmsmax ) ) {

			smax = lmax;
			for ( int n = 1; n <= nsup; ++n ) {
				if ( logical_w(n) ) {
					wmax(n) = 1.0;
				} else {
					wmax(n) = 0.0;
				}
				// wmax(n) = w(n);
			}
			rmsmax = rms;

		}
	}

//------------------------------------------------------------------------------
// --- Confirm final superimposition.
// --- first, compile regions without indels. Report rms
//------------------------------------------------------------------------------

	if ( smax > 1 ) {
		rmsfitca2(nr,xp,xe,wmax,smax,rms);
		 // side effect sets xpc,zpc etc... via common
		for ( int i = 1; i <= nsup; ++i ) {
			if ( wmax(i) > 0.0 ) {
				final_alignment(i) = true;
			}
		}

	} else {
		// if smax is less than 2 then basically we failed to find an allignement
		//  to make the best of a bad situation we simply revert to alligning all of the residue
		for ( int i = 1; i <= nr; ++i ) {
			wmax(i) = 1.0;
		}
		rmsfitca2(nr,xp,xe,wmax,nr,rms);
		 // side effect sets xpc,zpc etc... via common
	}

//	std::cout << "RMS = " << F( 7, 3, rms ) << " SMAX = " << I( 5, smax ) <<
//	 std::endl << std::endl;
	psi = ( static_cast< double >( smax ) / nsup ) * 100.0;

//------------------------------------------------------------------------------
// --- Transform PSI to Levitt & Gerstein Sstr
// --- NEED TO BE REMOVED
//------------------------------------------------------------------------------

// compute score without gaps

	score = 0.0;
	nali  = 0;

	for ( int i = 1; i <= nsup; ++i ) {

		if ( wmax(i) == 1.0 ) {
			d = 0.0;
			k = 3*(i-1);
			for ( int j = 1; j <= 3; ++j ) {
				d += square( xp(k+j) - xe(k+j) );

			}
			d = std::sqrt(d);
			score += 1.0 / ( 1.0 + square( d / rmstol ) );

			++nali;
		}
	}

//------------------------------------------------------------------------------
// --- All done. Report match statistics
//------------------------------------------------------------------------------

//------------------------------------------------------------------------------
// --- Report probabilities for random matches.
// --- These are preliminary values for the average and standard deviation
// --- of PSI as a function of NORM. These values are:
// --- m(L) = 759.31 * L**(-0.7545)
// --- s(L) = 393.32 * L**(-0.9009)
//------------------------------------------------------------------------------

// am = 759.31 * std::pow( norm, -0.7545 );
// as = 393.32 * std::pow( norm, -0.9009 );
// am = 695.41 * std::pow( norm, -0.7278 );
// as = 340.00 * std::pow( norm, -0.9045 );
// EV fitting, using N>70
	am = 747.29 * std::pow( static_cast< double >( nr ), -0.7971 );
	as = 124.99 * std::pow( static_cast< double >( nr ), -0.6882 );
	zscore = (psi-am)/as;
// this is the gaussian approach. Actually, extreme-value is more adequate
	evalue = 0.5 * erfcc(zscore/std::sqrt(2.0));
// here it is the EV approach
	znew   = 0.730*((1.2825755*zscore)+0.5772);
	evalue = 1.0-std::exp(-std::exp(-znew));
// due to numerical errors, the e-value is cutoff at 2.650E-14
	if ( evalue < 2.650E-14 ) evalue = 2.650E-14;

}
