// -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
// vi: set ts=2 noet:
//
// (c) Copyright Rosetta Commons Member Institutions.
// (c) This file is part of the Rosetta software suite and is made available under license.
// (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
// (c) For more information, see http://www.rosettacommons.org. Questions about this can be
// (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.

/// @file   core/optimization/Minimizer.cc
/// @brief  Minimizer class
/// @author Phil Bradley


// Unit headers
#include <core/optimization/LineMinimizer.hh>
#include <core/optimization/Minimizer.hh>
#include <core/optimization/GA_Minimizer.hh>

#include <core/util/Tracer.hh>

// AUTO-REMOVED #include <ObjexxFCL/ObjexxFCL.hh>
#include <ObjexxFCL/FArray2D.hh>
// AUTO-REMOVED #include <ObjexxFCL/Fmath.hh>
#include <utility/exit.hh>

#include <core/options/option.hh>
#include <core/options/keys/optimization.OptionKeys.gen.hh>


// C++ headers
#include <cmath>
//#include <cstdlib>
// #include <cstdio>
#include <iostream>
#include <algorithm>
#ifdef WIN32
#include <functional>
#endif

//Auto using namespaces
namespace ObjexxFCL { } using namespace ObjexxFCL; // AUTO USING NS
//Auto using namespaces end




namespace core {
namespace optimization {

	static util::Tracer TR( "core.optimization.LineMinimizer" );

	// set the function and the options
	Minimizer::Minimizer(
		Multifunc & func_in,
		MinimizerOptions const & options_in
		):
		func_( func_in ),
		options_( options_in )
	{}


	/////////////////////////////////////////////////////////////////////////////
	/// See @ref minimization_overview "Minimization overview and concepts" for details.
	Real
	Minimizer::run(
		Multivec & phipsi_inout // starting position, and solution is returned here
	)
	{
		// parse options
		std::string const type( options_.min_type() );

		Multivec phipsi( phipsi_inout ), dE_dphipsi( phipsi_inout );

		Real end_func;
		DFPMinConvergedFractional fractional_converge_test( options_.minimize_tolerance() );
		DFPMinConvergedAbsolute absolute_converge_test( options_.minimize_tolerance() );

		if ( type == "linmin" ) {
			func_.dfunc( phipsi, dE_dphipsi );
			linmin( phipsi, dE_dphipsi, end_func );
		} else if ( type == "dfpmin" ) {
			dfpmin( phipsi, end_func, fractional_converge_test );
		} else if ( type == "dfpmin_armijo" ) {
			LineMinimizationAlgorithmOP armijo_line_search( new ArmijoLineMinimization( func_, false, phipsi_inout.size() ) );
			dfpmin_armijo( phipsi, end_func, fractional_converge_test, armijo_line_search );
		} else if ( type == "dfpmin_armijo_nonmonotone" ) {
			LineMinimizationAlgorithmOP armijo_line_search( new ArmijoLineMinimization( func_, true, phipsi_inout.size() ) );
			dfpmin_armijo( phipsi, end_func, fractional_converge_test, armijo_line_search );
		} else if ( type == "dfpmin_atol" ) {
			dfpmin( phipsi, end_func, absolute_converge_test );
		} else if ( type == "dfpmin_armijo_atol" ) {
			LineMinimizationAlgorithmOP armijo_line_search( new ArmijoLineMinimization( func_, false, phipsi_inout.size() ) );
			dfpmin_armijo( phipsi, end_func, absolute_converge_test, armijo_line_search );
		} else if ( type == "dfpmin_armijo_nonmonotone_atol" ) {
			LineMinimizationAlgorithmOP armijo_line_search( new ArmijoLineMinimization( func_, true, phipsi_inout.size() ) );
			dfpmin_armijo( phipsi, end_func, absolute_converge_test, armijo_line_search );
		} else if ( type == "dfpmin_strong_wolfe" ) {
			LineMinimizationAlgorithmOP strong_wolfe_line_search( new StrongWolfeLineMinimization( func_, false, phipsi_inout.size() ) );
			dfpmin_armijo( phipsi, end_func, fractional_converge_test, strong_wolfe_line_search );
		} else if ( type == "dfpmin_strong_wolfe_atol" ) {
			LineMinimizationAlgorithmOP strong_wolfe_line_search( new StrongWolfeLineMinimization( func_, false, phipsi_inout.size() ) );
			dfpmin_armijo( phipsi, end_func, absolute_converge_test, strong_wolfe_line_search );
		} else if ( type == "GA" ) {
			GA_Minimizer gam(func_, options_);
			gam.run(phipsi, options_.max_iter());
		} else {
			utility_exit_with_message("unknown type of minimization '"+type+"'");
		}

		phipsi_inout = phipsi;
		return func_( phipsi );
	}

	////////////////////////////////////////////////////////////////////////
	// Convergence test stuff
	////////////////////////////////////////////////////////////////////////

	bool
	DFPMinConvergedFractional::operator()(
		Real Fnew,
		Real Fold
	)
	{ return ( 2.0f*std::abs( Fnew - Fold ) <=
								tolerance*( std::abs( Fnew ) + std::abs( Fold ) + eps ) );
	}

	bool
	DFPMinConvergedAbsolute::operator()(
		Real Fnew,
		Real Fold
	)
	{ return ( std::abs( Fnew - Fold ) <= tolerance );
	}


	/////////////////////////////////////////////////////////////////////////////
	// Skeleton algorithm for multivariate optimization
	/////////////////////////////////////////////////////////////////////////////
	Real JJH_Minimizer::run(
		Multivec & current_position
	)
	{
		int const problem_size( current_position.size() );
		int const max_iter( std::max( 200, problem_size/10 ));

		// reset storage variables for descent direction updates
		_get_direction.initialize();

		// Get the starting function value and gradient
		Multivec descent_direction( problem_size, 0.0);
		Real current_value( _func( current_position ) );
		_func.dfunc( current_position, descent_direction );
		// Convert to gradient (negative of the derivative) in-place
		std::transform( descent_direction.begin(), descent_direction.end(),
										descent_direction.begin(), std::negate<Real>() );

		// Iterate to convergence
		int iter( 0 );
		while( iter++ < max_iter ) {
			Real previous_value = current_value;
			current_value = _line_min( current_position, descent_direction );

			if( _converged(current_value, previous_value) ) return current_value;

//			descent_direction = _get_direction();
		}

		TR.Warning << "WARNING: Minimization has exceeded " << max_iter << "iterations but has not converged!" << std::endl;
		return current_value;
	}


	// wrapper functions around minimization routines to allow for more flexibility
	void
	Minimizer::dfpmin(
		Multivec & P,
		Real & FRET,
		ConvergenceTest & converge_test
	) const
	{
		int const N( P.size() );
		int const ITMAX = std::max( 200, N/10 );
		Minimizer::dfpmin( P, FRET, converge_test, ITMAX );
	}

	/////////////////////////////////////////////////////////////////////////////
	/////////////////////////////////////////////////////////////////////////////
	// now cut and paste from minimize.cc
	/////////////////////////////////////////////////////////////////////////////
	/////////////////////////////////////////////////////////////////////////////

	/////////////////////////////////////////////////////////////////////////////
	void
	Minimizer::dfpmin(
		Multivec & P,
		Real & FRET,
		ConvergenceTest & converge_test,
		int const ITMAX
	) const
	{

		int const N( P.size() );
		// int const ITMAX = std::max( 200, N/10 );

		// Grab a line minimizer
		BrentLineMinimization test_brent( func_, N );
		LineMinimizationAlgorithm* line_min = &test_brent;

		// should get rid of these FArrays
		FArray2D< Real > HESSIN( N, N, 0.0 );
		Multivec XI( N );
		Multivec G( N );
		Multivec DG( N );

		// get function and its gradient
		Real FP;
		FP = func_(P);
		func_.dfunc(P,G);

		for ( int i = 1; i <= N; ++i ) {
			HESSIN(i,i) = 1.0;
			XI[i] = -G[i];
		}

		Multivec HDG( N );

		for ( int ITER = 1; ITER <= ITMAX; ++ITER ) {


			// Christophe added the following to allow premature end of minimization
			// I probably need to do the same with line_min
			bool abort(func_.abort_min(P));
			if(abort){
				TR.Warning << "WARNING: ABORTING MINIMIZATION TRIGGERED BY abort_min" << std::endl;
				return;
			}
			// End Christophe modifications



			// note that linmin modifes XI; afterward XI is the actual (vector)
			// step taken during linmin
			FRET = (*line_min)( P, XI );

			// check for convergence
			if ( converge_test( FRET, FP ) ) {
//				std::cout << "Called line minimization " << line_min->_num_linemin_calls << std::endl;
				return;
			}

			FP = FRET;
			for ( int i = 1; i <= N; ++i ) {
				DG[i] = G[i];
			}

			// get function and its gradient
			FRET = func_(P);
			func_.dfunc(P,G);
			for ( int i = 1; i <= N; ++i ) {
				DG[i] = G[i]-DG[i];
			}
			for ( int i = 1; i <= N; ++i ) {
				HDG[i] = 0.0;
				for ( int j = 1; j <= N; ++j ) {
					HDG[i] += HESSIN(i,j)*DG[j];
				}
			}

			Real FAC, FAE, FAD;
			FAC = 0.;
			FAE = 0.;
			for ( int i = 1; i <= N; ++i ) {
				FAC += DG[i]*XI[i];
				FAE += DG[i]*HDG[i];
			}
			if ( FAC != 0.0 ) FAC = 1.0/FAC;
			if ( FAE != 0.0 ) {
				FAD = 1./FAE;
			} else {
				FAD = 0.;
			}
			for ( int i = 1; i <= N; ++i ) {
				DG[i] = FAC*XI[i] - FAD*HDG[i];
			}
			for ( int i = 1; i <= N; ++i ) {
				for ( int j = 1; j <= N; ++j ) {
					HESSIN(i,j) += FAC*XI[i]*XI[j] - FAD*HDG[i]*HDG[j] + FAE*DG[i]*DG[j];
				}
			}
			for ( int i = 1; i <= N; ++i ) {
				XI[i] = 0.;
				for ( int j = 1; j <= N; ++j ) {
					XI[i] -= HESSIN(i,j)*G[j];
				}
			}
		}
		TR.Warning << "WARNING: DFPMIN MAX CYCLES " << ITMAX << " EXCEEDED, BUT FUNC NOT CONVERGED!" << std::endl;

//		std::cout << "Called line minimization " << line_min->_num_linemin_calls << std::endl;
		return;

	} // dfpmin
	////////////////////////////////////////////////////////////////////////

	////////////////////////////////////////////////////////////////////////
	// dfpmin Armijo
	////////////////////////////////////////////////////////////////////////


	void
	Minimizer::dfpmin_armijo(
		Multivec & P,
		Real & FRET,
		ConvergenceTest & converge_test,
		LineMinimizationAlgorithmOP line_min
	) const
	{

		int const N( P.size() );
		static int ITMAX( core::options::option[ core::options::OptionKeys::optimization::dfpmin_max_cycles ]() );  // default 200
		Real const EPS( 1.E-5 );

		FArray2D< Real > HESSIN( N, N, 0.0 );
		Multivec XI( N );
		Multivec G( N );
		Multivec DG( N );
		Multivec HDG( N );

		int const prior_func_memory_size( line_min->nonmonotone() ? 3 : 1 );
		Multivec prior_func_memory( prior_func_memory_size );

		if( line_min->nonmonotone() ) line_min->_last_accepted_step = 0.005;

	// When inexact line search is used, HESSIN need not remain positive definite, so
	// additional safeguard must be added to ensure XI is a desc. direction (or inexact
	// line search would fail).  Two options for safeguards are implemented below:
	//  	HOPT = 1  resets HESSIN to a multiple of identity when XI is not a desc. direction.
	//		HOPT = 2  leaves HESSIN unchanged if stepsize XMIN fails Wolfe's condition
	//					    for ensuring new HESSIN to be positive definite.
	int const HOPT( 2 );

	// get function and its gradient
 	int NF = 1;  		// number of func evaluations
	Real prior_func_value = func_(P);
	func_.dfunc(P,G);

	// Start the prior function memory storage
	int func_memory_filled( 1 );
	prior_func_memory[ 1 ] = prior_func_value;

	for ( int i = 1; i <= N; ++i ) {
		HESSIN(i,i) = 1.0;
		XI[i] = -G[i];
	}

	Real FAC, FAE, FAD, FAF;

	for ( int ITER = 1; ITER <= ITMAX; ++ITER ) {

		line_min->_deriv_sum = 0.0;
		Real Gmax = 0.0;
		Real Gnorm = 0.0;

		for ( int i = 1; i <= N; ++i ) {
			line_min->_deriv_sum += XI[i]*G[i];
			Gnorm += G[i]*G[i];
			if ( std::abs( G[i] ) > Gmax ) {
				Gmax=std::abs( G[i] );
			}
		}

		Gnorm = std::sqrt(Gnorm);

		line_min->_func_to_beat = prior_func_memory[ 1 ];
		for( int i = 2 ; i <= func_memory_filled ; ++i ) {
			if( line_min->_func_to_beat < prior_func_memory[ i ] ) {
				line_min->_func_to_beat = prior_func_memory[ i ];
			}
		}

		// P is returned as new pt, and XI is returned as the change.
		FRET = (*line_min)( P, XI );

		// std::cout << "N= " << N << " ITER= " << ITER << " #F-eval= " << NF << " maxG= " << SS( Gmax ) << " Gnorm= " << SS( Gnorm ) << " step= " << SS( line_min->_last_accepted_step ) << " func= " << SS( FRET ) << std::endl;

		if ( converge_test( FRET, prior_func_value ) )
		{
			//$$$   std::cout << "dfpmin called linmin " << linmin_count << " times" << std::endl;
			if (Gmax<=1.0)
			{

				//std::cout << "N= " << N << " ITER= " << ITER << " #F-eval= " << NF << " maxG= " << SS( Gmax ) << " Gnorm= " << SS( Gnorm ) << " step= " << SS( line_min->_last_accepted_step ) << " func= " << SS( FRET ) << " time= " << SS( get_timer("dfpmin") ) << std::endl;

//				std::cout << "Called line minimization " << line_min->_num_linemin_calls << std::endl;
				return;
			}
			else
			{
				if (std::abs(FRET-prior_func_value)<=EPS)
				{
					Real XInorm = 0.0;
					for ( int i = 1; i <= N; ++i )
					{
						XInorm += XI[i]*XI[i];
					}
					if ( line_min->_deriv_sum < -1e-3*Gnorm*XInorm )
					{

//						std::cout << "Failed line search while large _deriv_sum, quit! N= " << N << " ITER= " << ITER << " #F-eval= " << NF << " maxG= " << SS( Gmax ) << " Gnorm= " << SS( Gnorm ) << " step= " << SS( line_min->_last_accepted_step ) << " func= " << SS( FRET ) /*<< " time= " << SS( get_timer("dfpmin") )*/ << std::endl;

//						std::cout << "Called line minimization " << line_min->_num_linemin_calls << std::endl;
						return;
					}
					// Not convergence yet. Reinitialize HESSIN to a diagonal matrix & update direction XI.
					// This requires G to be correctly the gradient of the function.

					TR.Warning << ":( reset HESSIN from failed line search" << std::endl;

					line_min->_deriv_sum = 0.0;
					for ( int i = 1; i <= N; ++i ) {
						for ( int j = 1; j < i; ++j ) {
							HESSIN(i,j) = 0.0;
						}
						for ( int j = i+1; j <= N; ++j ) {
							HESSIN(i,j) = 0.0;
						}
						if ( HESSIN(i,i) < 0.01 ) HESSIN(i,i) = 0.01;
						XI[i] = -HESSIN(i,i)*G[i];
						line_min->_deriv_sum += XI[i]*G[i];
					}

					FRET = (*line_min)( P, XI );

//					std::cout << "Failed line search again, quit! N= " << N << " ITER= " << ITER << " #F-eval= " << NF << " maxG= " << SS( Gmax ) << " Gnorm= " << SS( Gnorm ) << " step= " << SS( line_min->_last_accepted_step ) << " func= " << SS( FRET ) /*<< " time= " << SS( get_timer("dfpmin") )*/ << std::endl;

					if (std::abs(FRET-prior_func_value)<=EPS)
					{
//						std::cout << "Called line minimization " << line_min->_num_linemin_calls << std::endl;
						return;
					}
				}
			}
		}

		prior_func_value = FRET;

		// Update memory of function calls
		if( func_memory_filled < prior_func_memory_size ) {
			func_memory_filled++;
		} else {
			for( int i = 1 ; i < func_memory_filled ; ++ i ) {
				prior_func_memory[ i ] = prior_func_memory[ i + 1 ];
			}
		}
		prior_func_memory[ func_memory_filled ] = prior_func_value;

		for ( int i = 1; i <= N; ++i ) {
			DG[i] = G[i];
		}

		// Some line minimization algorithms require a curvature
		// check that involves the derivative before they accept a
		// move - in these cases we don't need to recalculate
		if( line_min->provide_stored_derivatives() ) {
			line_min->fetch_stored_derivatives( G );
		} else {
			FRET = func_(P);
			func_.dfunc(P,G);
		}

		NF++;

		line_min->_deriv_sum = 0.0;						//needed if HOPT = 2
		Real DRVNEW = 0.0;						//needed if HOPT = 2
		for ( int i = 1; i <= N; ++i )
		{
			line_min->_deriv_sum += XI[i]*DG[i];			//needed if HOPT = 2
			DRVNEW += XI[i]*G[i];			//needed if HOPT = 2
			DG[i] = G[i]-DG[i];
		}

//		if ( line_min->_last_accepted_step = 0.0 ) {
//			std::cout << " line_min->_last_accepted_step = 0.0! " << std::endl;	//diagnostic
//		}

		if ( HOPT == 1 || DRVNEW > 0.95*line_min->_deriv_sum )
		{			//needed if HOPT = 2

			for ( int i = 1; i <= N; ++i )
			{
				HDG[i] = 0.0;
				for ( int j = 1; j <= N; ++j )
				{
					HDG[i] += HESSIN(i,j)*DG[j];
				}
			}
			FAC = 0.0;
			FAE = 0.0;
			FAF = 0.0;
			for ( int i = 1; i <= N; ++i )
			{
				FAC += DG[i]*XI[i];
				FAE += DG[i]*HDG[i];
				FAF += DG[i]*DG[i];
			}
			FAF = FAC/FAF;
			FAC = 1.0/FAC;
			FAD = 1.0/FAE;
			for ( int i = 1; i <= N; ++i )
			{
				DG[i] = FAC*XI[i] - FAD*HDG[i];
			}
			for ( int i = 1; i <= N; ++i )
			{
				for ( int j = 1; j <= N; ++j )
				{
					HESSIN(i,j) += FAC*XI[i]*XI[j] - FAD*HDG[i]*HDG[j] + FAE*DG[i]*DG[j];
				}
			}

		}									//needed if HOPT = 2

		for ( int i = 1; i <= N; ++i ) {
			XI[i] = 0.0;
			for ( int j = 1; j <= N; ++j ) {
				XI[i] -= HESSIN(i,j)*G[j];
			}
		}

		if ( HOPT == 1 )
		{

			DRVNEW=0.0;
			for ( int i = 1; i <= N; ++i ) {
				DRVNEW += XI[i]*G[i];
			}

			// If direc. deriv >0, reset the Hessian inverse estimate
			if (DRVNEW > -EPS) {

//				std::cout << "reset hessin; dirdg=" << SS( line_min->_deriv_sum ) << std::endl;

				if (FAF<0.01) FAF=0.01;
				for ( int i = 1; i <= N; ++i ) {
					for ( int j = 1; j <= N; ++j ) {
						HESSIN(i,j) = 0;
					}
					HESSIN(i,i) = FAF;
					XI[i] = -FAF*G[i];
				}
			}

		}

	}

	TR.Warning << "WARNING: DFPMIN (Armijo) MAX CYCLES " << ITMAX << " EXCEEDED, BUT FUNC NOT CONVERGED!" << std::endl;

//	std::cout << "Called line minimization " << line_min->_num_linemin_calls << std::endl;
	return;
}





	/////////////////////////////////////////////////////////////////////////////
	void
	Minimizer::linmin(
		Multivec & P,
		Multivec & XI,
		Real & FRET
		) const
	{
		//Try to use a line minimizer algorithm
		BrentLineMinimization test_brent( func_, P.size() );

		// See if this is good enough
		FRET = test_brent( P, XI );
		return;
	}

} // namespace optimization
} // namespace core
