// -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
// vi: set ts=2 noet:
//
// This file is made available under the Rosetta Commons license.
// See http://www.rosettacommons.org/license
// (C) 199x-2007 University of Washington
// (C) 199x-2007 University of California Santa Cruz
// (C) 199x-2007 University of California San Francisco
// (C) 199x-2007 Johns Hopkins University
// (C) 199x-2007 University of North Carolina, Chapel Hill
// (C) 199x-2007 Vanderbilt University

/// @file   fragment_assembly.cc
/// @brief  simple fragment assembly routines, primarily for screening
/// @brief  of shorter unknown sequences that may be somewhat unstructured
/// @author Yih-En Andrew Ban (yab@u.washington.edu)


// unit headers
#include <epigraft/prediction/fragment_assembly.hh>

// package headers
#include <epigraft/design/fragment_functions.hh>

// rosetta headers
#include <aa_name_conversion.h>
#include <fragments.h>
#include <fragments_ns.h>
#include <fragments_pose.h>
#include <jumping_util.h>
#include <pose.h>
#include <score.h>

// C++ headers
#include <sstream>


namespace epigraft {
namespace prediction {


/// @brief   pick fragments for a given sequence using vall
/// @warning remember to update max res by e.g. pose_update_MAX_RES() call
///          before calling this routine
/// @return  true if fragments were picked, else false
bool
pick_fragments(
	String const & sequence,
	String const & secondary_structure,
	Integer const & n_fragments
)
{
	using epigraft::design::build_1mer_from_3mer;

	assert( sequence.length() == secondary_structure.length() );

	// number of residues
	Integer const nres = sequence.length();

	// cutoffs for fragment picking and operations
	Integer const cutoff9 = 8; // 16
	Integer const cutoff3 = 2; // 6
	Integer const cutoff1 = 0; // 3

	// reset global fragment arrays
	fragments::reset_fragment_arrays_used_by_Vall();

	// unused bigbin string
	String const bigbin( sequence.length(), '.' );

	// weights for fragment picking
	Real const sequence_weight = 1.0;
	Real const secondary_structure_weight = 1.0;
	Real const bigbin_weight = 0.0; // not used

	// use types of fragments?
	bool const use_9mers = nres > cutoff9;
	bool const use_3mers = nres > cutoff3;
	bool const use_1mers = nres > cutoff1;

	if ( !( use_9mers || use_3mers || use_1mers ) ) {
		return false;
	}

	// pick 9mers
	if ( use_9mers ) {
		get_vall_frags( sequence, secondary_structure, bigbin,
		                sequence_weight, secondary_structure_weight, bigbin_weight,
		                1, nres,
		                9, n_fragments,
		                true, true, true, 3 ); // booleans: exclude gly, pro, cis-peptide
	}

	// pick 3mers, this also needs to be done if 1mers are used
	if ( use_3mers || use_1mers ) {
		get_vall_frags( sequence, secondary_structure, bigbin,
		                sequence_weight, secondary_structure_weight, bigbin_weight,
		                1, nres,
		                3, n_fragments,
		                true, true, true, 2 ); // booleans: exclude gly, pro, cis-peptide
	}

	// pick 1mers from 3mers
	if ( use_1mers ) {
		build_1mer_from_3mer( nres );
	}

	// use top 'n' fragments for each size
	choose_frag_set_top_N_frags( n_fragments );

	return true;
}


/// @brief given a sequence string, approximate structure using fragments
/// @brief and no compaction terms
void
fragment_assemble(
	String const & sequence,
	Pose & pose
)
{
	using namespace pose_ns;

	// lengths
	Integer const nres = sequence.length();

	// dummy secondary structure
	String const secondary_structure( nres, '.' );

	// initialize pose
	pose.simple_fold_tree( nres );
	for ( Integer i = 1, aa; i <= nres; ++i ) {
		num_from_res1( sequence.at( i - 1 ), aa );
		pose.set_res( i, aa );
	}
	insert_init_frag( pose, 1, nres );

	// resize arrays
	pose_update_MAX_RES( pose );

	// pick fragments
	bool const fragments_picked = pick_fragments( sequence, secondary_structure );

	if ( !fragments_picked ) {
		utility::exit( __FILE__, __LINE__, "ERROR: epigraft::prediction::fragment_assemble(), no fragments picked!" );
	}

	// scoring function
	Score_weight_map weight_map;
	weight_map.set_weight( VDW, 1.0 );
	weight_map.set_weight( RAMACHANDRAN, 0.1 );

	// additional scoring setup
	score_set_cst_mode(3);

	// trajectory setup
	Integer const n_fragment_moves = nres * 100;

	// monte carlo
	pose.score( weight_map );
	Monte_carlo mc( pose, weight_map, 1.0 );

	// counts
	Integer f9mer_accepts = 0;
	Integer f3mer_accepts = 0;
	Integer f1mer_accepts = 0;

	// scratch
	Integer const frag_offset = 0;

	for ( Integer i = 1; i <= n_fragment_moves; ++i ) {

		// 9-mer
		choose_offset_frag( 9, pose, 1, nres, frag_offset );
		if ( mc.boltzmann( pose ) ) {
			++f9mer_accepts;
		}

		// 3-mer
		choose_offset_frag( 3, pose, 1, nres, frag_offset );
		if ( mc.boltzmann( pose ) ) {
			++f3mer_accepts;
		}

		// 1-mer
		choose_offset_frag( 1, pose, 1, nres, frag_offset );
		if ( mc.boltzmann( pose ) ) {
			++f1mer_accepts;
		}

	}

	// recover low
	pose = mc.low_pose();

	// status
	std::cout << "fragment assembly statistics:" << std::endl;
	std::cout << "   total trials  = " << n_fragment_moves << std::endl;
	std::cout << "   9-mer accepts = " << f9mer_accepts << std::endl;
	std::cout << "   3-mer accepts = " << f3mer_accepts << std::endl;
	std::cout << "   1-mer accepts = " << f1mer_accepts << std::endl;

}


/// @brief do fragment assembly for mper
void
mper()
{
	// init pose
	Pose pose;

	// define MPER sequence
	String const consensus_sequence( "LLALDKWASLWNWFDITNWLWYIKI" );
	String const mper_sequence( consensus_sequence );

	// number of structures
	Integer const n_structures = 100;

	std::cout << "* MPER assembly" << std::endl;
	std::cout << "* generating " << n_structures << " structures" << std::endl;

	for ( Integer i = 1; i <= n_structures; ++i ) {
		// do fragment assembly
		fragment_assemble( mper_sequence, pose );

		std::ostringstream ss;
		ss << "MPER" << '_' << i << ".pdb";
		pose.dump_pdb( ss.str() );
	}

}


} // prediction
} // epigraft
