// -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
// vi: set ts=2 noet:
//  CVS information:
//  $Revision: 23432 $
//  $Date: 2008-06-24 16:25:52 +0300 (Tue, 24 Jun 2008) $
//  $Author: yab $


// Rosetta Headers
#include "map_sequence.h"
#include "after_opts.h"
#include "dock_loop_ensemble.h"
#include "docking_ns.h"
#include "files_paths.h"
#include "filters.h"
#include "fragment_class.h"
#include "fragments.h"
#include "fragments_pose.h"
#include "fragments_ns.h"
#include "fullatom.h"
#include "initialize.h"
#include "jumping_util.h"
#include "jumping_loops.h"
#include "jumping_pairings.h" // get_loop_fraction
#include "jumping_diagnostics.h"
#include "misc.h" // damn
#include "param.h" // MAX_POS
#include "param_aa.h" // MAX_POS
#include "pose.h"
#include "pose_io.h"
#include "pose_rms.h"
#include "random_numbers.h"
#include "read_aa_ss.h"
#include "score.h"
#include "ssblocks.h" // charlie's heap stuff
#include "timer.h"
#include "util_vector.h"

// ObjexxFCL Headers
#include <ObjexxFCL/FArray1D.hh>
#include <ObjexxFCL/FArray2D.hh>
#include <ObjexxFCL/FArray3D.hh>
#include <ObjexxFCL/FArray4D.hh>
//#include <ObjexxFCL/Fmath.hh>
//#include <ObjexxFCL/Time_Date.hh>
#include <ObjexxFCL/formatted.o.hh>

// C++ Headers
#include <cmath>
#include <cstdlib>
#include <iostream>
#include <sstream>
#include <fstream>



//Utility Headers
#include <utility/basic_sys_util.hh>
#include <utility/io/izstream.hh>

/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
//
// returns TRUE on success
//
// assumes that the starting structure is in the misc arrays
//
// if fa_input, recycles side-chains from the starting structure at positions
// of sequence identity
//
// backbone geometry of starting structure is idealized at loop-modelling
// positions, otherwise it is preserved
//

bool
map_misc_coords(
	bool const fa_input,
	bool const coords_init,
	bool const ideal_structure
)
{
	using namespace pose_ns;

	bool const save_pose_flag( pose_flag() );
	set_pose_flag( true );

	// create a pose containing the starting structure
	Pose start_pose;

	pose_from_misc( start_pose, fa_input && coords_init, ideal_structure,
									coords_init );

	int const start_nres( start_pose.total_residue() );

	assert( files_paths::idealized_structure == start_pose.ideal_backbone() );

	//if (!coords_init ) start_pose.refold();

	// get the start and target sequences:
	std::string start_sequence, target_sequence;
	for ( int i=1; i<= start_nres; ++i ) {
		start_sequence = start_sequence + param_aa::aa_name1( start_pose.res(i));
	}
	{ // scope
		if (!files_paths::query_defined ) {
			std::cout << "STOP: need query-defined to map input sequences!!" <<
				std::endl;
			utility::exit( EXIT_FAILURE, __FILE__, __LINE__);
		}
		bool fail;
		read_aa( fail );
		if ( fail ) {
			std::cout << "STOP: need fasta-file to map input sequences!!" <<
				std::endl;
			utility::exit( EXIT_FAILURE, __FILE__, __LINE__);
		}
		for ( int i=1; i<= misc::total_residue; ++i ) {
			target_sequence = target_sequence + misc::residue1(i);
		}
	}
	int const target_nres( target_sequence.size() );

	// read the align file and setup the mapping
	FArray1D_int mapping( target_nres );
	if (!setup_start_mapping( target_sequence, start_sequence, mapping ) ) {
		// this could just mean that one of the starting structures
		// was screwed up, so don't STOP just return false
		std::cout << "WARNING: failed to setup the sequence mapping!" << std::endl;
		set_pose_flag( save_pose_flag );
		return false;
	}
	FArray1D_int start_mapping( mapping );

	// mapping is a mapping from the target_sequence to the start_sequence
	// constructed from the alignment information in the align file

	// trim cterminal template residues if necessary:
	int cgap(0);
	while ( mapping(target_nres-cgap) == -1 ) cgap++;
	int const template_nres ( mapping(target_nres-cgap) );

	// make our simulation pose, copy torsions,coords from start_pose
	Pose pose;
	pose.simple_fold_tree( template_nres );
	pose.copy_segment( template_nres, start_pose, 1, 1 );

	// now map the sequence:
	bool const ok = map_pose_sequence( pose, target_sequence, mapping );

	if ( !ok ) {
		// failure
		set_pose_flag( save_pose_flag );
		return false;
	}

	// check final rmsd over aligned region:
	std::cout << "map_misc_coords:: rmsd to start over aligned regions: " <<
		CA_rmsd_by_mapping( pose, start_pose, start_mapping ) << std::endl;


	if ( fa_input ) {
		// copy sidechains from starting structure
		pose.set_fullatom_flag( true, false ); // repack_rotamers = false
		FArray1D_bool allow_repack( target_nres, true );
		for ( int i=1; i<= target_nres; ++i ) {
			if ( start_mapping(i) != -1 ) {
				int const start_pos( start_mapping(i) );
				int const aa ( pose.res        (i) );
				int const aav( pose.res_variant(i) );
				if ( aa == start_pose.res( start_pos ) ) {
					allow_repack(i) = false;
					pose.copy_sidechain( i, aa, aav,
															 start_pose.full_coord()(1,1,start_pos) );
				}
			}
		}
		pose.repack( allow_repack, false ); // no rots exist
	}

	// ensure final pose is in the misc arrays!
	pose.copy_to_misc();

	// restore rosetta fullatom state (should be false in initialize_start )
	if ( fa_input ) set_fullatom_flag( false );
	assert( !get_fullatom_flag() );

	// reset pose_flag to initial state
	set_pose_flag( save_pose_flag );
	return true; // success!
}

///////////////////////////////////////////////////////////////////////////////
// given a pose with one sequence, construct a pose with a new sequence, the
// target_sequence, using start_mapping which is a mapping from residues
// in the target_sequence to residues in the current pose.
//
// see setup_start_mapping for an example of how the mapping is constructed
// from a fasta-format align file
//

bool
map_pose_sequence(
	pose_ns::Pose & pose,
	std::string const & target_sequence,
	const FArray1D_int & start_mapping
)
{
	using namespace pose_ns;

	int const target_nres( target_sequence.size() );

	if ( fragments::fragments_nres != target_nres ) {
		std::cout << "map_pose_sequence: target sequence should match the " <<
			"the fragment file (at least in length!) frag_nres= " <<
			fragments::fragments_nres << " target_nres= " << target_nres <<
			std::endl;
		utility::exit( EXIT_FAILURE, __FILE__, __LINE__);
	}

	if ( pose.num_jump() > 0 ) {
		std::cout << "SORRY: sequence mapping is only implemented for " <<
			"simple-tree poses right now. You could set a simple tree and " <<
			"call calc_bonds..." <<	std::endl;
		utility::exit( EXIT_FAILURE, __FILE__, __LINE__);
	}

	// save starting pose for rmsd calculation later
	Pose start_pose;
	start_pose = pose;

	// copy the mapping since we want to change it locally
	FArray1D_int mapping( start_mapping );

	/////////////////////////////////////////////////////
	// convert to the target sequence in aligned regions:
	for ( int i=1; i<= target_nres; ++i ) {
		if ( mapping(i) != -1 ) {
			pose.set_res( mapping(i), aa2int( target_sequence[i-1] ) );
		}
	}

	///////////////////////
	// build terminal loops
	if ( mapping(1) != 1 || mapping(target_nres) == -1 ) {
		// fill terminal missing density and/or shift sequence
		build_terminal_loops( pose, target_sequence, mapping );
	}

	///////////////////////
	// build internal loops
	FArray1D_int identity_mapping( target_nres );
	for ( int i=1; i<= target_nres; ++i ) identity_mapping(i) = i;
	while ( mapping != identity_mapping ) {
		bool ok
			( build_random_loop( pose, target_sequence, mapping ) );
		if ( !ok ) {
			// failed to close
			break;
		}
	}

	if ( mapping != identity_mapping ) {
		std::cout << "WARNING:: map_pose_sequence failed" << std::endl;
		return false;
	}

	// confirm that the sequence is correct
	for ( int i=1; i<= target_nres; ++i ) {
		if ( pose.res(i) != aa2int ( target_sequence[i-1] ) ) {
			// this should never happen
			std::cout << "STOP! failure in map_pose_sequence!!" << std::endl;
			for ( int j=1; j<= target_nres; ++j ) {
				int const aa1( pose.res(j) );
				char const rsd1( param_aa::aa_name1(aa1));
				char const rsd2( target_sequence[j-1] );
				int const aa2( aa2int( rsd2 ));
				std::cout << "pose_res,target_res: " << j << ' ' <<
					aa1 << ' ' << rsd1 << ' ' <<
					aa2 << ' ' << rsd2 << std::endl;
			}
			utility::exit( EXIT_FAILURE, __FILE__, __LINE__);
		}
	}

	// check final rmsd over aligned region:
	std::cout << "map_pose_sequence:: rmsd to start over aligned regions: " <<
		CA_rmsd_by_mapping( pose, start_pose, start_mapping ) << std::endl;

	return true; // success!
}

/////////////////////////////////////////////////////////////////
// two acceptable formats -- tags should be unique
//
// single-line:
// ALIGN <alignment> tag
//
// or aligned fasta:
// >tag
// <alignment>
//
// . or - are acceptable gap characters
//
// if the target_sequence or start_sequence appears twice in the alignment,
// need a way to distinguish the correct one.
//
// either one should have the tag "TARGET" or "target"
// or the other one should have the tag "START" or "start"
//
// returns TRUE on success
//

bool
setup_start_mapping(
	std::string const & target_sequence,
	std::string const & start_sequence,
	FArray1D_int & mapping
)
{
	assert( truefalseoption("map_sequence") );
	std::string filename; // no more Fstrings!
	stringafteroption( "map_sequence", "doh!", filename );

	utility::io::izstream data( filename.c_str() );
	if (!data.good() ) {
		std::cout << "STOP: Unable to open the sequence mapping file: " <<
			filename <<	"\nThe filename should follow the -map_sequence flag" <<
			std::endl;
		utility::exit( EXIT_FAILURE, __FILE__, __LINE__);
	}

	std::string line;
	getline( data, line );
	data.seek_beg();

	typedef std::map< std::string, std::string > Align_map;
	Align_map align_map;

	if ( line[0] == '>' ) { // assume aligned fasta file
		std::string tag;
		while ( getline( data,line ) ) {
			if ( line[0] == '>' ) {
				line.erase(0,1);
				tag = line;
				if ( tag == "TARGET" ) tag = "target";
				if ( tag == "START" ) tag = "start";
				if ( align_map.count( tag ) ) {
					std::cout << "WARNING:: duplicate tags in the align-file; replacing" <<
						" old data!" << std::endl;
				}
			} else {
				std::replace( line.begin(), line.end(), '.', '-' );
				std::string align( align_map[tag] );
				align = align+line;
				align_map[tag] = align;
			}
		}
	} else { // ALIGN file format
		while ( getline(data,line) ) {
			std::istringstream line_stream( line );
			std::string line_tag,align,tag;
			line_stream >> line_tag >> align >> tag;
			if ( line_tag != "ALIGN" || line_stream.fail() ) {
				std::cout << "align_file parse error: " << line << std::endl;
				continue;
			}
			std::replace( align.begin(), align.end(), '.', '-' );
			if ( tag == "TARGET" ) tag = "target";
			if ( tag == "START" ) tag = "start";
			if ( align_map.count( tag ) ) {
				std::cout << "WARNING:: duplicate tags in the align-file; replacing" <<
					" old data!" << std::endl;
			}
			align_map[tag] = align;
		}
	}

	// find the target_sequence, start_sequence
	std::string target_tag, start_tag;

	for ( Align_map::const_iterator it=align_map.begin(), it_end = align_map.end();
				it != it_end; ++it ) {
		std::string const & tag( it->first );
		std::string const & align( it->second );
		int const L( align.size() );
		std::string seq;
		for ( int i=0; i<L; ++i ) {
			if ( align[i] != '-' ) seq += align[i];
		}
		if ( seq == target_sequence && target_tag != "target" &&
				 ( target_tag.size() == 0 || tag == "target" ) ) {
			target_tag = tag;
		}
		if ( seq == start_sequence && start_tag != "start" &&
				 ( start_tag.size() == 0 || tag == "start" ) ) {
			start_tag = tag;
		}
	}

	if ( start_tag.size() < 1 || target_tag.size() < 1 ) {
		std::cout << "WARNING: cant find target or start sequences in mapping file" <<
			std::endl;
		return false;
	}

	std::string const & target_align( align_map[ target_tag ] );
	std::string const &  start_align( align_map[  start_tag ] );
	int const L( target_align.size() );

	if ( L != int(start_align.size() ) ) {
		std::cout << "WARNING: align-length mismatch in mapping file: " << L << ' ' <<
			start_align.size() << std::endl;
		return false;
	}

	{ // debuggin
		std::string al1,al2;
		for ( int i=0; i<L; ++i ) {
			char s1( target_align[i] );
			char s2(  start_align[i] );
			if ( s1 != '-' || s2 != '-' ) {
				al1 = al1 + s1;
				al2 = al2 + s2;
			}
		}
		std::cout << "target_align: " << al1 << "\n start_align: " << al2 << std::endl;
	}

	for ( int i=0, target_pos=0, start_pos=0; i< L; ++i ) {
		if ( start_align[i] != '-' ) {
			++start_pos;
		}
		if ( target_align[i] != '-' ) {
			++target_pos;
			if ( start_align[i] != '-' ) {
				mapping( target_pos ) = start_pos;
			} else {
				mapping( target_pos ) = -1;
			}
		}
		assert( i < L-1 ||
						( target_pos == int( target_sequence.size()) &&
							 start_pos == int(  start_sequence.size()) ) );
	}

	data.close();
	return true;
}

///////////////////////////////////////////////////////////////////////////
// needed by initialize_query to safely set a big MAX_RES
// value that will prevent re-sizing during the loop-building
// simulation
//
int
get_map_sequence_alignment_length()
{
	assert( truefalseoption("map_sequence") );
	std::string filename; // no more Fstrings!
	stringafteroption( "map_sequence", "doh!", filename );

	utility::io::izstream data( filename.c_str() );
	if (!data.good() ) {
		std::cout << "STOP: Unable to open the sequence mapping file: " << filename <<
			"\nThe filename should follow the -map_sequence flag" << std::endl;
		utility::exit( EXIT_FAILURE, __FILE__, __LINE__);
	}

	std::string line, alignment;
	getline( data, line );
	data.seek_beg();


	if ( line[0] == '>' ) { // assume aligned fasta file
		getline( data, line ); // re-read the ">" line
		while ( getline( data,line ) ) {
			if ( line[0] == '>' ) break;
			alignment += line;
		}
	} else {
		getline( data, line );
		std::istringstream line_stream( line );
		std::string line_tag,tag;
		line_stream >> line_tag >> alignment >> tag;
		if ( line_tag != "ALIGN" || line_stream.fail() ) {
			std::cout << "align_file parse error: " << line << std::endl;
			std::cout << "unable to parse alignment length!" << std::endl;
			std::cerr << "align_file parse error: " << line << std::endl;
			std::cerr << "unable to parse alignment length!" << std::endl;
			utility::exit( EXIT_FAILURE, __FILE__, __LINE__);
		}
	}

	data.close();

	return alignment.size();
}

///////////////////////////////////////////////////////////////////////////
void
build_terminal_loops(
	pose_ns::Pose & pose,
	std::string const & target_sequence,
	FArray1D_int & mapping
)
{
	using namespace pose_ns;

	Pose new_pose;

	int const target_nres ( target_sequence.size() );

	int ngap(0);
	while ( mapping(ngap+1) == -1 ) ++ngap;
	int cgap(0);
	while ( mapping(target_nres-cgap) == -1 ) ++cgap;

	std::cout << "build_terminal_loops: ngap= " << ngap << " cgap= " << cgap <<
		std::endl;

	int const offset ( ngap + 1 - mapping(ngap+1) );
	int const new_nres ( mapping(target_nres-cgap) + cgap + offset );
	int const old_nres ( pose.total_residue() );

	new_pose.simple_fold_tree( new_nres );
	// put init torsion angles into the termini we will be rebuilding:
	if ( ngap ) insert_init_frag( new_pose, 1, ngap );
	if ( cgap ) insert_init_frag( new_pose, new_nres-cgap+1, cgap );

	// fill in target sequence at termini:
	FArray1D_int tmp_mapping( new_nres, -1 ); // for debugging
	for ( int i=1; i<= new_nres; ++i ) {
		if ( i <= ngap || new_nres-i+1 <= cgap ) {
			int const pos( ( i <= ngap ) ? i : target_nres - new_nres + i);
			new_pose.set_res( i, aa2int( target_sequence[ pos-1 ] ) );
		} else {
			tmp_mapping( i ) = i - offset;
			assert( tmp_mapping(i) >= 1 && tmp_mapping(i) <= old_nres );
		}
	}

	// copy the middle of the protein from the pose
	new_pose.copy_segment( new_nres - ngap - cgap, pose, ngap+1,
												 mapping(ngap+1) );

	// now we have torsion angles at the termini and both torsions and
	// coordinates in the middle
	//
	// this call to score will trigger a refold()
	new_pose.score( score1 );

	{ // debugging
		float const rmsd( CA_rmsd_by_mapping( new_pose, pose, tmp_mapping ) );
		if ( rmsd > 0.01 ) {
			std::cout << "WARNING: bad terminal-loop-refold rmsd!!" << std::endl;
			assert( false ); // die in the debug build
		}
	}


	if ( ngap==0 && cgap==0 ) {
		// all we had to do was shift the sequence
		pose = new_pose;
	} else {
		// have to do some modelling
		// monte carlo
		Score_weight_map weight_map;
		setup_score_weight_map( weight_map, score1 );
		Monte_carlo mc ( new_pose, weight_map, 2.0 ); // temp = 2.0
		int const cycles ( 25 * ( ngap + cgap ) );

		// fragment moves:
		// choose_offset_frag inserts a fragment at a randomly selected
		// position in a given window of the pose
		//
		// it allows for an offset between the pose numbering and the
		// numbering of the fragment file, necessary in this case
		// since we are modeling a pose that does not have the target
		// sequence/length yet.
		for ( int i=1; i<= cycles; ++i ) {
			int size(3), loop_begin, loop_end, frag_offset;
			if ( ( ngap != 0 && i%2 == 0 ) || cgap == 0) { // nterminal
				assert ( ngap > 0 );
				loop_begin = 1;
				loop_end = ngap;
				frag_offset = 0;
			} else { // cterminal
				assert ( cgap > 0 );
				loop_begin = new_nres-cgap+1;
				loop_end = new_nres;
				frag_offset = target_nres - new_nres;
			}
			// insert fragment:
			choose_offset_frag( size, new_pose, loop_begin, loop_end, frag_offset );

			// this call to boltzmann automatically scores the pose
			// which in turn will trigger a refold since we inserted new
			// torsion angles
			mc.boltzmann( new_pose );
		}

		// recover the low pose
		pose = mc.low_pose();
	}

	// setup new mapping:
	// recall: int const offset ( ngap + 1 - mapping(ngap+1) );
	FArray1D_int old_mapping ( mapping );

	for ( int i=1,cdist,last_mapping=0; i<= target_nres; ++i ) {
		cdist = target_nres-i+1; // dist to c-term
		if ( i <= ngap ) {
			mapping(i) = i;
		} else if ( cdist <= cgap ) {
			mapping(i) = new_nres-cdist+1;
		} else if ( old_mapping(i) == -1 ) {
			mapping(i) = -1;
		} else {
			mapping(i) = old_mapping(i) + offset;
		}
		if ( mapping(i) != -1 ) {
			assert ( mapping(i) >= 1 && mapping(i) <= new_nres &&
							 mapping(i) > last_mapping );
			last_mapping = mapping(i);
		}
	}
}


// this routine is called repeatedly until mapping is the identity map
//
// returns false if loop-building failed, true otherwise
//
// assumes that we have already rebuilt terminal missing density? YES
//

bool
build_random_loop(
	pose_ns::Pose & pose,
	std::string const & target_sequence,
	FArray1D_int & mapping
)
{
	int const target_nres ( target_sequence.size() );

	// these describe the location of the region to be remodelled:
	// will be filled in by the logic below
	int target_begin(0), target_end(0), template_begin(0), template_end(0);

	pose.score( score0 );
	std::cout << "build_random_loop:: start VDW score: " <<
		pose.get_0D_score ( pose_ns::VDW ) << std::endl;

	//////////////////////////
	// choose a loop randomly:
	//////////////////////////

	while ( target_begin == 0 ) {
		int pos ( static_cast< int >( target_nres * ran3() + 1 ) );
		if ( mapping(pos) == -1 ) {
			while ( pos > 1 && mapping(pos) == -1 ) --pos;
			if ( mapping(pos) == -1 ) break;
		}
		assert ( mapping(pos) != -1 );
		while ( pos< target_nres &&
						mapping(pos+1) != -1 &&
						mapping(pos+1) == mapping(pos)+1 ) ++pos;
		if ( pos == target_nres ) continue; // try again

		if ( mapping(pos+1) == -1 ) {
			// insertion in target
			target_begin = pos;
			template_begin = mapping(pos);
			++pos;
			while ( pos<target_nres && mapping(pos) == -1 ) ++pos;
			if ( mapping(pos) == -1 ) break;
			target_end = pos;
			template_end = mapping(pos);
		} else {
			// deletion in target
			assert (mapping(pos+1) != mapping(pos)+1 );
			target_begin = pos;
			target_end = pos+1;
			template_begin = mapping(pos);
			template_end = mapping(pos+1);
		}
	}

	if ( target_begin == 0 ) {
		// debug
		std::cout << "STOP: build_random_loop:: first remodel terminal missing " <<
			"density"	<< std::endl;
		utility::exit( EXIT_FAILURE, __FILE__, __LINE__);
	}


	///////////////////////////////
	// now rebuild the loop segment
	///////////////////////////////

	int const old_template_nres ( pose.total_residue() );

	bool const ok = rebuild_loop( pose, target_sequence, target_begin,
		target_end, template_begin, template_end );

	if ( !ok ) return false; // give up on this decoy!


	/////////////////////
	// update the mapping
	FArray1D_int old_mapping( mapping );

	// residues after loop are shifted by distance equal to the
	// difference in the loop lengths:
	int const offset ( ( target_end   - target_begin ) -
										 ( template_end - template_begin ) );
	int const new_template_nres ( pose.total_residue() );
	assert ( new_template_nres == old_template_nres + offset );

	for ( int i=1,last_mapping=0; i<= target_nres; ++i ) {
		if ( i<= target_begin ) {
			mapping(i) = old_mapping(i);
		} else if ( i >= target_end ) {
			if ( old_mapping(i) == -1 ) {
				mapping(i) = -1;
			} else {
				mapping(i) = old_mapping(i) + offset;
			}
		} else {
			mapping(i) = template_begin + i - target_begin;
		}

		if ( mapping(i) != -1 ) {
			// debug
			assert ( mapping(i) >= 1 && mapping(i) <= new_template_nres &&
							 mapping(i) > last_mapping );
			last_mapping = mapping(i);
		}
	}
	assert ( mapping(target_begin) == template_begin &&
					 mapping(target_end) == template_begin+target_end-target_begin );

	return true;

}


///////////////////////////////////////////////////////////
// returns true if we succeeded
//
// *_begin, *_end are the residues that bound the loop, ie
// the loop goes from *_begin+1 --> *_end-1
//
// PB: 06-20-05 sequence mapping logic is tricky: only change the
// sequence in the range of the initial loop
//
bool
rebuild_loop(
	pose_ns::Pose & pose,
	std::string const & target_sequence,
	int const init_target_begin,
	int const init_target_end,
	int const init_template_begin,
	int const init_template_end
)
{
	using namespace pose_ns;

	/////////
	// params
 	int const min_target_loop_size ( 6 );
 	int const min_template_loop_size ( 4 );
	bool const verbose( false );

	// for choosing the best loop, and jumping out once we've seen
	// enough good ones:
	pose.score( score3 );
	float const start_vdw_score ( pose.get_0D_score( VDW ) );
	float const really_bad_score ( 10000.0 );
	float best_score ( really_bad_score );
	int good_loop_count (0);

	int const target_nres ( target_sequence.size() );
	int const template_nres ( pose.total_residue() );

	// local pose copy:
	Pose loop_pose, best_pose, start_loop_pose;

	int const init_target_loop_size ( init_target_end - init_target_begin - 1);
	int const init_template_loop_size ( init_template_end - init_template_begin - 1);
	int const delta_size ( init_target_loop_size - init_template_loop_size );
	int const new_template_nres ( template_nres + delta_size );

	std::cout << "rebuild_loop" << init_target_begin << ' ' << init_target_end <<
		' ' << target_sequence.substr( init_target_begin-1,
																	 init_target_loop_size+2 ) << std::endl;

	// setup the start-loop-pose
	// this will have the target sequence inside the loop
	{
		int const new_template_end( init_template_end + delta_size );
		int const cutpoint( init_template_begin + init_target_loop_size/2 );

		// fill the fold_tree:
		start_loop_pose.one_jump_tree( new_template_nres, init_template_begin,
																	 new_template_end, cutpoint );

		// copy segments before and after the loop:
		start_loop_pose.copy_segment( init_template_begin, pose, 1, 1 );
		start_loop_pose.copy_segment( template_nres - init_template_end +1,
																	pose, new_template_end, init_template_end );

		for ( int i=1; i<=init_target_loop_size; ++i ) {
			int const template_pos( init_template_begin+i );
			int const   target_pos(   init_target_begin+i );
			char const target_aa( target_sequence[ target_pos-1 ] );
			start_loop_pose.set_res( template_pos, aa2int( target_aa ) );
		}
		if ( init_target_loop_size > 0 ) {
			insert_init_frag( start_loop_pose, init_template_begin+1,
												init_target_loop_size );
		}
	} // scope


	// loop over a range of possible loop positions
	// we will break out of these loops once we find a closed loop that scores ok
	for ( int target_loop_size = std::max(
		 min_target_loop_size, std::max( min_template_loop_size + delta_size,
		 init_target_loop_size ) );
				target_loop_size < target_nres - 2;
				++target_loop_size ) {

		// wiggle room for loop location: it has to contain
		//  [ init_target_begin, init_target_end ]
		int const pad ( target_loop_size - init_target_loop_size );

		// sort the window positions by loop content in frags
		// go from highest to lowest:
		const FArray1D_float & loop_fraction ( get_loop_fraction( target_nres ) );
		typedef std::list< std::pair< float, int > > Float_int_list;
		Float_int_list window_list;
		for ( int ii=0; ii<= pad; ++ii ) {
			// should be the same as the code in the next loop
			int const target_begin   ( init_target_begin   - pad + ii );
			int const template_begin ( init_template_begin - pad + ii );
			int const target_end     ( init_target_end   + ii );
			int const template_end   ( init_template_end + ii );
			int const new_template_end  ( template_end + delta_size );
			if ( target_begin   < 1 || target_end > target_nres ||
					 template_begin < 1 || new_template_end > new_template_nres ) continue;
			float f (0);
			for ( int i=target_begin+1; i<= target_end-1; ++i ) {
				f += loop_fraction(i);
			}
			window_list.push_back( std::make_pair( f, ii ) );
		}
		window_list.sort();
		window_list.reverse();

		for ( Float_int_list::const_iterator w_it=window_list.begin(),
						w_it_end = window_list.end(); w_it != w_it_end; ++w_it ) {
			int const ii ( w_it->second );
			int const target_begin   ( init_target_begin   - pad + ii );
			int const template_begin ( init_template_begin - pad + ii );
			//int const target_end     ( init_target_end   + ii );
			int const template_end   ( init_template_end + ii );

			int const template_loop_size ( template_end - template_begin - 1);
			assert ( template_loop_size >= min_template_loop_size );

			int const cutpoint ( template_begin + target_loop_size / 2 );
			int const new_template_end  ( template_end + delta_size );

			/////////////////////
			// setup the new pose
			loop_pose = start_loop_pose;

			// setting the tree triggers recalculation of the jumps
			loop_pose.one_jump_tree( new_template_nres, template_begin,
															 new_template_end, cutpoint );

			// reset torsions and bonds in the loop region
			insert_init_frag( loop_pose, template_begin+1, target_loop_size );
			loop_pose.insert_ideal_bonds( template_begin+1, new_template_end-1 );

			/////////////
			// debugging: check rms between start pose and loop pose
			// setup a mapping from the loop_pose back to the start pose
			FArray1D_int loop_pose_mapping( new_template_nres );
			for ( int i=1; i<= new_template_nres; ++i ) {
				if ( i<= template_begin ) loop_pose_mapping( i ) = i;
				else if ( i< new_template_end ) loop_pose_mapping( i ) = -1;
				else loop_pose_mapping(i) = i - delta_size;
			}
			float const rmsd( CA_rmsd_by_mapping( loop_pose, pose, loop_pose_mapping ) );
			std::cout << "loop-rmsd: " << rmsd << std::endl;
			assert( rmsd < 0.01 );

			//////////////////////////////////////
			// collect a list of closed fragments:
			std::vector< Fragment > fragment_list;

			///////////////////////////////////////////////////////////////////
			// close with scored fragment insertions
			{ // scope
				// setup the score function:
				Score_weight_map weight_map;
				weight_map.set_weight( VDW, 1.0 );
				weight_map.set_weight( ENV, 1.0 );
				weight_map.set_weight( PAIR, 1.0 );
				weight_map.set_weight( CHAINBREAK, 1.0 );
				weight_map.set_weight( CHAINBREAK_OVERLAP, 1.0 );

				int const frag_offset ( target_begin - template_begin );
				bool const do_ccd_moves( target_loop_size >= 10);

				start_timer("scored_frag_close");

				scored_frag_close( weight_map, loop_pose, template_begin+1,
					new_template_end-1, 3, frag_offset, 100, 20*target_loop_size,
					do_ccd_moves, fragment_list );

				std::cout << "scored_frag_close:: found " << fragment_list.size() <<
					" frags time= " << get_timer("scored_frag_close") << std::endl;
			}

			//////////////////////////////////////////////////////////////////////////
			// try fragment closure w/o score, just chainbreak deviations
			// here we assume that fragments have been read in for the target sequence
			if ( target_loop_size <= 10 ) {
				int const frag_offset ( target_begin - template_begin );
				int const cycles1(400), cycles2(10*target_loop_size),
					big_num_fragments(100), little_num_fragments(50),
					frag_close_ccd_cycles(50);

				start_timer("frag_close");

				frag_close( loop_pose, template_begin+1, target_loop_size, cutpoint,
					frag_offset, cycles1, cycles2, big_num_fragments, little_num_fragments,
					frag_close_ccd_cycles, fragment_list );

				std::cout << "frag_close:: now found " << fragment_list.size() <<
					" frags time= " << get_timer("frag_close") << std::endl;
			}

			//////////////////////////////////////////////////
			// if we found any closed fragments, try them out:
			if ( fragment_list.size() > 0 ) {
				loop_pose.simple_fold_tree( new_template_nres );
				for ( std::vector< Fragment >::const_iterator it = fragment_list.begin(),
								it_end = fragment_list.end(); it != it_end; ++it ) {
					it->insert( loop_pose, template_begin+1 );
					//loop_pose.refold();
					loop_pose.score( score3 );
					float const map_rmsd( CA_rmsd_by_mapping( loop_pose, pose,
																										loop_pose_mapping) );
					if ( map_rmsd > 0.1 ) {
						std::cout << "WARNING!!! high loop-closure rmsd: " << map_rmsd <<
							std::endl;
					}
					if ( verbose ) {
						std::cout << "noloop refold: map_rmsd,score,env,pair,vdw " <<
							F(9,3,map_rmsd ) << ' ' <<
							loop_pose.get_0D_score( SCORE ) << ' ' <<
							loop_pose.get_0D_score( ENV ) << ' ' <<
							loop_pose.get_0D_score( PAIR ) << ' ' <<
							loop_pose.get_0D_score( VDW ) << std::endl;
					}

					float const score ( loop_pose.get_0D_score ( SCORE ) );
					float const vdw_score ( loop_pose.get_0D_score ( VDW ) );
					if ( vdw_score < start_vdw_score + 0.5 ) {
						++good_loop_count;
					}

					if ( best_score > score ) {
						best_score = score;
						best_pose = loop_pose;
					}
				}
			}

			if ( good_loop_count > 3 ) {
				// skip out of the slide-window loop if we've seen enough non-clashing
				// loops; just an optimization
				break;
			}
		} // slide loop

		if ( best_score < really_bad_score-1 ) {
			std::cout << "closed the loop!!! score,vdw,old_vdw " <<
				best_pose.get_0D_score ( SCORE ) << ' ' <<
 				best_pose.get_0D_score ( VDW ) << ' ' <<
 				pose.get_0D_score ( VDW ) << std::endl;
			pose = best_pose;
			break;
		}
	} // loop size

	return true;
}


///////////////////////////////////////////////////////////////////////
// closes the loop with a score consisting of
//

void
scored_frag_close(
	pose_ns::Score_weight_map const & weight_map,
	pose_ns::Pose const & pose_in,
	int const loop_begin,
	int const loop_end,
	int const frag_size,
	int const frag_offset,
	int const cycles1,
	int const cycles2,
	bool const do_ccd_moves,
	std::vector< Fragment > & fragment_list
)
{
	using namespace pose_ns;

	int const loop_size ( loop_end - loop_begin + 1 );
	float const ccd_threshold( 0.01);

	// local pose
	Pose pose;
	pose = pose_in;

	// setup monte_carlo
	Monte_carlo mc( pose, weight_map, 2.0 ); // temp = 2.0

	// find the cutpoint
	int ncut,cutpoint(0);
	const FArray1D_int & cuts( pose.fold_tree().get_fold_tree_cutpoint( ncut ) );
	for ( int i=1; i<= ncut; ++i ) {
		if ( loop_begin-1 <= cuts(i) && cuts(i) <= loop_end ) {
			cutpoint = cuts(i);
		}
	}
	if ( cutpoint == 0 ) {
		std::cout << "STOP: scored_frag_close:: cant find cutpoint\n";
		utility::exit( EXIT_FAILURE, __FILE__, __LINE__);
	}

	float best_fdev( 1000.0 ), best_score( 1000.0 );

#ifdef BOINC
  bool skip_best_fdev_cutoff = truefalseoption("skip_best_fdev_cutoff");
#endif

	for ( int c1=1; c1<= cycles1; ++c1 ) {
		if ( c1%10 == 0 ) {
			std::cout << "scored_frag_close( " << I(5,loop_begin) << I(5,loop_end) <<
				I(5,cycles1) << I(5,cycles2) <<	" ) cycle= " << I(5,c1) <<
				" total-closed-frags: " << I(5,fragment_list.size() ) << ' ' <<
				best_fdev << ' ' << best_score << std::endl;
		}
		insert_random_frags( frag_size, pose, loop_begin, loop_end, frag_offset );
		mc.reset( pose ); // calls score

		for ( int c2=1; c2<= cycles2; ++c2 ) {

			choose_offset_frag( frag_size, pose, loop_begin, loop_end, frag_offset );

			mc.boltzmann( pose );

			if ( do_ccd_moves && c2 > cycles2/2 && ran3() * cycles2 < c2 ) {
				// in 2nd half of simulation, start trying to close the loop:
				ccd_moves( loop_size, pose, loop_begin, loop_end, cutpoint );

				mc.boltzmann( pose );
			}
		}

		// recover low:
		pose = mc.low_pose();
		best_score = std::min( best_score, pose.get_0D_score( SCORE ));

		float fdev, bdev, torsion_delta, rama_delta;
		fast_ccd_loop_closure( pose,
													 loop_begin, loop_end, cutpoint, 100, ccd_threshold,
													 true, 2.0, 10, 50, 75,
													 fdev, bdev, torsion_delta, rama_delta);

		best_fdev = std::min( best_fdev, fdev );

		// DIAGNOSTICS
		//std::cout << "scored_frag_close()  best_fdev: " << best_fdev << std::endl;
#ifdef BOINC
		// optimization, jump out if threshold not met after 20 cycles
		if (!skip_best_fdev_cutoff && c1 == 20 && best_fdev > ccd_threshold) return;
#endif

		if ( fdev <= ccd_threshold && bdev <= ccd_threshold ) {
			// save this fragment
			//pose.score( weight_map );
			Fragment f( loop_size );
			for ( int k=1,pos; k<= loop_size; ++k ) {
				pos = loop_begin+k-1;
				f.phi       ( k ) = pose.phi       ( pos );
				f.psi       ( k ) = pose.psi       ( pos );
				f.omega     ( k ) = pose.omega     ( pos );
				f.secstruct ( k ) = pose.secstruct ( pos );
			}
			fragment_list.push_back( f );
		}
	}
}

///////////////////////////////////////////////////////////
// appends best num_fragments fragments to fragment_list
// aroop: although its not proper to put mode specific stuff
//        in a function, I would have to perform a lot of
//        code rot otherwise. I have included by mode specific
//        stuff out here. These are invoked by loop building
//        for docking mode dle.cc. It creates
//        the first heap based on sorted intra-clash score,i.e.
//        the clashes encountered when a built loop is placed
//        back into the rest of the protein. Then it is
//        resorted based on overlap deviation.

void
frag_close(
  const pose_ns::Pose & pose,
	int const begin,
	int const size,
	int const cutpoint,
	int const frag_offset, // add this to begin, get begin in fragment sequence
	int const cycles1,
	int const cycles2,
	int const big_num_fragments,
	int const little_num_fragments,
	int const ccd_cycles,
	std::vector< Fragment > & fragment_list // output
	)
{
	using namespace pose_ns;

	// for sorting the frags:
	// big_heap sorts the fragments we'll dunbrack close (big_num_fragments)
	// little_heap sorts the fragments we'll return ( ie copy to the fragment_list: little_num_fragments)
	static FArray1D_int big_heap,little_heap;
	static FArray1D_float big_coheap,little_coheap;
	if ( int (big_coheap.size1()) < big_num_fragments+2 ) {
		int const pad ( 10 );
		big_heap  .dimension( big_num_fragments+2+pad ); // have to add +2, I think
		big_coheap.dimension( big_num_fragments+2+pad );
	}
	if ( int (little_coheap.size1()) < little_num_fragments+2 ) {
		int const pad ( 10 );
		little_heap  .dimension( little_num_fragments+2+pad ); // have to add +2, I think
		little_coheap.dimension( little_num_fragments+2+pad );
	}
	// setup the heap
	heap_init( big_heap, big_coheap, big_num_fragments );
	heap_init( little_heap, little_coheap, little_num_fragments );

	// store all the fragments:
	static FArray1D< Fragment > frag_array;
	if ( int(frag_array.size1()) < cycles1 ) {
		int const pad ( 10 );
		frag_array.dimension( cycles1+pad );
		// calls the default constructor for Fragment ==> sizes are all 0
	}

	// debug:
	if ( cutpoint < begin-1 || cutpoint > begin+size-1 ) {
		std::cout << "STOP: frag_close:: want cutpoint to be contained in the loop\n";
		utility::exit( EXIT_FAILURE, __FILE__, __LINE__);
	}

	////////////////////////////////////////////////
	// setup a mini-pose for refolding, book-keeping
	Pose loop_pose, best_pose;
	int const nres ( size + 2 );
	int const loop_pose_cutpoint ( cutpoint - begin + 2 );
	loop_pose.one_jump_tree( nres, 1, nres, loop_pose_cutpoint );
	loop_pose.copy_segment( nres, pose, 1, begin-1 );
	//loop_pose.jumps_from_position();
	loop_pose.set_allow_bb_move( 1, false );
	loop_pose.set_allow_bb_move( nres, false );

	// more params:
	int const loop_pose_frag_offset ( frag_offset + begin - 2 );
	int const loop_begin ( 2);
	int const loop_end ( nres-1);
	int const frag_size (3);
	int const ncut ( 1);
	// for ccd closure:
	bool const rama_check( true );
	float fdev,bdev,torsion_delta,rama_delta,threshold(0.01),max_rama_delta(2.0);

	std::cout << "frag_close( " << I(5,begin) << I(5,begin+size-1) <<
		I(5,cycles1) << I(5,cycles2) << "): ";
	for ( int c1=1; c1<= cycles1; ++c1 ) {
		if ( c1%10 == 0 ) std::cout << "." << std::flush;
		float best_dev ( 1000.0 );
		// try again
		insert_random_frags(frag_size, loop_pose, 2,nres-1,loop_pose_frag_offset );

		for ( int c2=1; c2<= cycles2; ++c2 ) {
			// insert random frag
			choose_offset_frag( frag_size, loop_pose, loop_begin, loop_end,
													loop_pose_frag_offset );

			// calculate overlap. loop_pose.Eposition() triggers a refold

			float const dev ( calc_overlap_dev( loop_pose_cutpoint, ncut,
				loop_pose.Eposition(), loop_pose.get_overlap_Eposition( 1 )));

			// accept? this is a 0 temperature simulation
			if ( dev < best_dev ) {
				best_dev = dev;
				best_pose = loop_pose;
			} else {
				loop_pose = best_pose;
			}
		}

		if(docking::dle_loops_flag) {
			// aroop: want to place only closed loops into heap
			int const loop_pose_begin(2);
			fast_ccd_loop_closure( best_pose,
														 loop_pose_begin, loop_pose_begin+size-1,
														 loop_pose_cutpoint,
														 ccd_cycles, threshold,
														 rama_check, max_rama_delta,
														 10,50,75,
														 fdev, bdev, torsion_delta, rama_delta);
		}
		else {
			// insert into big_heap
			bool err;
			heap_insert( big_heap, big_coheap, c1, -best_dev, err );
		}

		// copy into the frag-array
		Fragment & f ( frag_array(c1) );
		f.dimension( size );
		for ( int k=1; k<= size; ++k ) {
			// loop is offset by 1 inside loop_pose
			f.phi       ( k ) = best_pose.phi       ( k+1 );
			f.psi       ( k ) = best_pose.psi       ( k+1 );
			f.omega     ( k ) = best_pose.omega     ( k+1 );
			f.secstruct ( k ) = best_pose.secstruct ( k+1 );
		}

		if(docking::dle_loops_flag) {
			// aroop check loop in context of the rest of the monomer
			float intra_clash_score = dle_intra_clash_check( pose, f, begin, size, cutpoint);

			// insert into big_heap based on sorted intra clash scores
			// intra clash scores are obtained by placing the built loop
			// back into the rest of the framework protein
			bool err;
			heap_insert( big_heap, big_coheap, c1, -intra_clash_score, err );
		}
	}
	std::cout << std::endl;

	// now ccd-close the top frags
	int const loop_pose_begin(2);
	for ( int ii=1; ii<= big_num_fragments; ++ii ) {
		bool err;
		int c1;
		float dev ,dev2; // dev2: returns overlap deviation at cutpoint
		heap_extract( big_heap, big_coheap, c1, dev, err);
		frag_array(c1).insert( best_pose, 2 );

		fast_ccd_loop_closure( best_pose,
													 loop_pose_begin, loop_pose_begin+size-1,
													 loop_pose_cutpoint,
													 ccd_cycles, threshold,
													 rama_check, max_rama_delta,
													 10,50,75,
													 fdev, bdev, torsion_delta, rama_delta);

		dev2 = calc_overlap_dev( loop_pose_cutpoint, ncut, best_pose.Eposition(), best_pose.get_overlap_Eposition( 1 ) );

		if(docking::dle_loops_flag) {
			// insert into little_heap based on deviation at cutpoint
			bool err;
			heap_insert( little_heap, little_coheap, c1, -dev2, err );
		}
		else {
			heap_insert( little_heap, little_coheap, c1, -fdev-bdev, err );
		}

		if ( !err ) {

			Fragment & f ( frag_array(c1) );
			f.dimension( size );
			for ( int k=1; k<= size; ++k ) {
				// loop is offset by 1 inside loop_pose
				f.phi       ( k ) = best_pose.phi       ( k+1 );
				f.psi       ( k ) = best_pose.psi       ( k+1 );
				f.omega     ( k ) = best_pose.omega     ( k+1 );
				f.secstruct ( k ) = best_pose.secstruct ( k+1 );
			}
		}
	}

	for ( int i=1; i<= little_num_fragments; ++i ) {
		int c1;
		float dev;
		bool err;
		// extract from heap:
		heap_extract( little_heap, little_coheap, c1, dev, err);

		// insert into best_pose
		frag_array(c1).insert( best_pose, 2 );

		// another round of frag_closure
		fast_ccd_loop_closure( best_pose,
													 loop_pose_begin, loop_pose_begin+size-1,
													 loop_pose_cutpoint,
													 ccd_cycles, threshold,
													 rama_check, max_rama_delta,
													 10,50,75,
													 fdev, bdev, torsion_delta, rama_delta);

		if ( fdev <= threshold && bdev <= threshold ) {
			dev = calc_overlap_dev( loop_pose_cutpoint, ncut,
															best_pose.Eposition(),
															best_pose.get_overlap_Eposition( 1 ) );

			Fragment f;
			f.dimension( size );
			for ( int k=1; k<= size; ++k ) {
				f.phi       ( k ) = best_pose.phi       ( k+1 );
				f.psi       ( k ) = best_pose.psi       ( k+1 );
				f.omega     ( k ) = best_pose.omega     ( k+1 );
				f.secstruct ( k ) = best_pose.secstruct ( k+1 );
			}
			fragment_list.push_back( f );
		}
	}
}

///////////////////////////////////////////////////////////
float
calc_overlap_dev(
	int const cutpoint,
	int const ncut,
	FArray3DB_float const & Eposition,
	FArray4D_float const & overlap_Eposition
)
{
	float dev( 0.0 ), tmp;
	for ( int i=0; i<2; ++i ) {
		for ( int j=1; j<= param::MAX_POS; ++j ) {
			if ( j==1 || j==2 || j==4 ) { //N,CA,C
				for ( int k=1; k<=3; ++k ) {
					tmp = Eposition(k,j,cutpoint+i) - overlap_Eposition(k,j,i,ncut);
					dev += tmp*tmp;
				}
			}
		}
	}
	dev = sqrt( dev/6 ); // now in Angstroms/rsd
	return dev;
}

