// -*- mode:c++;tab-width:1;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
// vi: set ts=2 noet:
//  CVS information:
//  $Revision: 15655 $
//  $Date: 2007-06-26 10:16:38 -0700 (Tue, 26 Jun 2007) $
//  $Author: ashworth $

// c++ headers
#include <list>
#include <sstream>
#include <vector>
#include <algorithm> // std::max

// Rosetta headers
#include "DnaPose.h"
#include "dna.h"
#include "dna_ns.h"

#include "aa_name_conversion.h" // num_from_res1
#include "after_opts.h" // truefalseoption
#include "CorrelatedSimAnnealer.h" // use_CorrelatedSimAnnealer flag
#include "decoystats.h" // decoystats_store_decoy
#include "design_structure.h" // get_optE
#include "files_paths.h" // output_file, include_inputchi
#include "InteractionGraphBase.h"
#include "loop_class.h" // Loops
#include "loops_ns.h" // loops_ns
#include "loop_relax.h" // choose_cutpoint
#include "minimize.h" // minimize_*
#include "nblist.h" // set_use_nblist
#include "pose_loops.h" // pose_refine_loops_with_ccd
#include "make_pdb.h" // several
#include "PackerTask.h" // class definition
#include "pack.h" // pack_rotamers
#include "param.h" // MAX_RES, MAX_AUTH_AA
#include "param_aa.h" // aa_name3, is_DNA
#include "param_pack.h" // gen_born flag
#include "pdb.h" // pdb_res_num
#include "pose_io.h" // fullatom_nonideal_initialized_pose_from_misc
#include "RotamerSet.h" // class definition
#include "score.h" // setup_score_weight_map, score12
#include "smallmove.h" // set_smallmove_size

// Utility Headers
#include <utility/basic_sys_util.hh> // utility::exit
#include <utility/io/all.hh> // utility::io::orstream
#include <ObjexxFCL/string.functions.hh> // lead_zero_string_of()

// ObjexxFCL headers
#include <ObjexxFCL/FArray1Ds.hh>

////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// @begin
// Encapsulation of Rosetta standard protein-DNA interface design protocols
//
// @brief
// Add or append to member functions to implement new protocols (more subclassing may be appropriate).
//
// 'using namespace' declarations strictly avoided for clarity.
//
// @authors
// ashworth
//
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////////////////////
// @begin DnaPose::init
//
// @authors
// ashworth
//
////////////////////////////////////////////////////////////////////////////////
void
DnaPose::init()
{
	assert( !initialized_ );
	pose_from_misc( pose_, true, false, true );
	init2();
}

////////////////////////////////////////////////////////////////////////////////
void
DnaPose::init( pose_ns::Pose const & pose )
{
	assert( !initialized_ );
	pose_ = pose;
	init2();
}

////////////////////////////////////////////////////////////////////////////////
// @begin DnaPose::init
//
// @authors
// ashworth
//
////////////////////////////////////////////////////////////////////////////////
void
DnaPose::init2()
{
	assert( !initialized_ );
	find_basepairs( pose_, dna_info_, true );

	// Missing phosphates must be built in, this attempts to 'repack' them
	unclash_phosphate( pose_ );

	int nres( pose_.total_residue() );

	// keep a copy of the input sequence for reference
	ref_seq_.clear();
	for ( int pos(1); pos <= nres; ++pos ) {
		ref_seq_.push_back( pose_.res(pos) );
	}
	// optional: read in a (presumably different) reference sequence
	if ( truefalseoption( "ref_resfile" ) ) {
		ref_seq_from_resfile( ref_seq_, pose_.res(), nres );
	}

	// filename root of the input pdb
	infile_root_ = ( ( files_paths::pdbout != "des" ) ?
	                   files_paths::pdbout :
	                   files_paths::output_file
	               );
	initialized_ = true;
}

////////////////////////////////////////////////////////////////////////////////
// @begin DnaPose::mutate_bases
//
// @brief
// Mutate DNA nucleotides, coupled to bookkeeping for dna_info_
//
// @authors
// ashworth
//
////////////////////////////////////////////////////////////////////////////////
void
DnaPose::mutate_bases( DnaSeqInfo const & mutseq )
{
	for ( std::vector< DnaPosInfo >::const_iterator bp( mutseq.begin() );
	      bp != mutseq.end(); ++bp ) {

		int const fwd_pos( bp->fwdpos() ), fwd_type( bp->fwdtype() );
		// reference to the main seq info
		DnaPosInfo & mainpos( dna_info_[fwd_pos] );

		if ( !mainpos.paired() ) {

			mainpos.set_type( fwd_type ); // update main dna_info_
			mutate_base( pose_, fwd_pos, fwd_type );

		} else { // paired

			int rvs_type(0);
			if ( bp->paired() ) {
				int const rvs_pos( bp->rvspos() );
				if ( rvs_pos != mainpos.rvspos() ) {
					std::cerr << "Error!  Mismatched basepair index." << std::endl;
					utility::exit( EXIT_FAILURE, __FILE__, __LINE__ );
				}
				rvs_type = bp->rvstype();
			} else {
				// keep reverse type the same, in case non-complement mutation desired
				// (forcing complementation should be made option-dependent)
				rvs_type = mainpos.rvstype();
			}
			mainpos.set_type( fwd_type, rvs_type );
			mutate_base( pose_, fwd_pos, fwd_type );
			mutate_base( pose_, mainpos.rvspos(), rvs_type );
		}
	}
}

////////////////////////////////////////////////////////////////////////////////
// @begin DnaPose::design
//
// @brief
// Main method for conventional protein-DNA interface design.
// To be refactored as new methods are incorporated.
//
// @authors
// ashworth
//
////////////////////////////////////////////////////////////////////////////////
void
DnaPose::design()
{
	if ( !initialized_ ) init();
	if ( opt_.decoystats ) set_decoystats_flag( true );
	std::cout << std::endl << "*** Starting DNA design ***" << std::endl;

	PackerTask task( pose_ );
	// make_output_file is false for local output handling
	FArray1D_bool allow_repack( param::MAX_RES(), true );
	task.set_task( "design", false, allow_repack, true );
	task.setup_residues_to_vary();
	task.set_nloop(1);
	find_dna_interface( task, 0., 0. );

	if ( opt_.dna_bb_moves ) {
		int const ndruns_save( design::ndruns );
		design::ndruns = 1;
/*
		// (using a separate PackerTask /may/ be unnecessarily paranoid)
		PackerTask task2( pose_ );
		// make_output_file is false for local output handling
		FArray1D_bool allow_repack( param::MAX_RES(), true );
		task2.set_task( "design", false, allow_repack, true );
		task2.setup_residues_to_vary();
		task2.set_nloop(1);
		find_dna_interface( task2, 0., 0. );
		pack_rotamers( pose_, task2 );
*/
		pack_rotamers( pose_, task );
		bb_moves();
		// output the new structure and quit (useful if short jobs are necessary)
		if ( opt_.no_design ) { output_pdb(); return; }
		//ja consider iterative bb_moves/sidechain redesign protocol here...
		design::ndruns = ndruns_save;
	}

	// local emulation of pack_rotamers() wrapper, to preserve interaction graph
	RotamerSet rotset;
	pack::InteractionGraphBase * ig = NULL;

	// build rotamers, compile energies
	// this rotamer_set and ig will include multiple DNA sequence options (via DNA rotamers) if the 'ALLNA' resfile flag is used
	FArray1D_float ligE1b;
	FArray1D_int cri( param::MAX_RES() );
	pack_rotamers_setup( task, rotset, ig, pose_, cri, ligE1b );

//// done setting up the packer ////

	// sequences to be designed against
	std::list< DnaSeqInfo > seqs;
	// add placeholder in case there is no specified target sequence, for the loop
	DnaSeqInfo emptyseq; seqs.push_back( emptyseq );

	if ( opt_.dna_scan ) {
		seqs.clear();
//		if ( opt_.one_seq_from_ig ) { // special case if sub-sequence from common ig file
//			seqs.push_back( dna_info_.target_set( opt_.userseq ) );
//		} else {
			DnaSeqInfo seq;
			std::vector<int> positions( dna_info_.design_positions() );
			all_dna_combinations( 0, positions, seq, seqs );
//			if ( opt_.userseq != "" ) start_scan_at( opt_.userseq_, seqs );
//		}

	} else if ( opt_.userseq != "" ) {
		// a single target sequence was passed in via the command line
		seqs.clear();
		seqs.push_back( dna_info_.target_set( opt_.userseq ) );
	}

	if ( opt_.verbose ) {
		std::cout << "Sequences to be designed:" << std::endl;
		debug_print_seqs( seqs );
	}

	std::ostringstream numstring;
	if ( opt_.design_by_base ) {
		numstring << pdb::pdb_res_num( dna_info_.design_positions()[0] );
	}
	if ( numstring.str() != "" ) infile_root_ += "_" + numstring.str();

	backup_pose_ = pose_;
	for ( std::list< DnaSeqInfo >::iterator seq( seqs.begin() );
	 seq != seqs.end(); ++seq ) {

		// does own handling of ndruns in order to control output (call pack_rotamers_run multiple times with ndruns of 1)
		for ( int run(0); run < design::ndruns; ++run ) {

			outfile_root_ = infile_root_; // refresh outfile root
			if ( !seq->empty() ) outfile_root_ += "_" + seq->str();

			std::cout << ">>> Run #" << I( 4, run + opt_.runindex_offset ) <<
			 std::endl;

			// refresh starting structure
			if ( seq != seqs.begin() ) pose_ = backup_pose_;

			if ( !opt_.no_design ) {
				std::vector<int> rot_to_pack;

				if ( !seq->empty() ) {
					// target sequence given, fix dna sequence from DNA rotamers
					restrict_dna_rotamers( rotset, *seq, rot_to_pack );

				} else if ( design::active_rotamer_options.dna_rotamers_exist ) {
				// DNA sequence varied by simulated annealing/monte carlo rotamer search
					pack::use_CorrelatedSimAnnealer = true;
					// this increases the number of DNA rotamers to x ratio of the total
					rot_to_pack = set_DNA_rotamer_ratio( rotset, 0.05 );
					// for safety: the packer will need an external up-to-date copy of the basepair array to correlate DNA rotamer choices
					dna_variables::basepairs = dna_info_;
				}

				// the rot_to_pack array should be ignored by the packer if empty
				pack_rotamers_run( task, rotset, ig, pose_, cri, ligE1b, rot_to_pack );

				// output structure before doing further analysis/remediation
				if ( opt_.dna_minimize ) minimize( false );
				output_pdb( run );
			}

			if ( !opt_.probe_specificity && !opt_.revert && !opt_.smm ) continue;

			// derive sequence map from the current structure, in case packing changed the DNA sequence.  (Could call find_basepairs instead to be extra safe)
			dna_info_.update_from_res( pose_.res() );
			DnaSeqInfo targetseq( dna_info_.design_set() );
			std::string targetstr( targetseq.str() );

			if ( opt_.verbose ) {
				std::cout << "DNA is " << targetstr << " after design." << std::endl;
			}

			float spec(0);
			if ( opt_.probe_specificity ) {
				// measure the specificity of the standard design
				spec = probe_dna_specificity( task, rotset, ig, cri, ligE1b,
				 targetseq, pose_.res(), 9999. );
			}

			if ( !opt_.revert && !opt_.smm ) continue;

			pose_ns::Score_weight_map wm;
			setup_score_weight_map( wm, score12 );
			float des_energy( pose_.score(wm) );

			bool changed(false);
			// revert mutated amino acids if they weren't specific
			if ( opt_.revert ) {
				changed = revert( task, rotset, ig, cri, ligE1b, targetseq, spec );
				// update design energy
				des_energy = pose_.score(wm);
			}
			// proceed to single-amino-acid mutational scanning for specificity
			if ( opt_.smm ) {
				changed = single_mutant_multistate( task, rotset, ig, cri, ligE1b,
				 targetseq, des_energy ) || changed;
			}
			if ( changed ) {
				if ( opt_.dna_minimize ) minimize( false );
				output_pdb( run );
				if ( opt_.probe_specificity ) {
					spec = probe_dna_specificity( task, rotset, ig, cri, ligE1b,
					                              targetseq, pose_.res(), 9999. );
					std::cout << "Final specificity: " << F(4,1,100*spec) << std::endl;
				}
			}

		} // runs
	} // target sequences
	delete ig; // IMPORTANT
}

////////////////////////////////////////////////////////////////////////////////
// @begin minimize
//
// @brief
// Minimizes input structure in pose_ns::Pose, then makes a PDB through design_output()
//
// @authors
// Ashworth
//
////////////////////////////////////////////////////////////////////////////////
void
DnaPose::minimize(
	bool const output_structure,
	int const run
)
{

//	std::cout << "pre_minim score: " << pose_.score( score12 ) <<
//	 std::endl;
//	pose_.dump_pdb( "pre_minim.pdb" );

	pose_.set_allow_bb_move( false );
	pose_.set_allow_chi_move( false );
//	std::cout << "Minimizing sidechains at residues:\n";

	for ( std::list< ResInfo >::iterator it( interface_list_.begin() );
        it != interface_list_.end(); ++it ) {

		pose_.set_allow_chi_move( it->pos, true );
//		std::cout << " " << pdb::pdb_res_num( it->first );
	}

	// DOES NOT YET INCLUDE DNA STATISTICAL POTENTIAL
//	std::cout << "M" << std::flush;
	pose_.main_minimize( score12, "dfpmin" );

	// why is the following not equal to bk_tot ???
//	std::cout << "post_minim score: " << pose_.score( score12 ) <<
//	 std::endl;

	if ( output_structure ) {
		// WARN: writes pose_ns::Pose to global arrays
		output_pdb( run );
	}
}

////////////////////////////////////////////////////////////////////////////////
// @begin probe_dna_specificity
//
// @brief
// probe the dna specificity of a structure by doing fixed-backbone repacks
// against single-mutant competitors
//
// @authors
// ashworth
//
////////////////////////////////////////////////////////////////////////////////
float
DnaPose::probe_dna_specificity(
	PackerTask & task,
	RotamerSet const & rotset,
	pack::InteractionGraphBase * ig,
	FArray1D_int & cri,
	FArray1D_float & ligE1b,
	DnaSeqInfo const & targetseq,
	FArray1DB_int const & sequence_ids,
	float const anchor_val
)
{
	std::list< DnaSeqInfo > const spec_seqs( targetseq.single_mutant_list() );
	// avoid overwriting the current design
	pose_ns::Pose origpose;
	origpose = pose_;

	std::list< NamedFloat > speclist(
		analyze_specificity( task, rotset, ig, cri, ligE1b, spec_seqs,
		                     sequence_ids )
	);

	pose_ = origpose; // restore unaltered pose

// debug - make sure that the original dna is present
//	std::cout << "post-refresh origpose: pose_ seq at targetseq positions: ";
//	for ( std::vector<DnaPosInfo>::const_iterator pos( targetseq.begin() );
//	      pos != targetseq.end(); ++pos ) {
//		std::cout << param_aa::aa_name3(pose_.res(pos->fwdpos())) << ", ";
//	}
//	std::cout << std::endl;

	// the order of the list returned by analyze_specificity() corresponds to spec_seqs, so here the first element is the target sequence
	NamedFloat target( speclist.front() );

	// this normalizes multiple-bp data sets to be comparable with single-bp ones, by increasing the proportion of the target to 1/4 of the population (one extra target per extra bp, because there are three competitors per bp)
	if ( opt_.normalize_Boltzmann ) {
		for ( unsigned bp(1); bp < target.name.size(); ++bp ) {
			speclist.push_back( target );
		}
	}
	return calculate_specificity( speclist, target.name, anchor_val,
	                              opt_.Boltz_temp );
}

////////////////////////////////////////////////////////////////////////////////
// @begin analyze_specificity
//
// @brief
// Pack the input structure vs. a list of dna sequence competitors to assess its specificity.  Returns a list of sequence scores; specificity is calculated by a separate function.
//
// @authors
// ashworth
//
////////////////////////////////////////////////////////////////////////////////
std::list< NamedFloat > const
DnaPose::analyze_specificity(
	PackerTask & task,
	RotamerSet const & rotset,
	pack::InteractionGraphBase * ig,
	FArray1D_int & cri,
	FArray1D_float & ligE1b,
	std::list< DnaSeqInfo > const & seqs,
	// the following can be different than pose.res if mutations are desired
	FArray1DB_int const & sequence_ids
)
{
	pose_ns::Score_weight_map wm;
	float E, best_E(9999.0);
	std::list< NamedFloat > seq_scorelist;

	for ( std::list< DnaSeqInfo >::const_iterator seq( seqs.begin() );
	      seq != seqs.end(); ++seq ) {

		std::string seq_string( seq->str() );
		if ( opt_.verbose ) {
			std::cerr << "seq " << seq_string << std::endl;
		}

		FArray1D_int fixed_sequence( seq->res_from_seq( sequence_ids ) );
		best_E = 9999.0;
		// best of num_packs repacks (important: lots of noise in repacking)
		for ( int trial(0); trial < opt_.num_packs; ++trial ) {

			// the rot_to_pack vector here can specify both protein and DNA sequence
			// (as long as rotamers exist for the specified sequence)
			pack_rotamers_run( task, rotset, ig, pose_, cri, ligE1b,
			 rotset.rot_to_pack_single_sequence( fixed_sequence ) );

			if ( opt_.dna_minimize ) minimize( false );
			setup_score_weight_map( wm, score12 );
			E = pose_.score(wm);
			if ( opt_.verbose ) {
				std::cerr << seq_string << F(10,2,E) << "  ";
			}
			if ( E < best_E ) best_E = E;
		}
		if ( opt_.verbose ) std::cerr << std::endl;
		seq_scorelist.push_back( NamedFloat( seq_string, best_E ) );
	}
	return seq_scorelist;
}

////////////////////////////////////////////////////////////////////////////////
// @begin dna_reversion_trials
//
// @brief
// Revert and repack designed mutations back to the original sequence ('wildtype') to analyze their contribution to the energy and specificity of the designed sequence. A remedy for overmutation that is fundamentally different than the '-favor_native_residue' option.  NOTE: as of now only performs one cycle.
//
// @authors
// ashworth
//
////////////////////////////////////////////////////////////////////////////////
bool
DnaPose::revert(
	PackerTask & task,
	RotamerSet const & rotset,
	pack::InteractionGraphBase * ig,
	FArray1D_int & cri,
	FArray1D_float & ligE1b,
	DnaSeqInfo const & targetdna,
	float const orig_spec
)
{
	outfile_root_ += "_rev";

	std::cout << "Testing reversions to wildtype." << std::endl
	          << "Starting sequence: ";

	std::vector<int> positions;
	for ( std::list< ResInfo >::iterator it( interface_list_.begin() );
		    it != interface_list_.end(); ++it ) {
		if ( it->design != true ) continue;
		// output current sequence for designing positions
		int const pos(it->pos);
		int const aa( pose_.res(pos) );
		std::cout << I( 3, pdb::pdb_res_num(pos) ) << "-"
		          << param_aa::aa_name3( aa );
		// add mutated positions to reversion list
		if ( aa != ref_seq_[pos-1] ) {
			positions.push_back( it->pos );
			std::cout << "*";
		}
		std::cout << ", ";
	}
	std::cout << std::endl;
	if ( positions.size() == 0 ) {
		std::cout << "No mutant positions to revert." << std::endl; return false;
	}
	std::cout << "Starting specificity: " << F(4,1,orig_spec*100) << std::endl;

	// amount of specificity and/or energy considered insignificant
	float const spec_thresh( 0.02 ); // 2%
//	float const ener_thresh( 5.0 ); // rosettas

	// competitor seqs for specificity calculation
	std::list< DnaSeqInfo > const spec_seqs( targetdna.single_mutant_list() );
	std::string const targetstr( targetdna.str() );
	std::cout << "Target sequence is " << targetstr << std::endl;

// for total number positions or until done reverting
	unsigned const maxcycles( positions.size() );
	unsigned cycle(0);
	pose_ns::Pose origpose;
	float curr_spec( orig_spec );

	while ( cycle < maxcycles ) {
		// random order, for fairness
		std::random_shuffle( positions.begin(), positions.end() );
		// over all positions until revertant found
		int mutpos(0);
		// preserve starting pose to simplify bookkeeping
		origpose = pose_;

		for ( std::vector<int>::const_iterator pos( positions.begin() );
		      pos != positions.end(); ++pos ) {

			mutpos = *pos;
			int const mut_aa( pose_.res(mutpos) ), rev_aa( ref_seq_[mutpos-1] );
			if ( mut_aa != rev_aa ) { // skip already reverted

				// start from reference pose to simplify bookkeeping for previous pos
				if ( pos != positions.begin() ) pose_ = origpose;

				std::cout << "Trying " << pdb::pdb_res_num(mutpos) << " from "
				          << param_aa::aa_name3(mut_aa) << " to "
				          << param_aa::aa_name3(rev_aa) << std::endl;

				// revert to original at pos
				FArray1D_int mutseq( pose_.res() );
				mutseq(mutpos) = rev_aa;

				float const spec( probe_dna_specificity( task, rotset, ig, cri, ligE1b,
				                  targetdna, mutseq, 9999. ) );

				// break out if this was a successful reversion (tracked by 'mutpos')
				if ( spec > ( curr_spec - spec_thresh ) ) {
					std::cout << " -- kept reversion, starting new round." << std::endl;
					curr_spec = spec;
					break;
				}
			}
			// no successful reversions and end of position list
			if ( mutpos == positions.back() ) {
				std::cout << "No (more) reversions maintain specificity." << std::endl;
				pose_ = origpose; // important, undoes reversion at last pos
				return ( cycle != 0 ); // made changes to main pose_?
			}
		} // end position loop

		pose_ = origpose; // restore unchanged reference pose
		// pack the reversion
		FArray1D_int ssres( pose_.res() );
		ssres(mutpos) = ref_seq_[mutpos-1];

		pack_rotamers_run( task, rotset, ig, pose_, cri,
		 ligE1b, rotset.rot_to_pack_single_sequence(ssres) );

		if ( opt_.dna_minimize ) minimize( false );

		++cycle;
	}
	return true;
}

////////////////////////////////////////////////////////////////////////////////
// @begin single_mutant_multistate
//
// @brief
// Designs for specificity against competitor sequences, one amino acid at a time.  Continues one mutation at a time until no more changes are found that optimize specificity.
//
// @detailed
// Looks for single amino acid mutations that optimize specificity toward a target DNA sequence using a multistate approach.  This can be done as either an enumerative (deterministic) procedure, or a stochastic one.
// One cycle is defined as:
//
// Scheme I (deterministic, slow, high-fidelity): all 20 amino acids are tested every position.  (All amino acids at every design position are exhaustively searched.)  The absolute best mutation is kept.
//
// Scheme II (stochastic, fast, low-fidelity): The search order of design positions is randomized.  For each position, mutations to each of the 20 amino acids are tried in random order.  The first mutation that increases specificity is kept.  This approach yields faster, more diverse, and lower quality results (designed for running many short jobs in parallel).
//
// @detailed
// Mutations are packed versus a list of DNA sequence competitors.  Specificity is taken as the Boltzmann specificity, including an adjustable reference energy in the denominator to anchor affinity.  Efficiently uses single master RotamerSet and InteractionGraph, by switching sequence states via DNA rotamers and rot_to_pack vectors.
//
// @authors
// ashworth
//
////////////////////////////////////////////////////////////////////////////////
bool
DnaPose::single_mutant_multistate(
	PackerTask & task,
	RotamerSet const & rotset,
	pack::InteractionGraphBase * ig,
	FArray1D_int & cri,
	FArray1D_float & ligE1b,
	DnaSeqInfo const & targetdna,
	float const ref_energy
)
{
	outfile_root_ += "_smm";

	std::string const targetstr( targetdna.str() );
	float const target_energy( ref_energy + opt_.ms_offset );
	std::cout << "Starting single-mutant multistate design:" << std::endl
	          << "\tTarget sequence is " << targetstr << std::endl
	          << "\tms_offset is " << F(5,2,opt_.ms_offset) << std::endl
	          << "\tTarget energy is " << F(7,2,target_energy) << std::endl;

	// set of designable positions
	std::vector<int> des_positions;
	for ( std::list< ResInfo >::const_iterator it( interface_list_.begin() );
				it != interface_list_.end(); ++it ) {
		if ( it->design == true ) des_positions.push_back( it->pos );
	}

	std::cout << "Starting sequence:";
	for ( std::vector<int>::const_iterator pos( des_positions.begin() );
	      pos != des_positions.end(); ++pos ) {
		std::cout << I( 4, pdb::pdb_res_num(*pos) ) << "-"
		          << param_aa::aa_name3( pose_.res(*pos) ) << ", ";
	}
	std::cout << std::endl;

	// set of amino acid choices
	std::vector<int> aa_set;
	for ( int i(1); i <= param::MAX_AA(); ++i ) {
		if ( param_aa::is_protein(i) ) aa_set.push_back(i);
	}

	// get initial specificity, taking anchor value into account
	float curr_spec( probe_dna_specificity( task, rotset, ig, cri, ligE1b,
	 targetdna, pose_.res(), target_energy ) );

	// list of seqs (target+competitors) to pack for specificity calculations
	std::list< DnaSeqInfo > const seqs( targetdna.single_mutant_list() );

	unsigned const num_des( des_positions.size() );
	unsigned const maxcycles( std::max( num_des, unsigned(10) ) );
	unsigned cycle(0);
	int lastpos(0);
	pose_ns::Pose origpose;
	while ( cycle < maxcycles ) {
		++cycle;

		std::cout << "Cycle " << cycle << ": starting specificity (w/ anchor) " <<
		 F(3,0,100*curr_spec) << std::endl;

		// save copy of original pose to avoid some mutational bookeeping
		origpose = pose_;

		// try all mutations at each design position
		std::map< int, NamedFloat > opt_muts;

		if ( !opt_.smm_exhaustive ) {
			// random order for fairness and stochasticity
			std::random_shuffle( des_positions.begin(), des_positions.end() );
		}
		//// position loop ////
		for ( std::vector<int>::iterator pos( des_positions.begin() );
		      pos != des_positions.end(); ++pos ) {
			// track previous position so as not to repeat the same one twice
			// (possible with randomized/incomplete position search)
			if ( *pos == lastpos ) continue;
			lastpos = *pos;

			pose_ = origpose; // restore original pose to erase changes at last pos
			int const seqpos(*pos);
			int const aa_orig( pose_.res(seqpos) ); // remember the starting aa
			std::string const aa_orig_name( param_aa::aa_name3(aa_orig) );
			std::cout << "Packing at position " << pdb::pdb_res_num(seqpos) <<
			 " (presently " << aa_orig_name << ")" << std::endl;

			FArray1D_int mutant_sequence( pose_.res() );
			NamedFloat opt_spec( "XXX", 0. );

			// random order of amino acids, if not exhaustively searching all aa's
			if ( !opt_.smm_exhaustive ) {
				std::random_shuffle( aa_set.begin(), aa_set.end() );
			}
			//// amino acid loop ////
			for ( std::vector<int>::const_iterator aa( aa_set.begin() );
			      aa != aa_set.end(); ++aa ) {

				std::string const aa_name( param_aa::aa_name3(*aa) );
				std::cout << A( 3, aa_name ) << " " << std::flush;

				// this array instructs probe_specificity() to make mutations
				mutant_sequence(seqpos) = *aa;

				float const spec( probe_dna_specificity( task, rotset, ig, cri, ligE1b,
				 targetdna, mutant_sequence, target_energy ) );

				if ( spec > opt_spec.val ) {
					opt_spec.name = aa_name; opt_spec.val = spec;
					if ( !opt_.smm_exhaustive && ( spec > curr_spec ) ) {
						// if doing a non-exhaustive search,
						// take the first mutation that increases specificity
						break;
					}
				}
			} // end amino acid loop
			opt_muts[seqpos] = opt_spec;
			if ( !opt_.smm_exhaustive && ( opt_spec.val > curr_spec ) ) {
				// if doing a non-exhaustive search,
				// take the first mutation that increases specificity
				break;
			}
		} // end position loop

		// review optimal mutations, find best
		NamedFloat new_mut( "XXX", 0. );
		int mutpos(0);
		for ( std::map<int,NamedFloat>::const_iterator it_mut( opt_muts.begin() );
		      it_mut != opt_muts.end(); ++it_mut ) {

			if ( it_mut->second.val > new_mut.val ) {
				mutpos = it_mut->first;
				new_mut = it_mut->second;
			}
			std::cout << "Optimal mutation at pos " <<
			 pdb::pdb_res_num(it_mut->first) << " was " << it_mut->second.name <<
			 ": " << F(3,0,100*it_mut->second.val) << std::endl;
		}

		int opt_aa( origpose.res(mutpos) ); // default revert to original

		if ( new_mut.val > curr_spec ) {
			// mutation exists that increases specificity
			curr_spec = new_mut.val;
			num_from_res3( new_mut.name, opt_aa );
			std::cout << "Mutation to " << new_mut.name << " at position " <<
			 I(4,pdb::pdb_res_num(mutpos)) << " increases specificity to " <<
			 F(3,2,new_mut.val) << std::endl;
		} else {
			std::cout << "No mutations exist which further optimize specificity" <<
			 std::endl;
			pose_ = origpose; // restore starting pose
			return ( cycle != 0 ); // made changes to main pose_?
		}
		// restore unaltered pose
		pose_ = origpose;
		// pack the mutation
		FArray1D_int ssres( pose_.res() );
		ssres(mutpos) = opt_aa;

		pack_rotamers_run( task, rotset, ig, pose_, cri, ligE1b,
		                   rotset.rot_to_pack_single_sequence(ssres) );

		if ( opt_.dna_minimize ) minimize( false );

		// sort temporarily for printing status
		if ( !opt_.smm_exhaustive ) std::sort( des_positions.begin(),
			                                     des_positions.end() );

		std::cout << "Cycle " << cycle << " ending sequence:";
		for ( std::vector<int>::const_iterator pos( des_positions.begin() );
		      pos != des_positions.end(); ++pos ) {
			std::cout << I( 4, pdb::pdb_res_num(*pos) ) << "-"
			          << param_aa::aa_name3( pose_.res(*pos) ) << ", ";
		}
		for ( std::vector<DnaPosInfo>::const_iterator bp( targetdna.begin() );
		      bp != targetdna.end(); ++bp ) {
			int const pos( bp->fwdpos() );
			std::cout << I( 4, pdb::pdb_res_num(pos) ) << "-"
			          << param_aa::aa_name3( pose_.res( bp->fwdpos() ) ) << ", ";
		}
		std::cout << std::endl;

	} // end outer cycles

	if ( cycle == maxcycles ) {
		std::cout << "Maximum number of cycles (" << maxcycles << ") reached." <<
		 std::endl;
	}
	return true;
}

////////////////////////////////////////////////////////////////////////////////
/// @begin bb_moves
///
/// @brief
/// Do small backbone moves in the protein-DNA interface
///
/// @detailed
/// This is intended to introduce minor backbone flexibility into the protein
/// backbone wherever redesign is taking place.
///
/// This code is modeled from loop_refinment with necessary changes for DNA.
/// Jump constraint method as in pose_jjh_loops.
///
/// @authors
/// ashworth
///
////////////////////////////////////////////////////////////////////////////////
void
DnaPose::bb_moves()
{
	std::cout << "\nStarting backbone moves\n";

	// gen_born is too slow for the many scoring steps
	bool const gen_born_save = param_pack::gen_born;
	param_pack::gen_born = false;
	bool const fast_save = loops_ns::fast;
	loops_ns::fast = true;

	pose_ns::Loops move_segments;

	// look through interface_list for continuous regions to move
	// assumes interface_list is in ascending numerical order (should be)
	int start_pos( interface_list_.front().pos ), last_pos( start_pos );
	char last_chain( pdb::res_chain(start_pos) );
	// length for new, and spread
	int n(6), s(2);
	// careful to avoid overlap
	assert( n >= 3 * s );

	// figure out loop segments based on designing residues
	for ( std::list< ResInfo >::const_iterator rsd( interface_list_.begin() );
	 rsd != interface_list_.end(); ++rsd ) {

		// upon large sequence discontinuity, store the last stretch as a Loop
		if ( rsd->pos > last_pos + n || pdb::res_chain(rsd->pos) != last_chain ||
		     (*rsd) == interface_list_.back() ) {

			int const seg_start( start_pos - s );
			int seg_end( last_pos + s );
			// include final pos in last segment, unless isolated
			if ( (*rsd) == interface_list_.back() && rsd->pos <= last_pos + n ) {
				seg_end = rsd->pos + s;
			}
			// "random" cutpoints (biased toward ends)
			int const cut( choose_cutpoint( pose_, seg_start, seg_end ) );
			move_segments.add_loop( seg_start, seg_end, cut, 0, false );

			std::cout << "segment added: " << pdb::pdb_res_num(seg_start) << "-(" <<
			 pdb::pdb_res_num(cut) << ")-" << pdb::pdb_res_num(seg_end) <<
			 " Chain " << last_chain << std::endl;

			start_pos = rsd->pos;
		}
		last_pos = rsd->pos;
		last_chain = pdb::res_chain( rsd->pos );
	}

	// confine changes to moving segments using "jump" constraints
	for ( pose_ns::Loops::const_iterator segment( move_segments.begin() );
	 segment != move_segments.end(); ++segment ) {

		pose_ns::Fold_tree f( pose_.fold_tree() );
		f.new_jump( segment->start()-1, segment->stop()+1, segment->cut() );
		pose_.set_fold_tree( f ); // regenerate atom tree with new folding rules
	}
	pose_.set_allow_jump_move( false );

	// minimizer settings: these settings copied from loop_relax
	// modifiable as necessary to optimize for DNA interface
	minimize_exclude_sstype( false, false );
	minimize_set_vary_phipsi( true );
	minimize_set_vary_chi( true );
	minimize_set_vary_omega( false ); // NOT omega, but everything else
	minimize_set_vary_rb_angle( true );
	minimize_set_vary_rb_trans( true );
	minimize_set_tolerance( 0.005 );
	minimize_set_local_min( false, 0 ); // all non-move-list rsds minimized
	set_smallmove_size( 2.0, 2.0, 3.0 );
	score_set_cst_mode( 3 );
	set_use_nblist( false );

	pose_ns::Score_weight_map wm;
	// will want DNA potentials in here eventually
	setup_score_weight_map( wm, score12 );
	wm.set_weight( pose_ns::CHAINBREAK, 1.0 );
	wm.set_weight( pose_ns::CHAINBREAK_OVERLAP, 1.0 );
	wm.set_weight( pose_ns::CST_SCORE, 1.0 );

	loops_ns::fix_natsc = true; // don't repack the neighbors of moving residues

	assert( design::ndruns == 1 );
	if ( design::ndruns != 1 ) {
		std::cout << "\nPointless to try ndruns > 1 during loop_refinement!\n";
		std::cout << "Forcing ndruns = 1." << std::endl;
		design::ndruns = 1;
	}
	// the business
	pose_refine_loops_with_ccd( pose_, wm, move_segments );

	param_pack::gen_born = gen_born_save;
//	pose_flag_ns::pose_flag_setting = pose_flag_orig;
	loops_ns::fast = fast_save;

}

////////////////////////////////////////////////////////////////////////////////
// @begin DnaPose::configure_design_behavior
//
// @brief
// Review the resfile options, make hard DNA mutations if necessary
//
// @authors
// ashworth
//
////////////////////////////////////////////////////////////////////////////////
void
DnaPose::configure_design_behavior(
	PackerTask const & task
)
{
	bool hard_mutate(false);

	for ( int pos(1); pos <= task.total_residue(); ++pos ) {

		// record DNA mutations, nucleotides to be designed or minimized
		if ( dna_info_.contains( pos ) ) {

			// set behavioral flags
			dna_info_[pos].set_design( false );
			dna_info_[pos].set_minimize( false );
			// PackerTask accessor to resfile flags
			std::string flag( task.get_resfile_option( pos ) );
			if ( (flag == "DESBA") || (flag == "PIKBA") ||
				   (flag == "ALLNA") || (flag == "PIKNA") ) {
				dna_info_[pos].set_design( true );
				dna_info_[pos].set_minimize( true );
			} else if ( flag == "VARYB" ) {
				dna_info_[pos].set_minimize( true );
			}
			// do hard mutation if PIKBA flag (non-rotameric)
			if ( flag == "PIKBA" ) {
				for ( int na(1); na < param::MAX_AA(); ++na ) {
					if ( !param_aa::is_DNA(na) ) continue;
					if ( task.get_designmap().get(pos,na) ) {
						dna_info_[pos].set_type( na, na_partner(na) );
						hard_mutate = true;
						break;
					}
				}
			}
		}
	}
	// make direct dna substitutions
	if ( hard_mutate ) mutate_bases( dna_info_ );
}

////////////////////////////////////////////////////////////////////////////////
// @begin find_dna_interface
//
// @brief
// This function identifies protein residues that interface with DNA and
// controls their automatic packing behavior.
//
// @authors
// ashworth, duarte, havranek
//
////////////////////////////////////////////////////////////////////////////////
void
DnaPose::find_dna_interface(
	PackerTask & task,
	float d2_vicinity,
	float d2_contact
)
{
	if ( !initialized_ ) init();

	int aa, daa;
	bool close, contact;

	std::cout << "\nFinding DNA or RNA Interface.  The behavior of " <<
	"AUTOM/AUTOP-flagged residues will be set by automatic interface " <<
	"detection of contact to ";
	if ( opt_.design_by_base ) std::cout << "designed bases.\n";
	else std::cout << "any DNA.\n";

	if ( d2_vicinity == 0. ) d2_vicinity = 100.0; // 10 Angstrom c-beta cutoff
	if ( d2_contact == 0. ) d2_contact = 14.0; // 3.74 Angstrom atom-atom cutoff

	std::cout << "Vicinity threshold is " << F(4,1,d2_vicinity) <<
	 " (c-beta to base distance)" << std::endl;
	std::cout <<  "Contact threshold is " << F(3,1,d2_contact) <<
	 " (rotamer all-atom to base)" << std::endl;
	std::cout << "DNA z-axis cutoff is " << F(3,1,opt_.dz_cutoff) <<
	 " (helical distance to sidechain origin)" << std::endl;

	int const nres( pose_.total_residue() );

	dna_neighbors_.dimension( nres );

	// additional setup based on resfile settings for DNA
	// will mutate bases if PIKBA resfile flag
	configure_design_behavior( task );

	// find interactions
	for ( int res1(1); res1 <= nres; ++res1 ) {
		close = false;
		contact = false;
		aa = pose_.res(res1);
//		if ( param_aa::is_DNA(aa) ) continue;

		for ( int dres(1); dres <= nres; ++dres ) {
			daa = pose_.res(dres);
			if ( !param_aa::is_DNA(daa) ) continue;

			dna_neighbors_(res1,dres).close = base_proximity( aa, daa,
			 pose_.full_coord()(1,1,res1), pose_.full_coord()(1,1,dres),
			 d2_vicinity );
			if ( !dna_neighbors_(res1,dres).close ) continue;

			// check z-axis displacement to dres before trying arginine sweep
			float dz = z_axis_check(
				pose_.full_coord()(1,aaproperties_pack::first_scatom(aa,1),res1),
        pose_.res(dres), pose_.full_coord()(1,1,dres)
			);

			if ( dz < opt_.dz_cutoff ) {
				if ( !opt_.design_by_base ) { // whole interface
					dna_neighbors_(res1,dres).contact =
						argrot_base_specific_neighbor( res1, dres, pose_, d2_contact );

				} else if ( dna_info_.contains(dres) && dna_info_[dres].design() ) {
					dna_neighbors_(res1,dres).contact =
						argrot_base_specific_neighbor( res1, dres, pose_, d2_contact );
				}
			}
			if ( dna_neighbors_(res1,dres).contact ) {
				// contacts bases: redesign
				if ( !opt_.design_by_base ) {
					if ( !design::chain_limit ) contact = true;
					else contact = ( pdb::res_chain(res1) == design::chain_choose );
				}
				else if ( dna_info_.contains(dres) && dna_info_[dres].design() ) {
					contact = true;
				}
			}
			// just repack
			else if ( !opt_.design_by_base ) close = true; // whole interface
			else if ( dna_info_.contains(dres) && dna_info_[dres].design() ) {
				close = true;
			}
		}

//ja set packing behavior in DesignMap based on dna vicinity and contact, but
// only if flagged for automatic behavior (AUTOP/AUTOM flags)

		std::string message = "";
		if ( !design::automatic_behavior(res1) ) {
			if ( task.get_designmap().repack_residue(res1) ) {
				if ( param_aa::is_DNA(aa) ) {
					if ( task.get_designmap().num_allowed_aa(res1) > 1 ) {
						message = "is a rotameric nucleotide";
					}
					else message = "is a hydrated nucleotide";
				}
				else message = "has user-defined behavior";
			}
			else if ( dna_info_.contains(res1) &&
			        ( dna_info_[res1].design() || dna_info_[res1].minimize() ) ) {
				message = "is a static DNA design position";
			}
		}	else {
			if ( contact ) {
				if ( dna_variables::no_sidechain(res1) ) {
					message = "is missing a sidechain - removing from repack";
				}
				else message = "will not be restricted";

			} else if ( close ) {
				for ( int atype = 1; atype <= param::MAX_AUTH_AA; ++atype ) {
					if ( atype != aa ) task.get_designmap().disable( res1, atype );
				}
				message = "will be restricted to native amino acid";
			}
			else task.get_designmap().fix_completely( res1 );
		}
		if ( message != "" ) {
			// output residue info
			int resnum = pdb::pdb_res_num(res1);
			std::cout << param_aa::aa_name3(aa) << I(4,resnum) << " " <<
			 pdb::res_chain(res1) << " " << message << std::endl;
		}
	}
	std::cout << '\n';

	// persistent list of residues set for repack/redesign, for future reference
	interface_list_.clear();

	for ( int pos(1); pos <= nres; ++pos ) {
		int const type( pose_.res(pos) );
		if ( task.get_designmap().repack_residue( pos ) && param_aa::is_protein(type) ) {

			bool design(false);
			for ( int aa(1); aa <= param::MAX_AUTH_AA; ++aa ) {
				if ( task.get_designmap().get(pos,aa) && aa != type ) {
					design = true;
					break;
				}
			}
			interface_list_.push_back( ResInfo( pos, type, design ) );
		}
	}
}


////////////////////////////////////////////////////////////////////////////////
// @begin DnaPose::output_pdb
//
// @brief
//
//
// @authors
// ashworth
//
////////////////////////////////////////////////////////////////////////////////
void
DnaPose::output_pdb( int const run )
{
	// compose filename
	std::string fullname( files_paths::pdb_out_path + outfile_root_ );
	fullname += "_" + lead_zero_string_of( run+opt_.runindex_offset, 4 ) + ".pdb";
	if ( files_paths::output_pdb_gz ) fullname += ".gz";

	utility::io::ozstream outf( fullname );

	if ( !outf ) {
		std::cout << "trouble opening output pdbfile " << fullname << std::endl;
		utility::exit( EXIT_FAILURE, __FILE__, __LINE__ );
	}

	// fill in global arrays (read by make_pdb() and related functions)
	pose_.copy_to_misc();
	dna_variables::interface_list = interface_list_;
	dna_variables::basepairs = dna_info_;

	score_set_new_pose(); // ?
	score12(); // score the structure (also updates many global arrays)
	decoystats_store_decoy();

	// write information to file (make_pdb.cc)
	output_fullcoord_pdb( outf );
	output_pdb_stats( outf, true );
	output_chi( outf );
	output_torsion_angles( outf );

	// DNA-specific (and class-specific) information
	if ( opt_.all_interface_stats ) {
		for ( int prt(1); prt <= pose_.total_residue(); ++prt ) {
			for ( int dna(1); dna <= pose_.total_residue(); ++dna ) {
				if ( dna_neighbors_(prt,dna).close ) {
					interface_list_.push_back( ResInfo( prt, true ) );
					break;
				}
			}
		}
	}
	output_moltenres( outf );  // note molten residues

	// the following depend on global arrays (misc, hbonds, scores...)

	// get protein-DNA hbond table
	dna_hbond_info( outf, interface_list_ );
	// some base-specific energies
	output_NA_base_specific_energy( outf, dna_info_ );
	// protein-DNA ddG calc
	if ( opt_.ddG ) dna_ddG( outf, interface_list_ );

	outf.close();

	if ( opt_.verbose ) std::cerr << fullname << " written" << std::endl;
}

////////////////////////////////////////////////////////////////////////////////
// @begin DnaPose::output_moltenres
//
// @brief
//
//
// @authors
// ashworth
//
////////////////////////////////////////////////////////////////////////////////
void
DnaPose::output_moltenres(
	utility::io::orstream & outf
	)
{
	outf << "\nResidues varied in this design:" << '\n';

	std::string info;
	std::list< std::string > packed, designed, mutated, des_dna;

	for ( std::list< ResInfo >::iterator it( interface_list_.begin() );
	 it != interface_list_.end(); ++it ) {
		int pos = it->pos;
		info = I( 4, pdb::pdb_res_num(pos) ) + A( 2, pdb::res_chain(pos) );
		if ( it->design == true ) {
			if ( pose_.res(pos) != it->type ) {
				mutated.push_back( info );
			}
			else designed.push_back( info );
		}
		else packed.push_back( info );
	}

	output_res_list( "PackRes", outf, packed );
	output_res_list( "DesignRes", outf, designed );
	output_res_list( "Mutated", outf, mutated );

	for ( std::vector< DnaPosInfo >::const_iterator na( dna_info_.begin() );
	      na != dna_info_.end(); ++na ) {

		if ( na->design() || na->minimize() || opt_.all_interface_stats ) {
			int pos( na->fwdpos() );
			des_dna.push_back( I(4,pdb::pdb_res_num(pos))+A(2,pdb::res_chain(pos)) );
			if ( !na->paired() ) continue;
			pos = na->rvspos();
			des_dna.push_back( I(4,pdb::pdb_res_num(pos))+A(2,pdb::res_chain(pos)) );
		}
	}
	output_res_list( "DNA_des", outf, des_dna );
}
