// -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
// vi: set ts=2 noet:
//  CVS information:
//  $Revision: 13124 $
//  $Date: 2007-02-27 14:15:47 -0800 (Tue, 27 Feb 2007) $
//  $Author: bblum $


// Rosetta Headers
#include "featurizer.h"
#include "after_opts.h"
#include "barcode_stats.h"
#include "cenlist.h"
#include "current_pose.h"
#include "decoystats.h" // rsd_exposed_sasa (int const rsd)
#include "dssp.h"
#include "featurizer_classes.h"
#include "files_paths.h"
#include "fullatom_energies.h" // per-residue energies
#include "fullatom_energy.h" // calculate full-atom energy for neighbors info
#include "fullatom_sasa.h" // calc_per_atom_sasa()
#include "force_barcode.h" //definitions of Secstruct, BigBin, BBlum, DSSP states
#include "hbonds.h" //For LRHB and SRHB energies.
#include "misc.h"
#include "orient_rms.h"
#include "pack.h"
#include "packing_measures.h"
#include "pose.h"
#include "pose_io.h"
#include "pose_rms.h"
#include "pose_vdw.h"
#include "rotamer_functions.h"
#include "score.h"
#include "silent_input.h"
#include "structure.h"
#include "symmetry_info.h"
#include "template_pack.h" // neighbors info
#include "utility/io/ozstream.hh"

// C++ Headers
#include <cstdlib>

///////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
/// @begin featurizer
///
/// @brief Generates feature strings for decoys from silent files
///
/// @detailed
/// Generates, for each input .out file, an output file of feature values.
/// Each line in the output file corresponds to a single decoy.
/// Features currently include torsion angle ramachandran bins, secondary
/// structure, beta strand pairings and register shifts, rotamers, neighbor
/// counts, per-residue energy terms, Will's packing measure, energy, rms,
/// and several kinds of contacts, including beta strand pairing contacts.
/// Indicate which features are requested via command line flags (e.g.
/// -torsion_feats for torsion features).
/// Additional flags:
/// -pair_feats_bulge: for register shifts, look for beta bulges.  If this
/// flag is not present, a typical register shift value will be, e.g., E4
/// for Even pleating and a register shift of 4.  If the strand pairing also
/// includes some pairings for register shift 5 (i.e. there is a beta bulge)
/// then the register shift is reported that has the most residues paired.
/// if -pair_feats_bulge is present, however, the register shift is reported
/// as E4O5 (or E4E5, depending on pleating).
/// -filter_contacts
/// If present, only keep contacts which show sufficient variation in at least
/// one decoy set, or that capture energy variation.  Otherwise, all contacts
/// which are present in any decoys at all are kept as features.
///
/// Pairing features
/// The register shift is with reference to the two residues indicated in
/// the register shift feature's name.  For example, if the feature is named
/// Pair_25_45_A, a register shift of 0 indicates that residues 25 and 45 are
/// paired (and the pairing is antiparallel).  A register shift of 5 should be
/// divided evenly between the two residue numbers, with preference to the
/// first, so residues 25+3 = 28 and 45+2 = 47 are paired.  For parallel
/// strands, register shifts SUBTRACT from the higher residue number, so
/// for Pair_25_45_P, a register shift of 5 indicates that residues 25+3=28
/// and 45-2 = 43 are paired.  Pleating of E indicates that the pleating
/// should be identified as "2" in the jumping file; pleating of O indicates
/// the pleating is "1" (odd).
///
/// Beta contact features
/// B1: present with pleating 1.
/// B2: present with pleating 2.
/// X: absent.
/// Bct_25_35 is an antiparallel pairing (first number smaller).
/// Bct_35_25 is a parallel pairing (second number smaller).
///
/// @global_read
///
/// @global_write
///
/// @remarks
///
/// @references
///
/// @authors
///
/// @last_modified
////////////////////////////////////////////////////////////////////////////////
void
featurize()
{
	using namespace pose_ns;
	using namespace packing_ns;
	using namespace fullatom_energies;
	using namespace cenlist_ns;
	using namespace structure::BOUNDARY;
	using namespace featurizer_ns;
	using namespace barcode_stats_ns;
	using namespace silent_io;
	using namespace dssp_ns;

	if(!truefalseoption("infiles")) {
		std::cout << "ERROR: no input .out files\n";
		return;
	}
	std::string inputs = stringafteroption("infiles");

	std::string featfile;
	std::string topofile, topodatafile, topopairfile, topopairdatafile;
	std::string datasetfile;
	std::string dprefix = inputs + ".decoys";
	std::string nprefix = inputs + ".natives";
	featfile = inputs + ".features";
	topofile = inputs + ".topologies";
	topodatafile = inputs + ".topodata";
	topopairfile = inputs + ".pairings";
	topopairdatafile = inputs + ".pairdata";
	utility::io::ozstream names(featfile);
	bool merge = truefalseoption("merge_feature_files");

	bool tor_feats = truefalseoption("torsion_feats");
	FArray2D_char tor;
	bool ss_feats = truefalseoption("ss_feats");
	FArray2D_char ss;
	bool dssp_feats = truefalseoption("dssp_feats");
	FArray2D_char dss;
	bool rss_feats = truefalseoption("rss_feats");
	FArray2D_char rss;
	bool frag_ss_feats = truefalseoption("frag_ss_feats");
	FArray2D_char fss;
	int rot_feats = 0;
	if(truefalseoption("rotamer_feats"))
		rot_feats = intafteroption("rotamer_feats");
	FArray3D_char rot;
	bool res_enrg_feats = truefalseoption("res_enrg_feats");
	FArray2D_float resenrg;
	bool enrg_feat = truefalseoption("enrg_feat");
	FArray1D_float enrg;
	bool pair_feats = truefalseoption("pair_feats");
	bool pair_feats_bulge = truefalseoption("pair_feats_bulge");
	bool numadj_feats = truefalseoption("numadj_feats");
	FArray2D_float numadj;
	bool sasa_feats = truefalseoption("sasa_feats");
	FArray2D_float sasa;
	bool bsasa_feats = truefalseoption("bsasa_feats");
	bool rms_feat = truefalseoption("rms_feat");
	FArray1D_float rms;
	bool name_feat = truefalseoption("name_feat");
	bool contact_feats = truefalseoption("contact_feats");
	FArray2D_char ct;
	bool fa_contact_feats = truefalseoption("fa_contact_feats");
	FArray2D_char fact;
	bool beta_contact_feats = truefalseoption("beta_contact_feats");
	bool topo_feat = truefalseoption("topo_feat");
	bool topo_pair_feats = truefalseoption("topo_pair_feats");
	if(topo_pair_feats)
		topo_feat = true;
	std::list<StrandPairingSet> topologies;
	std::list<std::list<int> > topocodes;
	std::list<StrandPairing> pairings;
	std::vector<int> decoy_topo_ids;
	bool barcode_feat = truefalseoption("barcode_input");
	FArray2D_char bct_1;
	FArray2D_char bct_2;
	float prob_cutoff = 0.0;
	if(truefalseoption("filter_contact_feats"))
		prob_cutoff = realafteroption("filter_contact_feats");

	int user_res1 = 0, user_res2 = 0, user_orient = 0;
	if(truefalseoption("pair_feat_res1")) {
		user_res1 = intafteroption("pair_feat_res1");
		user_res2 = intafteroption("pair_feat_res2");
		if(user_res1 > user_res2) {
			user_orient = 2;
			int temp = user_res1;
			user_res1 = user_res2;
			user_res2 = temp;
		} else
			user_orient = 1;
	}

	bool fullatom = fa_contact_feats || numadj_feats || sasa_feats || bsasa_feats || rot_feats > 0 || res_enrg_feats;

	bool test_new_ss = truefalseoption("test_new_ss");
	if(test_new_ss) {
		name_feat = tor_feats = ss_feats = rss_feats = dssp_feats = frag_ss_feats = res_enrg_feats = enrg_feat = pair_feats = numadj_feats = sasa_feats = bsasa_feats = rms_feat = contact_feats = fa_contact_feats = beta_contact_feats = fullatom = false;
		rot_feats = 0;
	}

	StrandPairing::BIG_BULGE_LIMIT = 6;
	StrandPairing::SMALL_BULGE_LIMIT = 4;


	Pose native_pose;
	pose_from_pdb( native_pose, files_paths::protein_name+".pdb", false, false );

	int nres = native_pose.total_residue();


	// loop over decoys once to get raw counts
	int decoy_counter(0);
	int total_decoys;
	Pose pose;
	Pose bestpose;
	ProteinSasa sasinator;
	set<int> worst_res;
	float min_score = 1000.0;
	float low_score_cutoff;
	float dec_score;
	Pairing_list feature_list;
	Contact_list fa_contact_list;
	Contact_list cen_contact_list;
	Contact_list beta_contact_list_1;
	Contact_list beta_contact_list_2;
	dssp_ns::DSSP dssp;

	// raw counts of decoy pairings:
	FArray2D_int pairing_count;

	// counts of fullatom pairings.
	FArray4D_int fa_contact_count;
	FArray4D_int cen_contact_count;
	FArray4D_int beta_pairing_count_1;
	FArray4D_int beta_pairing_count_2;

	std::string filename;
	Silent_file_data * silent_data;
	int dataset = 0;
	int decoyset = 0;
	int nativeset = 0;
	bool initialized = false;
	// loop once to determine which features to include
	if(pair_feats || fa_contact_feats || beta_contact_feats || bsasa_feats || topo_feat) {
		utility::io::izstream infiles( inputs);
		while(infiles.getline(filename)) {
			if(filename[0] != '*') { // not native
				silent_data = new Silent_file_data( filename, fullatom );
				if(!initialized) {
					initialized = true;
					nres = silent_data->nres();
					// raw counts of decoy pairings:
					pairing_count.dimension(nres, nres);

					// counts of fullatom contacts.
					fa_contact_count.dimension( nres, nres, 4, 2 );
					// counts of centroid contacts
					cen_contact_count.dimension( nres, nres, 2, 2 );
					beta_pairing_count_1.dimension( nres, nres, 2, 2 );
					beta_pairing_count_2.dimension( nres, nres, 2, 2 );
				}

				decoy_counter = 0;
				pairing_count = 0;
				fa_contact_count = 0;
				cen_contact_count = 0;
				beta_pairing_count_1 = 0;
				beta_pairing_count_2 = 0;
				total_decoys = silent_data->size();
				std::vector<float> decoy_scores(total_decoys);
				for(Silent_file_data::const_iterator decoy_iter = silent_data->begin();
						decoy_iter != silent_data->end();
						decoy_iter++, decoy_counter++)
					decoy_scores[decoy_counter] = decoy_iter->second->total_score;

				std::sort(decoy_scores.begin(), decoy_scores.end());

				low_score_cutoff = decoy_scores[total_decoys / 40]; // MAGIC: .025 quantile


				decoy_counter = 0;
				for(Silent_file_data::const_iterator decoy_iter = silent_data->begin();
						decoy_iter != silent_data->end();
						decoy_iter++, decoy_counter++) {
					if ( (decoy_counter)%500 == 0 ) 	std::cout << "Featurizing; decoy count: " <<
						decoy_counter << std::endl;
					decoy_iter->second->fill_pose( pose );


					if(bsasa_feats && decoy_iter->second->total_score < min_score) {
						decoy_iter->second->fill_pose( bestpose );
						min_score = decoy_iter->second->total_score;
					}

					score_reset_current_pose();
					if(fa_contact_feats) {
						//				fullatom_energy_full();

						//call pose.score( score12 ) to fill global array
						pose.score( score12 );
					} else {
						score_set_current_pose( pose );
					}

					if(fa_contact_feats) {
						for ( int i = 1; i<= nres; ++i )
							for ( int j = 1; j<= nres; ++j ) {
								// MAGIC: arbitrary bins, more or less
								if ( i < j && atr_pair(i,j) < 0.0 ) {
									if ( atr_pair(i,j) <= -1.5 ) {
										++fa_contact_count(i,j,4,1);
										if(decoy_iter->second->total_score<= low_score_cutoff)
											++fa_contact_count(i,j,4,2);
									} else if ( atr_pair(i,j) <= -.75 ) {
										++fa_contact_count(i,j,3,1);
										if(decoy_iter->second->total_score <= low_score_cutoff)
											++fa_contact_count(i,j,2,2);
									} else {
										++fa_contact_count(i,j,2,1);
										if(decoy_iter->second->total_score <= low_score_cutoff)
											++fa_contact_count(i,j,2,2);
									}
								} else {
									++fa_contact_count(i,j,1,1);
									if(decoy_iter->second->total_score <= low_score_cutoff)
										++fa_contact_count(i,j,1,2);
								}
							}
					}
					if(contact_feats) {
						pose_update_cendist(pose);
						for ( int i = 1; i<= nres; ++i )
							for ( int j =1; j<= nres; ++j ) {
								// MAGIC: 8 angstroms is cutoff point
								if ( cendist(i,j) <= 64.0 && i < j ) {
									++cen_contact_count(i,j,2,1);
									if(decoy_iter->second->total_score <= low_score_cutoff)
										++cen_contact_count(i,j,2,2);
								} else {
									++cen_contact_count(i,j,1,1);
									if(decoy_iter->second->total_score <= low_score_cutoff)
										++cen_contact_count(i,j,1,2);
								}
							}
					}
					if(beta_contact_feats || pair_feats || topo_feat)
						dssp.compute();
					if(beta_contact_feats) {
						for ( int i = 1; i<= nres; ++i )
							for ( int j = 1; j<= nres; ++j ) {
								if ( i != j && dssp.paired(i, j, i < j) && get_pleating(i,j) == 1 ) {
									++beta_pairing_count_1(i,j,2,1);
									if(decoy_iter->second->total_score <= low_score_cutoff)
										++beta_pairing_count_1(i,j,2,2);
								} else {
									++beta_pairing_count_1(i,j,1,1);
									if(decoy_iter->second->total_score <= low_score_cutoff)
										++beta_pairing_count_1(i,j,1,2);
								}
							}
						for ( int i = 1; i<= nres; ++i )
							for ( int j = 1; j<= nres; ++j ) {
								if ( i != j && dssp.paired(i, j, i < j) && get_pleating(i,j) == 2 ) {
									++beta_pairing_count_2(i,j,2,1);
									if(decoy_iter->second->total_score <= low_score_cutoff)
										++beta_pairing_count_2(i,j,2,2);
								} else {
									++beta_pairing_count_2(i,j,1,1);
									if(decoy_iter->second->total_score <= low_score_cutoff)
										++beta_pairing_count_2(i,j,1,2);
								}
							}
					}
					if(pair_feats) {
						for ( int i = 1; i<= nres; ++i ) {
							for ( int j =1; j<= nres; ++j ) {
								if ( dssp.paired(i,j,i < j) ) {
									++pairing_count(i,j);
								}
							}
						}
					}
					if(topo_feat) {
						topologies.push_back(*dssp.strand_pairing_set());
//						if(!dssp.strand_pairing_set()->check_pleat())
//							std::cout << "TROUBLE DECOY: " << *dssp.strand_pairing_set();
					}
				}

				if(topo_feat) {
					std::cout << topologies.size() << " topos\n";
					/*decoy_topo_ids = */merge_topologies(topologies);
					std::cout << "After merge: " << topologies.size() << " topos\n";
					/*decoy_topo_ids = */merge_topologies(topologies);
					std::cout << "After 2nd merge: " << topologies.size() << " topos\n";
					/*decoy_topo_ids = */merge_topologies(topologies);
					std::cout << "After 3rd merge: " << topologies.size() << " topos\n";

					if(topo_pair_feats) {
            topocodes = extract_strand_pairings(topologies, pairings);
//					std::cout << pairings.size() << " pairings.";
//					for(std::list<StrandPairing>::iterator it = pairings.begin();
//							it != pairings.end();
//							it++)
//						std::cout << *it << std::endl;
					}

				}

				float KL_cutoff;
				//float entropy_cutoff;
				KL_cutoff = 10.0;
				if(fa_contact_feats)
					identify_contact_features( nres, fa_contact_count, 4, fa_contact_list, prob_cutoff, KL_cutoff ); // MAGIC entropy and KL cutoffs
				if(contact_feats)
					identify_contact_features( nres, cen_contact_count, 2, cen_contact_list, prob_cutoff, KL_cutoff );
				if(beta_contact_feats) {
					identify_contact_features( nres, beta_pairing_count_1, 2, beta_contact_list_1, prob_cutoff, KL_cutoff );
					identify_contact_features( nres, beta_pairing_count_2, 2, beta_contact_list_2, prob_cutoff, KL_cutoff );
//					if(winnow_contacts)
//						winnow_beta_contact(nres, silent_data, beta_contact_list_1, beta_contact_list_2, beta_pairing_count_1, beta_pairing_count_B)
				}

				if(pair_feats) {
					Pairing_list local_list;

					// identify register features
					// fill in using pairing_count/total_decoys
					identify_pairing_features( nres, pairing_count, decoy_counter,	local_list );

					// merge register features found for this data set with all
					// register features found so far
					merge_pairing_feature_list(feature_list, local_list);
				}

				delete silent_data;
			}
			decoyset++;
		} // end of choosing features
		infiles.close();
// add user-specified pairing feature into feature_list
		if(user_res1 > 0)
			add_user_pairing_feature(nres, feature_list, user_res1, user_res2, user_orient);
		if(decoyset == 0) {
			std::cout << "ERROR: no decoy input files\n";
			return;
		}
	}

	if(bsasa_feats) {
		score_reset_current_pose();
		score_set_current_pose(bestpose);
		sasinator.compute_atom_bsasa_score();
		worst_res = sasinator.worst_hole_neighbors();
	}

	// loop again, do actual featurizing, output data
	utility::io::izstream infiles2( inputs);
	utility::io::ozstream out;
	dataset = 0;
	decoyset = 0;
	nativeset = 0;
	while(infiles2.getline(filename)) {
		if(filename[0] == '*') {
			datasetfile = nprefix + lead_zero_string_of(nativeset, 2);
			nativeset++;
			filename = filename.substr(1);
		} else {
			datasetfile = dprefix + lead_zero_string_of(decoyset, 2);
			decoyset++;
		}

		silent_data = new Silent_file_data( filename, fullatom );
		if(!initialized) {
			initialized = true;
			nres = silent_data->nres();

			pairing_count.dimension(nres, nres);
			fa_contact_count.dimension( nres, nres, 4, 2 );
			cen_contact_count.dimension( nres, nres, 2, 2 );
			beta_pairing_count_2.dimension( nres, nres, 2, 2 );
			beta_pairing_count_1.dimension( nres, nres, 2, 2 );
		}

		out.open(datasetfile + ".tmp");

		pairing_count = 0;
		decoy_counter = 0;

		for(Silent_file_data::const_iterator decoy_iter = silent_data->begin();
				decoy_iter != silent_data->end();
				decoy_iter++, decoy_counter++) {

			if ( (decoy_counter+1)%500 == 0 ) 	std::cout << "decoy count: " <<
				decoy_counter +1 << std::endl;


			decoy_iter->second->fill_pose( pose );

			score_reset_current_pose();
			if(fa_contact_feats || numadj_feats || sasa_feats || res_enrg_feats) {
				//				fullatom_energy_full();

				//call pose.score( score12 ) to fill global array
				dec_score = pose.score( score12 );
			} else {
				//				pose.score( score6 );
				score_set_current_pose( pose ); // copies pose to misc
				dec_score = decoy_iter->second->total_score;
			}
			if(ss_feats || rss_feats || dssp_feats || beta_contact_feats || pair_feats || topo_feat)
				dssp.compute();
			if(fa_contact_feats) {
				for(Contact_list::iterator it = fa_contact_list.begin();
						it != fa_contact_list.end();
						it++) {
					if(dataset == 0 && decoy_counter == 0)
						names << "FAct_" << it->getRes1() << '_' << it->getRes2() << ' ';
					if(atr_pair(it->getRes1(),it->getRes2()) <= -1.5)
						out << " A";
					else if(atr_pair(it->getRes1(), it->getRes2()) <= -.75)
						out << " B";
					else if(atr_pair(it->getRes1(), it->getRes2()) < 0)
						out << " C";
					else
						out << " X";
				}
			}
			if(contact_feats) {
				pose_update_cendist(pose);
				for(Contact_list::iterator it = cen_contact_list.begin();
						it != cen_contact_list.end();
						it++) {
					if(dataset == 0 && decoy_counter == 0)
						names << "Cct_" << it->getRes1() << '_' << it->getRes2() << ' ';
					if(cendist(it->getRes1(),it->getRes2()) <= 64.0)
						out << " c8";
					else
						out << " X";
				}
			}
					
			if(topo_feat) {
				if(dataset == 0 && decoy_counter == 0) {
					names << "Topo ";
					utility::io::ozstream topo(topofile);
					int top = 1;
					for(std::list<StrandPairingSet>::iterator it = topologies.begin();
							it != topologies.end();
							it++, top++)
						topo << "Topo_" << top << ' ' << *it;
					topo.close();

					if(topo_pair_feats) {

						utility::io::ozstream topodata(topodatafile);
						utility::io::ozstream topopair(topopairfile);
						utility::io::ozstream topopairdata(topopairdatafile);
						for(std::list<std::list<int> >::iterator it = topocodes.begin();
								it != topocodes.end();
								it++) {
							for(std::list<int>::iterator mit = it->begin();
									mit != it->end();
									mit++)
								topodata << *mit + 1 << ' ';
							topodata << std::endl;
						}
					  topodata.close();

						int par = 1;
						for(std::list<StrandPairing>::iterator it = pairings.begin();
								it != pairings.end();
								it++, par++) {
							topopair << "Pairing " << par << ": " << *it << std::endl;
							std::vector<BetaPair> pairs = it->get_beta_pairs();
							topopairdata << (pairs[0].orientation == 1 ? 'A' : 'P');
							for(std::vector<BetaPair>::iterator bit = pairs.begin();
									bit != pairs.end();
									bit++)
								topopairdata << ' ' << bit->res1 << ' ' << bit->res2 << ' ' << bit->pleating;
							topopairdata << std::endl;
							names << "TPair_" << par << ' ';
						}
						topopair.close();
						topopairdata.close();
					}
				}
				int i = 1;
				out << ' ';
				bool found = false;
				for(std::list<StrandPairingSet>::iterator it = topologies.begin();
						it != topologies.end();
						it++, i++) {
					if(it->merge(*dssp.strand_pairing_set())) {
						if(found)
							out << ',';
						out << i;
						found = true;
					}
				}
				if(!found)
					out << 'X';

				if(topo_pair_feats) {
					StrandPairing dit1;
					for(std::list<StrandPairing>::iterator it = pairings.begin();
							it != pairings.end();
							it++) {
						bool merged = false;
						for(std::list<StrandPairing>::iterator dit = dssp.strand_pairing_set()->pairings.begin();
								dit != dssp.strand_pairing_set()->pairings.end();
								dit++) {
							if(dit->merge(*it) && it->merge(*dit)) {
								out << " P";
								merged = true;
								break;
							}
						}
						if(!merged)
							out << " X"; 
					}
				}
			}
			if(beta_contact_feats) {
				Contact_list::iterator it1 = beta_contact_list_1.begin(), it2 = beta_contact_list_2.begin();
				while(it1 != beta_contact_list_1.end() || it2 != beta_contact_list_2.end()) {
					int pleating;
					if(it1 == beta_contact_list_1.end())
						pleating = 2;
					else if(it2 == beta_contact_list_2.end())
						pleating = 1;
					else {
						if(it1->getRes1() < it2->getRes1() || (it1->getRes1() == it2->getRes1() && it1->getRes2() < it2->getRes2()))
							pleating = 1;
						else
							pleating = 2;
					}
					int i1 = pleating == 1 ? it1->getRes1() : it2->getRes1();
					int i2 = pleating == 1 ? it1->getRes2() : it2->getRes2();

					if(dataset == 0 && decoy_counter == 0)
						names << "Bct_" << i1 << '_' << i2 << '_' << (i1 < i2 ? 'A' : 'P') << '_' << pleating << ' ';
					if(dssp.paired(i1, i2 , i1 < i2) && get_pleating(i1, i2) == pleating)
						out << " B";
					else
						out << " X";
					if(pleating == 1)
						it1++;
					else
						it2++;
				}
			}

			// pose name
			if(name_feat) {
				out << " \"" << decoy_iter->first << '\"';
				if (dataset == 0 && decoy_counter == 0)
					names << "Name ";
			}

			if(barcode_feat) {
		    for(std::map< std::string, barcode_classes::feature >::iterator it = decoy_iter->second->barcodes.feature_begin();
						 it != decoy_iter->second->barcodes.feature_end();
						 it++)
					out << ' ' << it->second;
				if (dataset == 0 && decoy_counter == 0)
					for(std::map< std::string, barcode_classes::feature >::iterator it = decoy_iter->second->barcodes.feature_begin();
							it != decoy_iter->second->barcodes.feature_end();
							it++)
					  names << "BC" << it->first << ' ';
			}

			// backbone torsion bin
			if(tor_feats) {
				for ( int i=1; i<= nres; ++i ) {
					out << ' ' << torsion2big_bin( pose.phi(i), pose.psi(i), pose.omega(i) );
					if(dataset == 0 && decoy_counter == 0)
						names << "Tor_" << i << ' ';
				}
			}

				// ss
			if(frag_ss_feats) {
				for ( int i=1; i<= nres; ++i ) {
					out << ' ' << pose.secstruct(i);
					if (dataset == 0 && decoy_counter == 0)
						names << "FragSS_" << i << ' ';
				}
			}

				// chi rotamer bin
			if(rot_feats > 0) {
				for ( int i=1; i<= nres; ++i ) {
					int const aa ( pose.res(i));
					int const aav( pose.res_variant(i));
					FArray1D_float chi( param::MAX_CHI );
					FArray1D_int rot( param::MAX_CHI );
					int const nchi( aaproperties_pack::nchi( aa,aav));
					for ( int k=1; k<= min(rot_feats,nchi); ++k ) {
						chi(k) = pose.chi(k,i);
					}
					rotamer_from_chi( chi, aa, rot );
					for(int k=1; k <= min(nchi, rot_feats); ++k) {
						if(dataset == 0 && decoy_counter == 0 )
							names << "Rot_" << i << '_' << k << ' ';
						out << " r" << rot(k);
					}
				}
			}

      if(dssp_feats) {
				FArray1D_char dssp_secstruct(pose.secstruct());
				dssp.dssp(dssp_secstruct);
				for ( int i=1; i<= nres; ++i ) {
					char c = dssp_secstruct(i);
					if(c == ' ')
            c = 'L';
          out << ' ' << c;
					if (dataset == 0 && decoy_counter == 0)
						names << "DSSP_" << i << ' ';
				}
			}

			if(rss_feats) {
				FArray1D_char rsecstruct(pose.secstruct());
				dssp.dssp_reduced(rsecstruct);
				for ( int i=1; i<= nres; ++i ) {
          out << ' ' << rsecstruct(i);
					if (dataset == 0 && decoy_counter == 0)
						names << "RSS_" << i << ' ';
        }
      }

			if(ss_feats) {
				FArray1D_char fsecstruct(pose.secstruct());
				dssp.dssp_featurizer(fsecstruct);
				for ( int i=1; i<= nres; ++i ) {
					out << ' ' << fsecstruct(i);
					if (dataset == 0 && decoy_counter == 0)
						names << "SS_" << i << ' ';
				}
			}

			if(test_new_ss) {
				std::string ssname = "ss" + lead_zero_string_of(dataset,2) + '_' + lead_zero_string_of(decoy_counter, 4);
				utility::io::ozstream ssout(ssname + ".ss");
				FArray1D_char fixed_secstruct(pose.secstruct());
				//FArray1D_char bc_secstruct(pose.secstruct());
				FArray1D_char dssp_secstruct(pose.secstruct());
				dssp.dssp_reduced(fixed_secstruct);
				//dssp_ben(bc_secstruct);
				dssp.dssp(dssp_secstruct);
				for ( int i=1; i<= nres; ++i )
					ssout << pose.secstruct(i);
				ssout << std::endl;
				for ( int i=1; i<= nres; ++i )
					ssout << fixed_secstruct(i);
				ssout << std::endl;
				for ( int i=1; i<= nres; ++i )
					ssout << dssp_secstruct(i);
				ssout << std::endl;
				ssout.close();

				pose.dump_pdb(ssname+".pdb");
				utility::io::ozstream hb(ssname + ".hb");
				for(int i1 = 1; i1 <= nres; i1++) {
					for(int i2 = 1; i2 <= nres; i2++)
						hb << dssp.hbond_bb_pair_score(i1,i2) << ' ';
					hb << std::endl;
				}
				hb.close();
			}


			if(pair_feats) {
				// register features:
				// now decoy_pairing_count is filled, generate features from raw
				// pairings
				//for each pairing feature active for this decoy
				//figure out the register shift
				// TODO: determine exact threshold
				int maxpairing, maxpairing2;
				int total;
				int bestshift, bestshift2;
				int i1, i2;
				for(Pairing_list::iterator it = feature_list.begin(),
						it_end = feature_list.end(); it != it_end; it++) {
					bestshift = 1000;
					bestshift2 = 1000;
					maxpairing = 0;
					maxpairing2 = 0;
					int g_max_i1=0, g_max_i2=0, g_max2_i1=0, g_max2_i2=0;
					for(int s = it->shiftmin(); s <= it->shiftmax(); s++) {
						total = 0;
						bool paired = false;
						int best_centrality = 1000, centrality;
						int max_i1=0, max_i2=0;
						for(it->get_shift_start_corner(i1, i2, s);
								it->contains_square(i1,i2);
								it->shift_move(i1, i2)) {
							paired = dssp.paired(i1, i2, it->antiparallel());
							total += paired ? 1 : 0;
							if(paired) {
								centrality = abs(i1 - (it->center1() + ((s - it->shiftmin()) + 1)/2));
								if(centrality < best_centrality) {
									max_i1 = i1;
									max_i2 = i2;
									best_centrality = centrality;
								}
							}
						}
						// Keep track of best two register shifts seen (for beta bulge)
						if(total > maxpairing ) {
							bestshift2 = bestshift;
							bestshift = s - it->shiftmin();
							maxpairing2 = maxpairing;
							maxpairing = total;
							g_max2_i1 = g_max_i1;
							g_max_i1 = max_i1;
							g_max2_i2 = g_max_i2;
							g_max_i2 = max_i2;
						} else if(total > maxpairing2) {
							bestshift2 = s - it->shiftmin();
							maxpairing2 = total;
							g_max2_i1 = max_i1;
							g_max2_i2 = max_i2;
						}
					}

					if (dataset == 0 && decoy_counter == 0) {
						names << "Pair_" << it->center1() << '_' << it->center2() << '_' << it->o() << ' ';
					}
					if(bestshift != 1000) {
						char pleating, pleating2;
						pleating = (get_pleating(g_max_i1,g_max_i2) + g_max_i1 - (it->center1() + (bestshift + 1)/2)) % 2 ? 'O':'E';
						if(pair_feats_bulge && bestshift2 != 1000) {
							pleating2 = (get_pleating(g_max2_i1,g_max2_i2) + g_max2_i1 - (it->center1() + (bestshift2 + 1)/2)) % 2 ? 'O':'E';
							if(bestshift < bestshift2)
								out << ' ' << pleating << bestshift << pleating2 << bestshift2;
							else
								out << ' ' << pleating2 << bestshift2 << pleating << bestshift;
						} else
							out << ' ' << pleating << bestshift;
					}
					else
						out << " X";
				}
			}

			if(enrg_feat) {
				out << ' ' << dec_score;
				if (dataset == 0 && decoy_counter == 0)
					names << "enrg ";
			}

			if(rms_feat) {
				out << ' ' << CA_rmsd(pose, native_pose);
				if (dataset == 0 && decoy_counter == 0)
					names << "rmsd ";
			}

			if(numadj_feats) {
				//eval full-atom energy score then steal neighbors info from global array
				for ( int i=1; i<= nres; ++i ) {
					out << ' ' << template_pack::neighbors(i);
					if (dataset == 0 && decoy_counter == 0)
						names << "numAdj_" << i << " ";
				}

			}
				//copy stuff from calc_per_atom_sasa and decoy_stat_score_ to set up rsd.sasa array and compute sasa_fraction
			if(sasa_feats) {
				FArray1D_float rsd_sasa( param::MAX_RES()() );
				FArray2D_float atom_sasa( param::MAX_ATOM()(), param::MAX_RES()() );
				float sasa_fraction;

				// initialize the per-atom logicals for calculating rms-like scores
				calc_per_atom_sasa( atom_sasa, rsd_sasa, 1.4f, false, false );
				// water probe radius, real sasa,small polar H
				for ( int i = 1; i <= misc::total_residue; ++i ) {
					int const aa ( misc::res(i));
					sasa_fraction = std::max( 0.0f, std::min( 1.0f, rsd_sasa(i) / rsd_exposed_sasa(aa) ) );
					out << " " << sasa_fraction;
					if (dataset == 0 && decoy_counter == 0)
						names << "sasa_" << i << " ";
				}
			}

			if(bsasa_feats) {
				sasinator.compute_atom_bsasa_score();
				if(dataset == 0 && decoy_counter == 0)
					names << "bsasa" << ' ';
				out << ' ' << sasinator.bsasa_score_weighted_log();
				for ( set<int>::iterator it = worst_res.begin();
						it != worst_res.end();
						it++ ) {
					if(dataset == 0 && decoy_counter == 0)
						names << "resBsasa_" << *it << ' ';
					out << ' ' << sasinator.res_bsasa_score(*it);
				}
			}

			//get per-residue energy from fullatom_energies.h
			if(res_enrg_feats) {
				for ( int i=1; i<= nres; ++i ) {
					out << " " << fullatom_energies::resenergy(i);
					if (dataset == 0 && decoy_counter == 0)
						names << "resEnrg_" << i << " ";
				}
			}
			out << '\n';
		}

		if(dataset == 0) {
			names << '\n';
			names.close();
		}
		out.close();

		dataset++;
		delete silent_data;
		if( !merge ) {
			std::string command = "cat " + featfile + " " + datasetfile + ".tmp > " + datasetfile;
			std::system(command.c_str());
			command = "rm " + datasetfile + ".tmp";
			std::system(command.c_str());
		}
	}
	infiles2.close();

	if(merge) {
		utility::io::izstream infiles3( inputs);
		dataset = 0;
		decoyset = 0;
		nativeset = 0;
		std::string dcommand = "cat " + featfile;
		std::string ncommand = "cat " + featfile;
		while(infiles3.getline(filename)) {
			if(filename[0] == '*') {
				datasetfile = nprefix + lead_zero_string_of(nativeset, 2);
				nativeset++;
				filename = filename.substr(1);
				ncommand += " " + datasetfile + ".tmp";
			} else {
				datasetfile = dprefix + lead_zero_string_of(decoyset, 2);
				decoyset++;
				dcommand += " " + datasetfile + ".tmp";
			}
		}
		dcommand += " > " + dprefix + "00";
		ncommand += " > " + nprefix + "00";
		infiles3.close();
		std::system(dcommand.c_str());
		std::system(ncommand.c_str());
		dcommand = "rm -f " + dprefix + "*.tmp";
		ncommand = "rm -f " + nprefix + "*.tmp";
		std::system(dcommand.c_str());
		std::system(ncommand.c_str());
	}
}

///////////////////////////////////////////////////////////////////////////////
void
identify_pairing_features(
	int const total_residue,
	FArray2D_int const & pairing_count,
	int const total_decoys,
	featurizer_ns::Pairing_list & feature_list
)
{
	using namespace featurizer_ns;
	const int  LONG_SMEAR(4);
	const int SHORT_SMEAR(2);
	const float MIN_FREQ = 0.001;

	// first smear the counts, then take local maxima
	typedef std::vector<std::pair<int,int> > Pair_list;

	std::map<int,Pair_list> smearList;
	std::map<int,std::vector<float> > smearWeight;
	for ( int o=1; o<= 2; ++o ) { // o=1: antiparallel, o=2: parallel
		Pair_list l;
		std::vector<float> w;
		for ( int i=-20; i<= 20; ++i ) {
			for ( int j=-20; j<=20; ++j ) {
				int a ( i - j );
				int b ( i + j );
				if(o == Beta_feature::PARALLEL) {
					int temp = a;
					a = b;
					b = temp;
				}
				if (  -LONG_SMEAR <= a && a <=  LONG_SMEAR &&
						 -SHORT_SMEAR <= b && b <= SHORT_SMEAR ) {
					l.push_back( std::make_pair( i,j) );
					float x = (float)a/(float)LONG_SMEAR;
					float y = (float)b/(float)SHORT_SMEAR;
					w.push_back( 2.0 - x*x - y*y );
				}
			}
		}
		smearList[o] = l;
		smearWeight[o] = w;
	} // o=1,2

	// now calculate the smeared frequencies, store anti-parallel and parallel
	// in same matrix: i<j ==> anti i>j ==> parallel
	FArray2D_float freq( total_residue, total_residue, 0.0);

	for ( int o=1; o<= 2; ++o ) {
		const Pair_list::iterator it_start = smearList[o].begin();
		const Pair_list::iterator it_stop  = smearList[o].end();
		const std::vector<float>::iterator wit_start = smearWeight[o].begin();
		const std::vector<float>::iterator wit_stop = smearWeight[o].end();
		for ( int i=1; i<= total_residue; ++i) {
			for ( int j=i+1; j<= total_residue; ++j ) {
		    std::vector<float>::iterator wit = wit_start;
				for ( Pair_list::iterator it = it_start; it != it_stop && wit != wit_stop; it++, wit++ ) {
					const int ii = i + it->first;
					const int jj = j + it->second;
					const float ww = *wit;
					if ( jj>ii && ii >= 1 && jj <= total_residue ) {
						if ( o==1 ) {
							freq(i,j) += ww * ( float( pairing_count( ii,jj ) ) / total_decoys);
						} else {
							freq(j,i) += ww * ( float( pairing_count( jj,ii ) ) / total_decoys);
						}
					}
				}
			}
		}
//		std::cout << freq;
	}

	// identify features, sort by frequencies:
	// typedef std::list< std::pair< float, Pairing_feature > > Pairing_sorter;
	Pairing_sorter pairing_sorter;

	for ( int o=1; o<=2 ; ++o ) {
		for ( int i=1; i<= total_residue; ++i) {
			for ( int j=i+1; j <= total_residue; ++j ) {
				const float f( (o==1) ? freq(i,j) : freq(j,i) );
				if ( f < MIN_FREQ ) continue;
				if ( local_max( i,j,o,freq ) ) {
					const float f( (o==1) ? freq(i,j) : freq(j,i) );
					pairing_sorter.push_back( std::make_pair( f, Pairing_feature( i,j,total_residue,o,1 ) ) );
				}
			}
		}
	}

	pairing_sorter.sort();
	pairing_sorter.reverse();

	// Iterate over all features, determine how far the rectangles extend in
	// each direction
	// TODO: determine what this constant should be.
	const float MIN_EXTENT_THRES = 0.05;
	for( Pairing_sorter::iterator it = pairing_sorter.begin(), it_end = pairing_sorter.end(); it != it_end; it++ ) {
	  bool changed = true;
		float total;
		int i1, i2, side_length;
		// Circle around the four directions and grow the rectangle in the given
		// direction if the average weight along the new edge is high enough
		while(changed) {
			changed = false;
			for(int dir = 0; dir < 4; dir++) {
				if(!it->second.grow(dir))
					continue;
		    total = 0.0;
				side_length = 0;
				for(it->second.get_start_corner(i1, i2, dir);
						  it->second.contains_square(i1,i2);
					  it->second.dir_move(i1, i2, dir)) {
					float const f( it->second.antiparallel() ? freq(i1,i2) : freq(i2,i1) );
					total += f;
					side_length++;
				}
				if(total / side_length < MIN_EXTENT_THRES) {
					it->second.shrink(dir);
				} else {
				  changed = true;
				}
			}
		}
		merge_pairing_feature(feature_list, it->second);
	}
}

///////////////////////////////////////////////////////////////////////////////
void merge_pairing_feature_list(featurizer_ns::Pairing_list &result, featurizer_ns::Pairing_list &other) {
  using namespace featurizer_ns;
	for(Pairing_list::iterator iter = other.begin();
			iter != other.end();
			iter++)
		merge_pairing_feature(result, *iter);
}

///////////////////////////////////////////////////////////////////////////////
void merge_pairing_feature(featurizer_ns::Pairing_list &result, featurizer_ns::Pairing_feature f) {
  using namespace featurizer_ns;
	int ind=0;
	const float MATCH_CUTOFF = 0.0;
	bool changed = true;
	while(changed) {
		changed = false;
		for(Pairing_list::iterator iter = result.begin();
				iter != result.end();
				iter++, ind++) {
			if(iter->match(f) > MATCH_CUTOFF) {
				changed = true;
				f.merge(*iter);
				result.erase(iter);
				break;
			}
		}
	}
	result.push_back(f);
}


///////////////////////////////////////////////////////////////////////////////
void
identify_contact_features(
	int const total_residue,
	FArray4D_int const & pairing_count, // pairing_count(i,j,k,l) is the number of decoys
	 // in which the contact distance between i and j is in bin k. l indexes which
   // set of decoys we're talking about: 1: all, 2: low energy.
	int const num_bins,
	featurizer_ns::Contact_list & contact_list,
	float prob_cutoff,
	float KL_cutoff
) {
  using namespace featurizer_ns;
	FArray2D_float entropy(total_residue, total_residue, 0.0);
	FArray2D_float prob_present(total_residue, total_residue, 0.0);
	int total_decoys = 0, low_decoys = 0;
	for(int k=1; k <= num_bins; k++) {
		total_decoys += pairing_count(1,1,k,1);
		low_decoys += pairing_count(1,1,k,2);
	}

	for(int i=1; i <= total_residue; i++)
		for(int j=1; j <= total_residue; j++)
			for(int k=1; k <= num_bins; k++) {
				if(pairing_count(i,j,k,1) > 0) {
					float p = (float)pairing_count(i,j,k,1)/(float)total_decoys;
					entropy(i,j) -= p * log(p);
					if(k > 1) // typically first bin is "not present"
						prob_present(i,j) += p;
				}
			}

	float plow = (float)low_decoys/(float)total_decoys;
	float entlow = -plow * log(plow) - (1-plow) * log(1-plow);
	FArray2D_float KL(total_residue, total_residue, 0.0);
	for(int i=1; i <= total_residue; i++)
		for(int j=1; j <= total_residue; j++) {
			for(int k=1; k <= num_bins; k++) {
				if(pairing_count(i,j,k,2) != 0) {
					float q = (float)pairing_count(i,j,k,1)/(float)total_decoys;
					float p = (float)pairing_count(i,j,k,2)/(float)low_decoys;
					KL(i,j) += p * log(p/q);
				}
			}
			KL(i,j) /= min(entropy(i,j), entlow);
		}


	// Eliminate contacts we've already seen
	for(Contact_list::iterator it = contact_list.begin();
			it != contact_list.end();
			it++) {
		prob_present(it->getRes1(),it->getRes2()) = -1.0;
	}



	for(int i=1; i <= total_residue; i++) {
		for(int j=1; j <= total_residue; j++) {
			if(prob_present(i,j) > 0.0) {
				//std::cout << '(' << i << ',' << j << ')' << " prob: " << prob_present(i,j) << " KL: " << KL(i,j) << std::endl;
				if(prob_present(i,j) > prob_cutoff  || KL(i,j) > KL_cutoff)
					contact_list.push_back(Contact_feature(i,j));
			}
		}
	}
}

/////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////

/////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////
// rhiju
// Some extra functions that will eventually make use of
// Ben Blum's featurizer, but
// output is somewhat compressed for use in
// my scripts.
/////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
int get_chibin( pose_ns::Pose & pose, int const i){
	using namespace aaproperties_pack;
	int const numchi( nchi( pose.res(i), pose.res_variant(i)));
	float chi1 = 0.0;
	if (numchi>0) chi1 = pose.chi(1, i);
	int chibin = 1;
	if (chi1 > 120.0 || chi1 < -120.0) chibin = 2;
	if (chi1 > -120.0 && chi1 < 0.0)   chibin = 3;
	return chibin;
}
///////////////////////////////////////////////////////////////////////////////
bool get_just_pair_flag(){
	static bool init = false;
	static bool just_pair = false;
	if (!init){
		just_pair = truefalseoption("just_pair");
		init = true;
	}
	return just_pair;
}
///////////////////////////////////////////////////////////////////////////////
bool get_report_chi_flag(){
	static bool init = false;
	static bool report_chi = false;
	if (!init){
		report_chi = truefalseoption("report_chi");
		init = true;
	}
	return report_chi;
}

///////////////////////////////////////////////////////////////////////////////
bool get_use_actual_centroids_flag(){
	static bool init = false;
	static bool use_actual_centroids = false;
	if (!init){
		use_actual_centroids = truefalseoption("use_actual_centroids");
		init = true;
	}
	return use_actual_centroids;
}

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////
// centroid info
void get_centroid_information( pose_ns::Pose & pose, std::ofstream & dataout,
															 std::ofstream & svmout, std::ofstream & mapout, int const count)
{
  using namespace pose_ns;



	static bool initResMap = { false };

	Score_weight_map w( score4 );
	pose.score( w );

	//Distance matrix.
	pose_update_cendist( pose ); //may not be necessary if computing VDW anyway.
	FArray2D_float const & cendist( pose.get_2D_score( CENDIST ) );

	int const total_residue = pose.total_residue();
	int scorecount( 0 );
	Symmetry_info const * symm_info( & pose.symmetry_info() );

	///////////////////////////////////////////
	// Allow one degree of freedom that will
	// allow the mean energy to go up and down.
	///////////////////////////////////////////
	dataout << count << " " << ++scorecount << " " <<
		pose.get_extra_score( "INPUT_SCORE" ) << std::endl;
	svmout << pose.get_extra_score( "INPUT_SCORE" );

	using namespace cenlist_ns;

	FArray3D_float const Epos( pose.Eposition() );
	//	int const num_jump = pose.fold_tree().get_num_jump();
	float const CENDISTCUTOFF = 8.0 * 8.0;

	bool const just_pair = get_just_pair_flag();
	bool const report_chi = get_report_chi_flag();

	if (report_chi && !pose.fullatom()){
		std::cout << "STOP:: Report chi requires -fa_input flag " << std::endl;
    utility::exit( EXIT_FAILURE, __FILE__, __LINE__);
	}

	///////////////////////////////////////////////
	// Report residue pairs with centroid-centroid
	// distances less than 8.0 Angstroms.
	///////////////////////////////////////////////
	if (just_pair ){
		// This is for docking runs -- look only at intermolecular
		// contacts.
		int const cutpoint = pose.fold_tree().cutpoint_by_jump( 1 );
		for ( int ii=1; ii<= cutpoint; ++ii ) {
			for ( int jj=cutpoint+1; jj<= total_residue; ++jj) {
				scorecount++;
				if (cendist(ii,jj) < CENDISTCUTOFF){
					dataout << count << " "  << scorecount << " " <<
						std::sqrt( cendist(ii,jj) ) << std::endl;
					svmout << " " << scorecount << ":" << 1.0 ;
				}
				if (!initResMap) mapout << scorecount << " " << ii << " " << jj << " CONTACT" << std::endl;
			}
		}
	}
	else if (report_chi) {
		//		int chi_ii,chi_jj;

		//Subdivide by chi1 rotamer.
		assert( pose.fullatom() );
		for ( int ii=1; ii<= total_residue; ++ii ) {
			int const chibin_ii = get_chibin( pose, ii);
			for ( int chiloop_ii=1; chiloop_ii<=3; ++chiloop_ii){

				for ( int jj=ii+1; jj<= total_residue; ++jj) {
					int const chibin_jj = get_chibin( pose, jj);
					for ( int chiloop_jj=1; chiloop_jj<=3; ++chiloop_jj){

						scorecount++;
						if (cendist(ii,jj) < CENDISTCUTOFF && chibin_ii==chiloop_ii && chibin_jj==chiloop_jj){
							dataout << count << " "  << scorecount << " " << std::sqrt( cendist(ii,jj) ) << std::endl;
							svmout << " " << scorecount << ":" << 1.0 ;
						}
						if (!initResMap) mapout << scorecount << " " << 3*(ii-1)+chiloop_ii << " " << 3*(jj-1)+chiloop_jj << " CONTACT" << std::endl;
					}
				}
			}
		}
	} else{
		//Default. Just a simple loop over contacts.
		for ( int ii=1; ii<= total_residue; ++ii ) {

			//Symmetry info!
			if ( pose.pseudo( ii ) || (symm_info && !symm_info->bb_independent( ii )) ) continue;

			for ( int jj=ii+1; jj<= total_residue; ++jj) {

				if (pose.pseudo( jj ) ) continue;
				scorecount++;

				if (cendist(ii,jj) < CENDISTCUTOFF){
					dataout << count << " "  << scorecount << " " << std::sqrt( cendist(ii,jj) ) << std::endl;
					svmout << " " << scorecount << ":" << 1.0 ;
				}
				if (!initResMap) mapout << scorecount << " " << ii << " " << jj << " 8.0 CONTACT" << std::endl;
			}
		}
	}


	//Follow bblum's example.
	////////////////////////////////
	//DSSP-based Beta Pairings!
	////////////////////////////////

	//There are four NxN matrices, for parallel/antiparallel, and the the two
	// possible "pleatings".
	pose.copy_to_misc(); //Just in case.
	dssp_ns::DSSP dssp;
	dssp.compute();

	for (int o = 1; o <= 2; o++ ){
		for ( int p = 1; p <= 2; ++p ) { //"Pleating"
			for ( int i = 1; i <= total_residue; ++i ) {

				//Symmetry info!
				if (pose.pseudo( i ) || ( symm_info && !symm_info->bb_independent( i )) ) continue;

				for ( int j = i+1; j <= total_residue; ++j ) {

					if (pose.pseudo( j ) ) continue;
					scorecount++;

					bool const antiparallel = (o==1);
					if ( dssp.paired(i,j,antiparallel) &&
							 p == get_pleating(i,j) ){
						dataout << count << " " << scorecount << " " << 1.0  << std::endl;
						svmout << " " << scorecount << ":" << 1.0;
					}

					if (!initResMap) mapout << scorecount << " " << i << " " << j << " " <<
														 o << "_" << p << " BETA_PAIR" << std::endl;

				} //j
			} // i
		} //p
	} //o


	int feature_type( 0 );

	/////////////////////////////////
	// Fragment secondary structure
	/////////////////////////////////
	for (int j = 1; j <= Secstruct::NUM_STATES; j++ ){

		feature_type++;

		for ( int i=1; i<= total_residue; ++i ) {

			//Symmetry info!
			if ( pose.pseudo( i ) || (symm_info && !symm_info->bb_independent( i )) ) continue;
			scorecount++;

			if ( get_secstruct_state( pose.secstruct( i ) ) == j ) {
				dataout << count << " " << scorecount << " " << 1.0  << std::endl;
				svmout << " " << scorecount << ":" << 1.0;
			}

			if (!initResMap) mapout << scorecount << " " << i << " " << feature_type << " " <<
												 Secstruct::StateChar(j) << " SS" << std::endl;
		}

	}


	///////////////////////////////
	// backbone torsion bin
	///////////////////////////////
	for (int j = 1; j <= BigBin::NUM_STATES; j++ ){

		feature_type++;

		for ( int i=1; i<= total_residue; ++i ) {

			//Symmetry info!
			if ( pose.pseudo( i ) || (symm_info && !symm_info->bb_independent( i )) ) continue;
			scorecount++;

			if ( get_big_bin_state( pose.phi(i), pose.psi(i), pose.omega(i) ) == j) {
				dataout << count << " " << scorecount << " " << 1.0  << std::endl;
				svmout << " " << scorecount << ":" << 1.0;
			}

			if (!initResMap) mapout << scorecount << " " << i << " " << feature_type << " " <<
												 BigBin::StateChar(j) << " BIG_BIN" << std::endl;
		}

	}


	//DSSP
	FArray1D_char dssp_secstruct( misc::total_residue );
	dssp.dssp( dssp_secstruct );

	for (int j = 1; j <= DSSP::NUM_STATES; j++ ){

		feature_type++;

		for ( int i=1; i<= total_residue; ++i ) {

			//Symmetry info!
			if ( pose.pseudo( i ) || (symm_info && !symm_info->bb_independent( i )) ) continue;
			scorecount++;

			if ( get_dssp_state( dssp_secstruct(i) ) == j) {
				dataout << count << " " << scorecount << " " << 1.0  << std::endl;
				svmout << " " << scorecount << ":" << 1.0;
			}

			if (!initResMap) mapout << scorecount << " " << i << " " << feature_type << " " <<
												 DSSP::StateChar(j) << " DSSP" << std::endl;
		}

	}

	//BBLUM type features.
	FArray1D_char bblum_secstruct( misc::total_residue );
	dssp.dssp_featurizer( bblum_secstruct );

	for (int j = 1; j <= BBlum::NUM_STATES; j++ ){

		feature_type++;

		for ( int i=1; i<= total_residue; ++i ) {

			//Symmetry info!
			if ( pose.pseudo( i ) || (symm_info && !symm_info->bb_independent( i )) ) continue;
			scorecount++;

			if ( get_bblum_state( bblum_secstruct(i) ) == j) {
				dataout << count << " " << scorecount << " " << 1.0  << std::endl;
				svmout << " " << scorecount << ":" << 1.0;
			}

			if (!initResMap) mapout << scorecount << " " << i << " " << feature_type << " " <<
												 BBlum::StateChar(j) << " BBLUM" << std::endl;
		}

	}

	//////////////////////////////////
	//Extraneous information on env:
	//////////////////////////////////
	//	using namespace structure::seq_struct;
	//	for ( int ii=1; ii<= total_residue; ++ii ) {
	//		float env_contribution = env_log(pose.res(ii), static_cast< int >(fcen10(ii)));
	//		dataout << count << " " << ++scorecount << " " << env_contribution  << std::endl;
	//		svmout << " " << scorecount << ":" << env_contribution;
	//	}

	//Break down by neighbors.
	int const MAX_NEIGHBORS = 30; //Hard-wired maximum for env_log.

	for ( int j=1; j <= MAX_NEIGHBORS; ++j){

		feature_type++;

		for ( int ii=1; ii<= total_residue; ++ii ) {

			//Symmetry info!
			if ( pose.pseudo( ii ) || (symm_info && !symm_info->bb_independent( ii )) ) continue;
			scorecount++;

			int num_neighbors = static_cast<int>( fcen10( ii ) );
			num_neighbors = std::max( num_neighbors, 1);
			num_neighbors = std::min( num_neighbors, MAX_NEIGHBORS);

			if ( num_neighbors == j ){
				dataout << count << " " << scorecount << " " << 1.0  << std::endl;
				svmout << " " << scorecount << ":" << 1.0;
			}
			if (!initResMap) mapout << scorecount << " " << ii << " " << feature_type << " " <<
												 j << " ENV" << std::endl;

		}

	}


	// Miscellaneous scores.
	if (!pose.fullatom()){
		float  hb_srbb_score, hb_lrbb_score;
		evaluate_hbenergy(hbonds::hbderiv_NONE,hb_srbb_score,hb_lrbb_score);
	//fast_backbone_hbE(pose, hb_srbb_score, hb_lrbb_score);
		pose.set_0D_score( pose_ns::HB_SRBB, hb_srbb_score );
		pose.set_0D_score( pose_ns::HB_LRBB, hb_lrbb_score );

		//Fun to include in fit.
		dataout << count << " " << ++scorecount << " " << F( 7, 2, pose.get_0D_score( ENV ) ) << std::endl;
		dataout << count << " " << ++scorecount << " " << F( 7, 2, pose.get_0D_score( PAIR ) ) << std::endl;
		dataout << count << " " << ++scorecount << " " << F( 7, 2, pose.get_0D_score( VDW ) ) << std::endl;
		dataout << count << " " << ++scorecount << " " << F( 7, 2, pose.get_0D_score( HS ) ) << std::endl;
		dataout << count << " " << ++scorecount << " " << F( 7, 2, pose.get_0D_score( SSPAIR ) ) << std::endl;
		dataout << count << " " << ++scorecount << " " << F( 7, 2, pose.get_0D_score( SHEET ) ) << std::endl;
		dataout << count << " " << ++scorecount << " " << F( 7, 2, pose.get_0D_score( CB ) ) << std::endl;
		dataout << count << " " << ++scorecount << " " << F( 7, 2, pose.get_0D_score( RSIGMA ) ) << std::endl;
		dataout << count << " " << ++scorecount << " " << F( 7, 2, pose.get_0D_score( HB_SRBB ) ) << std::endl;
		dataout << count << " " << ++scorecount << " " << F( 7, 2, pose.get_0D_score( HB_LRBB ) ) << std::endl;
		dataout << count << " " << ++scorecount << " " << F( 7, 2, pose.get_0D_score( RG ) ) << std::endl;
	} else {
		//Pad with zeros.
		dataout << count << " " << ++scorecount << " " << F( 7, 2, 0.0) << std::endl;
		dataout << count << " " << ++scorecount << " " << F( 7, 2, 0.0) << std::endl;
		dataout << count << " " << ++scorecount << " " << F( 7, 2, 0.0) << std::endl;
		dataout << count << " " << ++scorecount << " " << F( 7, 2, 0.0) << std::endl;
		dataout << count << " " << ++scorecount << " " << F( 7, 2, 0.0) << std::endl;
		dataout << count << " " << ++scorecount << " " << F( 7, 2, 0.0) << std::endl;
		dataout << count << " " << ++scorecount << " " << F( 7, 2, 0.0) << std::endl;
		dataout << count << " " << ++scorecount << " " << F( 7, 2, 0.0) << std::endl;
		dataout << count << " " << ++scorecount << " " << F( 7, 2, 0.0) << std::endl;
		dataout << count << " " << ++scorecount << " " << F( 7, 2, 0.0) << std::endl;
		dataout << count << " " << ++scorecount << " " << F( 7, 2, 0.0) << std::endl;
	}

	svmout << std::endl;

	if (!initResMap) mapout.close();
	initResMap = true;
	return;

}

////////////////////////////////////////////////////////////////////////////
void
extract_centroid_information_wrapper()
{
  using namespace silent_io;
  using namespace pose_ns;


	std::string prefix = stringafteroption("prefix","");

	std::ofstream dataout  ( (prefix+"CentroidInformation.txt").c_str() );
	std::ofstream  mapout  ( (prefix+"ResMap.txt").c_str() );
	//Useful for SVM-lite.
	std::ofstream trainout  ( (prefix+"TrainSVM.txt").c_str() );
	std::ofstream  testout  ( (prefix+"TestSVM.txt").c_str() );

  const bool fullatom( truefalseoption("fa_input") );
	Pose pose;

	std::string silent_file_name= stringafteroption("s");

  // read silent file
  Silent_file_data decoys( silent_file_name, fullatom );
	if ( !decoys.size() ) {
    std::cout << "STOP:: couldnt open silent-file!! " << std::endl;
    utility::exit( EXIT_FAILURE, __FILE__, __LINE__);
  }

  // setup tag list
  std::vector< std::string > tag_list;
	tag_list = decoys.tags();

	int const numdecoys = tag_list.size();

	int count = 0;
	// loop through the tag list
  for ( std::vector< std::string >::const_iterator it=tag_list.begin(),
					it_end = tag_list.end(); it != it_end; ++it ) {

    std::string const & tag( *it );

    if ( ! decoys.has_key( tag ) ) {
      std::cout << "couldnt find tag in silent-file: " << tag << std::endl;
      continue;
    }

    // get the data
    Silent_structure const & decoy( decoys.get_structure( tag ) );

		//Fresh pose for each decoy. (Necessary for RNA stuff, which sets up atom trees.)
		Pose pose;

		pose.set_use_actual_centroids(  get_use_actual_centroids_flag() );

    // fill the pose
    decoy.fill_pose( pose, true /*check_coords*/, true /*save_input_score*/ );

		//extract centroid information
		count++;
		if (mod(count,100) == 0) std::cout << "DECOY " << count << std::endl;


		//Divide up data into a test set (first 5% of decoys) and a training set.
		if (count > 0.05*numdecoys){
			get_centroid_information( pose, dataout, trainout, mapout, count );
		} else {
			get_centroid_information( pose, dataout, testout, mapout, count );
		}

  }

	dataout.close();
	trainout.close();
	testout.close();
}

///////////////////////////////////////////////////////////////////////////////

void 
add_user_pairing_feature(const int total_residue, featurizer_ns::Pairing_list &feature_list, int res1, int res2, int orientation) {
	using namespace featurizer_ns;
	// orientation == Beta_feature::ANTIPARALLEL or Beta_feature::PARALLEL
	bool recentered = false;
	for(Pairing_list::iterator it = feature_list.begin();
			it != feature_list.end();
			it++) {
	  if(it->orientation == orientation && it->contains_square(res1, res2)) {
	    it->recenter(res1, res2);
			recentered = true;
			break;
		}
	}
	if(!recentered) {
		const Pairing_feature f(res1, res2,total_residue,orientation,1);
		feature_list.push_back(f);
	}
}

///////////////////////////////////////////////////////////////////////////////
void merge_topologies(std::list<dssp_ns::StrandPairingSet> &topologies) {
	using namespace dssp_ns;
	std::list<StrandPairingSet>::iterator it = topologies.begin();

	while(it != topologies.end()) {
		// mother iterator:
	
		bool merged = false;
		for(std::list<StrandPairingSet>::iterator mit = topologies.begin();
				mit != topologies.end();
				mit++) {
			if(it != mit && mit->merge(*it)) {
				mit->merge(*it, true);
				it = topologies.erase(it);
				merged = true;
				break;
			}
		}
		if(!merged) {
			it++;
		}
	}
}

std::list<std::list<int> > extract_strand_pairings(std::list<dssp_ns::StrandPairingSet> &topologies, std::list<dssp_ns::StrandPairing> &pairings) {
	using namespace dssp_ns;
	bool merged;
	std::list<std::list<int> > topocodes;
	for(std::list<StrandPairingSet>::iterator it = topologies.begin();
			it != topologies.end();
			it++) {
		std::list<int> code;
		for(std::list<StrandPairing>::iterator topoit = it->pairings.begin();
				topoit != it->pairings.end();
				topoit++) {
			merged = false;
			int c=0;
			for(std::list<StrandPairing>::iterator sit = pairings.begin();
					sit != pairings.end();
					sit++, c++) {
				if(topoit->merge(*sit) && sit->merge(*topoit, true)) {
					merged = true;
					code.push_back(c);
					break;
				}
			}
			if(!merged) {
				code.push_back(pairings.size());
				pairings.push_back(*topoit);
			}
		}
		topocodes.push_back(code);
	}
	return topocodes;
}


		
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

