/** Show cluster information for decoys in a silent mode file; **/

/** 2/9/04: added native threshold info to subclustering **/
/** 2/12/04: added beta-sheet pairing geometry stuff **/

#include <stdio.h>
#include <stdlib.h>
#include "rms2.c"
#include <assert.h>
#include <string.h>

#define MIN_FEATURE_FREQ 0.01
#define MAX_BETA_FEATURES 1000
#define MAX_HAIRPIN_LOOP_LENGTH 9

#define RMSD_THRESHOLD_10 2.0
#define RMSD_THRESHOLD_100 6.0

#define PICK_BY_SCORE 0
#define MAX_NUM_DECOYS  200000
#define MAX_NUM_CLUSTERS   1000
#define CONTACT_THRESHOLD  81

#define MAX_SCORELINE_LENGTH 1000
#define MAX_SEQUENCE_LENGTH 1000
#define MAX_ALIGNMENT_LENGTH 10000
#define MAX_NUM_HOMOLOGS    500
#define MAX_FORCED_CONTACTS 100

#define BREAKPOINT_MARGIN 3
#define MAX_NUM_BREAKPOINTS 15
#define MIN_SUBCLUSTERING_SPAN  30
#define BIG_RMSD    10000.0f

#define SKIP_NEARBY1          5  /** show plot for non-automatic contacts **/
#define SKIP_NEARBY2         20  /** Only consider long-range contacts in scoring decoys **/


#define SHOW_TOP_N           20  /** For picking non-redundant decoys by score **/


char shortToLong[][4] = {"ALA","UNK","CYS","ASP","GLU",
			 "PHE","GLY","HIS","ILE","UNK",
			 "LYS","LEU","MET","ASN","UNK",
			 "PRO","GLN","ARG","SER","THR",
			 "UNK","VAL","TRP","UNK","TYR",
			 "UNK"};

int Mcount = 0;

/*******************************************************/
/** GLOBAL VARIABLES (probably bad form, but oh well) **/

double **commonCoords, **fullCoords, **decoyScores, *rmsdToNative,
  *goodDecoyCoords, /* for doing rmsds to native */
  *goodNativeCoords; /* stores native coords for (possibly a subset of) common positions */

char **commonStructure, **commonBB, **decoyName, **homologName, **homologSequence, *ssConstraint;
int *homologLength, **commonToFull, **fullToCommon, *decoyToHomolog,
  *goodCoords, /* remembers which positions in commonCoords have native coordinates */
  *commonToGood, /* remembers which index in the good coords set: like commonToGood */
  argCount;


int numResidues,numHomologs,NATIVE,numDecoys,numGoodCoords,numScorelineFields,
  minClusterSize,minTopClusterSize,targetClusterSize,maxClusterSize;
char *prefix,**argList;
char scorelineHeader[MAX_SCORELINE_LENGTH],*binFilePrefix;
float rmsdThreshold[1000],redundancyThreshold,
  minThreshold,maxThreshold;

int CHOOSE_BY_SCORE, CHOOSE_BY_CO,CHOOSE_LOW, CHOOSE_RANDOMLY,
  CHOOSE_BY_PB,SS_CONSTRAINT=0,CHOOSE_BY_CO_OLD;

float CHOOSE_FRACTION;

int DO_CLUSTERING,USE_BIN_FILE,DEBUG=0,SUBCLUSTER,ANALYZE_CLUSTERING,PHIL_CLUSTERING,MAXSUBMAP,
  GET_CLUSTERED_DECOYS=0, INCLUDE_FILENAME=1,
  chooseForSubclustering,FORCED_CONTACTS=0,SSRMS=0,FULLATOM=0,SHOW_NBRS=0;

/** for forcing contacts **/
int aContacts[MAX_FORCED_CONTACTS][2],
  bContacts[MAX_FORCED_CONTACTS][2],
  numForcedContacts;

float contactDistanceThreshold[MAX_FORCED_CONTACTS];


/** structures **/
struct IntList {
  int              data;
  struct IntList  *next;
};

typedef struct IntList    INT_LIST;
typedef INT_LIST         *INT_LINK;



/** functions **/

void ReadForcedContacts(char *filename) {
  FILE *file;

  file = fopen(filename,"r");
  numForcedContacts = 0;
  while (fscanf(file,"%d %d %d %d %f",
		aContacts[numForcedContacts],
		aContacts[numForcedContacts]+1,
		bContacts[numForcedContacts],
		bContacts[numForcedContacts]+1,
		contactDistanceThreshold+numForcedContacts)==5) {
    fprintf(stderr,"Forced contact between (common) residues %d-%d and %d-%d, distance<=%f\n",
	    aContacts[numForcedContacts][0],aContacts[numForcedContacts][1],
	    bContacts[numForcedContacts][0],bContacts[numForcedContacts][1],
	    contactDistanceThreshold[numForcedContacts]);
    numForcedContacts++;
  }
  fclose(file);
  return;
}

double DistanceSquared(double *a,double *b) {
  double temp1,temp2,temp3;

  temp1 = a[0]-b[0];
  temp2 = a[1]-b[1];
  temp3 = a[2]-b[2];
  return (temp1*temp1 + temp2*temp2 + temp3*temp3);
}


int IntMin(int a, int b) { 
  return (a<b?a:b);
}

float RMS100(float rmsd,int L) {
  if (L<14) return 1000;
  else return rmsd/ (1 + 0.5 * (log (( (float) L)/ 100.0)));
}

int SS2Int(char a) {

  if (a=='E') return 0;
  else if (a=='H') return 1;
  else if (a=='L') return 2;
  else {
    fprintf(stderr,"?2L");
    return 2;
  }
}

int CompareIncreasing(float **a,float **b) {
  if (a[0][0]>=b[0][0]) return 1;
  else return -1;
}
int CompareIntIncreasing(int *a,int *b) {
  if (a[0]>=b[0]) return 1;
  else return -1;
}

int CompareDecreasing(float **a,float **b) {
  if (a[0][0]>=b[0][0]) return -1;
  else return 1;
}

int IntListLength(INT_LINK l) {
  if (l==NULL) return 0;
  return (1+IntListLength(l->next));
}

void DeleteIntList(INT_LINK l) {
  if (l==NULL) return;
  DeleteIntList(l->next);
  free(l);
  return;
}

INT_LINK AddIntLink(int i,INT_LINK l) {
  INT_LINK new;

  new = malloc(sizeof(INT_LIST));
  Mcount ++;
  new->next = l;
  new->data = i;
  return new;
}

void Dsubvec(double v[3],double w[3], double ans[3]) {
  ans[0] = v[0]-w[0];
  ans[1] = v[1]-w[1];
  ans[2] = v[2]-w[2];
  return;
}
void Dsumvec(double v[3],double w[3], double ans[3]) {
  ans[0] = v[0]+w[0];
  ans[1] = v[1]+w[1];
  ans[2] = v[2]+w[2];
  return;
}

void Dunitvec(double v[3],double u[3]) {
  double l;
  l = sqrt ( v[0]*v[0] + v[1]*v[1] + v[2]*v[2]);
  u[0] = v[0] / l;
  u[1] = v[1] / l;
  u[2] = v[2] / l;
  return;
}

void Dcros(double v0[3], double v1[3], double v2[3]) { /** from util_vector! **/
  v2[0] = v0[1]*v1[2] - v0[2]*v1[1];
  v2[1] = v0[2]*v1[0] - v0[0]*v1[2];
  v2[2] = v0[0]*v1[1] - v0[1]*v1[0];
}

double Ddotprod(double v[3],double w[3]) {
  return v[0]*w[0] + v[1]*w[1] + v[2]*w[2];
}

void GetCAVectors(double ca1[9], double ca2[9], 
		  double a[3],double b[3], double c[3]) {
  double a1[3],b1[3],b2[3],b3[3],c1[3];

/*       a goes from c-alpha #1 to c-alpha #3 */
  Dsubvec(ca1+6,ca1,a1);
  Dunitvec(a1,a);
  
/*       b gives direction of pleat for ca1 c-alphas */
  Dsubvec(ca1+3,ca1,b1);
  Dsubvec(ca1+3,ca1+6,b2);
  Dsumvec(b1,b2,b3);
  Dunitvec(b3,b);
  
/*       c goes from ca1 triple to ca2 triple (central carbons) */
  Dsubvec(ca2+3,ca1+3,c1);
  Dunitvec(c1,c);
} 

/** geometry is [orientation, pleating1, pleating2] **/

void GetPairingGeometry(double ca1[9], double ca2[9], 
			float geometry[3]) {
  double orientation,d1,d2,pleating1,pleating2;
  double a1[3],b1[3],c1[3],a2[3],b2[3],c2[3],ab1[3],ab2[3];

  GetCAVectors(ca1,ca2,a1,b1,c1);
  GetCAVectors(ca2,ca1,a2,b2,c2);

  orientation = Ddotprod(a1,a2);
      
  Dcros(a1,b1,ab1);
  Dcros(a2,b2,ab2);
       
  d1 = Ddotprod(ab1,c1);
  d2 = Ddotprod(ab2,c2);

  pleating1 = d1;

  if (orientation < 0) pleating2 =  d2; /* antiparallel*/
  else pleating2 = -d2;
  
  geometry[0] = orientation;
  geometry[1] = pleating1;
  geometry[2] = pleating2;
  
  return;
}


int LocalMax(int i,int j,int o,float*** freq) {
  int local_max;
  float f;
  int ii,jj;
  
  f = freq[o][i][j];
  local_max = ( f >= MIN_FEATURE_FREQ);
  
  for (ii=i-1;ii<=i+1;ii++) {
    if (!local_max) break;
    for (jj=j-1;jj<=j+1;jj++) {
      if (!local_max) break;
      if (jj>ii && ii>=0 && jj<numResidues &&
	  (freq[o][ii][jj]>f ||
	   freq[o][ii][jj]==f && (ii>i || (ii==i && jj>j)))) local_max=0;
    }
  }
  return local_max;
}

int Is_HP(float ssFreq[3], int o) {
  float sep,Ecount,Hcount;

  sep = ssFreq[0] + ssFreq[1] + ssFreq[2]; 
  Ecount = ssFreq[SS2Int('E')];
  Hcount = ssFreq[SS2Int('H')];
  
  return (o==0 && sep<=8 && Ecount<=0.75);
/*    return (o==0 && sep<=6 && Ecount<=0.75);  PB changed 4/7/4 */
}

int Is_BAB(float ssFreq[3], int o) {
  float sep,Ecount,Hcount;

  sep = ssFreq[0] + ssFreq[1] + ssFreq[2]; 
  Ecount = ssFreq[SS2Int('E')];
  Hcount = ssFreq[SS2Int('H')];
  
  return (o==1 && sep<=30 && Ecount<=0.75 && Hcount>=1.0);
}

int LongAxis(int i, int j, int o) {
  if (o==0) return i-j;
  else return i+j;
}

int ShortAxis(int i, int j, int o) {
  if (o==0) return i+j;
  else return i-j;
}

int GetClosestFeature(int i, int j, int o, 
		      int featureList[MAX_BETA_FEATURES][3], int numFeatures,
		      int bigThreshold, int smallThreshold){
  int f,a,b,aa,bb,adev,bdev,dev,best[2];

  a = LongAxis(i,j,o);
  b = ShortAxis(i,j,o);
  best[0] = 1000;
  
  for (f=0;f<numFeatures;f++) {
    if (featureList[f][2] == o) {
      aa = LongAxis(featureList[f][0],featureList[f][1],o);
      bb = ShortAxis(featureList[f][0],featureList[f][1],o);
      adev = (a>aa)?(a-aa):(aa-a);
      bdev = (b>bb)?(b-bb):(bb-b);
      dev = smallThreshold * adev + bigThreshold * bdev;
      if (adev <= bigThreshold && bdev <= smallThreshold && dev<best[0]) {
	best[0] = dev;
	best[1] = f;
      }
    }
  }
  
  if (best[0]<1000) return best[1];
  else return -1;
}

int TooClose(int i,int j,int ii,int jj,int o) {
  int a,b,aa,bb;

  return ( (i-ii)*(i-ii) + (j-jj)*(j-jj) <= 9); /** radius of 3 **/ 
  
/*    a = LongAxis(i,j,o); */
/*    b = ShortAxis(i,j,o); */
/*    aa = LongAxis(ii,jj,o); */
/*    bb = ShortAxis(ii,jj,o); */
/*    adev = (a>aa)?(a-aa):(aa-a); */
/*    bdev = (b>bb)?(b-bb):(bb-b); */
/*    return (adev<= 4 && bdev <= 2); */
}
  

/** returns array [3][numResidues][numResidues] of pairing counts **/
/** first index is 0 if antiparallel, 1 if parallel, 3: ignore pleating **/
/** ii<jj ==> pleating is positive **/
/** ii>jj ==> pleating is negative **/

void CountBetaPairings(int* decoyList, int decoyListLength, 
		       int*** betaPairings) {
  
  int I,II,II_start,i,j,k,o,ii,jj,iii,a,b,s,f,numFeatures,f1,f2,loopLength,
    numSmear[2], smearList[2][100][2],*ssc[3],
    featureList[MAX_BETA_FEATURES][3],decoyCount[MAX_BETA_FEATURES],
    pairingCount[MAX_BETA_FEATURES], ssCount[MAX_BETA_FEATURES][3],
    nativeFeature[MAX_BETA_FEATURES], featureMatch[MAX_BETA_FEATURES],
    hairpinLoopLength[MAX_SEQUENCE_LENGTH],
    residueIsPaired [MAX_SEQUENCE_LENGTH],
    residueIsHairpin[MAX_SEQUENCE_LENGTH];
  
  float geometry[3],ssFreq[3],d,d1,d3,orientation,pleating1,pleating2,***freq,**fl;
  double *coords;
  char* ss,topFilename[1000];
  FILE* outFile;
  
  /** params: **/
  float d2Threshold=42.25; /** 6.5 * 6.5 **/
  int minSep = 5;
/*    int minSep = 3; PB changed back to 5 on 04/07/04 */
  int maxHairpinLoopLength = MAX_HAIRPIN_LOOP_LENGTH; 
/*    int minSep = 5; PB changed to 3 on 04/07/04 */
  int bigThreshold=6; /** for getClosestFeature **/
  int smallThreshold=4;
  
  if (decoyListLength == numDecoys) {  
    sprintf(topFilename,"%s.top",prefix);
    outFile = fopen(topFilename,"w");}
  else outFile = NULL;/** dont output info for clusters **/
  
  /** memory is allocated in main **/
  /** -3,-2,-1: native for antiparallel, parallel, both ignoring pleating **/
  /** 0,1,2: decoy for antiparallel, parallel, both ignoring pleating **/
  /** 3: store native "residueIsPaired" in [3][i][0] **/
  /** 3: store decoy  "residueIsPaired" in [3][i][1] **/
  /** 3: store native "residueIsHairpin" in [3][i][2] **/
  /** 3: store decoy  "residueIsHairpin" in [3][i][3] **/
  /** 4: store native freq: feature-> residue pairs **/
  /** 5: store decoy  freq: feature-> residue pairs **/
  /** 6: store native hairpin start=i loopLength=s in [6][i][s] **/
  /** 7: store decoy  hairpin start=i loopLength=s in [7][i][s] **/
  /** 8: store decoy  turn    start=i loopLength=s in [8][i][s] **/

  for (I=-3;I<9;I++)  /** I(+3)=0: antiparallel, I=1: parallel, I=2: both,ignore pleating **/
    for (i=0;i<numResidues;i++) 
      for (j=0;j<numResidues;j++) betaPairings[I][i][j] = 0;
  
  if (outFile && NATIVE) II_start = -1;
  else II_start = 0;
  for (II=II_start;II<decoyListLength;II++) {
    if (II<0) I = -1; /** native **/
    else I = decoyList[II];
    if (outFile)
      if (I<0)
	fprintf(outFile,"PAIRS NATIVE %9.2f",0.0);
      else
	fprintf(outFile,"PAIRS %s %9.2f",decoyName[I],decoyScores[I][0]);
    coords = commonCoords[I];
    ss = commonStructure[I];
    for(i=0;i<numResidues;i++) { /** initialize per-residue features for this protein **/
      residueIsPaired [i] = 0;
      residueIsHairpin[i] = 0;
      hairpinLoopLength[i] = 0;
    }
    for (i=1;i<numResidues-1;i++) {
      if (ss[i] == 'E') {
	for (j=i+minSep;j<numResidues-1;j++) {
	  if (ss[j] == 'E') { 
	    d = DistanceSquared( coords+3*i, coords+3*j);
	    if (d<d2Threshold) {

	      d3 = 1000.0;

	      for (ii=i-2;ii<=i+2;ii+=2) {
		for (jj=j-2;jj<=j+2;jj+=2) {
		  if (ii==i && jj==j || 
		      ii!=i && jj!=j ||
		      ii<0 || jj<0 ||
		      ii>=numResidues || jj>=numResidues ||
		      jj<=i+1 || ii>=j-1) continue;
		  
		  d1 = DistanceSquared(coords+3*ii,coords+3*jj);
		  d3 = ((d3<d1)?d3:d1);
		}
	      }
	      
	      GetPairingGeometry( coords+3*i-3, coords+3*j-3, geometry);
	  
	      orientation = geometry[0];
	      pleating1 = geometry[1];
	      pleating2 = geometry[2];

	      if (d3>d && pleating1 * pleating2 > 0) {
		if (outFile)
		  /** NOTE:: rosetta numbering in output here but not everywhere!?! **/
		  fprintf(outFile," %3d %3d %4.1f %4.1f",
			  i+1,j+1,orientation,pleating1 + pleating2); 
		residueIsPaired[i] = 1;
		residueIsPaired[j] = 1;

		/** is this pairing part of a hairpin?? **/
		ii=i+1;
		jj=j-1;
		while (ii<j && ss[ii]=='E') ii++;
		while (jj>i && ss[jj]=='E') jj--;
		if (ii==j || jj==i) {
		  if (I>=0)
		    fprintf(stderr,"funny pairing %4d %4d %4d %7.2f %7.2f %7.2f %7.2f %s\n",
			    i,j,j-i,
			    sqrt(d),orientation,
			    pleating1,pleating2,
			    decoyName[I]);
		  else
		    fprintf(stderr,"funny pairing %4d %4d %4d %7.2f %7.2f %7.2f %7.2f NATIVE\n",
			    i,j,j-i,
			    sqrt(d),orientation,
			    pleating1,pleating2);
		}
		loopLength = jj-ii+1; /** ii and jj are first/last residues of loop **/
		for (iii=ii;iii<jj && ss[iii]!='E';iii++); /** check for intervening E's **/
		if (orientation<0 && iii==jj &&
		    0<loopLength &&  
		    loopLength<=maxHairpinLoopLength) {/** hairpin **/
		  for (iii=ii;iii<=jj;iii++)
		    residueIsHairpin[iii] = 1;
		  hairpinLoopLength[ii] = loopLength; /** indexed by first rsd of loop! **/
		}

		if (orientation<0) o=0;
		else o=1;
		if (pleating1>0) { ii=i; jj=j;}
		else {ii=j; jj=i;}
		
		if (I>=0)
		  betaPairings[o][ii][jj]++;
		else /** native **/
		  betaPairings[o-3][ii][jj]++;
		
		if (orientation<0) {ii=i; jj=j;} /** now ignore pleating **/
		else {ii=j; jj=i;} /** antiparallel ii<jj; parallel ii>jj **/
		
		if (I>=0)
		  betaPairings[2][ii][jj]++;
		else /** native **/
		  betaPairings[2-3][ii][jj]++;
	      }
	    }
	  }
	}
      }
    }
    if (numResidues>=4) /** make sure we have the space! **/
      for (i=0;i<numResidues;i++) {/** this is wasteful: storing linear data pairwise **/
	if (residueIsPaired[i]) 
	  if (I<0) /** native **/
	    betaPairings[3][i][0] = 1;
	  else /** decoy **/
	    betaPairings[3][i][1]++;
	if (residueIsHairpin[i])
	  if (I<0) /** native **/
	    betaPairings[3][i][2] = 1;
	  else /** native **/
	    betaPairings[3][i][3]++;
      }
    
    /** look at loop lengths for hairpins and non-hairpins **/
    for (i=0;i<numResidues;i++) 
      if (hairpinLoopLength[i]) 
	if (I<0) /** native **/
	  betaPairings[6][i][ hairpinLoopLength[i] ] =1;
	else /** decoy **/
	  betaPairings[7][i][ hairpinLoopLength[i] ]++;
    
    if (I>=0) {/** store decoy loop-lengths **/
      for (i=0;i<numResidues-1;i++) { 
	if (ss[i] == 'E' && ss[i+1] != 'E') {
	  for (j=i+1; j<numResidues && ss[j] != 'E'; j++);
	  if (ss[j] != 'E') continue;
	  loopLength = j-i-1; /** j and i are in strands **/
	  if (loopLength<=maxHairpinLoopLength) {
	    /** hairpin-sized loop **/
	    betaPairings[8][i+1][loopLength]++; /** i+1 is start of loop **/
	    if (hairpinLoopLength[i+1]) {
/*  	      fprintf(stderr,"%s %d %d == %d\n", */
/*  		      decoyName[I],i+1,loopLength,hairpinLoopLength[i+1]); */
	      assert (hairpinLoopLength[i+1] == loopLength);
	    }
	  }
	  else assert (!hairpinLoopLength[i+1]);
	}
	else assert (!hairpinLoopLength[i+1]);
      }
    }
    
    if (outFile) fprintf(outFile,"\n");
  }
  if (decoyListLength != numDecoys) return;
  
  /** now get smeared frequencies to determine the features **/
  
  /*setup the smear list */
  for (o=0;o<2;o++) 
    numSmear[o] = 0;
  
  for (i=-20;i<=20;i++) {
    for (j=-20;j<=20;j++) {
      for (o=0;o<2;o++) {
	a = LongAxis(i,j,o);
	b = ShortAxis(i,j,o);
	
	if (-4<=a && a<=4 && -2<=b && b<=2) {
	  smearList[o][ numSmear[o] ][0] = i;
	  smearList[o][ numSmear[o] ][1] = j;
	  numSmear[o]++;
	}
      }
    }
  }
  
  /** now loop through **/
  freq = calloc(2,sizeof(float**));
  for (o=0;o<2;o++) {
    freq[o] = calloc(numResidues,sizeof(float*));
    for (i=0;i<numResidues;i++) {
      freq[o][i] = calloc(numResidues,sizeof(float));
      for (j=0;j<numResidues;j++) {
	freq[o][i][j] = 0.0;
	
	if (j-i>=minSep) {
	  
	  for (s=0;s<numSmear[o];s++) {
	    
	    ii = i+smearList[o][s][0];
	    jj = j+smearList[o][s][1];
	    
	    if (jj-ii>=minSep && ii>0 && jj<numResidues) {
	      if (o==0) 
		freq[o][i][j] += 
		  ( (float) betaPairings[2][ii][jj])/decoyListLength;
	      else 
		freq[o][i][j] += 
		  ( (float) betaPairings[2][jj][ii])/decoyListLength;
	    }
	  }
	}
      }
    }
  }

  /** now go through and identify the features -- local maxima **/
  numFeatures = 0;
  for (o=0;o<2;o++) {
    for (i=0;i<numResidues;i++) {
      for (j=0;j<numResidues;j++) {
	if (j-i>=minSep && LocalMax(i,j,o,freq)) {
	  fprintf(stderr,"new feature: %d %d %d %7.3f\n",
		  i,j,o,freq[o][i][j]);
	  featureList[numFeatures][0] = i;
	  featureList[numFeatures][1] = j;
	  featureList[numFeatures][2] = o;
	  numFeatures++;
	  assert (numFeatures<=MAX_BETA_FEATURES);
	}
      }
    }
  }

  /** go through features and kill ones that are too close together **/
  /** first reorder features by their frequency **/
  fl = calloc(numFeatures,sizeof(float*));
  for (ii=0;ii<numFeatures;ii++) {
    fl[ii] = calloc(2,sizeof(float));
    i = featureList[ii][0];
    j = featureList[ii][1];
    o = featureList[ii][2];
    fl[ii][0] = freq[o][i][j];
    fl[ii][1] = ii;
  }
  
  qsort(fl,numFeatures,sizeof(float*),
	(int (*)(const void*,const void*)) CompareDecreasing);
  if (numFeatures>=3)
    assert (fl[0][0] >= fl[1][0] && fl[1][0] >= fl[2][0]);
  
  for (a=0;a<numFeatures;a++) {
    f1 = (int) fl[a][1];
    fprintf(stderr,"Feature: %d Freq: %f\n",f1,fl[a][0]);
    i = featureList[f1][0];
    j = featureList[f1][1];
    o = featureList[f1][2];

    for (b=a+1;b<numFeatures;b++) {
      f2 = (int) fl[b][1];
      if (featureList[f2][2] != o) continue;

      ii = featureList[f2][0];
      jj = featureList[f2][1];

      if (TooClose (i,j,ii,jj,o)) {
	fprintf(stderr,"Discard: %d %d %d %d o= %d %f > %f\n",
		i,j,ii,jj,o,fl[a][0],fl[b][0]);
	featureList[f2][2] = -1; /** set orientation to -1: ignore me **/
      }
    }
  }
  
  /* free memory used for sorting*/
  for (i=0;i<numFeatures;i++) free(fl[i]);
  free(fl);

  /** go back through the decoys and get feature frequencies and ss_counts **/
  for (f=0;f<numFeatures;f++) {
    decoyCount[f] = 0;
    pairingCount[f] = 0;
    for (i=0;i<3;i++) ssCount[f][i] = 0;
    nativeFeature[f] = 0;
  }

  /** for storing cumulative ss sums for a given decoy **/
  for (i=0;i<3;i++) {
    ssc[i] = calloc(numResidues+1,sizeof(int));
    ssc[i]++;
    ssc[i][-1] = 0;
  }

  if (NATIVE) II_start = -1;
  else II_start = 0;
  for (II=II_start;II<decoyListLength;II++) {
    if (II<0) I = -1; /** native **/
    else I = decoyList[II];
    
    coords = commonCoords[I];
    ss = commonStructure[I];

    /** how many pairings in this decoy match feature f?? **/
    for (f=0;f<numFeatures;f++) featureMatch[f] = 0; 

    for (i=0;i<3;i++) /** cumulative ss counts: ssc[i][-1] = 0 **/
      for (j=0;j<numResidues;j++) 
	ssc[i][j] = ssc[i][j-1] + (SS2Int( ss[j]) == i);
    
    for (i=1;i<numResidues-1;i++) {
      if (ss[i] == 'E') {
	for (j=i+minSep;j<numResidues-1;j++) {
	  if (ss[j] == 'E') { 
	    d = DistanceSquared( coords+3*i, coords+3*j);
	    if (d<d2Threshold) {

	      d3 = 1000.0;

	      for (ii=i-2;ii<=i+2;ii+=2) {
		for (jj=j-2;jj<=j+2;jj+=2) {
		  if (ii==i && jj==j || 
		      ii!=i && jj!=j ||
		      ii<0 || jj<0 ||
		      ii>=numResidues || jj>=numResidues) continue;
		  
		  d1 = DistanceSquared(coords+3*ii,coords+3*jj);
		  d3 = ((d3<d1)?d3:d1);
		}
	      }
	      
	      GetPairingGeometry( coords+3*i-3, coords+3*j-3, geometry);
	  
	      orientation = geometry[0];
	      pleating1 = geometry[1];
	      pleating2 = geometry[2];

	      if (d3>d && pleating1 * pleating2 > 0) {
		if (orientation<0) o=0;
		else o=1;

		f = GetClosestFeature(i,j,o,featureList,numFeatures,
				      bigThreshold,smallThreshold); /* -1 if no match*/
		if (f>=0) {
		  if (I==-1) { /** NATIVE: dont increment pairingCount or ssCount **/
		    featureMatch[f]++;}
		  else {
		    ii = i+1;
		    while (ss[ii] == 'E' && ii<j) ii++;
		    jj = j-1;
		    while (ss[jj] == 'E' && jj>i) jj--;
		      
		    featureMatch[f]++;
		    pairingCount[f]++;
		  
		    if (jj>=ii && ii>0) 
		      for (k=0;k<3;k++) 
			ssCount[f][k] +=  (ssc[k][jj] - ssc[k][ii-1]);
		  }
		}
	      }
	    }
	  }
	}
      }
    }
    if (I==-1) {
      fprintf(outFile,"TOP NATIVE %9.2f",0.0);
      for (f=0;f<numFeatures;f++) 
	if (featureMatch[f]>=1) 
	  nativeFeature[f] = 1;}
    else
      fprintf(outFile,"TOP %s %9.2f",decoyName[I],decoyScores[I][0]);
    
    ii = 0; /** count all features **/
    jj = 0; /** count native features **/
    
    for (f=0;f<numFeatures;f++) 
      if (featureMatch[f]>=1) {
	ii++;
	jj+= nativeFeature[f];}
    
    fprintf(outFile," %3d %3d",ii,jj);
	
    for (f=0;f<numFeatures;f++) 
      if (featureMatch[f]>=1) {
	if (I>=0) /** dont increment decoyCount for the native **/
	  decoyCount[f]++;
	fprintf(outFile," %3d %3d %1d",
		featureList[f][0],
		featureList[f][1],
		featureList[f][2]);
      }
    fprintf(outFile,"\n");
  }

  for (f=0;f<numFeatures;f++) {
    if (decoyCount[f]<=0) continue; /** was this feature killed b/c too close to another? */
    assert (featureList[f][2]>=0); 
    for (k=0;k<3;k++) ssFreq[k] = ((float)ssCount[f][k])/pairingCount[f];
    i = featureList[f][0];
    j = featureList[f][1];
    o = featureList[f][2];
    fprintf(outFile,"FEATURE %3d %3d %1d %8.5f E: %7.3f H: %7.3f L: %7.3f sep: %7.3f",
	    i,j,o,
	    ((float)decoyCount[f])/decoyListLength,
	    ssFreq[SS2Int('E')],
	    ssFreq[SS2Int('H')],
	    ssFreq[SS2Int('L')],
	    ssFreq[SS2Int('E')]+ssFreq[SS2Int('H')]+ssFreq[SS2Int('L')]);
    if (Is_HP(ssFreq,o)) fprintf(outFile," HP");
    if (Is_BAB(ssFreq,o)) fprintf(outFile," BAB");
    if (nativeFeature[f]) fprintf(outFile," NAT");
    fprintf(outFile,"\n");
  }
  
  /** store info about feature frequencies in betaPairings **/
  for (i=0;i<numResidues;i++)
    for (j=i+minSep;j<numResidues;j++) 
      for (o=0;o<2;o++) {
	if (o==0) { /** anti-parallel **/
	  ii = i;
	  jj = j;}
	else {
	  ii = j;
	  jj = i;}
	f = GetClosestFeature(i,j,o,featureList,numFeatures,6,4); /* -1 if no match*/
	if (f>=0) {
	  if (nativeFeature[f])
	    betaPairings[4][ii][jj] = 1;
	  betaPairings[5][ii][jj] = decoyCount[f];
	}
      }
  
  /** free memory **/
  for (o=0;o<2;o++) 
    for (i=0;i<numResidues;i++) 
      free( freq[o][i] );
  free(freq[o]);
  free(freq);
  
  for (i=0;i<3;i++)
    free(ssc[i]-1);

  if (outFile) fclose(outFile);
  return;
}

char
PPO2BB( float phi, float psi, float omega )
{
	if ( omega < 90 && omega > -90 ) return 'O';
	if ( phi > 0 ) {
		if ( -100 < psi && psi <= 100 ) return 'G';
		else return 'E';
	} else {
		if ( -125 < psi && psi <= 50 ) return 'A';
		else return 'B';
	}
	return 'X'; /** won't hit this **/
}

int
BB2Int( char bb )
{
	if ( bb == 'A' ) return 0;
	else if ( bb == 'B' ) return 1;
	else if ( bb == 'G' ) return 2;
	else if ( bb == 'E' ) return 3;
	else if ( bb == 'O' ) return 4;
	else {
		fprintf(stderr,"Bad bb2int bb= %c\n",bb );
		return 0;
	}
}

int *RandomSubset(int N, int M) { /** choose N of M **/
  
  float **l;
  int i,*subset;

  l=  calloc(M,sizeof(float*));
  for (i=0;i<M;i++) {
    l[i] = calloc(2,sizeof(float));
    l[i][0] = (float) rand();
    l[i][1] = i;
  }

  qsort(l,M,sizeof(float*),(int (*)(const void*,const void*)) CompareDecreasing);
  
  subset = calloc(N,sizeof(int));
  for (i=0;i<N;i++) {
    subset[i] = (int) l[i][1];
  }
  for (i=0;i<M;i++) free(l[i]);
  free(l);
  qsort(subset,N,sizeof(int),(int (*)(const void*,const void*)) CompareIntIncreasing); 
  return subset;
}

void WritePDB(FILE *file,int decoy, int start, int stop,
	      double R[3][3],double T[3]) {
  int i,j,homolog,startPosition,stopPosition;
  static double v[3];
  double *coords;
  char *sequence;

  homolog = decoyToHomolog[decoy];
  if (start==0) 
    startPosition=0;
  else 
    startPosition = commonToFull[homolog][start];
  if (stop == numResidues-1) 
    stopPosition = homologLength[homolog]-1;
  else 
    stopPosition = commonToFull[homolog][stop];

  coords = fullCoords[decoy];
  sequence = homologSequence[homolog];
  for (i=startPosition; i<=stopPosition; i++) {

    for (j=0;j<3;j++)
      v[j] = R[j][0]*coords [3*i] + R[j][1]*coords[3*i+1] + R[j][2]*coords[3*i+2];
    
    fprintf(file,"ATOM  %5d %s %s A%4d    %8.3f%8.3f%8.3f%6.2f%6.2f\n",
	    i," CA ",shortToLong[sequence[i]-'A'],i,
	    v[0]+T[0],v[1]+T[1],v[2]+T[2],
	    1.0,1.0);
  }
  return;
}

void GetThreeIntegers(char *s,int*a, int*b, int*c) {
  
  char temp[100];
  int pos,old_pos,L;
  
  strcpy(temp,s);
  L = strlen(temp);
  for (pos=0;pos<L;pos++) if (temp[pos] == ',') break;
  temp[pos] = '\0';
  *a = atoi(temp);
  pos++;
  old_pos = pos;
  for (;pos<L;pos++) if (temp[pos] == ',') break;
  temp[pos] = '\0';
  *b = atoi(temp+old_pos);
  *c = atoi(temp+pos+1);
  return;
}

void GetTwoFloats(char *s,float *a, float *b) {
  
  char temp[100];
  int pos,L;
  
  strcpy(temp,s);
  L = strlen(temp);
  for (pos=0;pos<L;pos++) if (temp[pos] == ',') break;
  temp[pos] = '\0';
  *a = atof(temp);
  *b = atof(temp+pos+1);
  return;
}

void GetGoodCoords (double *coords, int decoy, int start, int stop, 
		    int *startPointer, int *stopPointer) { /** send in NULL if you dont need this info*/
  int count,i,j,BIG = 10000;

  count = 0;
  if (startPointer)
    *startPointer = BIG;
  
  for (i=0;i<numResidues;i++) 
    if (goodCoords[i]) {
      if (startPointer && *startPointer == BIG && i>=start) *startPointer = count;
      if (stopPointer && i<=stop) *stopPointer = count;
      
      for (j=0;j<3;j++)
	coords[3*count+j] = commonCoords[decoy][3*i+j];
      count++;
    }
  if (startPointer && goodCoords[start]) assert (*startPointer == commonToGood[start]);
  if (stopPointer && goodCoords[stop]) assert (*stopPointer == commonToGood[stop]);
  assert (count == numGoodCoords && count <BIG);
  return;
}



void ReadPDBFiles(FILE* file,char *nativeSequence, char* decoySequence, 
		  double** coords, char** structure, double** decoyScores,
		  char** decoyName, int *numDecoysPointer,
		  int NATIVE, int* numScorelineFieldsPointer, int* numResiduesPointer) {

  static char junk50[50],pdbLine[500],fileName[500],sequence[10000];

  double x,y,z;
  char rsd;
  int i,j,k,rsdCount,numResidues,numDecoys,
    foundSecondaryStructure,numScorelineFields,terminated;

  FILE *pdbFile;
  

  *numScorelineFieldsPointer = 1; /** no score info **/

  numDecoys = 0;
  numResidues = 0;
  while (fscanf(file,"%s",fileName)==1) {
/*      fprintf(stderr,"%d file: %s\n",numDecoys,fileName); */
    
    if ( (pdbFile = fopen(fileName,"r")) ==NULL) {
      fprintf(stderr,"missing pdbfile: %s\n",fileName);
      continue;
    }
    
    if (!numResidues) {
      /** read through file -- how many CA atoms?? **/
      while (fscanf(pdbFile,"%[^\n]%*c",pdbLine)==1) {
	pdbLine[4]='\0';
	pdbLine[16]='\0';
	if (strcmp(pdbLine,"ATOM") || strcmp(pdbLine+12," CA ") ) {
	  continue;
	}
	numResidues++;
      }
      *numResiduesPointer = numResidues;
      fseek(pdbFile,0,0);
    }
    
    coords[numDecoys] = calloc(3*numResidues,sizeof(double));
    structure[numDecoys] = calloc(numResidues+5,sizeof(char));
    for (i=0;i<numResidues;i++)
      structure[numDecoys][i] = 'L';

    foundSecondaryStructure = 0;
    rsdCount = 0;

    while (fscanf(pdbFile,"%[^\n]%*c",pdbLine)==1) {
      pdbLine[3]='\0';
      pdbLine[16]='\0';
      
      if (!strcmp(pdbLine,"TER")) {
	for (i=0;i<rsdCount;i++) {
	  if (fscanf(pdbFile,"%d %s %*[^\n]%*c",&j,structure[numDecoys]+i)!=2) break;
	}
	
	if (i==rsdCount)
	  foundSecondaryStructure = 1;
      }
      

      if (strcmp(pdbLine,"ATO") || strcmp(pdbLine+12," CA ") ) {
	continue;
      }

      pdbLine[54] = '\0';
      z = atof(pdbLine+46);
      pdbLine[46] = '\0';
      y = atof(pdbLine+38);
      pdbLine[38] = '\0';
      x = atof(pdbLine+30);
      pdbLine[20] = '\0';
      for (i=0;i<26;i++) 
	if ( !(strcmp(pdbLine+17,shortToLong[i])) ) {  
	  rsd = 'A'+i;
	  break;
	}
	
      if (i==26) rsd = 'X';
      sequence[rsdCount] = rsd;
      coords[numDecoys][3*rsdCount] = x;
      coords[numDecoys][3*rsdCount+1] = y;
      coords[numDecoys][3*rsdCount+2] = z;
      
      rsdCount++;
    }

    if (numDecoys==0) {
      strcpy(decoySequence,sequence);
      if (NATIVE) {
	
	assert (!(strcmp(nativeSequence,decoySequence)));
      }
    }
    else if ( strcmp(sequence,decoySequence)  ||
	      rsdCount != numResidues) {
      fprintf(stderr,"bad pdb file: %s\n",fileName);
      free(coords[numDecoys]);
      free(structure[numDecoys]);
      continue;
    }
    
    if (!foundSecondaryStructure) 
      fprintf(stderr,"Couldnt read secstruc info: %s\n",fileName);

    decoyName[numDecoys] = calloc(strlen(fileName)+1,sizeof(char));
    strcpy(decoyName[numDecoys],fileName);
    fprintf(stderr,"%5d %s\n",numDecoys,structure[numDecoys]);
    numDecoys++;
    if (numDecoys>= MAX_NUM_DECOYS) {
      fprintf(stderr,"Too many decoys, skipping some\n");
      break;
    }
  }
  *numDecoysPointer = numDecoys;
  
  

  return;
}


void CopyDecoy (int d, int s) { /* destination, source */
  int i,j,L;
  char temp[1000];
/*    fprintf(stderr,"Copying decoy %d to decoy %d\n",s,d); */

  assert (decoyToHomolog[d] == decoyToHomolog[s]);
  L = homologLength [decoyToHomolog[d]];

  for (i=0;i<numScorelineFields-1;i++) /** note only copying numScorelineFields <= nsf **/
    decoyScores[d][i] = decoyScores[s][i];
  for (i=0;i<3*L;i++) 
    fullCoords[d][i] = fullCoords[s][i];
  for (i=0;i<3*numResidues;i++) 
    commonCoords[d][i] = commonCoords[s][i];
  for (i=0;i<numResidues;i++) {
    commonStructure[d][i] = commonStructure[s][i];
    commonBB[d][i] = commonBB[s][i];
	}
  strcpy(temp,decoyName[s]); /** in case d==s **/
  free(decoyName[d]);
  decoyName[d] = calloc(strlen(temp)+5,sizeof(char));
  strcpy(decoyName[d], temp);
  return;
}

void FreeDecoy (int d) {
  free (decoyScores[d]);
  free (fullCoords[d]);
  free (commonCoords[d]);
  free (commonStructure[d]);
  free (commonBB[d]);
  free (decoyName[d]);
  return;
}

void ReadSilentFile(int homolog) {
  FILE *file,*debug;
  char junk50[10000],*bb,*ss,*filename,sequence[MAX_SEQUENCE_LENGTH],sschar;
  int i,j,k,L,rsdCount,nsf,startNumDecoys,totalDecoys,numToChoose,*subset,
    failedConstraint,ntc1,ntc2,ntc3,llength,
    bin,numBinDecoys,numBins,binSize,numChosen,decoysRemaining;
  float **l,minCO,maxCO,binSpan,bottom,top,phi,psi,omega;
  double *v,*w;
  
  filename = homologName[homolog];
  startNumDecoys = numDecoys;

  if ( (file = fopen(filename,"r")) == NULL) {
    fprintf(stderr,"cant open file: %s\n",filename);
    return;
  }
  
  fscanf(file,"%s %s%*c",junk50,sequence);
  L = strlen(sequence);

  fprintf(stderr,"LOAD  %s L = %d ",
	  homologName[homolog],L);

  assert (!strcmp (homologSequence[homolog], sequence));

  
  /** read scoreline header **/
  fscanf(file,"%s",junk50);
  assert (strcmp(junk50,"SCORE:")==0);

  nsf = 0;
  fscanf(file,"%s",junk50);
  while (strcmp(junk50,"SCORE:")) {
    nsf++;
    fscanf(file,"%s",junk50);
  }

  failedConstraint = 0;

  if (numScorelineFields && nsf !=numScorelineFields) {
    fprintf(stderr,"\nWARNING: Your silentfiles have different numbers of scoreline fields\n");
    fprintf(stderr,"WARNING: Only the min number of fields will be displayed for each decoy\n");
    fprintf(stderr,"WARNING: Cluster average scores assume agreement of fields\n");
    numScorelineFields = (nsf<numScorelineFields)?nsf:numScorelineFields;
  }
  else numScorelineFields = nsf;

  
  assert (!strcmp(junk50,"SCORE:"));
  fseek(file,0,0);
  fscanf(file,"%*[^\n]%*c%s%*c",scorelineHeader);
  
  ss = calloc(L+5,sizeof(char));
  bb = calloc(L+5,sizeof(char));

  while (fscanf(file,"%s",junk50)==1) {
    if (strlen(junk50) > 5000) fprintf(stderr,"whoah: %s\n",junk50);
    if (strcmp(junk50,"SCORE:")!=0) continue;

    decoyScores[numDecoys] = calloc(nsf-1,sizeof(double));
    fullCoords[numDecoys] = calloc(L*3,sizeof(double));
    
    for (i=0;i<nsf-1;i++) 
      if ( fscanf(file,"%lf",decoyScores[numDecoys]+i) != 1 ) break; 
    if (i != nsf-1) 
      fprintf(stderr,"*");
    else 
      fscanf(file,"%*[^\n]%*c");
    
    rsdCount = 0;

		if ( fscanf(file,"%d %c",&i,&sschar)  != 2 ) {
			// possible problem. but it might be FOLD_TREE and JUMPS
			fscanf(file,"%s %*[^\n]%*c",junk50);
			//fprintf(stderr,"funny tag? %s\n", junk50);
			if ( strcmp(junk50,"FOLD_TREE" )!=0 ) {
				fprintf(stderr,"funny tag! %s\n", junk50);
				continue;
			}
			fscanf(file,"%s %*[^\n]%*c",junk50);
			//fprintf(stderr,"funny tag? %s\n", junk50);
			if ( strcmp(junk50,"JUMPS" )!=0 ) {
				fprintf(stderr,"funny tag! %s\n", junk50);
				continue;
			}
			if ( fscanf(file,"%d %c",&i,&sschar)  != 2 ) {
				fscanf(file,"%*[^\n]%*c");
				fprintf(stderr,"iscan failed\n");
				continue;
			}
		}


    while (fscanf(file," %f %f %f",&phi,&psi,&omega)==3) {
			ss[rsdCount] = sschar;
			//fprintf(stderr,"%c %f %f %f\n",ss[rsdCount],phi,psi,omega);
      if ( !(ss[rsdCount] == 'H' || ss[rsdCount] == 'L' || ss[rsdCount] == 'E')) break;
      if (i!=rsdCount+1 || i> L) break;
			bb[rsdCount] = PPO2BB( phi, psi, omega );
      if ( FULLATOM ) {
				if (fscanf(file,"%lf %lf %lf %*f %*f %*f %*f %[^\n]%*c",
									 fullCoords[numDecoys]+3*rsdCount,
									 fullCoords[numDecoys]+3*rsdCount+1,
									 fullCoords[numDecoys]+3*rsdCount+2,junk50)!=4) break;
      } else {
				if (fscanf(file,"%lf %lf %lf %[^\n]%*c",
									 fullCoords[numDecoys]+3*rsdCount,
									 fullCoords[numDecoys]+3*rsdCount+1,
									 fullCoords[numDecoys]+3*rsdCount+2,junk50)!=4) break;
      }
      if (strlen(junk50) > 5000) fprintf(stderr,"whoah: %s\n",junk50);
      rsdCount++;
			if ( rsdCount < L ) fscanf(file,"%d %c",&i,&sschar);
    }


    if (i!=L || rsdCount!=L) {
      /** Some problem **/
      fprintf(stderr,"*(%s)",junk50);
/*        fprintf(stderr,"silentfile error: %d %d %d %s\n",i,rsdCount,L,junk50);  */
      free(fullCoords[numDecoys]);
      free(decoyScores[numDecoys]);
      continue;
    }

    ss[L] = '\0';
		bb[L] = '\0';

    /** check secondary structure constraint **/
    assert (strlen(ss)==L);
    if (SS_CONSTRAINT) {
      for (i=0;i<numResidues;i++) {
				j = commonToFull[homolog][i];
				if (ssConstraint[i] != '-' && ssConstraint[i] != ss[j]) break;
      }
      if (i!=numResidues) {
	failedConstraint++;
	if (failedConstraint%100 == 0) fprintf(stderr,"X");
	free(fullCoords[numDecoys]);
	free(decoyScores[numDecoys]);
	continue;
      }
    }
    
    /** check forced contacts **/
    if (FORCED_CONTACTS) {
      for (i=0;i<numForcedContacts;i++) {
	for (j=aContacts[i][0];
	     j<=aContacts[i][1];
	     j++) {
	  v = fullCoords[numDecoys]  + 3*(commonToFull[homolog][j]);
	  for (k=bContacts[i][0];
	       k<=bContacts[i][1];
	       k++) {
	    w = fullCoords[numDecoys]  + 3*(commonToFull[homolog][k]);
	    if ( sqrt( (v[0]-w[0])*(v[0]-w[0]) + 
		       (v[1]-w[1])*(v[1]-w[1]) +
		       (v[2]-w[2])*(v[2]-w[2]) ) <= contactDistanceThreshold[i])
	      break;
	  }
	}
	if (j>aContacts[i][1] && k>bContacts[i][1])  /** no contact found **/
	  break;
      }
      if (i<numForcedContacts) { /** failed at least one of the contacts **/
	failedConstraint++;
	if (failedConstraint%100 == 0) fprintf(stderr,"X");
	free(fullCoords[numDecoys]);
	free(decoyScores[numDecoys]);
	continue;
      }	
    }

    j = strlen(junk50);
    for (i=0;i<j;i++) /** Convert name to alpha-numeric+_ (fill in spaces,etc) **/
      if ( ! ( (junk50[i]>='A' && junk50[i]<='Z') ||
	       (junk50[i]>='a' && junk50[i]<='z') ||
	       (junk50[i]>='0' && junk50[i]<='9') || 
	       (junk50[i] == '.' ||
		junk50[i] == '/'))) 
	junk50[i] = '_';
    
    commonStructure[numDecoys] = calloc(numResidues+1,sizeof(char));
    commonBB[numDecoys] = calloc(numResidues+1,sizeof(char));

    /** assign sec structure **/
    for (i=0;i<numResidues;i++) {
      commonStructure[numDecoys][i] = ss[ commonToFull [homolog] [i] ];
			commonBB[numDecoys][i] = bb[ commonToFull [homolog] [i] ];
		}
    commonStructure[numDecoys][numResidues] = '\0';
    commonBB[numDecoys][numResidues] = '\0';

    /** assign decoy name **/
    if ( INCLUDE_FILENAME ) {
      decoyName[numDecoys] = calloc( strlen(junk50) + strlen(homologName[homolog])+5,sizeof(char));
      sprintf(decoyName[numDecoys],"%s:%s",homologName[homolog],junk50);
    } else {
      decoyName[numDecoys] = calloc( strlen(junk50)+5, sizeof(char));
      strcpy( decoyName[numDecoys], junk50 );
    }

    /** warn about possible wacked coordinates **/ 
    if (fullCoords[numDecoys] [3*L-3] == 0.0 && 
	fullCoords[numDecoys] [3*L-2] == 0.0 &&
	fullCoords[numDecoys] [3*L-1] == 0.0 ) {
      fprintf(stderr,"\nWARNING: %s:%s coordinates end in 0.000's -- possible error?\n",
	      homologName[homolog],junk50);
    }

    /** assign commonCoords **/
    commonCoords[numDecoys] = calloc(numResidues*3,sizeof(double));
    for (i=0;i<numResidues;i++) 
      for (j=0;j<3;j++)
	commonCoords[numDecoys][3*i+j] = fullCoords[numDecoys][3 * (commonToFull[homolog][i])+j];
  
    decoyToHomolog[numDecoys] = homolog;

    /** progress report **/
    if ( (numDecoys%100)==0) fprintf(stderr,"-");

    numDecoys++;
    if (numDecoys >= MAX_NUM_DECOYS) {
      fprintf(stderr,"STOP: numDecoys >= MAX_NUM_DECOYS\n");
      break;
    }
  } /** while(fscanf... **/

  

  /*********************** Choosing subsets, if necessary **********************************/

  totalDecoys = numDecoys - startNumDecoys; /** # decoys successfully read from this file **/
  fprintf(stderr," total: %d ",
	  totalDecoys,homologName[homolog]);
  
  subset = NULL;
  if (CHOOSE_RANDOMLY) {
    numToChoose = (int) floor(totalDecoys * CHOOSE_FRACTION);
    fprintf(stderr," randomly choosing %d\n",numToChoose);
    subset = RandomSubset(numToChoose, totalDecoys);
  }
  else if (CHOOSE_BY_SCORE != 0) { /** holds the score index to sort on **/
    numToChoose = (int) floor(totalDecoys * CHOOSE_FRACTION);
    fprintf(stderr," choosing %d by score#: %d\n",numToChoose,CHOOSE_BY_SCORE);
    assert (CHOOSE_BY_SCORE < numScorelineFields);
    l=  calloc(totalDecoys,sizeof(float*));
    for (i=0;i<totalDecoys;i++) {
      l[i] = calloc(2,sizeof(float));
      l[i][0] = (float) decoyScores[startNumDecoys + i] [CHOOSE_BY_SCORE - 1]; /** SCORE: offset*/ 
      l[i][1] = i;
    }
    if (CHOOSE_LOW)
      qsort(l,totalDecoys,sizeof(float*),(int (*)(const void*,const void*)) CompareIncreasing);
    else qsort(l,totalDecoys,sizeof(float*),(int (*)(const void*,const void*)) CompareDecreasing);
    subset = calloc(numToChoose,sizeof(int));
    for (i=0;i<numToChoose;i++) {
/*        fprintf(stderr,"new_decoy: %d decoy: %d score: %f\n",i,(int)l[i][1],l[i][0]); */
      subset[i] = (int) l[i][1];
    }
    for(i=0;i<totalDecoys;i++)free(l[i]);
    free(l);
  }
  else if (CHOOSE_BY_PB) { /** sort by total_score - sspair 1 - 6 ie 0 - 5 **/
    numToChoose = (int) floor(totalDecoys * CHOOSE_FRACTION);
    fprintf(stderr," choosing %d by pb\n",numToChoose);
    assert (CHOOSE_BY_SCORE < numScorelineFields);
    l=  calloc(totalDecoys,sizeof(float*));
    for (i=0;i<totalDecoys;i++) {
      l[i] = calloc(2,sizeof(float));
      l[i][0] = (float) decoyScores[startNumDecoys + i] [0] - 
	(float) decoyScores[startNumDecoys + i][5];
      l[i][1] = i;
    }
    qsort(l,totalDecoys,sizeof(float*),(int (*)(const void*,const void*)) CompareIncreasing);
    subset = calloc(numToChoose,sizeof(int));
    for (i=0;i<numToChoose;i++) {
      subset[i] = (int) l[i][1];
    }
    for(i=0;i<totalDecoys;i++)free(l[i]);
    free(l);
  }
  else if (CHOOSE_BY_CO) { /* CHOOSE_BY_CO holds the number of bins to use **/
    numToChoose = (int) floor(totalDecoys * CHOOSE_FRACTION);
    numBins = CHOOSE_BY_CO;
    binSize = numToChoose/numBins;
    numToChoose = binSize*numBins;
    fprintf(stderr," choosing %d by co-score, using %d bins.\n",
	    numToChoose,numBins);
    
    subset = calloc(numToChoose,sizeof(int));
    l=  calloc(totalDecoys,sizeof(float*));
    for (i=0;i<totalDecoys;i++) {
      l[i] = calloc(2,sizeof(float));
      l[i][0] = (float) decoyScores[startNumDecoys + i] [12 - 1]; /** co **/
      l[i][1] = i;
    }
    qsort(l,totalDecoys,sizeof(float*),
	  (int (*)(const void*,const void*)) CompareIncreasing);
    
    maxCO = l[totalDecoys-1][0];
    minCO = l[0][0];
    
    binSpan = (maxCO - minCO + 0.2)/numBins;
    
    numChosen = 0;
    decoysRemaining = totalDecoys;
    for (bin=0, bottom = minCO-0.1, top = minCO-0.1+binSpan;
	 bin<numBins;
	 bin++, bottom+=binSpan, top+=binSpan) {
      numBinDecoys = 0;
      for (numBinDecoys=0;
	   numBinDecoys<decoysRemaining && l[numBinDecoys][0]<top;
	   numBinDecoys++);

      if (numBinDecoys<binSize) 
	fprintf(stderr,"Underfull bin: %d (%5.1f-%5.1f) need: %d have: %d\n",
		bin,bottom,top,binSize,numBinDecoys);
      
      
      for (i=0;i<numBinDecoys;i++)
	l[i][0] = decoyScores[ (int) l[i][1] ][1 - 1]; /** total score **/
      qsort(l,numBinDecoys,sizeof(float*),
	    (int (*)(const void*,const void*)) CompareIncreasing);
      
      for (i=0;i<binSize && i<numBinDecoys;i++) {
	subset[numChosen] = (int) l[i][1];
	fprintf(stderr,"%d %d %7.3f %7.3f\n",bin,i,
		decoyScores[ (int) l[i][1] ][1-1],decoyScores[ (int) l[i][1] ][12-1]);
	numChosen++;
      }
      
      l+=numBinDecoys;
      decoysRemaining-=numBinDecoys;
    }
    assert (decoysRemaining == 0);
    fprintf(stderr,"Tried to choose %d; actually chose %d\n",
	    numToChoose,numChosen);
    numToChoose = numChosen;
  }
    


  else if (CHOOSE_BY_CO_OLD) { 
    /** choose 1/2 by score **/
    /** choose 1/3 by score from top CHOOSE_FRACTION of decoys by co **/
    /** choose 1/6 by score from bottom CHOOSE_FRACTION of decoys by co **/
    

    numToChoose = (int) floor(totalDecoys * CHOOSE_FRACTION);
    fprintf(stderr," choosing %d by co-score\n",numToChoose);
    ntc1 = numToChoose/2;
    ntc2 = numToChoose/3;
    ntc3 = numToChoose/6;
    ntc1 += (numToChoose - ntc1 - ntc2 -ntc3);
    assert (numToChoose == ntc1+ntc2+ntc3);
    subset = calloc(numToChoose,sizeof(int));
    
    l=  calloc(totalDecoys,sizeof(float*));
    for (i=0;i<totalDecoys;i++) {
      l[i] = calloc(2,sizeof(float));
      l[i][0] = (float) decoyScores[startNumDecoys + i] [0]; /** total score **/
      l[i][1] = i;
    }
    llength = totalDecoys;
    qsort(l,totalDecoys,sizeof(float*),
	  (int (*)(const void*,const void*)) CompareIncreasing);
    for (i=0;i<ntc1;i++) subset[i] = (int) l[i][1]; /** half by score **/

    l += ntc1;
    llength -= ntc1;
    
    for (i=0;i<llength;i++) l[i][0] = (float) decoyScores[startNumDecoys + ((int)l[i][1])][11]; /*co*/
    qsort(l,llength,sizeof(float*),
	  (int (*)(const void*,const void*)) CompareDecreasing);
    
    for (i=0;i<llength;i++) l[i][0] = (float) decoyScores[startNumDecoys + ((int)l[i][1])][0]; /*score*/
    qsort(l,IntMin(llength,numToChoose),sizeof(float*),
	  (int (*)(const void*,const void*)) CompareIncreasing);
    for (i=0;i<ntc2;i++) subset[ntc1+i] = (int) l[i][1]; /** 1/3 by score from top co bin **/
    
    l += ntc2;
    llength -= ntc2;

    for (i=0;i<llength;i++) l[i][0] = (float) decoyScores[startNumDecoys + ((int)l[i][1])][11]; /*co*/
    qsort(l,llength,sizeof(float*),
	  (int (*)(const void*,const void*))CompareIncreasing);
    
    for (i=0;i<llength;i++) l[i][0] = (float) decoyScores[startNumDecoys + ((int)l[i][1])][0]; /*score*/
    qsort(l,IntMin(llength,numToChoose),sizeof(float*),
	  (int (*)(const void*,const void*))CompareIncreasing);
    for (i=0;i<ntc3;i++) subset[ntc1+ntc2+i] = (int) l[i][1]; /** 1/6 by score from bottom co bin **/

  }
  else fprintf(stderr," keeping all decoys\n.");
    

  if (subset!=NULL) {

    if (DEBUG) {
      debug = fopen("junk1","w");
      for (i=0;i<totalDecoys;i++)
	fprintf(debug,"%f %f\n", 
		decoyScores[startNumDecoys+i][0], 
		decoyScores[startNumDecoys+i][11] );
      fclose(debug);
      
      debug = fopen("junk2","w");
      for (i=0;i<numToChoose;i++)
	fprintf(debug,"%f %f\n", 
		decoyScores[startNumDecoys+subset[i]][0], 
		decoyScores[startNumDecoys+subset[i]][11] );
      fclose(debug);
    }
    


    qsort(subset,numToChoose,sizeof(int),
	  (int (*)(const void*,const void*))CompareIntIncreasing);
    for (i=0;i<totalDecoys;i++) {
      if (i<numToChoose) {
	assert (subset[i] >= i);
	CopyDecoy (startNumDecoys + i, startNumDecoys + subset[i]);
      }
      else FreeDecoy (startNumDecoys + i);
    }
    numDecoys = startNumDecoys + numToChoose;
    free(subset);
  }

  fclose(file);
/*    free(ss); */
  return;
}

void ReadAlignFile(char *alignfilename) {
  
  char **alignment;
  int *common;
  char junk50[50], *constraintAlignment,align[MAX_ALIGNMENT_LENGTH],c,filename[1000];
  int localNative,i,j,k,length,commonIndex,fullIndex,alignmentLength,homologCount;
  FILE *file;

  /** how many homologs? do we have native alignment info? **/
  file = fopen(alignfilename,"r");

  assert (fscanf(file,"%s %s %s%*c",junk50,align,filename) == 3);
  alignmentLength = strlen(align);
  numHomologs =   ( strcmp(filename,"NATIVE") != 0 && 
		    strcmp(filename,"CONSTRAINT") != 0);
  localNative =   ( strcmp(filename,"NATIVE") == 0); 
  SS_CONSTRAINT = ( strcmp(filename,"CONSTRAINT") == 0);
  while (fscanf(file,"%s %*s %s%*c",junk50,filename)==2) {
    if ( strcmp(junk50,"ALIGN")!= 0) continue;
    assert (!strcmp(junk50,"ALIGN"));
    if (!strcmp(filename,"NATIVE")) 
      localNative = 1;
    else if (!strcmp(filename,"CONSTRAINT")) 
      SS_CONSTRAINT = 1;
    else 
      numHomologs++;
  }
  fprintf(stderr,"Read ALIGN file. numHomologs: %d alignmentLength: %d SS_CONSTRAINT: %d\n",
	  numHomologs,alignmentLength,SS_CONSTRAINT);

  if (NATIVE && !localNative) {
    fprintf(stderr,"No NATIVE line in ALIGN file; ignoring native coordinates\n");
    NATIVE = 0;
  }
  
  fseek(file,0,0);

  alignment = calloc(numHomologs+1,sizeof(char*));
  alignment++;
  

  /** read the raw alignment info **/
  

  common = calloc(alignmentLength,sizeof(int)); /** which positions are common to all homs **/
  for (i=0;i<alignmentLength;i++) common[i] = 1;

  for (i=0,k=0;
       i<numHomologs+localNative+SS_CONSTRAINT;
       i++) {
    fscanf(file,"%s %s %s%*c",junk50,align,filename);
    if (strcmp(junk50,"ALIGN")!=0) {
      i--;
      continue;
    }
/*      fprintf(stderr,"%s %d %d %s\n",junk50,strlen(align),alignmentLength,filename); */
    assert ( (!strcmp(junk50,"ALIGN")) && strlen(align) == alignmentLength);
    if (!strcmp(filename,"CONSTRAINT")) {
      constraintAlignment = calloc(alignmentLength+1,sizeof(char));
      strcpy(constraintAlignment,align);
/*        fprintf(stderr,"cst_al: %s\n",constraintAlignment); */
      continue;
    }
    else if (!strcmp(filename,"NATIVE") ) {
      homologCount = -1;
    }
    else {
      homologCount = k;
      k++;
    }
    length = 0;
    homologSequence[homologCount] = calloc(alignmentLength+1,sizeof(char));
    for (j=0;j<alignmentLength;j++) {
      if (align[j] == '-' || align[j] == '.') 
	common[j] = 0;
      else {
	homologSequence[homologCount][length] = align[j];
	length++;
      }
    }
    homologSequence[homologCount][length] = '\0';
    homologLength[homologCount] = length;
    fprintf(stderr,"READ_ALIGN_FILE: homolog: %s length: %d\n",
	    filename,length);
    alignment[homologCount] = calloc(alignmentLength+1,sizeof(char));
    strcpy(alignment[homologCount],align);

    homologName[homologCount] = calloc(strlen(filename)+1,sizeof(char));
    strcpy(homologName[homologCount],filename);
  }
  numResidues = 0;
  for (i=0;i<alignmentLength;i++) numResidues+=common[i];

  for (i= -1 * localNative; i<numHomologs; i++) {
    commonToFull[i] = calloc(alignmentLength,sizeof(int));
    fullToCommon[i] = calloc(alignmentLength,sizeof(int));

    commonIndex = 0;
    fullIndex = 0;
    for (j=0;j<alignmentLength;j++) {
/*        fprintf(stderr,"%d %d\n",i,j); */
      if (alignment[i][j] == '-' || alignment[i][j] == '.') continue;
      else if (common[j]) {
	commonToFull[i][commonIndex] = fullIndex;
	fullToCommon[i][fullIndex] = commonIndex;
/*  	fprintf(stderr,"%d %d %d-%d\n",i,j,fullIndex,commonIndex); */
	commonIndex++;
	fullIndex++;
      }
      else {
	fullToCommon[i][fullIndex] = -1;
/*  	fprintf(stderr,"%d %d %d-\n",i,j,fullIndex); */
	fullIndex++;
      }
    }
    assert (fullIndex == homologLength[i] && commonIndex == numResidues);
  }

  if (FORCED_CONTACTS) {
    for (i=0;i<numForcedContacts;i++) {
      fprintf(stderr,"%d-%d -- %d-%d %9.3f\n",
	      aContacts[i][0], aContacts[i][1],
	      bContacts[i][0], bContacts[i][1],
	      contactDistanceThreshold[i]);
      for (j=0;j<numHomologs;j++) {
	for (k=aContacts[i][0]; k<=aContacts[i][1];k++)
	  fprintf(stderr,"%c",homologSequence[j][commonToFull[j][k]]);
	fprintf(stderr," - ");
	for (k=bContacts[i][0]; k<=bContacts[i][1];k++)
	  fprintf(stderr,"%c",homologSequence[j][commonToFull[j][k]]);
	fprintf(stderr," %s\n",homologName[j]);
      }
    }
  }

  if (SS_CONSTRAINT) { /** setup constraint **/
    commonIndex = 0;
    ssConstraint = calloc(numResidues+1,sizeof(char));
    for (i=0;i<alignmentLength;i++) {
      if (common[i]) {
	ssConstraint[commonIndex] = constraintAlignment[i];
	commonIndex++;
      }
    }
    ssConstraint[numResidues] = '\0';
/*      fprintf(stderr,"%s\n",ssConstraint); */
  }

  for (i=-1*localNative;i<numHomologs;i++) {
    fprintf(stderr,"%3d %s %s\n",i,alignment[i],homologName[i]);
  }
  fprintf(stderr,"cmn:");
  for (i=0;i<alignmentLength;i++) {
    if (common[i]) fprintf(stderr,"*");
    else fprintf(stderr,"-");
  }
  fprintf(stderr,"\n");
  if (SS_CONSTRAINT) {
    fprintf(stderr,"cst:");
    commonIndex = 0;
    for (i=0;i<alignmentLength;i++) {
      if (common[i]) {
	fprintf(stderr,"%c",ssConstraint[commonIndex]);
	commonIndex++;
      }
      else fprintf(stderr,"-");
    }
    fprintf(stderr,"\n");
  }
  
  
  numDecoys = 0;
  /** Read the silent files **/
  for (i=0;i<numHomologs;i++) {
    
    ReadSilentFile(i);
  }
  return;
}
    
void WriteHelpMessage(char *command) {
  fprintf(stderr,"-------------------------------------------------------------------------------\n");
  fprintf(stderr,"-------------------------------------------------------------------------------\n\n");
  fprintf(stderr,"Usage:\n\n%s <silent-mode-file> <native-coords> <prefix> s1,s2,s3 t1,t2 {<cm> <f>}\n\n",
	  command);
  fprintf(stderr,"s1,s2,s3 = min-,target-,max- clusterSize; f1,f2 = min-,max- threshold\n");
  fprintf(stderr," If s1==0: no clustering is done.\n\n");
  fprintf(stderr," NOTE: Memory usage is proportional to maxClusterSize, so set it reasonably small\n\n");
  fprintf(stderr," Use - for <native-coords> if no native coords.\n\n");
  fprintf(stderr," To choose only a subset of the decoys, include a method of choice <cm> and the\n");
  fprintf(stderr,"  desired fraction of decoys <f>.\n\nCurrently supported options for <cm> are:\n");
  fprintf(stderr," r -- choose subset randomly. Useful for assessing clustering threshold before\n");
  fprintf(stderr,"      doing a full run.\n");
  fprintf(stderr," co,<N> -- contact order binning with N bins, choosing the lowest structures in\n");
  fprintf(stderr,"      each bin by total score (ie, the first column in the scoreline)\n");
  fprintf(stderr," +N -- choose the subset with the largest value for the Nth scoreline column.\n");
  fprintf(stderr,"       1 is total score, 2 is env, 12 is contact order,... \n");
  fprintf(stderr," -N -- choose the subset with the SMALLEST value for the Nth scoreline column.\n\n");
  fprintf(stderr,"Examples:\n\n1) %s junk.out junk.coords junk 5,10,25 3,7 r 0.1\n",command);
  fprintf(stderr," Choose 10 percent of the decoys, try for a top cluster of size 10,\n");
  fprintf(stderr,"   as low as 5 or as high as 25 to get a clustering threshold between 3 and 7 A\n");
  fprintf(stderr,"\n\n2) %s junk.out - junk 5,50,75 5,10 -1 0.2\n",command);
  fprintf(stderr," Choose the lowest 20 percent by score, cluster with a target top cluster size of 50,\n");
  fprintf(stderr,"   trying for a threshold between 5 and 10 A.\n");
  fprintf(stderr,"\n\n3) %s junk.out junk.coords junk 5,50,75 3,10 co,5 0.3\n",command);
  fprintf(stderr," Choose 30 percent by low-score contact order binning with 5 bins...\n\n\n");

  return;
}


float *** MakeNeighborList (int N,double **coords, int L, int start, int stop,
			    int monitorProgress) { 
  /* N=#coords, L=#neighbors */ 

  float ***neighbors,**nl,r;
  int f1,f2,I,J,i,j,numPositions;
  
  numPositions = stop-start+1;

/*    fprintf(stderr,"Computing decoy-decoy rmsds: numDecoys=%d, numNeighbors=%d, numPositions=%d.\n", */
/*  	  N,L,numPositions); */
  
  /* initialize the list: */
  neighbors = calloc(N,sizeof(float**));
  for (f1=0;f1<N;f1++) {
    /** neighbors[f1] is a sorted list of [rmsd,f2] to the nearest L nbrs seen **/
    neighbors[f1] = calloc(L,sizeof(float*));
    for (j=0;j<L;j++) {
      neighbors[f1][j] = calloc(2,sizeof(float));
      neighbors[f1][j][0] = 1000.0;
      neighbors[f1][j][1] = -1;
    }
  }
  
  /* calculate rmsd's */
  for (f1=0;f1<N;f1++) {
    
    if ( monitorProgress && !(f1%10) ) fprintf(stderr,".");

    for (f2 = f1+1;f2<N;f2++) {
	
      r = (float) rmsfit_(&numPositions, coords[f1]+3*start, coords[f2]+3*start);
      for (I=f1,J=f2;
	   I<=f2;
	   I+=(f2-f1),J-=(f2-f1)) {

	nl = neighbors[I];
	
	if (r < nl[L-1][0]) {
	  nl[L-1][0] = r;
	  nl[L-1][1] = J;
	  i = L-1;
	  while (i>0 && nl[i-1][0] > r) {
	    nl[i][0] = nl[i-1][0];
	    nl[i][1] = nl[i-1][1];
	    nl[i-1][0] = r;
	    nl[i-1][1] = J;
	    i--;
	  }
	}
      }
    }
  }

  if (monitorProgress) fprintf(stderr,"\n");
  return neighbors;
}


float ClusteringThreshold(float ***neighbors, int N,
			  int minTopClusterSize, int targetClusterSize, int maxClusterSize,
			  float minThreshold, float maxThreshold,
			  int *clusterCenterPointer, 
			  int *targetPointer,
			  int monitorProgress) {

  int f1,clusterCenter, target, lastTarget;
  float r, bestRMSD, threshold;

  target = targetClusterSize;
  lastTarget = 0;

  if (monitorProgress) fprintf(stderr,"Find threshold: (target,threshold) ");
  while (target <= maxClusterSize && target >= minTopClusterSize) {
    bestRMSD = 100.0;
    for (f1=0;f1<N;f1++) {
      r = neighbors[f1][target-1][0]; /** the rmsd of (target+1)th neighbor. **/
      if (r<bestRMSD) {
	bestRMSD = r;
	clusterCenter = f1;
      }
    }
    if (monitorProgress) fprintf(stderr," %d,%f",target,bestRMSD);
    
    if (bestRMSD < minThreshold) {
      if (lastTarget == target+1) break; /** bouncing backward **/
      lastTarget = target;
      target++;
    }
    else if (bestRMSD > maxThreshold) {
      if (lastTarget == target-1) break; /** bouncing backward **/
      lastTarget = target;
      target--;
    }
    else break;
  }
  if (monitorProgress) fprintf(stderr,"\n");
  
  if (target>maxClusterSize) target = maxClusterSize;
  else if (target<minTopClusterSize) target = minTopClusterSize;
  
  threshold = bestRMSD;
  assert (target<= maxClusterSize && target >= minTopClusterSize);

  *targetPointer = target;
  *clusterCenterPointer = clusterCenter;
  
  return threshold;
}


void MakePDBFile (char *filename, int* clusterMembers, int clusterSize, int start, int stop) {
  FILE *file;
  int count, i,j,numPositions,startCount, stopCount, clusterCenter;
  double R[3][3],T[3];

  file = fopen(filename,"w");
    
  clusterCenter = clusterMembers[0];
  
  if (NATIVE) 
    fprintf(file,"REMARK %4d NATIVE\n",0);
  for (i=0;i<clusterSize;i++)
    fprintf(file,"REMARK %4d %s\n",i+1,decoyName[clusterMembers[i]]);
  

  if (NATIVE) { /** Model 0 is the native structure, if present **/
    GetGoodCoords (goodDecoyCoords, clusterCenter, start, stop, &startCount, &stopCount);

    numPositions = stopCount - startCount +1;
    fit_rmsfit_(&numPositions, goodDecoyCoords+3*startCount, goodNativeCoords+3*startCount,R,T);
    
    fprintf(file,"MODEL     %4d\n",0);
    WritePDB(file,-1,start,stop,R,T); /** the native structure is decoy number -1 **/
    fprintf(file,"ENDMDL\n");
    
  }
    

  for (i=0;i<clusterSize;i++) {
    numPositions = stop - start +1;
    fit_rmsfit_(&numPositions, 
		commonCoords[clusterCenter]+3*start,
		commonCoords[clusterMembers[i]]+3*start,
		R,T);
    
    fprintf(file,"MODEL     %4d\n",i+1);
    WritePDB(file,clusterMembers[i],start,stop,R,T);
    fprintf(file,"ENDMDL\n");
  }
  fclose(file);
  return;
}


void ContactFile (char *fileName, int clusterSize, int **contactCount,
		  int ***betaPairingCount,
		  int **structureCount, int **BBCount, int **maxsubCount, 
		  float **subclusterThreshold, float **subclusterNativeThreshold,
		  float **subclusterRMSD,
		  int **nativeContacts) {
 
  FILE *contactFile;
  int i,j,I,loopLength;

  contactFile = fopen(fileName,"w");
  if (NATIVE) {
    fprintf(contactFile,"NS %s\n",commonStructure[-1]); /** native ss at aligned positions **/
    for (i=0;i<numResidues;i++) 
      for (j=i+SKIP_NEARBY1;j<numResidues;j++) 
	if (nativeContacts[i][j])
	  fprintf(contactFile,"NC %d %d\n",
		  i,j);
    
    if (maxsubCount!=NULL) {
      for (i=0;i<numResidues;i++) 
	for (j=i+SKIP_NEARBY1;j<numResidues;j++)
	  if (maxsubCount[i][j])
	    fprintf(contactFile,"MS %d %d %f\n",
		    i,j,( (float) maxsubCount[i][j])/clusterSize);
    }
    
    if (subclusterRMSD != NULL) 
      for (i=0;i<numResidues;i++) 
	for (j=i;j<numResidues;j++) 
	  if (subclusterRMSD[i][j] != -1)
	    fprintf(contactFile,"SR %d %d %f\n",
		    i,j,subclusterRMSD[i][j]);
    
    if (subclusterNativeThreshold != NULL) 
      for (i=0;i<numResidues;i++) 
	for (j=i;j<numResidues;j++) 
	  if (subclusterRMSD[i][j] != -1)
	    fprintf(contactFile,"SN %d %d %f\n",
		    i,j,subclusterNativeThreshold[i][j]);
    
      
  }
  for (i=0;i<numResidues;i++) 
    fprintf(contactFile,"DS %d E: %f H: %f L: %f\n",i,
	    ( (float) structureCount[i][0])/clusterSize,
	    ( (float) structureCount[i][1])/clusterSize,
	    ( (float) structureCount[i][2])/clusterSize);
    
  for (i=0;i<numResidues;i++) 
    fprintf(contactFile,"DBB %d A: %f B: %f G: %f E: %f O: %f\n",i,
	    ( (float) BBCount[i][0])/clusterSize,
	    ( (float) BBCount[i][1])/clusterSize,
	    ( (float) BBCount[i][2])/clusterSize,
	    ( (float) BBCount[i][3])/clusterSize,
	    ( (float) BBCount[i][4])/clusterSize);
	
  for (i=0;i<numResidues;i++) 
    for (j=i+SKIP_NEARBY1;j<numResidues;j++)
      if (contactCount[i][j])
	fprintf(contactFile,"DC %d %d %f\n",
		i,j,( (float) contactCount[i][j])/clusterSize);

  /** beta pairing stuff *************************/
  for (I=-3;I<6;I++) {
    if (I==3) continue; /** used below **/
    for (i=0;i<numResidues;i++) 
      for (j=0;j<numResidues;j++)
	if (betaPairingCount[I][i][j])
	  if (I<0) { /** native info **/
	    if (I+3<2) 
	      fprintf(contactFile,"NB%d %d %d\n",I+3,i,j);
	    else
	      fprintf(contactFile,"NBP %d %d\n",i,j);}
	  else if (I<3) {
	    if (I<2) 
	      fprintf(contactFile,"B%d %d %d %f\n",
		      I,i,j,( (float) betaPairingCount[I][i][j])/clusterSize);
	    else 
	      fprintf(contactFile,"BP %d %d %f\n",
		      i,j,( (float) betaPairingCount[I][i][j])/clusterSize);}
	  else { /** I==4,5 ==> feature frequencies, native features **/
	    if (I==4) 
	      fprintf(contactFile,"NBF %d %d\n",i,j);
	    else
	      fprintf(contactFile,"BF %d %d %f\n",
		      i,j,( (float) betaPairingCount[I][i][j])/clusterSize);
	  }
  }    
  if (numResidues>=4) { /** make sure we have the space!! **/
    for (i=0;i<numResidues;i++) {
      fprintf(contactFile,"NRB %d %d\n", /** native is paired ? **/
	      i, betaPairingCount[3][i][0]);
      fprintf(contactFile,"RB %d %f\n",
	      i,( (float) betaPairingCount[3][i][1])/clusterSize);
    }
    for (i=0;i<numResidues;i++) {
      fprintf(contactFile,"NHP %d %d\n", /** native is paired ? **/
	      i, betaPairingCount[3][i][2]);
      fprintf(contactFile,"HP %d %f\n",
	      i,( (float) betaPairingCount[3][i][3])/clusterSize);
    }
  }
  for (loopLength=1;loopLength<=MAX_HAIRPIN_LOOP_LENGTH;loopLength++) {
    for (i=0;i<numResidues;i++) {
      fprintf(contactFile,"L%d %d %f\n",
	      loopLength,i,( (float) betaPairingCount[8][i][loopLength])/clusterSize);
      fprintf(contactFile,"H%d %d %f\n",
	      loopLength,i,( (float) betaPairingCount[7][i][loopLength])/clusterSize);
      fprintf(contactFile,"NH%d %d %d\n",
	      loopLength,i,betaPairingCount[6][i][loopLength]);
    }
  }
  /*** end of beta pairing stuff **/

  
  if (subclusterThreshold != NULL) 
    for (i=0;i<numResidues;i++)
      for (j=i;j<numResidues;j++)
	if (subclusterThreshold[i][j] != -1)
	  fprintf(contactFile,"ST %d %d %f\n",
		  i,j,subclusterThreshold[i][j]);
  

  fclose(contactFile);
  return;
}  

  /*      ContactMap (clusterMembers, contactCount, structureCount); */

void ContactMap (int* clusterMembers, int clusterSize,
		 int **contactCount, int **structureCount, int **BBCount ) {

  int i,j,f2,I;
  double d, *v1,*v2;

  for (i=0;i<numResidues;i++) { /* initialize counts */
    for (j=0;j<3;j++) structureCount[i][j] = 0;
    for (j=0;j<5;j++) BBCount[i][j] = 0;
    for (j=0;j<numResidues;j++) {
      contactCount[i][j] = 0;
    }
  }

  for (f2=0;f2<clusterSize;f2++) {
    I = clusterMembers[f2];

    for (i=0;i<numResidues;i++) { /** calculate average ss and contacts **/
      structureCount[i][SS2Int(commonStructure[I][i])]++;
      BBCount       [i][BB2Int(       commonBB[I][i])]++;
      
      v1 = commonCoords[I]+3*i;
      for (j=i+SKIP_NEARBY1;j<numResidues;j++) {
				v2 = commonCoords[I]+3*j;

				d = ( (v1[0]-v2[0])*(v1[0]-v2[0]) + 
							(v1[1]-v2[1])*(v1[1]-v2[1]) + 
							(v1[2]-v2[2])*(v1[2]-v2[2]) );
				if (d<=CONTACT_THRESHOLD) {
					contactCount[i][j]++;
				}
      }
    }
  }
  return;
}

void MaxsubMap (int *clusterMembers, int clusterSize, int **maxsubCount, int MONITOR_PROGRESS) {

  int i,j,k,f2,I,J,start,count;
  double rmsd;
  
  for (i=0;i<numResidues;i++) {
    for (j=0;j<numResidues;j++) {
      maxsubCount[i][j] = 0;
    }
  }

  if (NATIVE) { /** calculate (pseudo-) maxsub-to-native scores **/
    for (f2=0;f2<clusterSize;f2++) {
      if (MONITOR_PROGRESS && ! (f2%10)) fprintf(stderr,".");
      
      I = clusterMembers[f2];

      GetGoodCoords (goodDecoyCoords,I,0,numResidues-1,NULL,NULL);
      
      /** pseudo-maxsub **/
      J = 0; /** stores the latest endpoint of a good segment **/ 
      for (i=0;i<numResidues;i++) {
	for (j=i+SKIP_NEARBY1; j<= numResidues; j++) {
	  if (j<=J) 
	    maxsubCount[i][j]++;
	  else if ( ( (i%2) + (j%2) == 0) && goodCoords[i] && goodCoords[j] ) {
	    /** calc rmsd **/
	    start = commonToGood[i];
	    k = commonToGood[j] - start + 1;
	    rmsd = rmsfit_(&k, goodNativeCoords+3*start,goodDecoyCoords+3*start);
	    if (rmsd < rmsdThreshold[j-i+1] ) { 
	      J++;
	      for (;J<=j;J++)
		maxsubCount[i][J]++;
	    }
	  }
	}
      }
    }
    if (MONITOR_PROGRESS) fprintf(stderr,"\n");


  }
  return;
}

int IsALoop(int pos, int **structureCount) {
  return (structureCount[pos][2] > structureCount[pos][1] &&
	  structureCount[pos][2] > structureCount[pos][0]);
}

void ExtendBreakpoint(int pos, int **structureCount, int *ok) {
  int i,stop;

  stop = 10000;
/*    printf("extend %d:",pos); */
  for (i=pos; 
       i<numResidues && i<stop;
       i++) {
    if (! IsALoop (i,structureCount) && stop == 10000) stop = i+BREAKPOINT_MARGIN;
    ok[i] = 0;
/*      printf(" %d",i); */
  }
  
  stop = -1;
  for (i=pos;
       i>=0 && i>stop;
       i--) {
    if (! IsALoop(i,structureCount) && stop == -1) stop = i-BREAKPOINT_MARGIN;
    ok[i] = 0;
/*      printf(" %d",i); */
  }
/*    printf("\n"); */
  return;
}      



int *ChooseBreakpoints(int **structureCount, int* numBreakpointsPointer) { 
  /** look for loopy regions **/
  
  int *breakpoints;
  float **loopiness;
  int i,*ok,pos,numBreakpoints;

  loopiness = calloc(numResidues,sizeof(float*));
  ok = calloc(numResidues,sizeof(int*)); /* is this residue ok for a breakpoint? */
  for (i=0;i<numResidues;i++) {
    ok[i] = 1;
    loopiness[i] = calloc(2,sizeof(float));
    if ( IsALoop(i,structureCount) )
      loopiness[i][0] =  ( (float) structureCount[i][2])/numDecoys;
    else loopiness[i][0] = 0.0;
    loopiness[i][1] = i;
  }
  qsort(loopiness,numResidues,sizeof(float*),
	(int (*)(const void*,const void*)) CompareDecreasing);
  
/*    for (i=0;i<numResidues;i++)  */
/*      printf("%d %f %f %d\n",i,loopiness[i][1],loopiness[i][0], */
/*  	   structureCount[ (int) loopiness[i][1] ][2]); */

  numBreakpoints = 2;
  breakpoints = calloc(MAX_NUM_BREAKPOINTS,sizeof(int));
  breakpoints[0] = 0;
  breakpoints[1] = numResidues-1;
  ExtendBreakpoint(0,structureCount,ok);
  ExtendBreakpoint(numResidues-1,structureCount,ok);

  for (i=0;i<numResidues;i++) {
    if (loopiness[i][0] < 0.6) break;
    if (numBreakpoints == MAX_NUM_BREAKPOINTS) break;
    pos = (int) loopiness[i][1];
    assert (fabs( loopiness[i][1]-pos) < 0.1); /** sanity check **/
    if ( ! ok[pos]) continue;
    breakpoints [numBreakpoints] = pos;
/*      printf("%d %d %f\n",numBreakpoints,pos,loopiness[i][0]); */
    ExtendBreakpoint(pos,structureCount,ok);
    numBreakpoints++;
  }

  *numBreakpointsPointer = numBreakpoints;
/*    for (i=0;i<numBreakpoints;i++) printf(" %d",breakpoints[i]); */
/*    printf("\n"); */
  qsort(breakpoints,numBreakpoints,sizeof(int),
	(int (*)(const void*,const void*))CompareIntIncreasing);
  fprintf(stderr,"breakpoints: ");
  for (i=0;i<numBreakpoints;i++) fprintf(stderr," %d",breakpoints[i]);
  fprintf(stderr,"\n");
  free(ok);
  for(i=0;i<numResidues;i++) free(loopiness[i]);
  free(loopiness);
  return breakpoints;
}
  
void Subcluster(int **structureCount, 
		float *** subclusterRMSDPointer,
		float *** subclusterThresholdPointer,
		float *** subclusterNativeThresholdPointer,
		int chooseSubsetSize) {

  int i,pos1,pos2,I,J,K,startPos,stopPos,numBreakpoints,j,k,numPositions,
    *breakpoints, *subsetDecoys, numTargets, *subclusterTargets,subsetSize,maxSubtarget,
    WIDTH,subtarget,target,clusterCenter,*clusterMembers;
  float threshold,**subclusterRMSD, ***neighbors, **subclusterThreshold,
    **subclusterNativeThreshold,**rmsdList,nativeThreshold;
  double **subsetCoords,rmsd;
  char *ss,fileName[500];

  if (chooseSubsetSize > numDecoys) {
    fprintf(stderr,"WARNING: subclustering: chooseSubsetSize=%d > numdecoys=%d. reset.\n",
	    chooseSubsetSize,numDecoys);
    chooseSubsetSize = numDecoys;
  }
    
  if (chooseSubsetSize < 20) {
    fprintf(stderr,"WARNING: Subclustering wont really work if you choose fewer than 20.\n");
    fprintf(stderr,"WARNING: no subclustering -- sorry.\n");
    *subclusterRMSDPointer = NULL;
    *subclusterThresholdPointer = NULL;
    *subclusterNativeThresholdPointer = NULL;
    return;
  }
  subsetSize = (numDecoys<chooseSubsetSize)?numDecoys:chooseSubsetSize;

  /** setup the subtargets **/
  numTargets = 3;
  subclusterTargets = calloc(numTargets,sizeof(int));
  if (subsetSize < 200) {
    fprintf(stderr,"subsetSize<200: using fixed subtarget sizes: 20,10,5\n");
    subclusterTargets[0] = 20;
    subclusterTargets[1] = 10;
    subclusterTargets[2] = 5;}
  else {
    subclusterTargets[0] = subsetSize/10;
    subclusterTargets[1] = subsetSize/20;
    subclusterTargets[2] = subsetSize/40;
  }
  maxSubtarget = subclusterTargets[0];
  fprintf(stderr,"SUBCLUSTER subsetSize: %d top-cluster sizes: %d %d %d\n",
	  subsetSize,
	  subclusterTargets[0],
	  subclusterTargets[1],
	  subclusterTargets[2]);
  

  /** choose random subset of decoys for subclustering **/
  subsetDecoys = RandomSubset (subsetSize,numDecoys);

  /** Find breakpoints for subclustering **/
  breakpoints = ChooseBreakpoints (structureCount,&numBreakpoints);
  
  /** subclustering **/
  subsetCoords = calloc(subsetSize,sizeof(double*));
  for (i=0;i<subsetSize;i++) 
    subsetCoords[i] = commonCoords[ subsetDecoys[i] ];
  
  /** for subregion text display **/
  WIDTH = (numResidues<75)?numResidues:75;
  ss = calloc(WIDTH+5,sizeof(char));
  for (i=0;i<numResidues;i++) {
    pos1 = (i*WIDTH)/numResidues;
    if (structureCount[i][0] > structureCount[i][1] &&
	structureCount[i][0] > structureCount[i][2] ) ss[pos1] = 'E';
    else if (structureCount[i][1] > structureCount[i][2]) ss[pos1] = 'H';
    else ss[pos1] = 'L';
  }
  for (I=0;I<numBreakpoints;I++) {
    i = breakpoints[I];
    pos1 = (i*WIDTH)/numResidues;
    if (structureCount[i][0] > structureCount[i][1] &&
	structureCount[i][0] > structureCount[i][2] ) ss[pos1] = 'E';
    else if (structureCount[i][1] > structureCount[i][2]) ss[pos1] = 'H';
    else ss[pos1] = 'L';
  }
  ss[WIDTH] = '\0';
  fprintf(stderr,"%s\n",ss);
  

  subclusterRMSD = calloc(numResidues,sizeof(float*));
  subclusterThreshold = calloc(numResidues,sizeof(float*));
  subclusterNativeThreshold = calloc(numResidues,sizeof(float*));
  for (i=0;i<numResidues;i++) {
    subclusterRMSD[i] = calloc(numResidues,sizeof(float));
    subclusterThreshold[i] = calloc(numResidues,sizeof(float));
    subclusterNativeThreshold[i] = calloc(numResidues,sizeof(float));
    for (j=0;j<numResidues;j++) {
      subclusterRMSD[i][j] = -1;
      subclusterThreshold[i][j] = -1;
      subclusterNativeThreshold[i][j] = -1;
    }
  }

  /** for storing rmsds to native **/
  rmsdList = calloc(subsetSize,sizeof(float*));
  for (i=0;i<subsetSize;i++) rmsdList[i] = calloc(2,sizeof(float));
  
  /** now loop over all breakpoint pairs, do subclustering: **/
  for (I = 0;I<numBreakpoints;I++) {
    for (J = I+2;J<numBreakpoints;J++) {
      pos1 = breakpoints[I];
      pos2 = breakpoints[J];

      if (NATIVE) {/** calculate native distance to all the decoys **/
	for (i=0;i<subsetSize;i++) {
	  GetGoodCoords (goodDecoyCoords, subsetDecoys[i],
			 pos1,pos2,&startPos,&stopPos);
	  numPositions = stopPos -startPos +1;
	  rmsdList[i][0] = rmsfit_(&numPositions, goodNativeCoords+startPos*3, 
				   goodDecoyCoords+startPos*3);      
	  rmsdList[i][1] = i;
	}
	qsort(rmsdList,subsetSize,sizeof(float*),
	      (int (*)(const void*,const void*))CompareIncreasing);
/*  	for (i=0;i<subsetSize;i++) fprintf(stderr,"subrmsdtonative: %d %d %f %f\n", */
/*  					   pos1,pos2, */
/*  					   rmsdList[i][0], */
/*  					   rmsdList[i][1]); */
      }
      
      
      neighbors = MakeNeighborList (subsetSize, subsetCoords, maxSubtarget, pos1, pos2,0);

      for (K=0;K<numTargets;K++) {
	subtarget = subclusterTargets[K];
      
	threshold = ClusteringThreshold (neighbors, subsetSize, 
					 2, subtarget, 10000,
					 0.0f,1000.0f,
					 &clusterCenter, 
					 &target,
					 0); /** dont show progress reports **/ 
	assert (target == subtarget);
      
      
	if (NATIVE) { /** calculate rmsd to native of clustercenter: **/
	  GetGoodCoords (goodDecoyCoords, subsetDecoys[clusterCenter], pos1,pos2,&startPos,&stopPos);
	  numPositions = stopPos -startPos +1;
	  rmsd = rmsfit_(&numPositions, goodNativeCoords+startPos*3, goodDecoyCoords+startPos*3);      
	  nativeThreshold = rmsdList[subtarget-1][0];
	}
	else {
	  rmsd = 0.0;
	  nativeThreshold = 0.0;
	}
	
	/** store result **/
	for (i = breakpoints[I]+1; i<breakpoints[I+1];i++) {
	  for (j = breakpoints[J-1]+1; j<breakpoints[J];j++) {


	    k = ( ( 2 * IntMin ( i-breakpoints[I], breakpoints[I+1]-i) * numTargets) / 
		  (breakpoints[I+1] - breakpoints[I]+1) );
	    
	    k = IntMin (k,  ( ( 2 * IntMin ( j-breakpoints[J-1], breakpoints[J]-j) * numTargets) / 
			      (breakpoints[J] - breakpoints[J-1]+1) ) );
			
	    assert (k<numTargets);
	    
	    if (k==K) {
	      subclusterRMSD[i][j] = RMS100(rmsd,pos2-pos1+1);
	      subclusterThreshold[i][j] = RMS100(threshold,pos2-pos1+1);
	      subclusterNativeThreshold[i][j] = RMS100(nativeThreshold,pos2-pos1+1);
	    }
	    /*  	  subclusterRMSD[i][j] = rmsd; */
	    /*  	  subclusterThreshold[i][j] = threshold; */
	  }
	}


	/** show the subregion: **/

	j = (pos1*WIDTH)/numResidues;
	for(i=0;i<j;i++)fprintf(stderr," ");
	j = (pos2*WIDTH)/numResidues;
	for(;i<j;i++)fprintf(stderr,"-");
	j = (numResidues*WIDTH)/numResidues;
	for(;i<j;i++)fprintf(stderr," ");
	fprintf(stderr,"  %4.1f %4.1f %4.1f %4.1f %4d-%4d (%4d)\n",threshold,RMS100(threshold,pos2-pos1+1),
		rmsd,RMS100(rmsd,pos2-pos1+1), pos1,pos2,pos2-pos1+1);


	/** make pdb file **/
	clusterMembers = calloc(subtarget,sizeof(int));
	clusterMembers[0] = subsetDecoys[clusterCenter];
	for (i=0;i<subtarget-1;i++) {
	  clusterMembers[i+1] = subsetDecoys[ (int) neighbors[clusterCenter] [i] [1] ];
	}
      
	sprintf(fileName, "%s.subcluster.%04d.%04d-%04d.pdb",prefix,subtarget,pos1,pos2);
	MakePDBFile (fileName, clusterMembers, subtarget, pos1,pos2);
      

	/*        fprintf(stderr,"threshold: %f clusterCenter: %d\n",threshold,subsetDecoys[clusterCenter]); */
	free(clusterMembers);
      }

      for (i=0;i<subsetSize;i++) {
	for (j=0;j<maxSubtarget;j++) {
	  free(neighbors[i][j]);
	}
	free(neighbors[i]);
      }
      free(neighbors);
    }
  }

  *subclusterRMSDPointer = subclusterRMSD;
  *subclusterThresholdPointer = subclusterThreshold; 
  *subclusterNativeThresholdPointer = subclusterNativeThreshold; 
  
  for (i=0;i<subsetSize;i++) free(rmsdList[i]);
  free(rmsdList);

  return;
}



void AnalyzeClustering (
  float ***neighbors, 
  int minClusterSize, int minTopClusterSize, int maxClusterSize, 
  float minThreshold, float maxThreshold,
  int targetCoverage,
  FILE *outFile
) 
{
  int i,j,done,clusterCenter,coverage,*clusterCenters,
    count,bestCount,target,I,f1,f2,clusterCount,*exists,*clusterCenterExists;
  float **nl,rmsd, threshold,maxsub;
  FILE * tagFile;
  char* tagFilename;


  /** for each top-cluster size, pick clusters with minimum size **/

  exists = calloc(numDecoys,sizeof(int));
  clusterCenterExists = calloc(numDecoys,sizeof(int));
  clusterCenters = calloc(numDecoys,sizeof(int));


  for (target = minTopClusterSize; 
       target<=maxClusterSize; 
       target+= (1+ (target-minTopClusterSize)/10) ) { /** clustering **/
    
/*      if (target>5*minTopClusterSize && target%5) continue; */

    threshold = 100.0;
    for (i=0;i<numDecoys;i++) 
      if (neighbors[i][target-1][0] < threshold) {
	threshold = neighbors[i][target-1][0];
	clusterCenter = i;
      }


    /** choose cluster centers **/
    clusterCount = 0;
    coverage = 0; /* number of decoys covered by cluster centers */
    for (i=0;i<numDecoys;i++) {
      exists[i] = 1;
      clusterCenterExists[i] = 1;
    }
      
    while (1) {
      clusterCenters[clusterCount] = clusterCenter;

      assert (exists[clusterCenter]);
      exists[clusterCenter] = 0;
      count=1;
      nl = neighbors[clusterCenter];
      for (i=0;i<target-1;i++) {
	if (nl[i][0]>=threshold) break;
	f1 = (int) nl[i][1];
	count+=exists[f1];
	exists[f1] = 0;
      }
      coverage += count;
      
      if (clusterCount) assert (count == bestCount);
      
      done = 0;
      while (! done) {
	done = 1;
	bestCount = 0;
	for (i=0;i<numDecoys;i++) {
	  nl = neighbors[i];
	  count = 1;
	  for (j=0;j<target-1;j++) {
	    if (nl[j][0] >= threshold) break;
	    count += exists[(int) nl[j][1]];
	  }
	  if (exists[i] && clusterCenterExists[i] && count>bestCount) {
	    bestCount = count;
	    clusterCenter = i;
	  }
	}
	if (bestCount<minClusterSize) break;
	if (redundancyThreshold <BIG_RMSD && (clusterCount+1)<20) {
	  /*********** only apply redundancy filtering to the first 20 ***********/
/*  	  fprintf(stderr,"filtering %f %d\n",redundancyThreshold,clusterCount); */
	  for (i=0;i<=clusterCount;i++) 
	    if (rmsfit_(&numResidues,
			commonCoords[clusterCenters[i]],
			commonCoords[clusterCenter]) < redundancyThreshold) break;
	  if (i<=clusterCount) {
/*  	    fprintf(stderr,"Too close to cluster #%d, %f<%f\n", i, */
/*  		    rmsfit_(&numResidues, */
/*  			    commonCoords[clusterCenters[i]], */
/*  			    commonCoords[clusterCenter]), redundancyThreshold); */
	    clusterCenterExists[clusterCenter] = 0;
	    done = 0;
	  }
	}
      }
      

      clusterCount++;
      if (bestCount<minClusterSize) break;
    }

    fprintf(stderr,"target %4d threshold %5.2f clusters %d coverage %d",
	    target,threshold,clusterCount,coverage);
    fprintf(outFile,"AC: target %4d threshold %5.2f clusters %d coverage %d",
	    target,threshold,clusterCount,coverage);
    if (NATIVE) { /** best rmsd and maxsub to native for top 1,5,20,50,all **/
      rmsd = 100;
      maxsub = 100;
      j=0;
      for (i=0;i<clusterCount;i++) {
	f1 = clusterCenters[i];
	rmsd = (rmsd>rmsdToNative[f1])?rmsdToNative[f1]:rmsd;
	if (numScorelineFields>18)
	  maxsub = (maxsub>decoyScores[f1][17])?decoyScores[f1][17]:maxsub;
	if (i==0 || i==4 || i== 19 || i==49) {
	  j++;
	  fprintf(stderr," %5.2f %5.2f",rmsd,maxsub);
	  fprintf(outFile," %5.2f %5.2f",rmsd,maxsub);
	}
      }
      for (;j<5;j++) {
	fprintf(stderr," %5.2f %5.2f",rmsd,maxsub);
	fprintf(outFile," %5.2f %5.2f",rmsd,maxsub);
      }
    }
    fprintf(stderr,"\n");
    fprintf(outFile,"\n");

    // break out if we are looking for a fixed total coverage
    if ( targetCoverage > 0 &&
	 ( ( coverage > targetCoverage && threshold > minThreshold ) ||
	   ( threshold > maxThreshold ) ) ) {
      break;
    }
  }
  if ( targetCoverage > 0 ) {
    fprintf( outFile, "GET_CLUSTERED_PARAMS: target= %d threshold= %f coverage= %d\n",
	     target, threshold, coverage );

    tagFilename = calloc( strlen(prefix) + 50, sizeof(char) );
    sprintf( tagFilename, "%s.clustered_tags", prefix );
    tagFile = fopen( tagFilename, "w" );

    // show the decoys included at this threshold
    for ( i=0; i< numDecoys; ++i ) {
      if ( !exists[i] ) {
	fprintf( tagFile, "%s\n", decoyName[i] );
      }
    }
    fclose( tagFile );
    free(tagFilename);
  }
  return;
}




void DumpBinFile(char *binFilename, float ***neighbors, int maxClusterSize ) {
  
  FILE *file;
  int i,j;
  char filename[1000];

  sprintf(filename,"%s.crossCheck",binFilename);
  file = fopen(filename,"w");
  for (i=0;i<numDecoys;i++) fprintf(file,"%s\n",decoyName[i]);
  fclose(file);

  file = fopen(binFilename,"wb");
  for (i=0;i<numDecoys;i++)
    for (j=0;j<maxClusterSize;j++) 
      assert (fwrite(neighbors[i][j], sizeof(float), 2, file) ==2);
  fclose(file);
}


float ***ReadBinFile(char *binFilename, int maxClusterSize) {
  FILE *file;
  float ***neighbors;
  int i,j;
  char filename[1000],junk[1000];

  sprintf(filename,"%s.crossCheck",binFilename);
  file = fopen(filename,"r");
  for (i=0;i<numDecoys;i++) {
    fscanf(file,"%s",junk);
    assert ( ! strcmp(junk,decoyName[i]));
  }
  fclose(file);

  neighbors = calloc(numDecoys,sizeof(float**));
  file = fopen(binFilename,"rb");
  
  for (i=0;i<numDecoys;i++) {
    if (i%10==0) fprintf(stderr,".");
    neighbors[i] = calloc(maxClusterSize,sizeof(float*));
    for (j=0;j<maxClusterSize;j++) {
      neighbors[i][j] = calloc(2,sizeof(float));
      assert (fread (neighbors[i][j], sizeof(float), 2, file)==2);
    }
  }
  fprintf(stderr,"\n");

  fclose(file);
  return neighbors;
}

void RemoveArgument(int pos) {
  int i;
  for (i=pos+1;i<argCount;i++) argList[i-1] = argList[i];
  argCount--;
}

void ReadArguments() {
  int i;
  
  /** defaults **/
  USE_BIN_FILE = 0;
  SUBCLUSTER = 1;
  chooseForSubclustering = 400;
  DO_CLUSTERING = 1;
  CHOOSE_RANDOMLY = 0;
  CHOOSE_BY_SCORE = 0;
  CHOOSE_BY_CO = 0;
  CHOOSE_BY_PB = 0;
  CHOOSE_LOW = 0;
  CHOOSE_BY_CO_OLD = 0;
  PHIL_CLUSTERING = 0;
  MAXSUBMAP = 1;
  ANALYZE_CLUSTERING = 1;
  redundancyThreshold = BIG_RMSD;
  FORCED_CONTACTS = 0;
  FULLATOM = 0;

  /** forced contacts **/
  for (i=0;i<argCount;i++)  if (!strcmp(argList[i],"-fc") ) break;
  if (i<argCount-1) {
    FORCED_CONTACTS = 1;
    ReadForcedContacts(argList[i+1]);
    RemoveArgument(i);
    RemoveArgument(i);
  }

  /** use a binary file? **/
  for (i=0;i<argCount;i++)  if (!strcmp(argList[i],"-bf") ) break;
  if (i<argCount-1) {
    USE_BIN_FILE = 1;
    binFilePrefix = argList[i+1];
    RemoveArgument(i);
    RemoveArgument(i);
  }
  
  /** do subclustering **/
  for (i=0;i<argCount;i++)  if (!strcmp(argList[i],"-sc") ) break;
  if (i<argCount-1) {
    SUBCLUSTER = 1;
    chooseForSubclustering = atoi(argList[i+1]);
    if (chooseForSubclustering==0) SUBCLUSTER=0;
    RemoveArgument(i);
    RemoveArgument(i);
  }
  /** do subclustering **/
  for (i=0;i<argCount;i++)  if (!strcmp(argList[i],"-ms") ) break;
  if (i<argCount) {
    MAXSUBMAP =0;
    RemoveArgument(i);
  }

  /** rmsd only over ss **/
  for (i=0;i<argCount;i++)  if (!strcmp(argList[i],"-ssrms") ) break;
  if (i<argCount) {
    SSRMS=1;
    RemoveArgument(i);
  }

  /** fa silent files, have to skip CHI angles to get the right tag **/
  for (i=0;i<argCount;i++)  if (!strcmp(argList[i],"-fa") ) break;
  if (i<argCount) {
    FULLATOM=1;
    RemoveArgument(i);
  }

  /** just show neighbors for each decoy **/
  for (i=0;i<argCount;i++)  if (!strcmp(argList[i],"-nbrs") ) break;
  if (i<argCount) {
    SHOW_NBRS=1;
    RemoveArgument(i);
  }

  /** use a particular redundancy threshold **/
  for (i=0;i<argCount;i++)  if (!strcmp(argList[i],"-rt") ) break;
  if (i<argCount-1) {
    redundancyThreshold = atof(argList[i+1]);
    fprintf(stderr,"Using a redundancy threshold of %f Angstroms for the first 20 clusters in analyze clustering\n",redundancyThreshold);
    RemoveArgument(i);
    RemoveArgument(i);
  }
  
  /** show clustered decoys  **/
  for (i=0;i<argCount;i++)  if (!strcmp(argList[i],"-show") ) break;
  if (i<argCount-1) {
    GET_CLUSTERED_DECOYS = atoi(argList[i+1]);
    INCLUDE_FILENAME = 0;
    fprintf( stderr, "Target coverage for clustered decoys: %d\n",
	     GET_CLUSTERED_DECOYS );
    RemoveArgument(i);
    RemoveArgument(i);
  }
  


  /** parse the standard arguments **/
  if (argCount>=6) {

    if (4!= sscanf(argList[4], "%d,%d,%d,%d",
		   &minClusterSize,&minTopClusterSize,
		   &targetClusterSize,&maxClusterSize)) {
      if (3!= sscanf(argList[4], "%d,%d,%d",
		     &minClusterSize, 
		     &targetClusterSize,&maxClusterSize)) {
	fprintf(stderr,"cant parse the cluster size info: %s\n",argList[4]);
	WriteHelpMessage(argList[0]);
	exit(1);
      }
      else {
	fprintf(stderr,"setting minTopClusterSize := minClusterSize = %d\n",
		minClusterSize);
	minTopClusterSize = minClusterSize;
      }
    }
    
    if (2!= sscanf(argList[5],"%f,%f",&minThreshold,&maxThreshold)) {
      fprintf(stderr,"cant parse the cluster size info: %s\n",argList[5]);
      WriteHelpMessage(argList[0]);
      exit(1);
    }

/*      GetThreeIntegers(argList[4],&minClusterSize,&targetClusterSize,&maxClusterSize); */
/*      GetTwoFloats(argList[5],&minThreshold,&maxThreshold); */
    
    if (minClusterSize == 0) {
      DO_CLUSTERING=0;
    }
    else assert (minClusterSize >= 1 && 
		 minTopClusterSize >= minClusterSize &&
		 targetClusterSize >= minTopClusterSize && 
		 maxClusterSize >= targetClusterSize &&
		 minThreshold <= maxThreshold);
    
    
    if (argCount==8) { /** choose a subset for clustering **/
      
      if (!strcmp(argList[6],"r")) {  /** choose at random **/
	fprintf(stderr,"Will choose a random subset of the decoys\n");
	CHOOSE_RANDOMLY = 1;
      }
      else if (argList[6][0]=='c' && argList[6][1]=='o') {  /** choose by co -- see ReadSilentFile **/
	CHOOSE_BY_CO = atoi(argList[6]+3);
	fprintf(stderr,"Choose by co & score. numBins= %d\n",CHOOSE_BY_CO);
      }
      else if (!strcmp(argList[6],"pb")) {  /** choose by co -- see ReadSilentFile **/
	fprintf(stderr,"Will choose a subset of decoys by pb\n");
	CHOOSE_BY_PB = 1;
      }
      else { 
	CHOOSE_BY_SCORE = atoi(argList[6]);
	if (CHOOSE_BY_SCORE<0) {
	  CHOOSE_LOW = 1;
	  CHOOSE_BY_SCORE *= -1;
	}
	else CHOOSE_LOW = 0;
	fprintf(stderr,"Will choose a subset of decoys by score: %d CHOOSE_LOW: %d\n",
		CHOOSE_BY_SCORE,CHOOSE_LOW);
      }
      CHOOSE_FRACTION = atof(argList[7]);
    }

  if (strlen(argList[2])==1 && argList[2][0] == '-') {
    fprintf(stderr,"No native structure.\n");
    NATIVE = 0;
  }
  else NATIVE = 1;
  
  
  prefix = argList[3];
  
  if ( !strcmp(prefix+strlen(prefix)-2,"pc")) {
    fprintf(stderr,"Will use phils clustering procedure\n");
    PHIL_CLUSTERING = 1; /** silly switch **/
    MAXSUBMAP = 0;
    SUBCLUSTER =0;
  }
  }
  else {
    WriteHelpMessage(argList[0]);
    exit(1);
  }


}

/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/
/********************************************************************/


int main(int argc, char *argv[]) {
  FILE *file,*outFile;

  double *v1, *v2, d,rmsd,**subsetCoords;

  float ***neighbors,r,avg,threshold,**clusterList,**nl,
    **subclusterThreshold,**subclusterNativeThreshold,**subclusterRMSD,
    bestRMSD,avgRMSD,centerRMSD,t1,**scoreList;

  int *goodContacts,**nativeContacts,*totalContacts,*exists,*clusterMembers,
    **contactCount,**structureCount,**BBCount,
		**maxsubCount,*allDecoys,*subsetDecoys,
    *breakpoints,*clusterCenterList,***betaPairingCount;

  char *ss,nativeSequence[MAX_SEQUENCE_LENGTH],*nativeStructure;
  
  int  K,maxSubtarget,count,f1,f2,i,j,k,L,numBreakpoints,subtarget,pos1,pos2,subsetSize,
    rsdCount,I,J,good,total,clusterSize, numTargets, *subclusterTargets;
  
  char junk50[500],fileName[500],clusterName[500],binFilename[1000];
  
  int clusterCount,clusterCenter,startPos,stopPos,numPositions,target;
  
  int WIDTH; /** width of subclustering display **/

/*    fprintf(stderr,"WARNING!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\nthis file is being editted, do not recompile!!!!!!!!!!!\n\nUse the executable!!!!!!!!!!!!!!!\n"); */


  /** threshold for pseudo-maxsub **/
  for (i=0;i<1000;i++) rmsdThreshold[i] = ( RMSD_THRESHOLD_10 + (i-10.0) * 
					     ((RMSD_THRESHOLD_100 - RMSD_THRESHOLD_10) / 90) );
  
  

  argCount = argc;
  argList = calloc(argCount,sizeof(char*));
  for (i=0;i<argCount;i++) argList[i] = argv[i];
  
  ReadArguments(); /** sets clusterSize constraints and thresholds **/
  
  
  
  
  /** a bit of setup **/
  homologName = calloc(MAX_NUM_HOMOLOGS,sizeof(char*));
  homologLength = calloc(MAX_NUM_HOMOLOGS,sizeof(int));
  homologSequence = calloc(MAX_NUM_HOMOLOGS,sizeof(char*));
  commonToFull = calloc(MAX_NUM_HOMOLOGS,sizeof(int*));
  fullToCommon = calloc(MAX_NUM_HOMOLOGS,sizeof(int*));
  homologName++; /** native index is -1 **/
  homologLength++;
  homologSequence++;
  commonToFull++;
  fullToCommon++;

  /***************************************************************/
  /****************** Read the silent file ***********************/


  commonCoords = calloc(MAX_NUM_DECOYS,sizeof(double*));
  commonStructure = calloc(MAX_NUM_DECOYS,sizeof(char*));
  commonBB        = calloc(MAX_NUM_DECOYS,sizeof(char*));
  fullCoords = calloc(MAX_NUM_DECOYS,sizeof(double*));
  decoyScores = calloc(MAX_NUM_DECOYS,sizeof(double*));
  decoyName = calloc(MAX_NUM_DECOYS,sizeof(char*));
  decoyToHomolog = calloc(MAX_NUM_DECOYS,sizeof(int));

  commonCoords++;  /** native index is -1 **/
  commonStructure++;
  commonBB++;
  fullCoords++;
  
  decoyToHomolog[-1] = -1;

  /** auto-detect the input-file type: silent-file, ALIGN file, or pdb-list **/
  if ((file = fopen(argList[1],"r")) == NULL) {
    fprintf(stderr,"Missing decoy file: %s\n",argList[1]);
    return 1;
  }
  fscanf(file,"%s",junk50);
  
  if (!strcmp(junk50,"SEQUENCE:")) { /** Its a single silent file **/
    fscanf(file,"%s",nativeSequence);
    fclose(file);

    /** setup for single sequence clustering **/
    numResidues = strlen(nativeSequence);
    numHomologs = 1;
    homologName[0] = calloc(strlen(argList[1])+1,sizeof(char));
    strcpy(homologName[0],argList[1]);
    homologLength[0] = numResidues;
    homologSequence[0] = calloc(numResidues+1,sizeof(char));
    strcpy(homologSequence[0],nativeSequence);
    homologName[-1] = calloc(10,sizeof(char));
    sprintf(homologName[-1],"NATIVE");
    homologLength[-1] = numResidues;
    homologSequence[-1] = calloc(numResidues+1,sizeof(char));
    strcpy(homologSequence[-1],nativeSequence);
    commonToFull[0] = calloc(numResidues,sizeof(int));
    commonToFull[-1] = calloc(numResidues,sizeof(int));
    fullToCommon[0] = calloc(numResidues,sizeof(int));
    fullToCommon[-1] = calloc(numResidues,sizeof(int));
    for (i=0;i<numResidues;i++) {
      commonToFull[-1][i] = i; /*native*/
      fullToCommon[-1][i] = i; /*native*/
      commonToFull[0][i] = i;
      fullToCommon[0][i] = i;
    }

    ReadSilentFile(0);



  }
  
  else if (!strcmp(junk50,"ALIGN") ) { /** clustering of multiple homologs **/
    fclose(file);
    ReadAlignFile(argList[1]);
  }

  else {
    fclose(file);
    fprintf(stderr,"not debugged yet: Reading pdb-list file: %s\n",argList[1]);
    return 1;

/*      ReadPDBFiles(file,nativeSequence,decoySequence,coords,structure,decoyScores,decoyName, */
/*  		 &numDecoys,NATIVE,&numScorelineFields,&numResidues); */
  }
  

  fprintf(stderr,"Read %d decoys; numResidues = %d\n",numDecoys,numResidues);
  


  /**********************/
  /** read native info **/

  if ( NATIVE && ( (file = fopen(argList[2],"r"))==NULL) ) {
    fprintf(stderr,"Cant open native coordinate file\n");
    NATIVE = 0;
  }
  else if (NATIVE) {
    fscanf(file,"%c%s%*c",junk50,nativeSequence);
    assert (junk50[0] == '#');
    assert ( ! strcmp(homologSequence[-1],nativeSequence));

    L = strlen(nativeSequence);
    nativeStructure = calloc(L+1,sizeof(char));
    fullCoords[-1] = calloc(3*L,sizeof(double));
    commonCoords[-1] = calloc(3*numResidues,sizeof(double));
    commonStructure[-1] = calloc(numResidues+1,sizeof(char));
    goodCoords = calloc(L,sizeof(int));
    commonToGood = calloc(numResidues,sizeof(int));

    for (i=0;i<L;i++) {
      fscanf(file,"%d %s %d %lf %lf %lf%*c",&j,nativeStructure+i,goodCoords+i,
	     fullCoords[-1]+3*i,fullCoords[-1]+3*i+1,fullCoords[-1]+3*i+2);
      assert (j==i);
      if ( SSRMS && nativeStructure[i] == 'L' ) {
	goodCoords[i] = 0;
	fprintf(stderr,"discarding native coords at position %d ss=%c\n",
		i,nativeStructure[i]);
      }
    }
    fclose(file);
    assert (strlen(nativeStructure) == L);

    /** transfer to commonCoords **/
    count =0;
    for (i=0;i<numResidues;i++) {
      for (j=0;j<3;j++) 
	commonCoords[-1][3*i+j] = fullCoords[-1][ 3 *commonToFull[-1][i] +j];
      commonStructure[-1][i] = nativeStructure[commonToFull[-1][i]];
      goodCoords[i] = goodCoords[ commonToFull[-1][i] ]; /** adjust to common numbering **/
      if (goodCoords[i]) {
	commonToGood [i] = count;
	count++;
      }
    }
    commonStructure[-1][numResidues] = '\0';
    numGoodCoords = count;
    
  
    /** calculate native contacts **/
    nativeContacts = calloc(numResidues,sizeof(int*));
    for (i=0;i<numResidues;i++) {
      nativeContacts[i] = calloc(numResidues,sizeof(int));
      for (j=0;j<numResidues;j++) nativeContacts[i][j] = 0;
    }
    for (i=0;i<numResidues;i++) {
      if (! goodCoords[i]) continue;
      for (j=i+SKIP_NEARBY1;j<numResidues;j++) {
	if (! goodCoords[j]) continue;
      
	v1 = commonCoords[-1]+3*i;
	v2 = commonCoords[-1]+3*j;
	d = ( (v1[0]-v2[0])*(v1[0]-v2[0]) + 
	      (v1[1]-v2[1])*(v1[1]-v2[1]) + 
	      (v1[2]-v2[2])*(v1[2]-v2[2]) );
	if (d<=CONTACT_THRESHOLD) {
	  nativeContacts[i][j] = 1;
	  nativeContacts[j][i] = 1;
	}
      }
    }

    /** For calculating RMSD's: **/
    goodNativeCoords = calloc(3*numResidues,sizeof(double));
    GetGoodCoords (goodNativeCoords, -1, 0, numResidues-1, NULL, NULL);
  } /** finished reading in native coordinates **/




  /*********************************************************************************/
  /** Calculate RMSD to native, number of correct contacts, and contact fractions **/

  maxsubCount = calloc(numResidues,sizeof(int*));
  contactCount = calloc(numResidues,sizeof(int*));
  for (i=0;i<numResidues;i++) {
    maxsubCount[i] = calloc(numResidues,sizeof(int));
    contactCount[i] = calloc(numResidues,sizeof(int));
    for (j=0;j<numResidues;j++) maxsubCount[i][j] = 0;
    for (j=0;j<numResidues;j++) contactCount[i][j] = 0;
  }
  betaPairingCount = calloc(12,sizeof(int**));
  for (I=0;I<12;I++) { /** I=0: antiparallel, I=1: parallel **/
    betaPairingCount[I] = calloc(numResidues,sizeof(int*));
    for (i=0;i<numResidues;i++) {
      betaPairingCount[I][i] = calloc(numResidues,sizeof(int));
    }
  }
  betaPairingCount += 3;
  
  structureCount = calloc(numResidues,sizeof(int*));
  for (i=0;i<numResidues;i++) {
    structureCount[i] = calloc(3,sizeof(int));
    for (j=0;j<3;j++) structureCount[i][j] = 0;
  }
  BBCount = calloc(numResidues,sizeof(int*));
  for (i=0;i<numResidues;i++) {
    BBCount[i] = calloc(5,sizeof(int));
    for (j=0;j<5;j++) BBCount[i][j] = 0;
  }
  
  allDecoys = calloc(numDecoys,sizeof(int)); /* list all decoys for passing to functions */
  for (i=0;i<numDecoys;i++) allDecoys[i] = i;
  goodDecoyCoords = calloc(3*numResidues,sizeof(double));


  ContactMap(allDecoys,numDecoys,contactCount,structureCount,BBCount);

  CountBetaPairings(allDecoys,numDecoys,betaPairingCount);

  WIDTH = numDecoys/75+1; /** how much to skip per character **/
  for (i=0;i<numResidues;i++) {
    for (j=0;j<structureCount[i][0];j+=WIDTH) fprintf(stderr,"E");
    for (j=0;j<structureCount[i][1];j+=WIDTH) fprintf(stderr,"H");
    for (j=0;j<structureCount[i][2];j+=WIDTH) fprintf(stderr,"L");
    fprintf(stderr," %d\n",i);
  }

  if (SUBCLUSTER) 
    Subcluster (structureCount, 
		&subclusterRMSD, &subclusterThreshold, &subclusterNativeThreshold, 
		chooseForSubclustering);
  else {
    subclusterRMSD = NULL;
    subclusterThreshold = NULL;
    subclusterNativeThreshold = NULL;
  }

  fprintf(stderr,"Pseudo-maxsub to native:\n");

  if (MAXSUBMAP) {
    
    if (0) { /** for a subset **/
      MaxsubMap(subsetDecoys,subsetSize,maxsubCount,1);
      for (i=0;i<numResidues;i++) 
	for (j=0;j<numResidues;j++) 
	  maxsubCount[i][j] = (maxsubCount[i][j] * numDecoys)/subsetSize; 
    }
    
    MaxsubMap(allDecoys,numDecoys,maxsubCount,1); 
  }
  else maxsubCount = NULL;
  

  rmsdToNative = calloc(numDecoys,sizeof(double));
  goodContacts = calloc(numDecoys,sizeof(int));
  totalContacts = calloc(numDecoys,sizeof(int));
  
  for (I=0;I<numDecoys;I++) { /** calculate rmsd to native and good contacts **/
    
    if (NATIVE) {
      GetGoodCoords (goodDecoyCoords, I, 0, numResidues-1,NULL,NULL);
      rmsdToNative[I] = rmsfit_(&numGoodCoords, goodNativeCoords, goodDecoyCoords);
    }
    else 
      rmsdToNative[I] = 0.0;
        
    total = 0; /** compare to native contacts **/
    good = 0;
    for (i=0;i<numResidues;i++) {
      v1 = commonCoords[I]+3*i;
      for (j=i+SKIP_NEARBY1;j<numResidues;j++) {
	v2 = commonCoords[I]+3*j;
	d = ( (v1[0]-v2[0])*(v1[0]-v2[0]) + 
	      (v1[1]-v2[1])*(v1[1]-v2[1]) + 
	      (v1[2]-v2[2])*(v1[2]-v2[2]) );
	if (d<=CONTACT_THRESHOLD) {
	  total++;
	  good+= (NATIVE && (j-i>=SKIP_NEARBY2) && nativeContacts[i][j]) ;
	}
      }
    }
    goodContacts[I] = good;
    totalContacts[I] = total;
  }


  sprintf(fileName,"%s.contacts",prefix); /** write the contact file **/
  ContactFile (fileName,numDecoys,contactCount,betaPairingCount,
	       structureCount,BBCount,maxsubCount,
	       subclusterThreshold,subclusterNativeThreshold,subclusterRMSD,
	       nativeContacts);
  
  if (! DO_CLUSTERING)  { /** terminate here if we're not clustering **/
    if (NATIVE) {
      sprintf(fileName,"%s.info",prefix);
      outFile = fopen(fileName,"w");
      fprintf(outFile,"COMMAND:");
      for (i=0;i<argc;i++) fprintf(outFile," %s",argv[i]);
      fprintf(outFile,"\n");
      for (f1=0;f1<numDecoys;f1++) {
	fprintf(outFile,"DECOY_SCORE:");
	for (i=0;i<numScorelineFields-1;i++) 
	  fprintf(outFile," %7.2f",decoyScores[f1][i]);
	fprintf(outFile," %7.2f",rmsdToNative[f1]);
	fprintf(outFile," %s\n",decoyName[f1]);
      }
      fclose(outFile);
    }

    return 0;
  }
  

  if (PHIL_CLUSTERING) {
    fprintf(stderr,"decoy-decoy RMSDS\n");
    neighbors = MakeNeighborList (numDecoys, commonCoords, maxClusterSize,0,numResidues-1,1);


    clusterList = calloc(numDecoys,sizeof(float*));
    for (f1=0;f1<numDecoys;f1++) {
      clusterList[f1] = calloc(2,sizeof(float));
    }
    
    sprintf(fileName,"%s.info",prefix);
    outFile = fopen(fileName,"w");
    fprintf(outFile,"COMMAND:");
    for (i=0;i<argc;i++) fprintf(outFile," %s",argv[i]);
    fprintf(outFile,"\n");
    
    for (target =minTopClusterSize; target<maxClusterSize; target++) {
      
      count = 0;
      for (f1=0;f1<numDecoys;f1++) {
	t1 = neighbors[f1][target-1][0];
	for (i=0;i<target-1;i++) {
	  f2 = (int) neighbors[f1][i][1];
	  if (neighbors[f2][target-1][0] < t1) break;
	}
	if (i==target-1) {
	  clusterList[count][0] = t1;
	  clusterList[count][1] = f1;
	  count++;
	}
      }
      qsort(clusterList,count,sizeof(float*),
	    (int (*)(const void*,const void*))CompareIncreasing);
      
      fprintf(outFile,"target: %d count: %d",target,count);
      sprintf(fileName,"%s.%04d.pdb",prefix,target);
      clusterMembers = calloc(count,sizeof(int));
      
      for (i=0;i<count;i++) {
	f1 = (int) clusterList[i][1];
	fprintf(outFile," %.2f,%.2f,%.2f",clusterList[i][0], rmsdToNative[f1],decoyScores[f1][18-1]);
	avg=decoyScores[f1][0];
	for (j=0;j<target-1;j++) {
	  avg+= decoyScores[ (int) neighbors[f1][j][1] ][0];
	}
	avg/=target;
	fprintf(outFile,",%.1f",avg);
	
	avg=decoyScores[f1][11]; /* co */
	for (j=0;j<target-1;j++) {
	  avg+= decoyScores[ (int) neighbors[f1][j][1] ] [11];
	}
	avg/=target;
	fprintf(outFile,",%.1f",avg);
	
	clusterMembers[i] = f1;
      }
      fprintf(outFile,"\n");
      MakePDBFile(fileName,clusterMembers,count,0,numResidues-1); 
      free(clusterMembers);
      
    }
    
    fclose(outFile);
    return 0;
  }    /**** end of phil_clustering ****/

  
  /****************************************************************************/
  /*********************************** clustering *****************************/
  
  
  /** calculate decoy-decoy rmsds  ---- THIS IS THE SLOW STEP **/
  fprintf(stderr,"decoy-decoy RMSDS\n");

  if (USE_BIN_FILE) {

    sprintf(binFilename,"%s.%d.%d.bin",binFilePrefix,numDecoys,maxClusterSize);
    
    if ( (file=fopen(binFilename,"rb")) == NULL ) {
      fprintf(stderr,"BIN_FILE: %s doesnt exist; creating it \n",binFilename);
      
      neighbors = MakeNeighborList (numDecoys, commonCoords, maxClusterSize,0,numResidues-1,1);
      fprintf(stderr,"BIN_FILE: writing %s\n",binFilename);
      DumpBinFile(binFilename,neighbors,maxClusterSize);
      fprintf(stderr,"BIN_FILE: done writing %s\n",binFilename);
    }
    else {
      fclose(file);
      fprintf(stderr,"BIN_FILE: reading %s\n",binFilename);
      neighbors = ReadBinFile (binFilename, maxClusterSize);
      fprintf(stderr,"BIN_FILE: done reading %s\n",binFilename);
    }
  }
  else {
    neighbors = MakeNeighborList (numDecoys, commonCoords, maxClusterSize,0,numResidues-1,1);
  }

  /** now we can open the output file **/
  sprintf(fileName,"%s.info",prefix);
  outFile = fopen(fileName,"w");
  fprintf(outFile,"COMMAND:");
  for (i=0;i<argc;i++) fprintf(outFile," %s",argv[i]);
  fprintf(outFile,"\n");

  if ( SHOW_NBRS ) {
    for ( I=0; I< numDecoys; ++I ) {
      fprintf(outFile,"%s %d",decoyName[I], I );
      for ( i=0; i< maxClusterSize; ++i ) {
	fprintf(outFile," %d", ( (int) neighbors[I][i][1]));
      }
      fprintf(outFile,"\n");
    }
    fclose(outFile);
    return 0;
  }

  if (ANALYZE_CLUSTERING) {
    AnalyzeClustering( neighbors, minClusterSize, minTopClusterSize, maxClusterSize,
		       minThreshold, maxThreshold, GET_CLUSTERED_DECOYS,
		       outFile);

    if ( GET_CLUSTERED_DECOYS ) {
      fclose(outFile);
      return 0;
    }
  }

  /** find clustering threshold **/
  threshold = ClusteringThreshold (neighbors, numDecoys, 
				   minTopClusterSize, targetClusterSize, maxClusterSize,
				   minThreshold, maxThreshold,
				   &clusterCenter, 
				   &target,
				   1); /** show progress reports **/
  
  fprintf(stderr,"clustering_threshold: %f top_cluster_size: %d top_cluster_center: %d \n",
	  threshold,target,clusterCenter);
  

  
  /** choose clusters **/
  exists = calloc(numDecoys,sizeof(int));
  for (i=0;i<numDecoys;i++) exists[i] = 1;
  
  clusterList = calloc(numDecoys,sizeof(float*));
  for (f1=0;f1<numDecoys;f1++) {
    clusterList[f1] = calloc(2,sizeof(float));
  }
  
  clusterMembers = calloc(target,sizeof(int));
  scoreList = calloc(target,sizeof(float*));
  for (i=0;i<target;i++)
    scoreList[i] = calloc(2,sizeof(float));
  
  /** testing **/
  for (i=0;i<numDecoys;i++) {
    for (j=0;j<maxClusterSize;j++) {
      I = (int) neighbors[i][j][1];
      assert ( fabs(neighbors[i][j][1]-I) < 0.001 );
    }
  }
  
  clusterCount = 0;

  
  fprintf(outFile,"TARGET: %d THRESHOLD: %f\n",target,threshold);


  clusterCenterList = calloc(MAX_NUM_CLUSTERS,sizeof(int));
  fprintf(stderr,"# size centerName centerRMSD2native centerScore18 bestScoreName bsrmsd2native bsScore18 rmsds-to-other-cluster-centers\n");
  while (1) { /** make clusters in this loop -- break when size is < minClusterSize **/
    
    if (clusterCount) {
      clusterCenter = (int) clusterList[0][1];
    }
    clusterCenterList[clusterCount] = clusterCenter;
    
    clusterMembers[0] = clusterCenter;
    exists[clusterCenter] = 0;
    count = 1;
    nl = neighbors[clusterCenter];
    for (i=0;i<target;i++) {
      if (nl[i][0] >= threshold) break;
      if (! exists[ (int) nl[i][1]]) continue;
      clusterMembers[count] =  (int) nl[i][1];
      exists[ (int) nl[i][1] ] = 0;
      count++;
    }

    /** sanity check **/
    if (clusterCount) {
      assert (count==clusterList[0][0]);
    }
    else if (count!=target) fprintf(stderr,"WARNING: count != target\n");
    clusterSize = count;

    
    /*************************** Show cluster info: scores, cluster-cluster rmsds ********/

    if (clusterSize>=2) {/** reorder cluster members according to total score **/
      for (i=1;i<clusterSize;i++) {
	scoreList[i][0] = decoyScores[clusterMembers[i]][0];
	scoreList[i][1] = clusterMembers[i];
      }
      qsort(scoreList+1,clusterSize-1,sizeof(float*),
	    (int (*)(const void*,const void*))CompareIncreasing);

      if (clusterSize>=3)
	assert (scoreList[1][0]<=scoreList[2][0]);

      for (i=1;i<clusterSize;i++) {
	clusterMembers[i] = (int) scoreList[i][1];
	assert (!exists[ clusterMembers[i] ]);
      }
      

      fprintf(stderr,"CLUSTER %d %d %s %f %f %s %f %f",
	      clusterCount,clusterSize,
	      decoyName[clusterCenter],
	      rmsdToNative[clusterCenter],
	      (numScorelineFields>=18) * decoyScores[clusterCenter][18-1],
	      decoyName[clusterMembers[1]],
	      rmsdToNative[clusterMembers[1]],
	      (numScorelineFields>=18) * decoyScores[clusterMembers[1]][18-1]);	
      fprintf(outFile,"CLUSTER_RMSDS %d %d %s %f %f %s %f %f",
	      clusterCount,clusterSize,
	      decoyName[clusterCenter],
	      rmsdToNative[clusterCenter],
	      (numScorelineFields>=18) * decoyScores[clusterCenter][18-1],
	      decoyName[clusterMembers[1]],
	      rmsdToNative[clusterMembers[1]],
	      (numScorelineFields>=18) * decoyScores[clusterMembers[1]][18-1]);	
    
    }
    else {
      fprintf(stderr,"CLUSTER %d %d %s %f %f - 0.0 0.0",
	      clusterCount,clusterSize,
	      decoyName[clusterCenter],
	      rmsdToNative[clusterCenter],
	      (numScorelineFields>=18) * decoyScores[clusterCenter][18-1]);
      fprintf(outFile,"CLUSTER_RMSDS %d %d %s %f %f - 0.0 0.0",
	      clusterCount,clusterSize,
	      decoyName[clusterCenter],
	      rmsdToNative[clusterCenter],
	      (numScorelineFields>=18) * decoyScores[clusterCenter][18-1],
	      decoyName[clusterMembers[1]],
	      rmsdToNative[clusterMembers[1]],
	      (numScorelineFields>=18) * decoyScores[clusterMembers[1]][18-1]);	
    }      
      

    for (i=0;i<=clusterCount;i++) 
      fprintf(stderr," %4.1f",
	      rmsfit_(&numResidues,
		      commonCoords[clusterCenterList[i]],
		      commonCoords[clusterCenter]));
    fprintf(stderr,"\n");

    for (i=0;i<=clusterCount;i++) 
      fprintf(outFile," %7.3f",
	      rmsfit_(&numResidues,
		      commonCoords[clusterCenterList[i]],
		      commonCoords[clusterCenter]));
    fprintf(outFile,"\n");



    /** give the cluster a name **/
    if ( clusterCount < 100 ) {
      sprintf(clusterName,"cluster%02d.%03d",clusterCount,clusterSize);
    } else { 
      sprintf(clusterName,"cluster%d.%03d",clusterCount,clusterSize);
    }
    
    /** write cluster average scores **/
    fprintf(outFile,"CLUSTER_SCORE:");
    for (i=0; i<numScorelineFields-1; i++) {
      avg = 0.0;
      for (j=0;j<clusterSize;j++) {
	avg+= decoyScores[clusterMembers[j]][i];
      }
      avg/=clusterSize;
      fprintf(outFile," %7.2f",avg);
    }
    fprintf(outFile," %s\n",clusterName);
    
    
    /** write cluster info-line **/
    centerRMSD = rmsdToNative[clusterCenter];
    bestRMSD = 1000;
    avgRMSD = 0.0;
    for (i=0;i<clusterSize;i++) {
      r = rmsdToNative[clusterMembers[i]];
      bestRMSD = r<bestRMSD?r:bestRMSD;
      avgRMSD += r;
    }
    avgRMSD/=clusterSize;
    
    
    fprintf(outFile,"CLUSTER_INFO: %d %s %d,%s %d %f %f %f Members:",
	    clusterCount, clusterName, clusterCenter, decoyName[clusterCenter],
	    clusterSize,centerRMSD,bestRMSD,avgRMSD);

    for (i=0;i<clusterSize;i++) fprintf(outFile," %d,%s",
					clusterMembers[i],decoyName[clusterMembers[i]]);
    fprintf(outFile,"\n");


    /** Create pdb file **/
    sprintf(fileName,"%s.%s.pdb",prefix,clusterName);
    MakePDBFile (fileName, clusterMembers, clusterSize, 0, numResidues-1);



    /** Calculate cluster contacts and maxsub **/

    ContactMap (clusterMembers, clusterSize, contactCount, structureCount, BBCount);
    CountBetaPairings(clusterMembers, clusterSize, betaPairingCount);
    MaxsubMap (clusterMembers, clusterSize, maxsubCount,0);

    sprintf(fileName,"%s.%s.contacts",prefix,clusterName);
    ContactFile (fileName, clusterSize, contactCount, betaPairingCount,
		 structureCount, BBCount, maxsubCount, 
		 NULL, NULL, NULL, /** no subclustering info **/
		 nativeContacts);
    
    /** Update clusterList **/
    for (f1=0;f1<numDecoys;f1++) {
      

      clusterList[f1][1] = f1;
      if (! exists[f1] ) {
	clusterList[f1][0] = 0;
	continue;
      }

      count = 1;
      nl = neighbors[f1];
      for (i=0;i<target;i++) {
	if (nl[i][0] >= threshold) break;
	if (exists[ (int) nl[i][1]])
	  count++;
      }

      clusterList[f1][0] = count;
    }
    
    qsort(clusterList,numDecoys,sizeof(float*),
	  (int (*)(const void*,const void*))CompareDecreasing);
    
    clusterCount++;

    if (clusterList[0][0] < minClusterSize && 
	(clusterList[0][0]<1 || clusterCount>=20 ) ) break;
    
    if (clusterCount>=MAX_NUM_CLUSTERS) {
      fprintf(stderr,"Clustering was stopped at %d clusters\n",MAX_NUM_CLUSTERS);
      break;
    }
  }
  


  if (PICK_BY_SCORE) { /** Pick non-redundant set of decoys according to decoyScores[][0] **/
    for (i=0;i<numDecoys;i++) {
      clusterList[i][0] = decoyScores[i][0];
      clusterList[i][1] = i;
      exists[i] = 1;
    }

    clusterCount=0;
    qsort(clusterList,numDecoys,sizeof(float*),
	  (int (*)(const void*,const void*))CompareIncreasing);
    for (i=0;i<numDecoys && clusterCount<SHOW_TOP_N;i++) {
      assert (clusterCount<=i);
      I = clusterList[i][1];
      /*      fprintf(stderr,"i: %d I: %d clusterList[i][0]: %f clusterCount: %d exists[I]: %d\n", */
      /*  	    i,I,clusterList[i][0],clusterCount,exists[I]); */
      if (exists[I]) {
	clusterList[clusterCount][1] = I;
	clusterList[clusterCount][0] = clusterList[i][0];
	clusterCount++;

	/** kill neighbors of I **/
	nl = neighbors[I];
	for (j=0;j<target;j++) {
	  if (nl[j][0] >= threshold) break;
	  exists[ (int) nl[j][1] ] = 0;
	}
      }
    }
  
    for (i=0;i<SHOW_TOP_N && i<numDecoys;i++) {
      I = (int) clusterList[i][1];
      if (i<SHOW_TOP_N-1) 
	assert (exists[I] && clusterList[i][0]<=clusterList[i+1][0]);
      assert ( fabs(clusterList[i][0]-decoyScores[I][0])<0.001);
      fprintf(outFile,"CHOOSE_BY_SCORE:");
      for (j=0;j<numScorelineFields-1;j++) 
	fprintf(outFile," %7.2f",decoyScores[I][j]);
      fprintf(outFile," %s\n",decoyName[I]);
    }
  }

  /** Show the information **/

  for (f1=0;f1<numDecoys;f1++) {
    
    fprintf(outFile,"DECOY_SCORE:");
    for (i=0;i<numScorelineFields-1;i++) 
      fprintf(outFile," %7.2f",decoyScores[f1][i]);
    fprintf(outFile," %7.2f",rmsdToNative[f1]);
    fprintf(outFile," %7.2f",neighbors[f1][target-1][0]); /** this decoy's "clustering threshold" **/
    fprintf(outFile," %s\n",decoyName[f1]);
    

    fprintf(outFile,"DECOY_INFO: %d %s %f %d %d",
	    f1,decoyName[f1],rmsdToNative[f1],goodContacts[f1],totalContacts[f1]);

    fprintf(outFile,"\n");
  }

  fclose(outFile);
  
  return 0;
}








