#!/usr/bin/python

from phil import *
from amino_acids import longer_names
import math
from barcode_util import *

## decoy info is stored in the form:
## info[decoy_name] = { 'score':-127.3, 'rms':2.6, ....,
##                      1:{'BB':[-60,-40,180], 'S':'H', ...
##                      2:{ ...
##                      etc
##
## per-residue info tags: rsd_tag_list = ['BB','CHI','ROT','SS','NB','AA']

MIN_FRQ = 0.05
MAX_FRQ = 0.3
SC_BIN_WIDTH = 120
GNUPLOT = 0

def Help():
    print '\nUsage: %s <decoy_file> MAX_NB MAX_CHI BB_BIN_WIDTH\n\n\
           <decoy_file> can be either a list file or a silent file\n\
           {-w <weight_file>} {-a <fasta_aln>} {-o <out_file>}\n\
           {-n <native file>} [-hom_struct] [-status]\n'%(argv[0])

    exit()

## parameters


if len(argv)<5:
    Help()

## ugly argument parsing -- should probably use getopt or something
args = argv[1:]
if args.count('-n'):
    pos = args.index('-n')
    native_file = args[pos+1]
    del args[pos]
    del args[pos]
else:
    native_file = ''

if args.count('-o'):
    pos = args.index('-o')
    out_file = args[pos+1]
    del args[pos]
    del args[pos]
else:
    out_file = 'barcode.cst'

if args.count('-hom_struct'):
    HOM_STRUCT = 1 ## no sequence checking -- map rotamers to
    if args.count('-w'):
        pos = args.index('-w')
        weight_file = args[pos+1]
        del args[pos]
        del args[pos]
    else:
        log('must have sequence weight file in -HOM_STRUCT mode!')
        exit(-1)

    if args.count('-a'):
        pos = args.index('-a')
        aln_file = args[pos+1]
        del args[pos]
        del args[pos]
    else:
        log('must have fasta aln file in -HOM_STRUCT mode!')
        exit(-1)
else:
    HOM_STRUCT = 0

if args.count('-status'):
    STATUS = 1
else:
    STATUS = 0


list_file = args[0]


MIN_DEV = 20.0 ## smallest value for sd(phi) + sd(psi) to include as features
MIN_NB = int(args[1]) ## definition of exposed, used for sc rotamers
MAX_CHI = int(args[2]) ## only use rot numbers up to and including this chi torsion
BB_BIN_WIDTH = int(args[3]) ## in degrees

if HOM_STRUCT:
    assert MAX_CHI == 2
    MIN_DEV = 0.0 ## we need all the backbone constraints in homologous cst mode

if native_file and HOM_STRUCT:
    log('Sorry, can not use -n option with -HOM_STRUCT mode!')
    exit()

## MIN_DEV = 20.0 ## smallest value for sd(phi) + sd(psi) to include as features
## MIN_NB = 12 ## definition of exposed, used for sc rotamers
## MAX_CHI = 2 ## only use rot numbers up to and including this chi torsion
## BB_BIN_WIDTH = 20 ## in degrees

## MIN_DEV = 20.0 ## smallest value for sd(phi) + sd(psi) to include as features
## MIN_NB = 12 ## definition of exposed, used for sc rotamers
## MAX_CHI = 4 ## only use rot numbers up to and including this chi torsion
## BB_BIN_WIDTH = 5 ## in degrees


EXPOSED = tuple( [-1]*MAX_CHI ) ## internal name for SC bin if nb<MIN_NB


########################################### functions:
def In_range(a): ## forces a to be in [-180,180)
    while a>=180.0:
        a = a - 360.0
    while a<-180.0:
        a = a + 360.0
    return a

def In_positve_range(a): ## forces a to be in [0.360)
    while a<0:
        a = 360.0 + a
    while a>=360:
        a = a - 360.0
    return a

def Angle_delta(a,b):
    a = In_range(a)
    b = In_range(b)
    assert -180 <= a <= 180
    assert -180 <= b <= 180
    x = max(a,b)
    y = min(a,b)
    delta = min( x-y, y+360.0 - x)
    assert delta <= 180.0
    return delta

def Chi_bin(a):
    ## modelled on Rosetta's rot_from_chi for chi1
    a = In_range(a)
    if 0 <= a <120:
        return 1
    elif -120 <= a < 0:
        return 3
    else:
        return 2

def Rot_from_chi(chi, aa_type):
    assert len(chi) == 2
    if aa_type in ['C','P','S','T','V']: # only has chi1
        return (Chi_bin (chi[0]), 0)
    elif aa_type in ['A','G']:  # no chi1, chi2
        return (0,0)
    else:
        return ( Chi_bin( chi[0]),
                 Chi_bin( chi[1]) )

def pp_class(pp): ## E G A B and O
    pp = ( In_range( pp[0]), In_range(pp[1]), In_range(pp[2]))
    assert -180<=pp[0]<=180 and -180<=pp[1]<=180 and -180<=pp[2]<=180

    if abs(pp[2]) <90:
        return 'O'
    elif pp[0]>=0:
        if -100< pp[1] <= 100:return 'G'
        else: return 'E'
    elif -125 < pp[1] <= 50: return 'A'
    else: return 'B'


def Get_bb( tor ): ## includes SS !!!!!!!!!!!!
    return tor['BB'] + [tor['SS']]

def Get_bb_bin( bb ):
    assert len(bb) == 4 ## [phi,psi,omega,ss]
    phi = bb[0]
    psi = bb[1]
    omega = bb[2]
    ss = bb[3]
    if BB_BIN_WIDTH > 0:
        phi_bin = int(floor(phi/ BB_BIN_WIDTH ))
        psi_bin = int(floor(psi/ BB_BIN_WIDTH ))
        return ( phi_bin, psi_bin )
    else: ## use ABGEO bins
        bin = pp_class( [ phi, psi, omega ] )
        if bin in ['B','E']: ## Lump E with B
            bin = 'B'
        if bin == 'A' and ss == 'H' :
            bin = 'H'
    return bin



def Get_chi (tor):
    ## PB: the logic below appears to require this:
    assert MAX_CHI == 2 ## I think this is only called by HOM_STRUCT?
    nb = tor['NB']
    aa_type = tor['AA']
    if nb >= MIN_NB:
        chis = (tor['CHI'][:MAX_CHI], aa_type)
        ([chi1, chi2], aatype) = chis
        if aa_type in ['C','P','S','T','V']: #only chi1
            return tuple([[chi1,0.0],aa_type])
        elif aa_type in ['A','G']:  # no chi1, chi2
            return tuple([[0.0,0.0],aa_type])
        else: return tuple(chis)
    else:
        return EXPOSED

def Get_rot( tor ):
    nb = tor['NB']
    if nb >= MIN_NB:
        if HOM_STRUCT:
            return Rot_from_chi( tor['CHI'][:MAX_CHI], tor['AA'] )
        else:
            return tuple( tor['ROT'][:MAX_CHI])
    else:
        return EXPOSED

def remove_Overlap (max1, min1, max2, min2, count1,freq1,count2,freq2):
    if max1-min1 > 180:
        lower_bound = max1
    else: lower_bound = min1
    max1_var = In_positve_range(max1 - lower_bound)
    min1_var = In_positve_range(min1 - lower_bound)
    max2_var = In_positve_range(max2 - lower_bound)
    min2_var = In_positve_range(min2 - lower_bound)

    if (min1_var < max2_var and max1_var > min2_var): ## overlaped range
#        if not (count1 < 2 or freq1 < 0.2 or  count2 < 2 or freq2 < 0.2):
#            print 'don\'t know who caused the overlap'
#            print 'count_pos1 freq_pos1 count_pos2 freq_pos2'
#            print count1, freq1, count2, freq2
#            print min1,max1,min2,max2
#            exit()
        if min1_var <= min2_var and max1_var >= min2_var: #   --
                                                        #--
                                                        #   --
                                                        #--
            if count1 < 2 or freq1 < 0.2: # pos1 caused the overlap
                max1_var = min2_var
            elif count2 < 2 or freq2 < 0.2: # pos2 caused the overlap
                min2_var = max1_var
        elif min1_var <= max2_var and max1_var >= max2_var: #--
                                                          #   --
                                                          #--
                                                          #   --
            if count1 < 2 or freq1 < 0.2: # pos1 caused the overlap
                min1_var = max2_var
            elif count2 < 2 or freq2 < 0.2: # pos2 caused the overlap
                max2_var = min1_var
        else:
            print 'error in bin size overlap check.'
            print 'min1  max1  min2  max2:'
            print min1_var,max1_var,min2_var,max2_var
            print min1,max1,min2,max2
            exit(1)

    max1 = In_range (In_range(max1_var) + lower_bound)
    min1 = In_range (In_range(min1_var) + lower_bound)
    max2 = In_range (In_range(max2_var) + lower_bound)
    min2 = In_range (In_range(min2_var) + lower_bound)
    return (max1, min1, max2, min2)

def Remove_gaps ( aln_seq ):
    no_gap_seq = {}
    pos = 0
    for res in aln_seq:
        if res != '-' and res != '\n':
            no_gap_seq[pos] = res
            pos = pos + 1
    return no_gap_seq

def Read_Seq_Weights(filename):
    ## filename is a file contains weights for each sequence
    ## returns seq_weights[seqname] = weights

    seq_weights = {}

    try:
        data = open(filename,'r')
    except:
        log('no such file %s, thus exit!\n' %filename)
        exit;

    line = data.readline()
    while line:
        line_parts = string.split(line)
        seq_weights[int(line_parts[0])] = float(line_parts[1])
        line = data.readline()

    return seq_weights

## end of Read_Seq_Weights


def Aln_Map (filename):
    ## filename is the multiple sequence alignment of the structures
    ## in fasta format
    ## returns the map between res number in sequence and position in
    ## also returns the pdb_list_name, which is the same as the
    ## the multiple sequence alignment
    ## aln_map[seqname][num_alnpos] = num_res: residue is in a aln position
    ## aln_map[seqname][num_alnpos] = 0 : gap is in the aln position

    aln_map = {}

    try:
        data = open(filename,'r')
    except:
        log('no such file %s, thus exit!\n' %filename)
        exit;

    line = data.readline()
    alnseq = {}
    first_seq_name = 0
    seqnames = {}
    first_seq = {}

    num_seq = 0;
    while line:

        if line[:1] == '>':
            seqname = (string.split(line)[0])[1:]
            if not first_seq_name: first_seq_name = seqname
            alnseq[seqname] = ''
            seqnames[num_seq] = seqname
            num_seq = num_seq+1
        else:
            line=string.replace(line,'\n','')
            alnseq[seqname] = alnseq[seqname] + line

        line = data.readline()

    for seqname in alnseq.keys():
        num_res = 0
        num_alnpos = 0
        thismap = {}
        for res in alnseq[seqname]:
            num_alnpos = num_alnpos + 1
            if res != '-':
                num_res = num_res + 1
                thismap[num_res] = num_alnpos

        aln_map[seqname] = thismap

    num_alnpos = 1
    for res in alnseq[first_seq_name]:
        first_seq[num_alnpos] = res
        num_alnpos = num_alnpos + 1

    return aln_map, seqnames, alnseq, first_seq

## end of Aln_Map

########################################### functions:


## read info from the log-file:

## info[name] = { pos: [ nb, phi, psi, omega, rot1, rot2, rot3, rot4]}


if HOM_STRUCT:

    ## output file

    outfile = open (out_file, 'w')
    #outfile.write \
    #('FEATURE FRQ  POS  1.0 TYPE   MAX      MIN    TYPE   MAX      MIN   \n')

    ## bq read in the sequence weights
    seq_weight = Read_Seq_Weights (weight_file)

    ## bq read in the alignment

    aln_map, seqnames, aln_seqs, first_aln_seq = Aln_Map( aln_file )
    ## bq seqname is a list of names in the fasta aln file, which are also
    ## bq the pdb names


    ## bq update the keys for seq_weight
    new_seq_weight = {}
    for seq_num in seq_weight.keys():
        new_seq_weight[seqnames[seq_num]] = seq_weight[seq_num]
    seq_weight = new_seq_weight

## auto-detect input file type:

SEQ_CHECK = not HOM_STRUCT
info,seq,SILENT_INPUT = Read_either_file ( list_file, SEQ_CHECK )
print 'SILENT INPUT = ',SILENT_INPUT
if MAX_CHI == 0: SILENT_INPUT=1

if HOM_STRUCT:
    assert not SILENT_INPUT

    files = info.keys()

    ## map file names to alignment fasta names
    file_seq_map = {}
    for file in files:
        for seqnum in seqnames.keys():
            seqname = seqnames[seqnum]
            if string.count( file, seqname):
                file_seq_map[file] = seqname

    ## check sequence lengths
    for file in files:
        file_info = info[file]
        s = Get_seq(file_info)

        if seq and len(s.keys()) != \
           len(( Remove_gaps( aln_seqs[file_seq_map[file] ] ) ).keys() ):
            log('bad seq: '+file)
            continue
        else:
            seq = s


    seq = first_aln_seq
    ## bq map info to aln positions
    newinfo = {}
    newweight = {}
    for file in info.keys():
        ## currently, this will discard non-residue data from info[file]
        ## ie score,rms,etc
        thisnewinfo = {}
        file_L = info[file]['L'] ## length of the structure in this file
        for pos in range(1,file_L+1):
            thisnewinfo[ aln_map[file_seq_map[file] ][pos] ] = info[file][pos]
        newinfo [file] = thisnewinfo
        newweight[file] = seq_weight[file_seq_map[file]]
    info = newinfo
    seq_weight = newweight
    ## bq end of mapping

NATIVE = 0
if native_file:
    f = Read_file( native_file )
    s = Get_seq(f)
    if f and s == seq:
        NATIVE = 1
        info[NATIVE] = f

######################################################
## put torsions into backbone and sidechain lists:
######################################################


L = len(seq.keys() )

bb_list = {}
chi_list = {}

for pos in seq.keys():
    bb_list[pos] = []
    chi_list[pos] = []


for name in info.keys():
    if name == NATIVE:
        continue
    for pos in range(1,L+1):
        if not info[name].has_key( pos ):continue
        tor = info[name][pos]

        if HOM_STRUCT:
            bb = {}
            rot = {}
            bb['bb'] = Get_bb( tor )
            bb['weight'] = seq_weight[name]
            rot['chi'] = Get_chi( tor )
            rot['weight'] = seq_weight[name]
        else:
            bb = Get_bb ( tor )
            if not SILENT_INPUT:
                rot = Get_rot (tor )
        bb_list[pos].append( bb )
        if not SILENT_INPUT:
            chi_list[pos].append( rot )

#########################################################################
##### process the side chain torsions: ##################################
#########################################################################

chi_map = {}
for pos in chi_list.keys():
    if SILENT_INPUT: continue
    aa = seq[pos]
    if HOM_STRUCT and aa != '-' and STATUS:
	# output torsion distribution of every residue positions
    	thischi = \
        string.join(['chi_',str(find_key(aln_map[seqnames[0]],pos)), '.txt'],'')
        chiout = open (thischi, 'w')

    chi_map[pos] = {}
    counter = 0
    if NATIVE:
        nat_rot = Get_rot( info[NATIVE][pos] )
    else:
        nat_rot = (0) ## shouldnt match anybody

    nat_count = 0
    chi1 = 0
    chi2 = 0
    aa_type = ''
    rot_count = {}
    rot_bin_count = {}
    chi1_max = {}
    chi1_min = {}
    chi2_max = {}
    chi2_min = {}
    if HOM_STRUCT and aa != '-':
        print 'MAP SC:%d counter= %d rep= %s'\
            %(find_key(aln_map[seqnames[0]],pos),-1,'GAP')
        print find_key(aln_map[seqnames[0]],pos),aa
    for rot_and_weight in chi_list[pos]:
        if HOM_STRUCT:
            chis = rot_and_weight['chi']
            if chis == EXPOSED:
                rot = chis
            else:
                ([chi1,chi2],aa_type) = rot_and_weight['chi']
                rot = Rot_from_chi ( (chi1,chi2), aa_type )
            weight = rot_and_weight['weight']
        else:
            rot = rot_and_weight

        Increment (rot_count, rot)
        if rot == nat_rot:
            nat_count = nat_count + 1
        if not chi_map[pos].has_key( rot ):
            chi_map[pos][rot] = counter
            if HOM_STRUCT:
                if aa != '-':
                    print 'MAP SC:%d counter= %d rep= %s'\
                        %(find_key(aln_map[seqnames[0]],pos),counter,string.join(map(str,list(rot)),','))
            else:
                print 'MAP SC:%d counter= %d rep= %s'\
                    %(pos,counter,string.join(map(str,list(rot)),','))
            counter = counter + 1
        if HOM_STRUCT:
            if rot == EXPOSED: continue # don't count the exposed

            if seq[pos] != '-' and STATUS:
                chiout.write ( '%4d %4d %8.2f %8.2f\n'\
                %(find_key(aln_map[seqnames[0]],pos), chi_map[pos][rot], chi1, chi2))

            Increment(rot_bin_count,chi_map[pos][rot], weight)
            if not (aa_type in ['A','G']):  ## homologous structure aatype
                keep_max(chi1_max, chi_map[pos][rot], chi1)
                keep_min(chi1_min, chi_map[pos][rot], chi1)
            if not (aa_type in ['A','G','C','P','S','T','V']): ## homologous struct aatype
                keep_max(chi2_max, chi_map[pos][rot], chi2)
                keep_min(chi2_min, chi_map[pos][rot], chi2)
        else:
            Increment(rot_bin_count,chi_map[pos][rot])
    log('CHI '+`pos,seq[pos],counter,nat_count, nat_rot`)
    if HOM_STRUCT:
        if aa != '-':
            print 'ROT DISTR  ',find_key(aln_map[seqnames[0]],pos),' ',rot_bin_count
    else:
        print 'ROT DISTR  ',pos,' ',rot_bin_count

    if HOM_STRUCT:
        if seq[pos] == '-': continue ## only output wrt the first seq
        if STATUS: chiout.close() # chi, bb angle outputs
        sum = 0;
        for counter in rot_bin_count.keys():
            sum = float(sum + rot_bin_count[counter])
        will_output = 0
        rot_bin_freq = {}
        for counter in rot_bin_count.keys():
            Increment(rot_bin_freq, counter, rot_bin_count[counter]/sum)
            if (rot_bin_freq[counter] > MAX_FRQ): will_output = 1  # ignore spreaded pos
        #if sum < 3: will_output = 0 # too many gaps
        if seq[pos] in ['A','G']: # no chi1, chi2
            will_output = 0
        if not will_output: continue

        ## padding for the outliers
        for counter in chi1_max.keys(): ## note some position can have no chi1_max
            if rot_bin_count[counter] < 2 or rot_bin_freq[counter] < 0.2:
                if chi1_max[counter] - chi1_min[counter] > 180:
                    chi1_ave = In_range((360 + chi1_max[counter] + chi1_min[counter])/2.0)
                    chi1_max[counter] = In_range(chi1_ave - 20)
                    chi1_min[counter] = In_range(chi1_ave + 20)
                else:
                    chi1_ave = (chi1_max[counter] + chi1_min[counter])/2.0
                    chi1_max[counter] = In_range(chi1_ave + 20)
                    chi1_min[counter] = In_range(chi1_ave - 20)
        for counter in chi2_max.keys(): ## note some position can have no chi2_max
            if rot_bin_count[counter] < 2 or rot_bin_freq[counter] < 0.2:
                if chi2_max[counter] - chi2_min[counter] > 180:
                    chi2_ave = In_range((360 + chi2_max[counter] + chi2_min[counter])/2.0)
                    chi2_max[counter] = In_range(chi2_ave - 20)
                    chi2_min[counter] = In_range(chi2_ave + 20)
                else:
                    chi2_ave = (chi2_max[counter] + chi2_min[counter])/2.0
                    chi2_max[counter] = In_range(chi2_ave + 20)
                    chi2_min[counter] = In_range(chi2_ave - 20)

        # check for overlap
        newchi1_max = chi1_max
        newchi1_min = chi1_min
        newchi2_max = chi2_max
        newchi2_min = chi2_min
        used_c1 = {}
        for c1 in chi1_max.keys():
            used_c1[c1] = 1
	    for c2 in chi1_max.keys():
                if used_c1.has_key( c2 ): continue
                (newchi1_max[c1],newchi1_min[c1],\
                newchi1_max[c2],newchi1_min[c2]) = remove_Overlap (chi1_max[c1],\
                                                        chi1_min[c1],\
                                                        chi1_max[c2],\
                                                        chi1_min[c2],\
                                                        rot_bin_count[c1],\
                                                        rot_bin_freq[c1],\
                                                        rot_bin_count[c2],\
                                                        rot_bin_freq[c2])
        used_c1 = {}
        for c1 in chi2_max.keys():
            used_c1[c1] = 1
            for c2 in chi2_max.keys():
                if used_c1.has_key( c2 ): continue
                (newchi2_max[c1],newchi2_min[c1],\
                newchi2_max[c2],newchi2_min[c2]) = remove_Overlap (chi2_max[c1],\
                                                        chi2_min[c1],\
                                                        chi2_max[c2],\
                                                        chi2_min[c2],\
                                                        rot_bin_count[c1],\
                                                        rot_bin_freq[c1],\
                                                        rot_bin_count[c2],\
                                                        rot_bin_freq[c2])
        chi1_max = newchi1_max
        chi2_max = newchi2_max
        chi1_min = newchi1_min
        chi2_min = newchi2_min


        ## check for bins across the -180 or 180 boundary\
        ## if so, reverse the max ,min order so that max is
        ## less than min. Rosetta will then take this as a signal
        ## to treat them as bins across 180 boundary
        chi1_interval = {}
        chi2_interval = {}
        for counter in chi1_max.keys():
            chi1_interval [ counter ] = chi1_max[counter] - chi1_min[counter]
            if chi1_interval [ counter ]  >= 180.0:
                chi1_interval [counter] = 360.0 - chi1_interval[ counter ]
                temp = chi1_max[counter]
                chi1_max[counter] = chi1_min[counter]
                chi1_min[counter] = temp
            elif chi1_interval [ counter ]  <= -180.0:
                chi1_interval [counter] = 360.0 + chi1_interval[ counter ]
            chi1_interval [counter] = abs (chi1_interval [counter])
            assert 0 <= chi1_interval [counter] <= 180
        for counter in chi2_max.keys():
            chi2_interval [ counter ] = chi2_max[counter] - chi2_min[counter]
            if chi2_interval [ counter ] >= 180.0:
                chi2_interval[ counter ] = 360.0 - chi2_interval [counter]
                temp = chi2_max[counter]
                chi2_max[counter] = chi2_min[counter]
                chi2_min[counter] = temp
            elif chi2_interval [ counter ]  <= -180.0:
                chi2_interval [counter] = 360.0 + chi2_interval[ counter ]
            chi2_interval [counter] = abs (chi2_interval [counter])
            assert 0 <= chi2_interval [counter] <= 180
            if seq[pos] in ['C','P','S','T','V']: # only chi1
                chi2_max[counter] =  180.0
                chi2_min[counter] = -180.0
                chi2_interval [ counter ] = chi2_max[counter] - chi2_min[counter]

        ## check if chi1_max/min, chi2_max/min exists
        for counter in rot_bin_count.keys():
            if chi1_max.has_key( counter ) and not chi2_max.has_key( counter ):
                ## has chi1 but no chi2
                chi2_max[ counter ] =  180.0
                chi2_min[ counter ] = -180.0
                chi2_interval [ counter ] = chi2_max[counter] - chi2_min[counter]

        ## output
        for counter in rot_bin_count.keys():
            if not chi1_max.has_key( counter ) and not chi2_max.has_key( counter ):
                # no information
                continue
            weight = 0.0
            if chi1_interval[ counter ]*chi2_interval[ counter ] != 0.0:
                if seq[pos] in ['C','P','S','T','V']: # only chi1
                    density = rot_bin_freq[counter]/chi1_interval[counter]
                    if density*SC_BIN_WIDTH < MIN_FRQ: ## avoid small numbers blow up the log
                        weight = 0.0
                    else:
                        weight =\
                        math.log10(MIN_FRQ/(density*SC_BIN_WIDTH))
                else:
                    density = rot_bin_freq[counter]/(chi1_interval[counter]*chi2_interval[counter])
                    if density*SC_BIN_WIDTH*SC_BIN_WIDTH < MIN_FRQ: ## avoid small numbers blow up the log
                        weight = 0.0
                    else:
                        weight =\
                        math.log10(MIN_FRQ/(density*SC_BIN_WIDTH*SC_BIN_WIDTH))
            else:
                print 'chi1(2)_interval can\'t be zero'
                print counter,chi1_max[counter], '-',chi1_min[counter],'=', chi1_interval[counter]
                print rot_bin_freq[counter],rot_bin_count[counter]
                print counter,chi2_max[counter], '-',chi2_min[counter],'=', chi2_interval[counter]
                exit()
            outfile.write ( 'SC_%s%-4d %3.2f SC_BIN %4d %8.2f %8.2f %8.2f %8.2f %8.2f\n'\
                %(seq[pos],pos,rot_bin_freq[counter]-0.01,find_key(aln_map[seqnames[0]],pos),\
                weight,chi1_min[counter],chi1_max[counter],chi2_min[counter],chi2_max[counter]))


####################################################################
######## now rank the bb positions by deviations ###################
####################################################################

dev_list = []

bb_feature_rsd_list = []

if HOM_STRUCT:
    for pos in bb_list.keys():
        if pos >1 and pos<L:
            bb_feature_rsd_list.append(pos)
else:
    for pos in bb_list.keys():

        if not bb_list[pos]:continue

        total_sd = 0
        ssd = {}
        thisbb = []
        #if HOM_STRUCT:
        #    for bb_and_weight in bb_list[pos]:
        #        thisbb.append ( bb_and_weight['bb'] )
        #else:
        thisbb = bb_list[pos]

        for i in range(2): ## 0= phi, 1=psi
            ll = map(lambda x:x[i], thisbb )

            sd = {}
            for s in range(-6,7):
                shift = s*30

                l = map(lambda x:In_range(x+shift), ll)

                m = float( reduce(add,l) ) / len(l)

                if (len(l) - 1 != 0):
                    sd[s] = sqrt( reduce(add, map(lambda x:( Angle_delta(x,m) )**2,l)) / (len(l) - 1))
                #print shift, sd[s]

            if sd:
                ssd[i] = min(sd.values())
                total_sd = total_sd + ssd[i]


        if pos >1 and pos<L and \
        total_sd >= MIN_DEV: ## add this to the list of interesting positions
            log(`[total_sd, pos, ssd[0], ssd[1]]`)
            bb_feature_rsd_list.append(pos)

        if NATIVE:
            ### calculate deviations from native
            dev=0.
            for i in range(2): ## 0= phi, 1=psi
                ll = map(lambda x:x[i], bb_list[pos] )
                nat_bb=Get_bb(info[NATIVE][pos])
                temp=nat_bb[i]
                dev =dev+sqrt( reduce(add, map(lambda x:( Angle_delta(x,temp ))**2,ll)) /\
                               (len(ll) - 1))

#            print 'NAT_DEV',' ',pos,' ',dev,' ',total_sd
            print 'NAT_DEV  position %s dev_from_nat %.2f dev_win_decoy_pop %.2f'%(pos,dev,total_sd)


####################################################################
######## setup a mapping from bb angles to features ################
####################################################################
##
bin_map = {}

if GNUPLOT:
    gpout,gpin = popen2('gnuplot')
    gpin.write('set nokey\n')
    angle = -180
    while angle<180:
        gpin.write('set arrow from %f, graph 0 to %f, graph 1 nohead\n'\
                   %(angle,angle))
        gpin.flush()
        angle = angle + BB_BIN_WIDTH

for pos in bb_feature_rsd_list:
    if HOM_STRUCT:
        aa = seq[pos]
        if aa != '-' and STATUS:
            thisbb = string.join (['bb_',str(find_key(aln_map[seqnames[0]],pos)), '.txt'],'')
            bbout = open (thisbb, 'w')

    bin_map[pos] = {}
    counter = 0

    if NATIVE: ## we'll make the first bin correspond to the native value
        nat_bb = Get_bb( info[NATIVE][pos] )
        nat_bin = Get_bb_bin ( nat_bb )
    else:
        nat_bb = (0) ## shoudn match anybody
        nat_bin = (0)

    phi_min = {}
    phi_max = {}
    psi_min = {}
    psi_max = {}
    nat_counter = 0
    bb_bin_count={}
    if HOM_STRUCT and aa != '-':
        print 'MAP BB:%d counter= %d rep= GAP'\
            %(find_key(aln_map[seqnames[0]],pos),-1)
    for bb_and_weight in bb_list[pos]:
        if HOM_STRUCT:
            bb = bb_and_weight['bb']
            phi = In_range(bb[0])
            psi = In_range(bb[1])
            weight = bb_and_weight['weight']
        else:
            bb = bb_and_weight

        bin = Get_bb_bin( bb )
        if not bin_map[pos].has_key( bin ):
            bin_map[pos][bin] = counter
            if HOM_STRUCT:
                if aa != '-':
                    print 'MAP BB:%d counter= %d rep= %.1f,%.1f'\
                        %(find_key(aln_map[seqnames[0]],pos),counter,bb[0],bb[1])
            else:
                print 'MAP BB:%d counter= %d rep= %.1f,%.1f'\
                  %(pos,counter,bb[0],bb[1])
            counter = counter + 1

        if bin == nat_bin:
            nat_counter = nat_counter + 1

        if HOM_STRUCT:
            if seq[pos] != '-' and STATUS:
                bbout.write ('%4d %4d %8.2f %8.2f\n'\
                %(find_key(aln_map[seqnames[0]],pos),bin_map[pos][bin], phi, psi))

            Increment(bb_bin_count,bin_map[pos][bin], weight)
            keep_max(phi_max, bin_map[pos][bin], phi)
            keep_min(phi_min, bin_map[pos][bin], phi)
            keep_max(psi_max, bin_map[pos][bin], psi)
            keep_min(psi_min, bin_map[pos][bin], psi)
        else:
            Increment(bb_bin_count,bin_map[pos][bin])

    log('BB %4d num_bins: %4d nat_count: %4d\n'\
        %(pos, counter, nat_counter))
    if HOM_STRUCT:
        if aa != '-':
            print 'RAMA DISTR ', find_key(aln_map[seqnames[0]],pos), '  ', bb_bin_count
    else:
        print 'RAMA DISTR ', pos, '  ', bb_bin_count

    if HOM_STRUCT:
        if seq[pos] == '-': continue ## only print out wrt first seq
        if STATUS: bbout.close()
        sum = 0;
        for counter in bb_bin_count.keys():
            sum = float(sum + bb_bin_count[counter])
        will_output = 0
        bb_bin_freq = {}
        for counter in bb_bin_count.keys():
            Increment(bb_bin_freq, counter, bb_bin_count[counter]/sum)
            if bb_bin_freq[counter] > MAX_FRQ: will_output = 1
        #if sum < 3: will_output = 0
        if not will_output: continue

        ## padding for the outliers
        for counter in bb_bin_count.keys():
            if bb_bin_count[counter] < 2 or bb_bin_freq[counter] < 0.2:
                if phi_max[counter] - phi_min[counter] > 180:
                    phi_ave = In_range((360 + phi_max[counter] + phi_min[counter])/2.0)
                    phi_max[counter] = In_range(phi_ave - 20)
                    phi_min[counter] = In_range(phi_ave + 20)
                else:
                    phi_ave = (phi_max[counter] + phi_min[counter])/2.0
                    phi_max[counter] = In_range(phi_ave + 20)
                    phi_min[counter] = In_range(phi_ave - 20)
                if psi_max[counter] - psi_min[counter] > 180:
                    psi_ave = In_range((360 + psi_max[counter] + psi_min[counter])/2.0)
                    psi_max[counter] = In_range(psi_ave - 20)
                    psi_min[counter] = In_range(psi_ave + 20)
                else:
                    psi_ave = (psi_max[counter] + psi_min[counter])/2.0
                    psi_max[counter] = In_range(psi_ave + 20)
                    psi_min[counter] = In_range(psi_ave - 20)


        # check for overlap
        newphi_max = phi_max
        newphi_min = phi_min
        newpsi_max = psi_max
        newpsi_min = psi_min
        used_c1 = {}
        for c1 in bb_bin_count.keys():
            used_c1[ c1 ] = 1
	    for c2 in bb_bin_count.keys():
                if used_c1.has_key( c2 ): continue
                (newphi_max[c1],newphi_min[c1],\
                 newphi_max[c2],newphi_min[c2]) = remove_Overlap (phi_max[c1],\
                                                        phi_min[c1],\
                                                        phi_max[c2],\
                                                        phi_min[c2],\
                                                        bb_bin_count[c1],\
                                                        bb_bin_freq[c1],\
                                                        bb_bin_count[c2],\
                                                        bb_bin_freq[c2])
        used_c1 = {}
        for c1 in bb_bin_count.keys():
            used_c1[ c1 ] = 1
            for c2 in bb_bin_count.keys():
                (newpsi_max[c1],newpsi_min[c1],\
                 newpsi_max[c2],newpsi_min[c2]) = remove_Overlap (psi_max[c1],\
                                                        psi_min[c1],\
                                                        psi_max[c2],\
                                                        psi_min[c2],\
                                                        bb_bin_count[c1],\
                                                        bb_bin_freq[c1],\
                                                        bb_bin_count[c2],\
                                                        bb_bin_freq[c2])
        phi_max = newphi_max
        psi_max = newpsi_max
        phi_min = newphi_min
        psi_min = newpsi_min


        ## check for bins across the -180 or 180 boundary\
        ## if so, reverse the max ,min order so that max is
        ## less than min. Rosetta will then take this as a signal
        ## to treat them as bins across 180 boundary
        phi_interval = {}
        psi_interval = {}
        for counter in bb_bin_count.keys():
            phi_interval [ counter ] = phi_max[counter] - phi_min[counter]
            if phi_interval [ counter ]  >= 180.0:
                phi_interval [counter] = 360.0 - phi_interval[ counter ]
                temp = phi_max[counter]
                phi_max[counter] = phi_min[counter]
                phi_min[counter] = temp
            elif phi_interval [ counter ]  <= -180.0:
                phi_interval [counter] = 360.0 + phi_interval[ counter ]
            phi_interval [counter] = abs ( phi_interval [counter] )
            assert 0 <= phi_interval [counter] <= 180
        for counter in bb_bin_count.keys():
            psi_interval [ counter ] = psi_max[counter] - psi_min[counter]
            if psi_interval [ counter ] >= 180.0:
                psi_interval[ counter ] = 360.0 - psi_interval [counter]
                temp = psi_max[counter]
                psi_max[counter] = psi_min[counter]
                psi_min[counter] = temp
            if psi_interval [ counter ] <= -180.0:
                psi_interval[ counter ] = 360.0 + psi_interval [counter]
            psi_interval[ counter ] = abs ( psi_interval[ counter ] )
            assert 0 <= psi_interval [counter] <= 180

        for counter in bb_bin_count.keys():
            if phi_interval[ counter ]*psi_interval[ counter ] != 0.0:
                density = bb_bin_freq[counter]/(phi_interval[counter]*psi_interval[counter])
            else:
                continue
                #print 'phi/psi_interval can\'t be zero'
                #print counter,phi_max[counter], '-',phi_min[counter],'=', phi_interval[counter]
                #print bb_bin_freq[counter],bb_bin_count[counter]
                #print counter,psi_max[counter], '-',psi_min[counter],'=', psi_interval[counter]
                #exit()
            if density*BB_BIN_WIDTH*BB_BIN_WIDTH < MIN_FRQ: ## to avoid a small number blow up the log10
                weight = 0.0
            else:
                weight = math.log10(MIN_FRQ/(density*BB_BIN_WIDTH*BB_BIN_WIDTH))

            outfile.write ( 'BB_%s%-4d %3.2f BB_SMALL_BIN %4d %8.2f %8.2f %8.2f %8.2f %8.2f\n'\
                %(seq[pos],pos,bb_bin_freq[counter]-0.01,find_key(aln_map[seqnames[0]],pos),\
                weight,phi_min[counter],phi_max[counter],psi_min[counter],psi_max[counter]))

    #### gnuplot the rama distributions
    if GNUPLOT and not HOM_STRUCT:
        out = open('junk.plot','w')
        out.write('N %f %f\n'%(nat_bb[0],nat_bb[1]))

        for bb in bb_list[pos]:
            out.write('D %f %f\n'%(bb[0],bb[1]))

        out.close()

        command = 'plot [-180:180] [-180:180] "< grep D junk.plot" u 2:3, "< grep N junk.plot" u 2:3'
        angle = -180
        while angle<=180:
            command = command + ', %f'%angle
            angle = angle + BB_BIN_WIDTH
        command = command + '\n'
        #print command
        gpin.write(command)
        gpin.flush()
        raw_input()

####################################################################
######## now output the codes ######################################
####################################################################

code_count = {}

############### show the names:
## must use the same order of output here and below!

print 'CODE_NAMES:',
for pos in bb_feature_rsd_list:
    if HOM_STRUCT:
        if seq[pos] != '-':
            print 'BB:%d'%find_key(aln_map[seqnames[0]],pos),
    else:
        print 'BB:%d'%pos,

if not SILENT_INPUT:
    for pos in range(1,L+1):
        if HOM_STRUCT:
            if seq[pos] != '-':
                print 'SC:%d'%find_key(aln_map[seqnames[0]],pos),
        else:
            print 'SC:%d'%pos,
print


#print bin_map

for name in info.keys():
    bar_code = []

    for pos in bb_feature_rsd_list:
        if HOM_STRUCT and seq[pos] == '-':
            continue
        if info[name].has_key(pos):
            tor = info[name][pos]
            bb = Get_bb (tor)
            bin = Get_bb_bin( bb )
            if bin_map[pos].has_key( bin ):
                bar_code.append( bin_map[pos][bin] )
            else:
                assert name == NATIVE
                bar_code.append( -2 ) ## hack! means no decoys in native bin!
        else:
            bar_code.append( -1 ) ## gap

    if not SILENT_INPUT:
        for pos in range(1,L+1):
            if HOM_STRUCT and seq[pos] == '-':
                continue
            if info[name].has_key(pos):
                rot = Get_rot( info[name][pos] )
                if chi_map[pos].has_key( rot ):
                    bar_code.append( chi_map[pos][rot] )
                else:
                    assert name == NATIVE
                    bar_code.append( -2 ) ## hack! means no decoys in native bin!
            else:
                bar_code.append( -1 ) ## gap

    code = string.join( map(str,bar_code))
    if name == NATIVE:
        print 'NAT_CODE:',code
    else:
        print 'CODE:',name, code

        if not code_count.has_key( code ):
            code_count[code] = 0

        code_count[code] = code_count[code] + 1


for code in code_count.keys():
    print 'CODE_COUNT:',code_count[code], code

if HOM_STRUCT:
    outfile.close()

