#!/usr/bin/python

from phil import *
from amino_acids import extra_longer_names

def Help():
    print '\nUsage: %s out_file MIN_NB MAX_CHI BB_BIN_WIDTH {-n <native file>}\n\n'\
          %(argv[0])
    print '\n Need rosetta processed all atom native models (-score -fa_output -nstruct 1)\n\n'
    exit()

## set BB_BIN_WIDTH <= 0 to get ABGO bins
    
## parameters

if len(argv)<5:
    Help()

##
args = argv[1:]
if args.count('-n'):
    pos = args.index('-n')
    native_file = args[pos+1]
    del args[pos]
    del args[pos]
else:
    native_file = ''

log(native_file)

out_file = args[0]


MIN_DEV = 50.0 ## smallest value for sd(phi) + sd(psi) to include as features
MIN_NB = int(args[1]) ## definition of exposed, used for sc rotamers
MAX_CHI = int(args[2]) ## only use rot numbers up to and including this chi torsion
BB_BIN_WIDTH = int(args[3]) ## in degrees

## MIN_DEV = 20.0 ## smallest value for sd(phi) + sd(psi) to include as features
## MIN_NB = 12 ## definition of exposed, used for sc rotamers
## MAX_CHI = 2 ## only use rot numbers up to and including this chi torsion
## BB_BIN_WIDTH = 20 ## in degrees

## MIN_DEV = 20.0 ## smallest value for sd(phi) + sd(psi) to include as features
## MIN_NB = 12 ## definition of exposed, used for sc rotamers
## MAX_CHI = 4 ## only use rot numbers up to and including this chi torsion
## BB_BIN_WIDTH = 5 ## in degrees


EXPOSED = tuple( [-1]*MAX_CHI ) ## internal name for SC bin if nb<MIN_NB


########################################### functions:
def In_range(a):
    while a>180.0:
        a = a - 360.0
    while a<=-180.0:
        a = a + 360.0
    return a
        
def Angle_delta(a,b):
    a = In_range(a)
    b = In_range(b)
    assert -180 <= a <= 180
    assert -180 <= b <= 180
    x = max(a,b)
    y = min(a,b)
    delta = min( x-y, y+360.0 - x) 
    assert delta <= 180.0
    return delta


def pp_class(pp): ## E G A B and O
    pp = ( In_range( pp[0]), In_range(pp[1]), In_range(pp[2]))
    assert -180<=pp[0]<=180 and -180<=pp[1]<=180 and -180<=pp[2]<=180

    if abs(pp[2]) <90:
        return 'O'
    elif pp[0]>=0:
        if -100< pp[1] <= 100:return 'G'
        else: return 'E'
    elif -125 < pp[1] <= 50: return 'A'
    else: return 'B'


def Get_bb( tor ):
    assert len(tor) == 5
    return tuple( tor[1:5] ) ## now include omega

def Get_bb_bin( bb ):
    if BB_BIN_WIDTH > 0:
        phi_bin = int(floor(bb[0]/ BB_BIN_WIDTH ))
        psi_bin = int(floor(bb[1]/ BB_BIN_WIDTH ))
        bin = ( phi_bin, psi_bin )
    else:
        bin = pp_class(bb)
        if bin in ['B','E']: ## Lump E with B
            bin = 'B'
        if bin == 'A' and bb[3] == 'H' :
            bin = 'H'
    return bin

def Get_rot( tor ):
    assert len(tor) == 9
    nb = tor[0]
    if nb >= MIN_NB:
        return tuple( tor[5:5+MAX_CHI] )
    else:
        return EXPOSED

def Read_file(filename):
    ## read: the neighbor counts, sc torsion numbers, bb torsions
    ## returns: info,seq

    info = {}
    seq = {}

    try:
        data = open(filename,'r')
    except:
        log('missing %s\n'%filename)
        return info,seq

    line = data.readline()
    print line
    nb = {}
    bb = {}
    rot = {}
    L = 0
    while line:
        if line[:4] == 'ATOM' and line[12:16] == ' CA ':
            L = L+1
        if line[:9] == 'res aa nb':
            n = len(string.split(line))
            for i in range(L):
                line = data.readline()
                l = string.split(line)
                if len(l) == n and int(l[0]) == i+1:
                    nb[int(l[0])] = int(l[2])
                else:
                    break
        elif line[:8] == 'absolute':
               line = data.readline() ## header line
               for i in range(L):
                line = data.readline()
                l = string.split(line)
                if len(l) == 11 and l[-1] == 'chi_absolute' and int(l[0]) == i+1:
                    rot[int(l[0])] = map(int, l[6:10] )
                    seq[int(l[0])] = extra_longer_names[ l[1] ]
                else:
                    break
        elif line[:8] == 'complete':
            for i in range(L):
                line = data.readline()
                l = string.split(line)
                if l[1] in ['E','H','L'] and int(l[0]) == i+1:
                    bb[int(l[0])] = map(float, l[2:5] ) + [l[1]]
                else:
                    break
            
        line = data.readline()

    data.close()
    L = len( seq.keys())
    if L and L == len( nb.keys()) == len( bb.keys()) == len( rot.keys()):
   	 for pos in nb.keys():
            if bb.has_key( pos) and rot.has_key( pos):
                info[pos] = [nb[pos] ] + bb[pos] 
                print info[pos]
            else:
                info = {} ## signal failure
                break

    return info,seq ## info will be {} if failed

            
########################################### functions:


## read info from silent mode file:
 
## 

info = {}
bb={}
seq={}
silent_file =  open(out_file,'r').readlines()
for line in silent_file :
    l = string.split(line)
    if l[0]=='SEQUENCE:' : 
        sequence=l[1]
        for i in range(len(sequence)):
            seq[i+1]=sequence[i]

    if l[0]=='SCORE:' :
        if bb:
            info[tag]= bb
            bb={}
        tag = l[-1]
        
    if l[1] in ['E','H','L']  :
        bb[int(l[0])] = [0] + map(float, l[2:5] ) + [l[1]]
info[tag]=bb 

L = len(info.keys())
print 'decoy_num', L

NATIVE = ''
if native_file:
    f,s = Read_file( native_file )
    print 'native file:',native_file
    if f and s == seq:
	print 'sequences match!'
    	NATIVE = 'NATIVE'
    	info[NATIVE] = f


## sort positions by deviation of bb-torsions
bb_list = {}

for pos in seq.keys():
    bb_list[pos] = []


for name in info.keys():
    if name != NATIVE: ## dont include the native in bb_list
        for pos in info[name].keys():
            tor = info[name][pos]
            bb = Get_bb( tor )
            bb_list[pos].append( bb )

## now rank the bb positions by deviations
dev_list = []
decoy_sd={}
bb_feature_rsd_list = []

for pos in bb_list.keys():

    total_sd = 0
    ssd = {}
    for i in range(2): ## 0= phi, 1=psi
        ll = map(lambda x:x[i], bb_list[pos] )
        sd = {}
        for s in range(-6,7):
            shift = s*30

            l = map(lambda x:In_range(x+shift), ll)

            m = float( reduce(add,l) ) / len(l)
        
            sd[s] = sqrt( reduce(add, map(lambda x:( Angle_delta(x,m) )**2,l)) / (len(l) - 1))
            #print shift, sd[s]
            

        ssd[i] = min(sd.values())
        total_sd = total_sd + ssd[i]

    decoy_sd[pos]=total_sd
    if pos >1 and pos<L and \
       total_sd >= MIN_DEV: ## add this to the list of interesting positions
        log(`[total_sd, pos, ssd[0], ssd[1]]`)
        bb_feature_rsd_list.append(pos)

for pos in bb_list.keys():
    dev=0.
    for i in range(2): ## 0= phi, 1=psi
        ll = map(lambda x:x[i], bb_list[pos] )
        nat_bb=Get_bb(info[NATIVE][pos])
	temp=nat_bb[i]
	dev =dev+sqrt( reduce(add, map(lambda x:( Angle_delta(x,temp ))**2,ll)) / (len(ll) - 1))

    print 'NAT_DEV',' ',pos,' ',dev,' ',decoy_sd[pos]

# setup a mapping from bb angles to features
bin_map = {}

for pos in bb_list.keys():
## for pos in bb_feature_rsd_list:
    bin_map[pos] = {}
    counter = 0 

    if NATIVE:
        nat_bb = Get_bb( info[NATIVE][pos] ) 
        nat_bin = Get_bb_bin ( nat_bb )
        bin_map[pos][nat_bin] = counter ## 0 is always the native bin
        print 'MAP BB%d counter= %d rep= %.1f,%.1f'\
              %(pos,counter,nat_bb[0],nat_bb[1])
        counter = counter+1
    else:
        nat_bb = (0) ## shoudn match anybody
        nat_bin = (0)

    nat_counter = 0

    for bb in bb_list[pos]:
        bin = Get_bb_bin( bb )

        if not bin_map[pos].has_key( bin ):
            bin_map[pos][bin] = counter
            print 'MAP BB%d counter= %d rep= %.1f,%.1f'\
                  %(pos,counter,bb[0],bb[1])
            counter = counter + 1

        if bin == nat_bin:
            nat_counter = nat_counter + 1
            
    log('BB %4d num_bins: %4d nat_count: %4d\n'\
        %(pos, counter, nat_counter))
    print 'BB %4d num_bins: %4d nat_count: %4d'\
          %(pos, counter, nat_counter)


code_count = {}

print 'CODE_NAMES:',
for pos in bb_feature_rsd_list:
    print 'BB%d'%pos,
#for pos in range(1,L+1):
#    print 'SC%d'%pos,
print
    
for name in info.keys():

    bar_code = []
    bar_code_torsions = []

    for pos in bb_feature_rsd_list:
        bb = Get_bb( info[name][pos] )
        bin = Get_bb_bin( bb )
        bar_code.append( bin_map[pos][bin] )
        bar_code_torsions.append( bb )

#    for pos in range(1,L+1):
#        rot = Get_rot( info[name][pos] )
#        bar_code.append( chi_map[pos][rot] )

    code = string.join( map(str,bar_code))

    if name == NATIVE:
        print 'NAT_TORSIONS:','NATIVE',\
              string.join(map(lambda x:'%.1f,%.1f'%(x[0],x[1]), bar_code_torsions))
        continue ## dont output CODE line
    
    print 'CODE:',name, code
    print 'CODE_TORSIONS:',name,\
          string.join(map(lambda x:'%.1f,%.1f'%(x[0],x[1]), bar_code_torsions))
                                     
    if not code_count.has_key( code ):
        code_count[code] = 0

    code_count[code] = code_count[code] + 1


for code in code_count.keys():
    print 'CODE_COUNT:',code_count[code], code
    










