## this has I/O routines for reading decoy files, silent-files

from phil import *
from amino_acids import longer_names
import math


rsd_tag_list = ['BB','CHI','ROT','SS','NB','AA']

## all torsion angles are restricted to [-180:180) by calls to In_range

########################################### functions:
def In_range(a): ## forces a to be in [-180,180)
    while a>=180.0:
        a = a - 360.0
    while a<-180.0:
        a = a + 360.0
    return a


## returns big_info, seq, SILENT_INPUT = {...},{...}
def Read_either_file( filename, SEQ_CHECK = 1):
    if not exists(filename):
        log('missing: '+filename)
        return {},{}

    data = open(filename,'r')
    line = data.readline()
    if line[:8] == 'SEQUENCE':
        SILENT_INPUT = 1
        big_info,seq = Read_silent_file( filename)
    else:
        SILENT_INPUT = 0
        counter = 0
        files = map(lambda x:string.split(x)[0],
                    open(filename,'r').readlines())
        seq = {}
        big_info = {}
        for file in files:
            counter = counter+1
            if not counter%25:
                log('%d %d\n'%(counter, len(files)))

            info = Read_file( file )
            s = Get_seq(info)

            if not info:
                log('bad file: '+file)
                continue

            if seq and SEQ_CHECK and s != seq:
                log('bad seq: '+file)
                continue
            else:
                seq = s

            ## add to big dictionary:
            big_info[ file ] = info

    if not SILENT_INPUT: ## check to see if these are centroid decoys:
        chi_count = 0
        total_count = 0
        for file in big_info.keys()[:10]:
            info = big_info[file]
            L = info['L']
            for pos in range(1,L+1):
                if info.has_key(pos):
                    print file,pos,info[pos]
                    if info[pos].has_key( 'CHI'):
                        chi_count = chi_count + 1
                    total_count = total_count + 1
        if chi_count == 0:
            SILENT_INPUT = 1
        elif chi_count < total_count:
            log('are these fullatom decoys or not?? %d != %d\nassuming they are\n'\
                %(chi_count, total_count ))

    return big_info, seq, SILENT_INPUT

####

def Read_file(filename):
    ## read: the neighbor counts, sc torsion numbers, bb torsions
    ## returns: info,seq

    info = {}

    try:
        data = open(filename,'r')
    except:
        log('missing %s\n'%filename)
        return info,seq

    line = data.readline()


    L = 0
    while line:
        if line[:4] == 'ATOM' and line[12:16] == ' CA ':
            L = L+1
        elif line[:9] == 'res aa nb':
            n = len(string.split(line))
            for i in range(L):
                line = data.readline()
                l = string.split(line)
                if len(l) == n and int(l[0]) == i+1:
                    pos = int(l[0])
                    if not info.has_key(pos):info[pos] = {}
                    info[pos]['NB'] = int(l[2])
                else:
                    break
        elif line[:8] == 'absolute':
            line = data.readline() ## header line
            for i in range(L):
                line = data.readline()
                l = string.split(line)
                if len(l) == 11 and l[-1] == 'chi_absolute' and int(l[0]) == i+1:
                    pos = int(l[0])
                    if not info.has_key(pos):info[pos] = {}
                    info[pos]['CHI'] = map(lambda x:In_range(float(x)), l[2:6] )
                    info[pos]['ROT'] = map(int, l[6:10] )
                    info[pos]['AA'] = longer_names[ l[1] ]
                else:
                    break
        elif line[:8] == 'complete':
            for i in range(L):
                line = data.readline()
                l = string.split(line)
                if l[1] in ['E','H','L'] and int(l[0]) == i+1:
                    pos = int(l[0])
                    if not info.has_key(pos):info[pos] = {}
                    info[pos]['BB'] = map(lambda x:In_range(float(x)), l[2:5] )
                    info[pos]['SS'] = l[1]
                    if not info[pos].has_key('AA'):
                        info[pos]['AA'] = l[7]
                    else:
                        assert info[pos]['AA'] == l[7]
                else:
                    break
        else:
            l = string.split(line)
            if l and l[0][-1] == ':':
                try:
                    if l[0] == 'maxsub:':
                        score = float(l[2])
                    else:
                        score = float(l[1])
                    tag = l[0][:-1]
                    info[tag] = score
                except:
                    pass
        line = data.readline()

    data.close()
    info['L'] = L

    return info
## end of Read_file


## get sequence info out of the info object above
def Get_seq( info ):
    seq = {}
    L = info['L']
    for pos in range(1,L+1):
        if info[pos].has_key('AA'):
            seq[pos] = info[pos]['AA']
    return seq



## Read_silent_file returns big_info,seq
## seq is the sequence
## big_info is a mapping from tags to info-dictionaries,
##  where each dictionary has keys like: "BB:9", "SS:17", and "SCORE:env"

def Read_silent_file( filename ):
    if not exists( filename):
        log('missing: '+filename)
        return
    score_tag={}
    big_info = {}
    seq={}
    data = open(filename,'r')
    l = string.split(data.readline())
    assert l[0] == 'SEQUENCE:'
    sequence = l[1]
    L = len(sequence)
    for pos in range(1,L+1):
        seq[pos] = sequence[pos-1]
    l = string.split(data.readline())
    assert l[0] == 'SCORE:'
    for i in range(1,len(l)-1): ## ignore SCORE, description
        score_tag[i] = l[i]

    line = data.readline()
    while line:
        if line[:6] != 'SCORE:' or len(string.split(line)) -2 != len(score_tag.keys()):
            print 'badline:',line[:-1]
            line = data.readline()
            continue
        l = string.split(line)
        name = l[-1]
        info = {}
        ## record the score-values
        for i in range(1,len(l)-1):
            tag = score_tag[i]
            info[tag] = float(l[i])

        ok = 1
        for i in range(L):
            l = string.split(data.readline())
            if len(l) != 9 or l[-1] != name or int(l[0]) != i+1:
                ok = 0
                break
            pos = i+1
            info[pos] = {}
            info[pos]['BB'] = map(lambda x:In_range(float(x)),l[2:5])
            info[pos]['SS'] = l[1]
            info[pos]['AA'] = seq[pos]

        line = data.readline()
        if not ok:
            continue
        info['L'] = L
        big_info[name] = info
    return big_info, seq



















