# (c) Copyright Rosetta Commons Member Institutions.
# (c) This file is part of the Rosetta software suite and is made available under license.
# (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
# (c) For more information, see http://www.rosettacommons.org. Questions about this can be
# (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.

import tkFileDialog
import re, sys
import math
import colorsys
from pymol import cmd
from pymol.cgo import *
"""
AUTHOR:     ( Ron Jacak )

DESCRIPTION:

USAGE:


"""



#mo variables and their defaults

#mo for dashed line between satisfied hydrogen bonds
bondWidth = 0.03
gapLength = 0.20
dashLength = 0.10
label_color = "yello"
label_size  = "10"

#mo for unsatisfied hbonds
uns_hbond_radius = .25

#mo for color gradient
nbins = 50

#debug on/off
DEBUG=True # False

hue=0
sat=1.0
value=1.0

global unique_residues

#####################
# h-bond visualizer #
#####################

# Tasks:
# ..1) get h-bond data
# ...a) parse from pdb file
# ....i) get the lines
# ....ii) parse the lines into tuples
# ...b) get from mini
# ..


#######################################################################
def find_hbond_data(pdb):
    """CONTRACT: str -> str list * str list

    PURPOSE: Given a pdb string extract the hbond data lines and
    unsatisfied hbond data lines"""


    hbDataStart = 0
    hbData = []
    unsHbDataStart = 0
    unsHbData = []
    for line in pdb:
    ##for line in pdb.split("\n"):

	if   hbDataStart:    hbData.append(line)
	elif unsHbDataStart: unsHbData.append(line)

	if line== "\n":  # blank line
	    # done saving all hb data lines, reset hbDataStart
	    if   hbDataStart:
                hbDataStart = 0
	    elif unsHbDataStart:
                unsHbDataStart = 0
            continue

	if   line.startswith("Loc, res, pos, pdb"):
            hbDataStart = 1

        elif line.startswith("GU "):
            unsHbDataStart = 1

    if len(hbData) == 0:
	print "Rosetta Design Wizard: No H-Bond data in pdb file."

    return (hbData, unsHbData)


#######################################################################
def getHydrogenBondData(hbDataLines):
    """
    CONTRACT: str list -> float * float * float * float * float
    * float * float * float * float

    PURPOSE: Given a list of strings containing the hydrogen bond
    information as given by Rosetta, returns a list of tuples
    containing the relevant info."""

    # the lines of the PDB file we're interested in look like below.
    # this is the output when -output_hbond_info is passed on the
    # rosetta command line.  note: nucleic acids use 1-letter residue
    # codes

    #Loc, res, pos, pdb, atom, res, pos, pdb, atom, hbE, dist, spec
    #(PROT|BASE)(space)(1to3-letter-don-aa-code)(space)
    #     (4character-padded-int-don-res-num)(space)
    #     (4char-leftpadded-int-don-res-num-pdb)(space)(1char-don-chain)(space)
    #     (4char-atom-name)(space)(1to3-letter-acc-aa-code)(space)
    #     (4character-leftpadded-int-acc-res-num)(space)
    #     (4char-leftpadded-int-acc-res-num-pdb)(space)(1char-acc-chain)(space)
    #     (4char-atom-name)(space)
    #     (6char-padded-float-hbEnergy)(space)(6char-padded-float-distance)(space)
    #     (specificity:none|ALL|something else)
    # don't worry about matching the distance or specificity, though

    #PROT ASN   31   47 A  H   THR    2   18 A  O    -0.38   2.13 none
    #PROT LEU   32   48 A  H   THR    2   18 A  O    -1.00   2.01 none
    #BASE LYS  184  228 B 2HZ    T  365  569 D  O4   -0.01   2.55   T

    hbDataTuples = []
    pattObject = re.compile("(?:PROT|BASE|bkbn) \s*([A-Z]+) \s*\d+ \s*(\d+) ([ A-Z]) \s*([A-Z0-9]+) \s*([A-Z]+) \s*\d+ \s*(\d+) ([ A-Z]) \s*([A-Z0-9]+)\s* (-?[\d\.]*) \s*([\d\.]*)")
    for line in hbDataLines:
        matchObject = pattObject.search(line)
	if not matchObject: continue

	# groups in match objects are 1-based
	energy = float(matchObject.group(9))
	distance = float(matchObject.group(10))
	(don_resn, don_resi, don_chain, don_res_atom,
	 acc_resn, acc_resi, acc_chain, acc_res_atom,
	 str_energy, str_distance) = matchObject.groups()

	# make default values for the chain if it wasn't in the
	# rosetta output
	if don_chain == " ":  don_chain = "*"
	if acc_chain == " ":  acc_chain = "*"

	hbDataTuples.append( (don_chain, don_resi, don_resn, don_res_atom,
			      acc_chain, acc_resi, acc_resn, acc_res_atom,
			      energy, distance) )

    return hbDataTuples

#######################################################################
def getUnsatisfiedHydrogenBondData(unsHbDataLines, model=None):
    """
    CONTRACT: str list * str * pymolModelObj -> tuple?

    PURPOSE: Given a list of strings with the unsatisfied hydrogen
    bond data, return a list of tuples having the relevant information,
    similar to getHydrogenBondData()."""

    # the line consists of (GU) (space) (3-letter-AA-code) (space)
    # (1-5letter-space-padded explanation of what group is
    # unsatisfied) (space) (4-char-padded-int-res-num) (space)
    # (4-char-padded-int-res-num-pdb)

    # GU ALA   BBH   16   32
    # GU ASP   BBH   17   33
    # GU TYR    OH   22   38
    # GU ARG   BBH   24   40
    #
    # dna-protein lines look as follows
    # GU   C    CO  163  502
    # GU   C    CO  163  502
    # GU   C   NH2  163  502
    # GU   A    CO  164  503
    # GU   A    CO  164  503
    # GU   A     N  164  503
    # GU   G    CO  165  504
    # GU   G IGNOR  165  504

    # (This comment made more sense with the old way of printing
    # unsatisfied hydrogen bonds. It's left in for explanatory
    # purposes only.)  the DS lines we can ignore. the GU are the ones
    # we really care about. the difference is that the DS lines print
    # info on all hydrogens not in a hydrogen bond. the GU lines print
    # only the groups that have not a single hydrogen bond. since a
    # group can contain more than one hydrogen, we're interested in
    # groups that have 0 hbonds.

    # old style of printing unsatisfied
    # GU T BBH 2
    # GU A BBH 16

    unsHbDataTuples = []
    pattObject = re.compile("GU \s*([A-Z]+) \s*(BBO|COO|CO|N|OH|BBH|NH3|NH2|NH|NHarg|NHaro|COO-|NH2\+|NH3\+|IGNOR) \s*\d+ \s*(\d+)")
    pattObject2 = re.compile("GU ([A-Z]) (BBO|COO|COO-|CO|N|OH|BBH|NH3|NH2|NH) (\d+)")
    for line in unsHbDataLines:
        if line.startswith("DS ") or line == "\n":  continue

	matchObject = pattObject.search(line)
	if not matchObject:
	    matchObject = pattObject2.search(line)
	    if not matchObject:
	        print "HB Plugin: Unable to parse unsatisfied hydrogen bond info. Format not understood: " + line,
		continue

	    res_type, group_type, rosetta_res_num = matchObject.groups()
	    # allow for compatibility with older versions of hbond
	    # info printing
	    if group_type == "COO-":  group_type = "COO"
	    res_num = rosettaNumToPDBNum(rosetta_res_num, model)
	    if not res_num:  break

	else:
	    res_type, group_type, res_num = matchObject.groups()
	    group_type = group_type.lstrip()

	unsHbDataTuples.append( (group_type, res_type, res_num) )

    return unsHbDataTuples


unique_residues = []
#######################################################################
def rosettaNumToPDBNum(rosetta_num, modelList=None ):
    """
    CONTRACT: int * pymolModelObj -> int list

    PURPOSE: Translate rosetta nums to PDB nums.
    """

    try:
        return unique_residues[int(rosetta_num)-1]
    except:

        unique_residues = []
	residues =  [atom.resi for atom in modelList.atom]
	unique_residues = []
	for resi in residues:
	    if not resi in unique_residues:
	        unique_residues.append(resi)
	unique_residues = unique_residues

    try:
        return unique_residues[int(rosetta_num)-1]
    except:
        print unique_residues
	print sys.exc_info()
	print "HB Plugin: Failed to translate rosetta num " + rosetta_num + " to a PDB number."
	return None




#######################################################################
def hbond(name=None,
	  a1=None,a2=None,
	  r=1.0,g=1.0,b=0.2,
	  weight=0.03,
	  dash_gap=0.20,
	  dash_length=0.10):

    """
    AUTHOR       ( Gareth Stockwell )

    DESCRIPTION

    "hbond" creates a dashed line between two selections, marked with
    an arrow.

    USAGE

    hbond
    hbond (selection1), (selection2)
    hbond name
    hbond name, (selection1), (selection2) , [, r, [, g, [, b] ] ]
    hbond r, g, b
    hbond name, r, g, b
    hbond (selection1), (selection2), r, g, b

    name = name of hbond object
    selection1, selection2 = atom selections
    r, g, b = colour

    URL  http://www.ebi.ac.uk/~gareth/pymol/downloads/scripts/hbond.py

    NOTE: mo I'm not sure all the different ways of calling hbond are
    implemented here.
    """

    if not a1 or not a2:
        print "a1 and a2 cannot be None. Please specify values for a1 and a2"
	return

    # Convert arguments into floating point values
    rr = float(r)
    gg = float(g)
    bb = float(b)

    # added by rj - weight is a passed parameter
    dl = float(dash_length)
    gl = float(dash_gap)
    dr = float(weight)

    # Use the atoms that were passed in
    # Get coords
    x1,y1,z1 = a1.coord[0:3]
    x2,y2,z2 = a2.coord[0:3]

    # Calculate distances
    dx,dy,dz = [x2-x1,y2-y1,z2-z1]
    d  = math.sqrt((dx*dx) + (dy*dy) + (dz*dz))

    # Work out how many times (dash_len + gap_len) fits into d
    dash_tot = dl + gl
    n_dash = int(math.floor(d / dash_tot))

    # Work out step lengths
    dx1 = (dl / dash_tot) * (dx / n_dash)
    dy1 = (dl / dash_tot) * (dy / n_dash)
    dz1 = (dl / dash_tot) * (dz / n_dash)
    dx2 = (dx / n_dash)
    dy2 = (dy / n_dash)
    dz2 = (dz / n_dash)

    # Empty CGO object
    obj = []

    # Generate dashes
    x,y,z = x1,y1,z1

    for i in range(n_dash):
        # Generate a dash cylinder



        obj.extend( [ CYLINDER,
		      x, y, z,
		      x+dx1, y+dy1, z+dz1,
		      dr,
		      rr, gg, bb,
		      rr, gg, bb ] )

	# Move to start of next dash
	x,y,z = (x + dx2, y + dy2, z + dz2)

    return obj

#######################################################################
def makeSphereObject(sele_exp):
    """Return a string containing a CGO sphere object(s) on the atom
    of the given selection."""

    # get_model returns ChemPy atom objects corresponding to the passed in selection
    atoms = cmd.get_model(sele_exp).atom


    try:
        x,y,z = atoms[0].coord[0:3]
    except:
        print "No coordinates available for atoms in ",sele_exp
	return None

    return [SPHERE,float(x),float(y),float(z),float(uns_hbond_radius) ]



#######################################################################
def satisfiedHBonds(obj, hbond_data):
    """ given a sorted tuple of hbond_data """
    # (don_chain, don_resi, don_resn, don_res_atom, acc_chain,
    # acc_resi, acc_resn, acc_atom, energy, distance)


    cmd.set("label_color", label_color)

    try:
	cmd.set("label_size", label_size)
    except:
	print "HB Plugin: Old version of PyMOL. \
	Default sized labels being used."

    try:
	best_energy = float(hbond_data[0][8])
	# will be zero, since Ehb shouldn't give positive values
	worst_energy = float(hbond_data[-1][8])
        bin_width = (worst_energy - best_energy) / nbins
    except:
	print "Error in hydrogen bond data."
	print hbond_data
	return

    output = "The following interface spanning hydrogen bonds were found:"

    #mo create color bins
    #mo a color pair is an energy level and a color
    colorPairs = []
    for binNum in range(nbins):
	hsv = (hue, sat * float(nbins-binNum)/nbins, value)
	colorPairs.append((best_energy+binNum*bin_width, colorsys.hsv_to_rgb(*hsv)))

    #partition the bonds into strong, mid, and weak
    bondsObjects = []

    #Labels go on donor atoms.  Each donor can participate in more than one h-bond.
    #donor -> recipiant list
    labelList = {}


    for datum in hbond_data:
        (don_chain, don_resi, don_resn, don_res_atom,
	 acc_chain, acc_resi, acc_resn, acc_res_atom,
	 energy, distance) = datum
	""" collect the data and make a call to hbond """

	name = "hb.%s-%s.%s-%s"%(don_resi,don_res_atom,acc_resi,acc_res_atom)
	sele1_exp='/'+'/'.join([obj.name,'',don_chain,don_resi,don_res_atom])
	sele2_exp='/'+'/'.join([obj.name,'',acc_chain,acc_resi,acc_res_atom])

	# index returns tuples containing object name and atom object index
	x1 = cmd.index( sele1_exp, 1 )
	x2 = cmd.index( sele2_exp, 1 )

	# Check to make sure we got something out of index
	if len(x1) < 1:
	    output = output + "HB Plugin: Selection " + \
	    sele1_exp + " has no atoms.\n"
	    return
        if len(x2) < 1:
	    output =  output + "HB Plugin: Selection " + \
	    sele2_exp + " has no atoms.\n"
	    return

        a1 = obj.model.atom[ x1[0][1] - 1 ]
	a2 = obj.model.atom[ x2[0][1] - 1 ]

	#mo bin datum, apply color
	if energy <= colorPairs[0][0]:  rgbTuple = colorPairs[0][1]
	else:
	    for index in range(len(colorPairs) -1):
	        if energy >  colorPairs[index][0] and \
		       energy <= colorPairs[index+1][0]:
		    rgbTuple = colorPairs[index][1]
		    break

	if not rgbTuple:                rgbTuple = colorPairs[-1][1]

	# make the call to Gareth's function to construct the CGO

        try:
	    cgoObject = hbond(name=name,
			      a1=a1, a2=a2,
			      r=rgbTuple[0], g=rgbTuple[1], b=rgbTuple[2],
			      weight=bondWidth,
			      dash_gap=gapLength,
			      dash_length=dashLength )

            # save into our arrays for "load"ing later
	    bondsObjects.extend( cgoObject )

	except:
	    print sele1_exp
	    print sele2_exp
	    print "HB Plugin: ERROR 007: \
	    Error in creating CGO object for bond."
	    # to next bond


	# now create a label next to this bond containing the distance
	# and Rosetta energy for this H-bond
	distanceAndEnergyText = '"%sA"' % str(round(distance, 2))
	if don_chain != acc_chain:
	    if sele1_exp in labelList:
	        labelList[sele1_exp] = "%s, %s" % \
		(labelList[sele1_exp], distanceAndEnergyText)
	    else:
	        labelList[sele1_exp] = distanceAndEnergyText


	# print out a line containing all the info in the PDB file so
	# people can check the energy easily
	if don_chain != acc_chain:
	    print "HB Plugin: %s %s-%s %s - %s-%s %s: \
	    distance: %2.2f, hbE: %2.2f" % \
	    (obj.name, don_resn, don_resi, don_res_atom,
	     acc_resn, acc_resi, acc_res_atom, distance, energy)

    # now that the CGOs are all ready, make them visible in pymol
    cmd.load_cgo( bondsObjects, obj.name + "_hb" )

    # go through the list of labels and display them
    for sele_exp in labelList.keys():
        try:
	    cmd.label(sele_exp, labelList[sele_exp])
        except:
	    print sele_exp
	    print "HB Plugin: ERROR 008: Error in creating bond label."
	    # to next bond



#######################################################################
def unsatisfiedHBonds(obj, uns_hbond_data):

    # datum has: (group_type, resi_name, rosetta_resi_num) since
    # "COO", "ASP" has two O's that should be marked, use the
    # group2atoms array to figure out which atoms to make sphere
    # objects on

    unsatisfied = []

    for datum in uns_hbond_data:
        # I'm assuming that unsatisfied IGNOR groups should be ignored...
	if   datum[0]=="IGNOR":  continue
	if   datum[0]=="BBO"  :  atoms = ["O"]
	elif datum[0]=="BBH"  :  atoms = ["H"]
        else:
	    try:
		atoms = group2atoms[(datum[0],datum[1])]
	    except:
	        print "HB Plugin: No atom translation available for group: '" + datum[0] + "-" + datum[1] + "'. Omitting this group."
		continue

	for atom in atoms:
	    sele_exp = '/' + '/'.join([ obj.name, '', '*', datum[2], atom])
	    unsatisfied.extend( makeSphereObject( sele_exp ) )

    # now that we have all the spheres created, load them all in
    cmd.load_cgo( unsatisfied, obj.name + "_unsat" )



#######################################################################
def do_hbond(obj, pdb_filename=None):
    """ draw lines for satisfied hbonds, balls for unsatisfied hbonds
    and color based on hbond score"""

    if DEBUG: print "begin H-Bond visualizer:"

    if DEBUG: print "get pdb filename"
    if not pdb_filename:
        pdb_filename = obj.score_filename

    if not pdb_filename:
        pdb_filename = tkFileDialog.askopenfilename()

    if not pdb_filename:
        return None


    if DEBUG: print "get pdb file"
    try:
        pdb = open(pdb_filename)
    except:
        print "file "+pdb_filename+" unreadable "
        return None


    if DEBUG: print "extract hbond data from pdb"
    hbDataLines, unshbDataLines = find_hbond_data(pdb)

    if DEBUG: print "parse hbond data"
    hbond_data = getHydrogenBondData(hbDataLines)


    if DEBUG: print "display hbond data"
    satisfiedHBonds(obj, hbond_data)


    if DEBUG: print "parse unsatisfied hbond data"
    uns_hbond_data = getUnsatisfiedHydrogenBondData(unshbDataLines,obj.model)




    if DEBUG: print "display unsatisfied hbonds"
    unsatisfiedHBonds(obj, uns_hbond_data)

    if DEBUG: print "finsh up hbond visualization."
    pdb.close()
    obj.score_filename = pdb_filename
