#!/usr/bin/perl

##  Initial Author: Nir London (londonir@cs.huji.ac.il)

#NOTE! the columns in the score files (*.sc and funhuntINT.OUT) read by this script are in the order
# produced by rosetta.2.3.0
# e.g.
#  $score is column 2
#  $sc1 is column 14
#  $ncont  is column 8
#  $d_env is column 16
#  $sasa is column 47
#  # of unsatisfied buried HB_BB is taken from funhuntINT.OUT file 
#  and assumes column 19

#given a pdb file of the form aaXXXX.ppk_????.pdb
#outputs the pdb's parameters
#assumptions: the following files are in working directory
#XXXX.sc
#funHuntINT.OUT
#analyze_interface.log

$pdbfile = $ARGV[0];
$pdb = substr($pdbfile,2,4);
$model = substr($pdbfile,0,15);
$fascfile = $pdb.".sc";
$intfile = "funHuntINT.OUT";

open (PDB,"$pdbfile");
open (FASC,"$fascfile");
open (INT,"$intfile");

$sanity = 0;

#retrieve params from INT.out file
$found = 0;
while (<INT>) {
  $line = $_;
  #we found the entry pass nine lines to get to the scores
  if ($found > 0){
    $found = $found + 1;
  }
  #we're at the scores line
  if ($found == 9){
    $sanity = $sanity +1;
    @components = split (" ",$line);
    #unsatisfied BB HB
    $D_BBHB_UNS = $components[18];
    last;
  }
  if ($line =~ m/$model/) {
    $found = 1;
  }
}

#retrieve params from fasc file
while (<FASC>){
  $line = $_;
  if ($line =~ m/$pdbfile/){
    $sanity = $sanity +1;
    @fasc_comp = split (" ",$line);
    #delta score
    $score = $fasc_comp[1];
    $sc1 = $fasc_comp[13];
    #ncont
    $ncont = $fasc_comp[7];
    #denv
    $d_env = $fasc_comp[15];
    #sasa
    $sasa = $fasc_comp[46];
    last;
  }
}

#create a hashtable of the aa coords
%coords = ();
while (<PDB>) {
  $line = $_;
  @parts = split ("",$line);
  $record = join ("",@parts[0..5]);
  $type = join ("",@parts[12..15]);
  $x = join ("",@parts[30..37]);
  $y = join ("",@parts[38..45]);
  $z = join ("",@parts[46..53]);
  $resName = join ("",@parts[17..19]);
  $chain = @parts[21];
  $resNum = join ("",@parts[22..25]);
  #clean white spaces
  $resNum =~ s/\s+//;
  $x =~ s/\s+//;
  $y =~ s/\s+//;
  $z =~ s/\s+//;
  if (($record eq "ATOM  ") && ($type eq " CA ")){
    $coords{$resName.$resNum.":".$chain} = $x.":".$y.":".$z;
  }
  if ($record eq "dockin") {
    @cenCoords = split (" ",$line);
    $cenCoords[3] =~ s/[,)]//;
    $cenCoords[4] =~ s/[,)]//;
    $cenCoords[5] =~ s/[,)]//;
    if ($type eq "1_ce") {
      $cen1x = $cenCoords[3];
      $cen1y = $cenCoords[4];
      $cen1z = $cenCoords[5];
    }
    if ($type eq "2_ce"){
      $cen2x = $cenCoords[3];
      $cen2y = $cenCoords[4];
      $cen2z = $cenCoords[5];
    }
  }
}
close (PDB);
open (PDB,"$pdbfile");

@aaTypes = (ALA,CYS,ASP,GLU,PHE,GLY,HIS,ILE,LYS,LEU,MET,ASN,PRO,GLN,ARG,SER,THR,VAL,TRP,TYR);

#hash table which holds amino acid content in the IF
%aac = ();
foreach $l (@aaTypes){
  $aac{$l} = 0;
}

#hash which holds pairs of aa
%aapairc = ();
for ($i=0; $i<20; $i++){
  for ($j=$i; $j<20; $j++) {
    $aapairc{"$aaTypes[$i].$aaTypes[$j]"} = 0;
  }
}

$found = 0;
#create a hash table of interface residues
#calc variance of energy contribution to IF
%interface = ();
while (<PDB>) {
  $line = $_;
  if ($line =~ m/Pair/){
    $found = 1;
    next;
  }
  if (($found==2) && ($line eq "\n")){
    last;
  }
  if ($found == 2){
    @parts = split(" ",$line);
    #aa num:chain
    #check if there is a chain identifier
    $chainA = 0;
    $chainB = 0;
    $BchainPos = 4;
    if ( ($parts[2] eq "ALA") || ($parts[2] eq "CYS") || ($parts[2] eq "ASP") ||
         ($parts[2] eq "GLU") || ($parts[2] eq "PHE") || ($parts[2] eq "GLY") ||
         ($parts[2] eq "HIS") || ($parts[2] eq "ILE") || ($parts[2] eq "LYS") ||
         ($parts[2] eq "LEU") || ($parts[2] eq "MET") || ($parts[2] eq "ASN") ||
         ($parts[2] eq "PRO") || ($parts[2] eq "GLN") || ($parts[2] eq "ARG") ||
         ($parts[2] eq "SER") || ($parts[2] eq "THR") || ($parts[2] eq "TYR") ||
         ($parts[2] eq "TRP") || ($parts[2] eq "VAL") ) {
      $chainA = 1;
      $BchainPos = 5;
    }

    if ( ($parts[$BchainPos] eq "ALA") || ($parts[$BchainPos] eq "CYS") || 
         ($parts[$BchainPos] eq "ASP") || ($parts[$BchainPos] eq "GLU") || 
         ($parts[$BchainPos] eq "PHE") || ($parts[$BchainPos] eq "GLY") ||
         ($parts[$BchainPos] eq "HIS") || ($parts[$BchainPos] eq "ILE") || 
         ($parts[$BchainPos] eq "LYS") || ($parts[$BchainPos] eq "LEU") || 
         ($parts[$BchainPos] eq "MET") || ($parts[$BchainPos] eq "ASN") ||
         ($parts[$BchainPos] eq "PRO") || ($parts[$BchainPos] eq "GLN") || 
         ($parts[$BchainPos] eq "ARG") || ($parts[$BchainPos] eq "SER") || 
         ($parts[$BchainPos] eq "THR") || ($parts[$BchainPos] eq "TYR") ||
         ($parts[$BchainPos] eq "TRP") || ($parts[$BchainPos] eq "VAL") ) {
      $chainB = 1;
    }
    if (($chainA == 1) && ($chainB == 1)){
      $res1 = $parts[2].$parts[1].":".$parts[0];
      $res2 = $parts[5].$parts[4].":".$parts[3];
    }
    if (($chainA == 1) && ($chainB == 0)){
      $res1 = $parts[2].$parts[1].":".$parts[0];
      $res2 = $parts[4].$parts[3].":";
    }
    if (($chainA == 0) && ($chainB == 1)){
      $res1 = $parts[1].$parts[0].":";
      $res2 = $parts[4].$parts[3].":".$parts[2];
    }
    if (($chainA == 0) && ($chainB == 0)){
      $res1 = $parts[1].$parts[0].":";
      $res2 = $parts[3].$parts[2].":";
    }

    if (exists $aapairc{"$parts[$chainA + 1].$parts[ $chainA + $chainB + 3]"}) {
      $aapairc{"$parts[$chainA + 1].$parts[$chainA + $chainB + 3]"}++;
    }
    else {
       $aapairc{"$parts[$chainA + $chainB + 3].$parts[$chainA + 1]"}++;
    }

    if (!exists $interface{$res1}){
      $interface{$res1} = $parts[$chainA + $chainB + 5];
      $aac{$parts[$chainA + 1]}++;
    }
    if (!exists $interface{$res2}){
      $interface{$res2} = $parts[$chainA + $chainB + 5];
      $aac{$parts[$chainA + $chainB + 3]}++;
    }
  }
  if ($found == 1) {
    $found = 2;
  }
}

#new way of calcing centroidity
$centro = sqrt((($cen1x-$cen2x)**2)+(($cen1y-$cen2y)**2)+(($cen1z-$cen2z)**2));
$centroAvg = 0;

#conservation file should be of the form:
#two columns: AaNum:Chain Score //named pdbID.con
$confile = $pdb.".con";
$conCount = 0;
$conScore = 0;

if (-e $confile) {

open (CON,"$confile");
%conSurf = ();
while (<CON>){
  $conline = $_;
   @conparts = split(" ",$conline);
  if (!(exists($conSurf{$conparts[0]}))){
    $conSurf{$conparts[0]} = $conparts[1];
  }
}
close (CON);

foreach $k (keys(%interface)) {
  if (exists ($conSurf{$k})){
    $conCount = $conCount + 1;
    $conScore = $conScore + $conSurf{$k};
  }
  else {
    $conCount = $conCount + 1;
    print STDERR "WARNING: $pdb $k has no conservation score\n";
  }
}
}

#if no conservation scores are present avgCon is indeed zero
if ($conCount == 0) {
  $avgCon = 0;
}
else {
  $avgCon = $conScore/$conCount;
}


#delta score in score10d_min way
#$deltaScore = $sc1 -(0.322*$fa_rep + $score);

#correct delta score:
$deltaScore = $sc1 - $score;

if ($sanity == 2) {
print ("$pdb\t$pdbfile\t");
print ("$ncont\t");
print ("$d_env\t");
print ("$sasa\t");
printf ("%.3f\t",$deltaScore);
print ("$D_BBHB_UNS\t");
printf ("%.3f\t",$avgCon);
printf ("%.3f\t",$centro);
}
print ("\n");

close(PDB);
close(FASC);
close(INT);

