#!/usr/bin/perl
##
## Copyright 2002, University of Washington, the Baker Lab, and Dylan Chivian.
##   This document contains private and confidential information and its 
##   disclosure does not constitute publication.  All rights are reserved by 
##   University of Washington, the Baker Lab, and Dylan Chivian, except those 
##   specifically granted by license.
##
##  Initial Author: Dylan Chivian (dylan@lazy8.com)
##  $Revision: 11754 $
##  $Date: 2006-12-11 20:08:00 -0500 (Mon, 11 Dec 2006) $
##  $Author: vanita $
##
###############################################################################
# Vanita Sood, April 6, 2004
# Makes a resfile for each of 20 substitutions at the specified design residue
# Pass in the residue to be designed (with or without chain eg 1 OR C_3
# Optional: pass in a file with comma separated residues for repacking 
# Optional: pass in a file with comma separated one-letter amino acids 
# (these are the only substitution that will then be made) 
# Pass in the pdb file
###############################################################################
# conf
###############################################################################
$| = 1;                                              # disable stdout buffering
###############################################################################
# init
###############################################################################

# argv
my %opts = &getCommandLineOptions ();
my $pdbfile = $opts{pdb};
my $des     = $opts{desres};
my $repack  = $opts{repack};
my $subst   = $opts{subst};

@pdb_buf = &fileBufArray ($pdbfile);

###############################################################################
# main
###############################################################################
# Get repack residues
my @repack;
if (defined $repack) {
    open (FILE, $repack);
    chomp (my $rep = <FILE>);
    close FILE;
    $rep =~ s/^\s+|\s+$//g;
    @repack = split (/,/, $rep);
}
# Get aa identities and chain ID for each residue
my @res_num;
my @res_id;
my @chain;
my @sequential_residue_number = ();
my $sqrn = 0;
foreach $line (@pdb_buf) {
  if (substr($line,13,2) =~ /CA/ || substr($line,13,4) =~ /C1\* / ) {
    my $res_num = substr($line,23,3);
    $res_num =~ s/^\s+|\s+$//g;
    my $res_id = substr($line,17,3);
    $res_id =~ s/^\s+|\s+$//g;
    my $ch = substr($line,21,1);
    $ch =~ s/^\s+|\s+$//g;
    $sqrn++ ;
    push (@res_num, $res_num);
    push (@res_id, $res_id);
    push (@chain, $ch);
    push (@sequential_residue_number, $sqrn);
  }
}

my %pdbaa = ();
for (my $i=0; $i<=$#sequential_residue_number; $i++){
    $pdbaa{$sequential_residue_number[$i]} = $res_id[$i];
} 

my %chain_id = ();
for (my $i=0; $i<=$#sequential_residue_number; $i++){
    $chain_id{$sequential_residue_number[$i]} = $chain[$i];
}

#add the residue_number()
my %residue_number = ();
for (my $i=0; $i<=$#sequential_residue_number; $i++){
    $residue_number{$sequential_residue_number[$i]} = $res_num[$i];
}

my @resfile_buf;
# Make a template rosetta resfile
# top of resfile
    my $resfile_top = 
" This file specifies which residues will be varied
                                                  
 Column   2:  Chain                               
 Column   4-7:  sequential residue number         
 Column   9-12:  pdb residue number                
 Column  14-18: id  (described below)             
 Column  20-40: amino acids to be used            
                                                  
 NATAA  => use native amino acid                  
 ALLAA  => all amino acids                        
 NATRO  => native amino acid and rotamer          
 PIKAA  => select inividual amino acids           
 POLAR  => polar amino acids                      
 APOLA  => apolar amino acids                     
                                                  
 The following demo lines are in the proper format
                                                  
 A    1    3 NATAA                                
 A    2    4 ALLAA                                
 A    3    6 NATRO                                
 A    4    7 NATAA                                
 B    5    1 PIKAA  DFLM                          
 B    6    2 PIKAA  HIL                           
 B    7    3 POLAR                                
 -------------------------------------------------
 start";
chomp $resfile_top;
push (@resfile_buf,$resfile_top);
# resfile
#    $sequential_residue_number=1;   
foreach (@sequential_residue_number) {
  my $a = sprintf("%2s", $chain_id{$_});
  my $b = sprintf("%5s", $_);
  
  my $c = sprintf("%5s", $residue_number{$_});
  push (@resfile_buf,"$a$b$c NATRO ");
}
# Make design substitutions
my @chain_residue = split("_",$des);
if (@chain_residue == 2){
  my $design_chain = $chain_residue[0];
  my $design_residue = $chain_residue[1];
  foreach my $line (@resfile_buf) {
    if ($design_residue == substr($line,7,5) && $design_chain eq substr($line,1,1)){
      $line =~ s/NATRO/PIKAA/;
      last;
    }
  }
}
elsif(@chain_residue==1){
  foreach my $line (@resfile_buf) {
    if ($des == substr($line,7,5)){
      $line =~ s/NATRO/PIKAA/;
      last;
    }
  }
}

# Repack residues
if (defined $repack) {
  foreach my $repack_part(@repack){
    $repack_part =~ s/^\s+|\s+$//; 
    my @chain_residue = split(" ",$repack_part);
    if(@chain_residue ==2 ){
      my $repack_chain = $chain_residue[0];
      my $repack_residue = $chain_residue[1];
      foreach my $line (@resfile_buf) {
	if ($repack_residue == substr($line,7,5) && $repack_chain eq substr($line,1,1)
	    && $pdbaa{$residue} ne "GLY" 
	    && $pdbaa{$residue} ne "ALA"){
	  $line =~ s/NATRO/NATAA/;
	  last;
	}elsif ($pdbaa{$residue} eq "GLY"){
	  print "no point in repacking $residue, it's a GLY\n";
	  last;
	}elsif ($pdbaa{$residue} eq "ALA"){
	  print "no point in repacking $residue, it's a ALA\n";
	  last;
	}
      }
    }
    elsif(@chain_residue==1){
      foreach my $line (@resfile_buf) {
	if ($repack_part == substr($line,7,5) 
	    && $pdbaa{$residue} ne "GLY" 
	    && $pdbaa{$residue} ne "ALA"){
	  #if ($tmp_array[1] == substr($line,7,5) && $tmp_array[0] eq substr($line,1,1)){
	  $line =~ s/NATRO/NATAA/;
	  last;
	}
      }
    }
  }
}
my @aa_choices = qw/A C D E F G H I K L M N P Q R S T V W Y/;
if (defined $subst) {
    open (FILE, $subst);
    chomp (my $buf = <FILE>);
    close FILE;
    $buf =~ s/^\s+|\s+$//g;
    @aa_choices = split (/,/, $buf);
}
foreach my $subst (@aa_choices) {
  my $outfile = $des.$subst.".res";
  open OUTFILE, ">$outfile";
  foreach (@resfile_buf){
    if (substr($_,13,5) eq "PIKAA"){
      substr ($_,18,3) = sprintf("%3s",$subst);
      print OUTFILE $_."\n";
    }else{
      print OUTFILE $_."\n";
    }
  }
  close OUTFILE;
}
exit 0;
###############################################################################
# subs
###############################################################################

# getCommandLineOptions()
#
#  rets: \%opts  pointer to hash of kv pairs of command line options
#
sub getCommandLineOptions {
    use Getopt::Long;
    my $usage = qq{usage: $0
\t-pdb <pdbfile>
\t-desres <design_res_number>
\t[-repack <repack_res_file>]
\t[-subst <substitution_list_file>]
Note you can pass in "-desres 1" which means first chain residue 1, 
  or "-desres B_1" which means chain B, residue #1.
For repack list, use comma sep values with or without chain ID, eg
  "1,4,A5,B3, B56, C 4"
};

    # Get args
    my %opts = ();
    &GetOptions (\%opts, "pdb=s", "desres=s", "repack=s", "subst=s");

    # Check for legal invocation
    if (! defined $opts{pdb}
	|| ! defined $opts{desres}
        ) {
        print STDERR "$usage\n";
        exit -1;
    }
    &checkExist ('f', $opts{pdb});
    if (defined $opts{repack}) {
	&checkExist ('f', $opts{repack});
    }
    if (defined $opts{susbt}) {
	&checkExist ('f', $opts{subst});
    }

    return %opts;
}

###############################################################################
# util
###############################################################################

# readFiles
#
sub readFiles {
    my ($dir, $fullpath_flag) = @_;
    my $inode;
    my @inodes = ();
    my @files = ();
    
    opendir (DIR, $dir);
    @inodes = sort readdir (DIR);
    closedir (DIR);
    foreach $inode (@inodes) {
	next if (! -f "$dir/$inode");
	next if ($inode =~ /^\./);
	push (@files, ($fullpath_flag) ? "$dir/$inode" : "$inode");
    }
    return @files;
}

# createDir
#
sub createDir {
    my $dir = shift;
    if (! -d $dir && (system (qq{mkdir -p $dir}) != 0)) {
	print STDERR "$0: unable to mkdir -p $dir\n";
	exit -2;
    }
    return $dir;
}

# copyFile
#
sub copyFile {
    my ($src, $dst) = @_;
    if (system (qq{cp $src $dst}) != 0) {
	print STDERR "$0: unable to cp $src $dst\n";
	exit -2;
    }
    return $dst;
}

# zip
#
sub zip {
    my $file = shift;
    if ($file =~ /^\.Z$/ || $file =~ /\.gz$/) {
	print STDERR "$0: ABORT: already a zipped file $file\n";
	exit -2;
    }
    if (system (qq{gzip -9 $file}) != 0) {
	print STDERR "$0: unable to gzip -9 $file\n";
	exit -2;
    }
    $file .= ".gz";
    return $file;
}

# unzip
#
sub unzip {
    my $file = shift;
    if ($file !~ /^\.Z$/ && $file !~ /\.gz$/) {
	print STDERR "$0: ABORT: not a zipped file $file\n";
	exit -2;
    }
    if (system (qq{gzip -d $file}) != 0) {
	print STDERR "$0: unable to gzip -d $file\n";
	exit -2;
    }
    $file =~ s/\.Z$|\.gz$//;
    return $file;
}

# remove
#
sub remove {
    my $inode = shift;
    if (system (qq{rm -rf $inode}) != 0) {
	print STDERR "$0: unable to rm -rf $inode\n";
	exit -2;
    }
    return $inode;
}
     
# runCmd
#
sub runCmd {
    my ($cmd, $nodie) = @_;
    my $ret;
    my $date = `date +'%Y-%m-%d_%T'`;  chomp $date;
    print "[$date]:$0:RUNNING: $cmd\n" if ($debug);
    $ret = system ($cmd);
    #$ret = ($?>>8)-256;
    if ($ret != 0) {
	$date = `date +'%Y-%m-%d_%T'`;  chomp $date;
	print STDERR ("[$date]:$0: FAILURE (exit: $ret): $cmd\n");
	if ($nodie) {
	    return $ret;
	} else {
	    exit $ret;
	}
    }
    return 0;
}

# logMsg()
#
sub logMsg {
    my ($msg, $logfile) = @_;
    my $date = `date +'%Y-%m-%d_%T'`;  chomp $date;

    if ($logfile) {
        open (LOGFILE, ">".$logfile);
        select (LOGFILE);
    }
    else {
	select (STDERR);
    }
    print "[$date]:$0: $msg\n";
    if ($logfile) {
        close (LOGFILE);
    }
    select (STDOUT);

    return 'true';
}

# checkExist()
#
sub checkExist {
    my ($type, $path) = @_;
    if ($type eq 'd') {
	if (! -d $path) { 
            print STDERR "$0: dirnotfound: $path\n";
            exit -3;
	}
    }
    elsif ($type eq 'f') {
	if (! -f $path) {
            print STDERR "$0: filenotfound: $path\n";
            exit -3;
	}
	elsif (! -s $path) {
            print STDERR "$0: emptyfile: $path\n";
            exit -3;
	}
    }
}

# abort()
#
sub abort {
    my $msg = shift;
    my $date = `date +'%Y-%m-%d_%T'`;  chomp $date;
    print STDERR "[$date]:$0:ABORT: $msg\n";
    exit -2;
}

# writeBufToFile()
#
sub writeBufToFile {
    my ($file, $bufptr) = @_;
    if (! open (FILE, '>'.$file)) {
	&abort ("$0: unable to open file $file for writing");
    }
    print FILE join ("\n", @{$bufptr}), "\n";
    close (FILE);
    return;
}

# fileBufString()
#
sub fileBufString {
    my $file = shift;
    my $oldsep = $/;
    undef $/;
    if ($file =~ /\.gz$|\.Z$/) {
	if (! open (FILE, "gzip -dc $file |")) {
	    &abort ("$0: unable to open file $file for gzip -dc");
	}
    }
    elsif (! open (FILE, $file)) {
	&abort ("$0: unable to open file $file for reading");
    }
    my $buf = <FILE>;
    close (FILE);
    $/ = $oldsep;
    return $buf;
}

# fileBufArray()
#
sub fileBufArray {
    my $file = shift;
    my $oldsep = $/;
    undef $/;
    if ($file =~ /\.gz$|\.Z$/) {
	if (! open (FILE, "gzip -dc $file |")) {
	    &abort ("$0: unable to open file $file for gzip -dc");
	}
    }
    elsif (! open (FILE, $file)) {
	&abort ("$0: unable to open file $file for reading");
    }
    my $buf = <FILE>;
    close (FILE);
    $/ = $oldsep;
    @buf = split (/$oldsep/, $buf);
    pop (@buf)  if ($buf[$#buf] eq '');
    return @buf;
}

# bigFileBufArray()
#
sub bigFileBufArray {
    my $file = shift;
    my $buf = +[];
    if ($file =~ /\.gz$|\.Z$/) {
        if (! open (FILE, "gzip -dc $file |")) {
            &abort ("$0: unable to open file $file for gzip -dc");
        }
    }
    elsif (! open (FILE, $file)) {
        &abort ("$0: unable to open file $file for reading");
    }
    while (<FILE>) {
        chomp;
        push (@$buf, $_);
    }
    close (FILE);
    return $buf;
}     

###############################################################################
# end
1;                                                     # in case it's a package
###############################################################################
