#!/usr/bin/perl
##
##  Initial Author: Bin Qian
##  $Revision: 5305 $
##  $Date: 2004-10-14 02:15:42 -0400 (Thu, 14 Oct 2004) $
##  $Author: bqian $
##
###############################################################################


###############################################################################
# conf
###############################################################################

# general
$| = 1;                                              # disable stdout buffering
$debug = 1;                                             # chatter while running

# databases
$scopdirfile        = "/data/bqian/SCOP/dir.cla.scop.txt_1.63";

# programs
$getscopfam         = "/users/bqian/scripts/get_scopfam_pdb.pl";
$rosettadir         = "/users/bqian/rosetta/rosetta++/";
$mammothmult        = "/users/bqian/bin/src/mammothmult/mammothmult";
$clustal2fasta      = "/users/bqian/scripts/clustal2fasta.pl";
$getseqweights      = "/users/bqian/scripts/seq_weights.pl";
$barcodecst         = "/users/bqian/src/rosetta_scripts/barcode/barcode.py";
$blast2seq          = "/net/local/blast/bl2seq";
$getFasta           = "/users/bqian/scripts/getAllFastaFromPdb.pl";

###############################################################################
# init
###############################################################################

# argv
my %opts = &getCommandLineOptions ();
my $scopfam         = $opts{scopfam};
my $querypdb        = $opts{querypdb};
my $out_dir         = $opts{outdir};
my $evalue_cutoff   = $opts{evalue};
my $no_hom          = $opts{no_hom};

###############################################################################
# main
###############################################################################

if ($querypdb !~ /\.pdb/) {
    &abort ("querypdb file must end with .pdb: $file");
}
$querypdb =~ /\/*([^\/]*)\.pdb/;
$queryname = "$1.pdb";

if (! defined $scopfam){
    $queryname =~ /^(....)(.)/;
    my $pdbid = $1;
    my $chainid = $2;
    $chainid =~ tr/[a-z]/[A-Z]/;
    if ($chainid !~ /[A-Z]/){$chainid = '-';}
    else {$chainid .= ':';}
    print "Retrieving scop fam name for $pdbid$chainid....\n";
    open (DIR, $scopdirfile);
    while(<DIR>){
        if (/$pdbid\s+$chainid\s+(\S+)/){
            $scopfam = $1;
            last;
        }
    }
    close(DIR);
}
if (! defined $scopfam){
    print "No scop fam id found for $queryname. Either this pdb is ";
    print "too new to be included in scop, or it has multiple chains ";
    print "in which case you must specify the chain id such as 1vl7A.pdb.\n";
    exit(1);
}

&runCmd (qq{$getscopfam -scop $scopfam -out $out_dir});
system (qq{cp $querypdb $out_dir/});

if ($no_hom){
    system (qq{ls $out_dir/$queryname > $out_dir/list});
    &runCmd (qq{$getFasta $querypdb > $out_dir/this_strange_parent});
    while ($pdbs = <$out_dir/*.pdb>){
        &runCmd (qq{$getFasta $pdbs > $out_dir/this_strange_query});
        my $thise = &get_blast_evalue("$out_dir/this_strange_parent", "$out_dir/this_strange_query");
        if (!$thise || $thise > $evalue_cutoff){
            system(qq{ls $pdbs >> $out_dir/list});
        }
    }
    system (qq{rm -f $out_dir/this_strange_query $out_dir/this_strange_parent});
}else{
    system (qq{ls $out_dir/*.pdb > $out_dir/list});
}
   
system (qq{cp $rosettadir/paths.txt ./});
system (qq{mkdir -p output})    if (! -d "output");
&runCmd (qq{$rosettadir/rosetta.gcc -score -nstruct 1 -fa_input -fa_output -scorefile qq -l $out_dir/list});

@pdblist = fileBufArray("$out_dir/list");
system (qq{rm -f $out_dir/*.pdb});
foreach $pdb (@pdblist){
    $pdb =~/\/*([^\/]*)\.pdb/;
    $rosettadecoy = "output/".$1.'_0001.pdb';
    system(qq{cp $rosettadecoy $pdb}) if (-f $rosettadecoy);
}

system (qq{ls $out_dir/*.pdb > $out_dir/list});
@pdblist = fileBufArray("$out_dir/list");
my $queryindex = 0;
for(1; $pdblist[$queryindex] !~ $queryname; $queryindex++){}
my $dummy = $pdblist[0];
$pdblist[0] = $pdblist[$queryindex];
$pdblist[$queryindex] = $dummy;
open (MTLIST, ">$out_dir/mammothlist");
print MTLIST "MAMMOTH\n";
foreach $pdb (@pdblist){
    print MTLIST "$pdb\n";
}

&runCmd (qq{$mammothmult $out_dir/mammothlist});
system (qq{mv mammothlist* $out_dir/});
&runCmd (qq{$clustal2fasta $out_dir/mammothlist-FINAL.aln $out_dir/$scopfam.fasta});
&runCmd (qq{$getseqweights $out_dir/$scopfam.fasta > $out_dir/$scopfam.weight});

@fastaarray = fileBufArray("$out_dir/$scopfam.fasta");
open (NEWLIST, ">$out_dir/$scopfam.list");
foreach $line(@fastaarray) {
    if ($line =~ /^>(.*)/){
        if ($1 !~ /$queryname/){
            print NEWLIST "$out_dir/$1\n";
        }
    }
}
close(NEWLIST);

&runCmd(qq{$barcodecst $out_dir/$scopfam.list 0 2 100 -a $out_dir/$scopfam.fasta -w $out_dir/$scopfam.weight -hom_struct});
# done
exit 0;

###############################################################################
# subs
###############################################################################

# getCommandLineOptions()
#
#  rets: \%opts  pointer to hash of kv pairs of command line options
#
sub getCommandLineOptions {
    use Getopt::Long;
    my $usage = qq{usage: $0 
\t -querypdb          <query_pdb>
\t[-evalue            <evalue_cutoff>]      (def: 1e-6)
\t[-no_hom            <T/F>]                (def: T)
\t[-outdir            <out_dir>]            (def: ./ must be empty!)
\t[-scopfam           <scop_fam_id>] 
};


    # Get args
    my %opts = ();
    &GetOptions (\%opts,"no_hom=s","evalue=f", "scopfam=s", "querypdb=s", "outdir=s");

    # Check for legal invocation
    if (! defined $opts{querypdb}
        ) {
        print STDERR "$usage\n";
        exit -1;
    }
    &checkExist ('f', $opts{querypdb});
    
    $opts{outdir} = '.' if (! defined $opts{outdir});
    &runCmd (qq{mkdir -p }.$opts{outdir})    if (! -d $opts{outdir});

    $opts{evalue} = 1e-6 if (! defined $opts{evalue});
    $opts{no_hom} = 'true' if (! defined $opts{no_hom});
    $opts{no_hom} = undef if ($opts{no_hom} =~ /^F/i);
    $opts{scopfam}= undef if (! defined $opts{scopfam});
    return %opts;
}

sub get_blast_evalue {

    ($queryfasta, $parentfasta) = @_;
    my $cmd = "$blast2seq -i $queryfasta -j $parentfasta -p blastp > strangname_uuuuu";
    &runCmd($cmd);

    open (P, "strangname_uuuuu");
    my $evalue = undef;
    while(<P>){
        if (/Score =.*Expect\s*=\s*(\S+)\s*/){
            if(!$evalue) {$evalue = $1;}
            else {last;}
        }
    }
    close(P);
    system(qq{rm -f strangname_uuuuu});

    return $evalue;
}

###############################################################################
# util
###############################################################################

# chompEnds ()
#
sub chompEnds {
    my $str = shift;
    $str =~ s/^\s+|\s+$//g;
    return $str;
}


# chip (chop for front of strings)
#
sub chip {
    my @flo = ();
    for ($i=0; $i <= $#_; ++$i) {
        $flo[$i] = substr ($_[$i], 0, 1);
        $_[$i] = substr ($_[$i], 1);                   # don't think this works
    }
    return $flo[0]  if ($#_ == 0);
    return @flo;
}


# chimp (chomp for front of strings)
#
sub chimp {
    my @flo = ();
    for ($i=0; $i <= $#_; ++$i) {
        $_[$i] =~ s/^(\s*)//;                          # don't think this works
        $flo[$i] = $1;
    }
    return $flo[0]  if ($#_ == 0);
    return @flo;
}


# cleanStr ()
#
sub cleanStr {
    my $str = shift;
    $str =~ s/[\x00-\x08\x0B-\x1F\x80-\xFF]//g;
    return $str;
}


# base36 ()
#
sub base36 {
    my $d = shift;
    return $d  if ($d =~ /^\d$/);
    return chr ($d - 10 + ord ('A'));
}


# charToHex ()
#
sub charToHex {
    my $ascii = ord($_[0]);
    my %hexMap = (  0 => '0',
                    1 => '1',
                    2 => '2',
                    3 => '3',
                    4 => '4',
                    5 => '5',
                    6 => '6',
                    7 => '7',
                    8 => '8',
                    9 => '9',
                   10 => 'a',
                   11 => 'b',
                   12 => 'c',
                   13 => 'd',
                   14 => 'e',
                   15 => 'f'
                    );

    return $hexMap{(($ascii & 0xf0) >> 4)} . $hexMap{($ascii & 0x0f)};
}
# end charToHex ()


#  hexToChar ()
#
sub hexToChar {
    my $ascii = hex($_[0]);
    return chr $ascii;
}
# end hexToChar ()


# listMember ()
#
sub listMember {
    my ($item, @list) = @_;
    my $element;
    foreach $element (@list) {
        return $item  if ($item eq $element);
    }
    return undef;
}


# iterElimSortIndexList ()
#
sub iterElimSortIndexList {
    my ($val1_list, $val2_list, $fraction, $direction) = @_;
    my $index_list        = +[];
    my $local_index_list  = +[];
    my $local_val_list    = +[];
    my $local_sorted_list = +[];
    my ($index, $i, $j);

    my $sorted_val1_list = &insertSortIndexList ($val1_list, $direction);
    for ($i=0; $i <= $#{$sorted_val1_list}; ++$i) {
	$index_list->[$i] = $sorted_val1_list->[$i];
    }

    my $done = undef;
    my $toggle = 2;
    $cut = int ($#{$index_list} * $fraction);
    $last_cut = $#{$index_list};
    while ($cut > 0) {
	# sort the right half ("discards")
	$local_index_list = +[];
	$local_val_list   = +[];
	for ($j=0; $cut+$j+1 <= $last_cut; ++$j) {
	    $index                  = $index_list->[$cut+$j+1];
	    $local_index_list->[$j] = $index;
	    $local_val_list->[$j]   = ($toggle == 1) ? $val1_list->[$index]
		                                     : $val2_list->[$index];
	}
	$local_sorted_index_list = &insertSortIndexList ($local_val_list, $direction);
	for ($j=0; $cut+$j+1 <= $last_cut; ++$j) {
	    $local_index = $local_sorted_index_list->[$j];
	    $index_list->[$cut+$j+1] = $local_index_list->[$local_index];
	}

	# sort the left half ("keeps")
	$local_index_list = +[];
	$local_val_list   = +[];
	for ($j=0; $j <= $cut; ++$j) {
	    $index                  = $index_list->[$j];
	    $local_index_list->[$j] = $index;
	    $local_val_list->[$j]   = ($toggle == 1) ? $val1_list->[$index]
		                                     : $val2_list->[$index];
	}
	$local_sorted_index_list = &insertSortIndexList ($local_val_list, $direction);
	for ($j=0; $j <= $cut; ++$j) {
	    $local_index = $local_sorted_index_list->[$j];
	    $index_list->[$j] = $local_index_list->[$local_index];
	}
	
	# update cut and toggle
	$toggle = ($toggle == 1) ? 2 : 1;
	$last_cut = $cut;
	$cut = int ($last_cut * $fraction);
    }

    return $index_list;
}

# insertSortIndexList ()
#
sub insertSortIndexList {
    my ($val_list, $direction) = @_;
    my $index_list = +[];
    my ($index, $val, $i, $i2, $assigned);

    $index_list->[0] = 0;
    for ($index=1; $index <= $#{$val_list}; ++$index) {
        $assigned = undef;
        $val = $val_list->[$index];
        for ($i=0; $i <= $#{$index_list}; ++$i) {
            if ($direction eq 'decreasing') {
                if ($val > $val_list->[$index_list->[$i]]) {
                    for ($i2=$#{$index_list}; $i2 >= $i; --$i2) {
                        $index_list->[$i2+1] = $index_list->[$i2];
                    }
                    $index_list->[$i] = $index;
                    $assigned = 'true';
                    last;
                }
            }
            else {
                if ($val < $val_list->[$index_list->[$i]]) {
                    for ($i2=$#{$index_list}; $i2 >= $i; --$i2) {
                        $index_list->[$i2+1] = $index_list->[$i2];
                    }
                    $index_list->[$i] = $index;
                    $assigned = 'true';
                    last;
                }
            }
        }
        $index_list->[$#{$index_list}+1] = $index  if (! $assigned);
    }
    return $index_list;
}

# readFiles
#
sub readFiles {
    my ($dir, $fullpath_flag) = @_;
    my $inode;
    my @inodes = ();
    my @files = ();
    
    opendir (DIR, $dir);
    @inodes = sort readdir (DIR);
    closedir (DIR);
    foreach $inode (@inodes) {
	next if (! -f "$dir/$inode");
	next if ($inode =~ /^\./);
	push (@files, ($fullpath_flag) ? "$dir/$inode" : "$inode");
    }
    return @files;
}

# createDir
#
sub createDir {
    my $dir = shift;
    if (! -d $dir && (system (qq{mkdir -p $dir}) != 0)) {
	print STDERR "$0: unable to mkdir -p $dir\n";
	exit -2;
    }
    return $dir;
}

# copyFile
#
sub copyFile {
    my ($src, $dst) = @_;
    if (-f $src) {
	if (system (qq{cp $src $dst}) != 0) {
	    print STDERR "$0: unable to cp $src $dst\n";
	    exit -2;
	}
    } else {
	print STDERR "$0: file not found: '$src'\n";
    }
    return $dst;
}

# zip
#
sub zip {
    my $file = shift;
    if ($file =~ /^\.Z/ || $file =~ /\.gz/) {
	&abort ("already a zipped file $file");
    }
    if (-s $file) {
	if (system (qq{gzip -9f $file}) != 0) {
	    &abort ("unable to gzip -9f $file");
	}
    } elsif (-f $file) {
	&abort ("file empty: '$file'");
    } else {
	&abort ("file not found: '$file'");
    }
    $file .= ".gz";
    return $file;
}

# unzip
#
sub unzip {
    my $file = shift;
    if ($file !~ /^\.Z/ && $file !~ /\.gz/) {
	&abort ("not a zipped file $file");
    }
    if (-f $file) {
	if (system (qq{gzip -d $file}) != 0) {
	    &abort ("unable to gzip -d $file");
	}
    } else {
	&abort ("file not found: '$file'");
    }
    $file =~ s/\.Z$|\.gz$//;
    if (! -s $file) {
	&abort ("file empty: '$file'");
    }
    return $file;
}

# remove
#
sub remove {
    my $inode = shift;
    if (-e $inode) {
	if (system (qq{rm -rf $inode}) != 0) {
	    print STDERR "$0: unable to rm -rf $inode\n";
	    exit -2;
	}
    } else {
	print STDERR "$0: inode not found: '$inode'\n";
    }
    return $inode;
}
     
# runCmd
#
sub runCmd {
    my ($cmd, $nodie, $silent) = @_;
    my $ret;
    my $date = `date +'%Y-%m-%d_%T'`;  chomp $date;
    print "[$date][RUN][$0] $cmd\n" if ($debug && ! $silent);
    $ret = system ($cmd);
    #$ret = ($?>>8)-256;
    $ret = ($?>>8);
    if ($ret != 0) {
	$ret -= 256  if ($ret >= 128);
	$date = `date +'%Y-%m-%d_%T'`;  chomp $date;
	print STDERR ("[$date][FAILURE:$ret][$0] $cmd\n");
	if ($nodie) {
	    return $ret;
	} else {
	    exit $ret;
	}
    }
    return 0;
}

# logMsg()
#
sub logMsg {
    my ($msg, $logfile) = @_;
    my $date = `date +'%Y-%m-%d_%T'`;  chomp $date;

    if ($logfile) {
        open (LOGFILE, ">".$logfile);
        select (LOGFILE);
    }
    else {
	select (STDOUT);
    }
    print "[$date][LOG][$0] $msg\n";
    if ($logfile) {
        close (LOGFILE);
    }
    select (STDOUT);

    return 'true';
}

# checkExist()
#
sub checkExist {
    my ($type, $path) = @_;
    if ($type eq 'd') {
	if (! -d $path) { 
            &alert ("dirnotfound: $path");
            exit -3;
	}
    }
    elsif ($type eq 'f') {
	if (! -f $path) {
            &alert ("filenotfound: $path");
            exit -3;
	}
	elsif (! -s $path) {
            &alert ("emptyfile: $path");
            exit -3;
	}
    }
}

# alert()
#
sub alert {
    my $msg = shift;
    my $date = `date +'%Y-%m-%d_%T'`;  chomp $date;
    print STDERR "[$date][ALERT][$0] $msg\n";
    return;
}

# abort()
#
sub abort {
    my $msg = shift;
    my $date = `date +'%Y-%m-%d_%T'`;  chomp $date;
    print STDERR "[$date][ABORT][$0] $msg\n";
    exit -2;
}

# writeBufToFile()
#
sub writeBufToFile {
    my ($file, $bufptr) = @_;
    if (! open (FILE, '>'.$file)) {
	&abort ("unable to open file $file for writing");
    }
    print FILE join ("\n", @{$bufptr}), "\n";
    close (FILE);
    return;
}

# fileBufString()
#
sub fileBufString {
    my $file = shift;
    my $oldsep = $/;
    undef $/;
    if ($file =~ /\.gz$|\.Z$/) {
	if (! open (FILE, "gzip -dc $file |")) {
	    &abort ("unable to open file $file for gzip -dc");
	}
    }
    elsif (! open (FILE, $file)) {
	&abort ("unable to open file $file for reading");
    }
    my $buf = <FILE>;
    close (FILE);
    $/ = $oldsep;
    return $buf;
}

# fileBufArray()
#
sub fileBufArray {
    my $file = shift;
    my $oldsep = $/;
    undef $/;
    if ($file =~ /\.gz$|\.Z$/) {
	if (! open (FILE, "gzip -dc $file |")) {
	    &abort ("unable to open file $file for gzip -dc");
	}
    }
    elsif (! open (FILE, $file)) {
	&abort ("unable to open file $file for reading");
    }
    my $buf = <FILE>;
    close (FILE);
    $/ = $oldsep;
    @buf = split (/$oldsep/, $buf);
    pop (@buf)  if ($buf[$#buf] eq '');
    return @buf;
}

# bigFileBufArray()
#
sub bigFileBufArray {
    my $file = shift;
    my $buf = +[];
    if ($file =~ /\.gz$|\.Z$/) {
        if (! open (FILE, "gzip -dc $file |")) {
            &abort ("unable to open file $file for gzip -dc");
        }
    }
    elsif (! open (FILE, $file)) {
        &abort ("unable to open file $file for reading");
    }
    while (<FILE>) {
        chomp;
        push (@$buf, $_);
    }
    close (FILE);
    return $buf;
}     

###############################################################################
# end
1;                                                     # in case it's a package
###############################################################################
