#!/usr/bin/perl

use POSIX ":sys_wait_h";
sub usage {
  die <<END;
  USAGE : preprocessing_recovery.pl <path to execuable> <single pdb file | file with list of pdb files>
  IE:     preprocessing_recovery.pl ~/rosetta/bin/rosetta.gcc 1gcu.pdb
  RESULT : Runs preprocessing for domain insertion mode for the pdb or list of pdbs.
  Also requires a file that is tab-separated with fields of chain, protein size, parent domain 1, parent domain 2,
  insertion domain.

END
}

usage unless scalar(@ARGV) == 2;

$rosetta = $ARGV[0];
$infile = $ARGV[1];
$filetype = substr($infile,-3,3);
# link to the version of the rosetta executable to be copied or linked for execution
$kid = -1;

# determine whether the script is to be run on a single pdb file
# or a list of pdb files
if ($filetype eq 'pdb') {
  &preprocess($infile);
} else {
  open(LIST,$infile);
  while($line=<LIST>) {
    chomp $line;
    $dir = `pwd`;
    &preprocess($line);
  }
}

# calls the list of functions to do preprocessing
sub preprocess {
  my ($pdbfile) = @_;
  print "$pdbfile\n";
  $pdbinfo = "\$domain_insertion/pdb_info.txt";
  $pdb = substr($pdbfile,0,4);
  $chain = `awk '/$pdb/ {print \$2}' $pdbinfo | tr -d '\t\n'`;
  $host1 = `awk '/$pdb/ {print \$4}' $pdbinfo | tr -d '\t\n'`;
  $host2 = `awk '/$pdb/ {print \$5}' $pdbinfo | tr -d '\t\n'`;
  $insert = `awk '/$pdb/ {print \$6}' $pdbinfo | tr -d '\t\n'`;
  &setup($pdb);
  waitpid($kid, 0);
  &prepack($pdb);
  waitpid($kid, 0);
  &create_fasta($pdb);
  waitpid($kid, 0);
  &create_fragments($pdb);
}

# make all the subdirectories required for preprocessing
# copy necessary files
sub setup {
  my ($pdb) = @_;
  print "pdb: $pdb\n";
  my $out = substr($pdb,1,3);

  $kid = -1;

  print "$chain $host1 $host2 $insert\n";
  `mkdir $pdb`;
  chdir($pdb);
      `mkdir shell`;
      `mkdir setup`;
      `mkdir prepack`;
      if ($chain eq '_') {
        &change_chain($pdb);
      }
      &create_insertion($pdb);
      chdir("setup");
        print "in setup\n";
        `cp ../insertion_file.prm .`;
        `cp \$domain_insertion/domins.paths paths.txt`;
        `ln -s $rosetta`;
        &extract_chain($pdb,$chain);
        print "exiting setup\n";
      chdir("..");
      chdir("prepack");
        `cp \$domain_insertion/prepack.paths paths.txt`;
        `cp \$domain_insertion/ppk.bash .`;
        `ln -s $rosetta`;
        &extract_chain($pdb,$chain);
        `echo TER > temp`;
        `cat ${out}1.pdb temp ${out}2.pdb temp > $pdb.pdb`;
        `rm temp`;
        `cp $pdb.pdb $pdb.unbound.pdb`;
      chdir("..");
  chdir("..");

  $kid = 0;

}

# make a chain of _ into a chain of A
sub change_chain {
  my ($pdb) = @_;
  print ":$chain:\n";
  `changeChain.pl $pdb.pdb $chain A > temp.pdb`;
  `mv temp.pdb $pdb.pdb`;
  $chain = 'A';
  print ":$chain:\n";
}

# create insertion file
sub create_insertion {
  my ($pdb) = @_;
  print "pdb: $pdb\n";
  my $insertion = "insertion_file.prm";
  if (-e $insertion ) {
    print "$insertion already exists\n";
    return;
  }
  my ($h11,$h12) = split(/-/,$host1);
  my ($h21,$h22) = split(/-/,$host2);
  my ($i1,$i2) = split(/-/,$insert);
  print "$host1 $host2 $insert";
  unless ( open(OUTFILE, ">$insertion")) {
    die("Output file $insertion could not be opened.\n");
  }
  print "$insertion is being written to...";
  my $out = substr($pdb,1,3);
  print OUTFILE "hostp \t ${out}1.pdb\n";
  print OUTFILE "insertp \t ${out}2.pdb\n";
  print OUTFILE "hostp_start_res \t $h11\n";
  print OUTFILE "hostp_end_res \t $h22\n";
  print OUTFILE "hostp_begin \t $h12\n";
  print OUTFILE "hostp_end \t $h21\n";
  print OUTFILE "insertp_start_res \t $i1\n";
  print OUTFILE "insertp_end_res \t $i2\n";
  print OUTFILE "insertp_begin \t $i1\n";
  print OUTFILE "insertp_end \t $i2\n";
  close(OUTFILE);
  print "done\n";
}

# extract chain and split the pdb
sub extract_chain {
  my ($pdb, $chain) = @_;
  my ($h11,$h12) = split(/-/,$host1);
  my ($h21,$h22) = split(/-/,$host2);
  my ($i1,$i2) = split(/-/,$insert);
  my $out = substr($pdb,1,3);
  my $count = 0;
  print ":$chain:\n";
  if ($chain eq ' ') {
    $chain = '_';
  }
  print ":$chain:\n";
  `extractChains.pl $chain ../$pdb.pdb > $pdb.pdb`;
  open(INFILE, "$pdb.pdb");
  open(HOSTOUT, ">${out}1.pdb");
  open(INSERTOUT, ">${out}2.pdb");

  print "$h11, $h12, $i1, $i2, $h21, $h22\n";
  while ($line=<INFILE>) {
    chomp($line);
    $count = substr($line,22,5);
    $count =~ s/[ \t]+//;
    $count =~ s/[ \t]+$//;
    if (($count >= $h11) and ($count <= $h12)) {
      print HOSTOUT "$line\n";
    } elsif (($count >= $i1) and ($count <= $i2)) {
      print INSERTOUT "$line\n";
    } elsif (($count >= $h21) and ($count <= $h22)) {
      print HOSTOUT "$line\n";
    }
  }
  $chain = ' ';
}

# prepack
sub prepack {
  my ($pdb) = @_;
  $kid = -1;
  print "**************PREPACKING******************\n";
  chdir($pdb);
      chdir("prepack");
        `./ppk.bash $pdb > ppk.out`;
      chdir("..");
  chdir("..");
  print "**************PREPACKING DONE******************\n";
  $kid = 0;
}

# create fasta
sub create_fasta {
  my ($pdb) = @_;
  $kid = -1;
  print "**************CREATE_FASTA******************\n";
  chdir($pdb);
      chdir("setup");
        $dir = `pwd`;
        `rm $pdb.pdb`;
        if (!-e "rosetta.gcc") {
           symlink($rosetta, "rosetta.gcc");
        }
        `./rosetta.gcc SS $pdb $chain -domain_insertion -create_fasta -verbose -skip_missing_residues -insertion_file insertion_file.prm -l > fasta.out`;
      chdir("..");
  chdir("..");
  print "**************CREATE_FASTA DONE******************\n";
  $kid = 0;
}

# setup fragments
sub create_fragments {
  my ($pdb) = @_;
  $kid = -1;
  print "**************FRAGMENTS******************\n";
  chdir($pdb);
      chdir("setup");
        `cp \$domain_insertion/loops.py .`;
        `./loops.py $pdb.pdb $chain > fragments.out`;
      chdir("..");
  chdir("..");
  print "**************FRAGMENTS DONE******************\n";
  $kid = 0;
}
