#!/usr/bin/perl

if ( @ARGV < 1 ) {
		print "Not enough arguments. Exiting...
Usage: $0 <listfile>
Script to map the given PDB sequence to the Chothia numbering
The script outputs the .heavy file that contains the mapping
<listfile> is a list of PDB file names and chain ID's in the following format
1G9E H:A
The first column has the PDB ID's (of any length) without the .pdb extension
The second column has the heavy and antigen chain ID's separated by a :\n\n" ;
		die () ;
}



%threetoone=("ALA",A,"CYS",C,"ASP",D,"GLU",E,"PHE",F,"GLY",G,"HIS",H,"ILE",I,"LYS",K,"LEU",L,"MET",M,"ASN",N,"PRO",P,"GLN",Q,"ARG",R,"SER",S,"THR",T,"VAL",V,"TRP",W,"TYR",Y);

$h1std=10;
$h2std=16;
$h3std=8;

$list=shift @ARGV;
open(list,$list)||die();

&initialize;


while($pdbfile ne ''){
		
		print "Mapping $pdbfile ...\n";
		
		&readpdbfile;
		&findcdrs;
		&assignnumbering;
		&checknumbering;
		&renumbercdrs;
		&initialize;
}


sub initialize{

#get the pdbfilename and the chain ID's 
		$line=<list>;
		chop($line);
		@fileandchains=split(/ +/,$line);
		$pdbfile=lc($fileandchains[0]);
		$chains=$fileandchains[1];
		@array=split(/:/,$chains);
		$heavychain=substr($array[0],0,1);
		$nresheavy=$ncysheavy=0;
		$heavyseq=$h1=$h2=$h3='';
}

sub readpdbfile{

#read the actual pdb file and then identify the light and the heavy chain sequence.

    $filename=$pdbfile.".pdb";

#    $filename = $pdbfile.".ent" if !(-e $filename);
		print( "Here $filename\n" );
    open(pdbfile,$filename)||die();
		print( "Here too\n" );
    open(chothiaheavy,">$pdbfile\_chothia.heavy");
    $line=<pdbfile>;
    chop($line);

    while($line ne ''){
				($identifier,$atomno,$atom,$residue,$chain,$residueno,@junk)=split(/ +/,$line);

				$identifier = substr($line,0,6);
				$atomno = substr($line,6,5);
				$atom = substr($line,12,4);
				$alt_loc = substr($line,16,1);
				$residue = substr($line,17,3);
				$chain = substr($line,21,1);
				$residueno = substr($line,22,4);
				$insert_code=substr($line,26,1);
				$x = substr($line,30,8);
				$y = substr($line,38,8);
				$z =  substr($line,46,8);

				if($identifier =~ "ATOM" and $atom =~ "CA"){
						if($chain eq $heavychain){
								$heavyseq=$heavyseq.$threetoone{$residue} unless($old_residueno eq $residueno and $old_alt_loc ne $alt_loc);
								$old_residueno=$residueno;
								$old_alt_loc=$alt_loc;
						}
				}
				$line=<pdbfile>;
				chop($line);
    }
}

sub findcdrs{

#**************H1************
		$var = $heavyseq =~/C[A-Z]{1,16}(W)(I|V|F|Y|A|M|L|N|D)(R|K|Q|V|N)(Q|K|H|E|L|R)/;
		if($var){
				$temp=$&if($var);
				$lenh1=length ($temp)-8;
				$h1=substr($temp,4,$lenh1);
		}
#******************************


#***********H3****************
		$var = $heavyseq =~/C[A-Z]{1,27}(W|R)G[A-Z](G|R)/;
		
		if($var){
				$temp=$&;
				$lenh3=length ($temp)-7;
				$h3=substr($temp,3,$lenh3);
		}
#***************************

		$h1start = index($heavyseq,$h1);
		$h1end=$h1start+$lenh1-1;
		$h3start= index($heavyseq,$h3);
		$h3end=$h3start+$lenh3-1;
		$h2start=$h1end+15;
		$h2end=$h3start-33;
		$lenh2=$h2end-$h2start+1;
		$h2= substr($heavyseq,$h2start,$lenh2);

		$frh1=substr($heavyseq,0,$h1start);
		$lenfrh1=length($frh1);
		$frh2=substr($heavyseq,$h1end+1,$h2start-$h1end-1);
		$lenfrh2=length($frh2);
		$frh3=substr($heavyseq,$h2end+1,$h3start-$h2end-1);
		$lenfrh3=length($frh3);
		$frh4=substr($heavyseq,$h3end+1,10);
		$lenfrh4=length($frh4);
		$seq1=$frh1.$h1.$frh2.$h2.$frh3.$h3;


#print "$filename $frh1 $h1 $frh2 $h2 $frh3 $h3 $frh4\n";
#print "$filename $lenfrh1 $lenh1 $lenfrh2 $lenh2 $lenfrh3 $lenh3 $lenfrh4\n" ;
		
		print "$filename\t$frh1 - \"$h1\" - $frh2 - \"$h2\" - $frh3 - \"$h3\" - $frh4\n";

#print "$filename\t$lenfrl1\t$lenfrl2\t$lenfrl3\t$lenfrl4\n";

}


sub renumbercdrs{

    $string[1]=$newnumberfrh1[$lenfrh1];
    $string[2]=$newnumberh1[$lenh1];
    $string[3]=$newnumberfrh2[$lenfrh2];
    $string[4]=$newnumberh2[$lenh2];
    $string[5]=$newnumberfrh3[$lenfrh3];
    $string[6]=$newnumberh3[$lenh3];
    $string[7]=$newnumberfrh4;
		
    for($i=1;$i<=7;$i++){
				@array=split(/,/,$string[$i]);
				$nelements=@array;
				for($j=0;$j <$nelements;$j++){
						print chothiaheavy "$array[$j]\n";
				}
		}
}

sub assignnumbering{
		
		$newnumberfrh1[21]="5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25";
		$newnumberfrh1[22]="4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25";
		$newnumberfrh1[23]="3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25";
		$newnumberfrh1[24]="2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25";
		$newnumberfrh1[25]="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25";
		$newnumberfrh1[26]="0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25";
		
		$newnumberh1[7]="26,27,28,32,33,34,35";
		$newnumberh1[8]="26,27,28,29,32,33,34,35";
		$newnumberh1[9]="26,27,28,29,30,32,33,34,35";
		$newnumberh1[10]="26,27,28,29,30,31,32,33,34,35";
		$newnumberh1[11]="26,27,28,29,30,31,31A,32,33,34,35";
		$newnumberh1[12]="26,27,28,29,30,31,31A,31B,32,33,34,35";
		$newnumberh1[13]="26,27,28,29,30,31,31A,31B,31C,32,33,34,35";
		
		$newnumberfrh2[14]="36,37,38,39,40,41,42,43,44,45,46,47,48,49";
		
		$newnumberh2[12]="50,51,52,57,58,59,60,61,62,63,64,65";
		$newnumberh2[13]="50,51,52,56,57,58,59,60,61,62,63,64,65";
		$newnumberh2[14]="50,51,52,55,56,57,58,59,60,61,62,63,64,65";
		$newnumberh2[15]="50,51,52,54,55,56,57,58,59,60,61,62,63,64,65";
		$newnumberh2[16]="50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65";
		$newnumberh2[17]="50,51,52,52A,53,54,55,56,57,58,59,60,61,62,63,64,65";
		$newnumberh2[18]="50,51,52,52A,52B,53,54,55,56,57,58,59,60,61,62,63,64,65";
		$newnumberh2[19]="50,51,52,52A,52B,52C,53,54,55,56,57,58,59,60,61,62,63,64,65";
		$newnumberh2[20]="50,51,52,52A,52B,52C,52D,53,54,55,56,57,58,59,60,61,62,63,64,65";
		$newnumberh2[21]="50,51,52,52A,52B,52C,52D,52E,53,54,55,56,57,58,59,60,61,62,63,64,65";
		$newnumberh2[22]="50,51,52,52A,52B,52C,52D,52E,52F,53,54,55,56,57,58,59,60,61,62,63,64,65";
		
		$newnumberfrh3[32]="66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,82A,82B,82C,83,84,85,86,87,88,89,90,91,92,93,94";
		
		$newnumberh3[3]="95,96,97";
		$newnumberh3[4]="95,96,97,98";
		$newnumberh3[5]="95,96,97,98,99";
		$newnumberh3[6]="95,96,97,98,99,100";
		$newnumberh3[7]="95,96,97,98,99,101,102";
		$newnumberh3[8]="95,96,97,98,99,100,101,102";
		$newnumberh3[9]="95,96,97,98,99,100,100A,101,102";
		$newnumberh3[10]="95,96,97,98,99,100,100A,100B,101,102";
		$newnumberh3[11]="95,96,97,98,99,100,100A,100B,100C,101,102";
		$newnumberh3[12]="95,96,97,98,99,100,100A,100B,100C,100D,101,102";
		$newnumberh3[13]="95,96,97,98,99,100,100A,100B,100C,100D,100E,101,102";
		$newnumberh3[14]="95,96,97,98,99,100,100A,100B,100C,100D,100E,100F,101,102";
		$newnumberh3[15]="95,96,97,98,99,100,100A,100B,100C,100D,100E,100F,100G,101,102";
		$newnumberh3[16]="95,96,97,98,99,100,100A,100B,100C,100D,100E,100F,100G,100H,101,102";
		$newnumberh3[17]="95,96,97,98,99,100,100A,100B,100C,100D,100E,100F,100G,100H,100I,101,102";
		$newnumberh3[18]="95,96,97,98,99,100,100A,100B,100C,100D,100E,100F,100G,100H,100I,100J,101,102";
		$newnumberh3[19]="95,96,97,98,99,100,100A,100B,100C,100D,100E,100F,100G,100H,100I,100J,100K,101,102";
		$newnumberh3[20]="95,96,97,98,99,100,100A,100B,100C,100D,100E,100F,100G,100H,100I,100J,100K,100L,101,102";
		$newnumberh3[21]="95,96,97,98,99,100,100A,100B,100C,100D,100E,100F,100G,100H,100I,100J,100K,100L,100M,101,102";
		$newnumberh3[22]="95,96,97,98,99,100,100A,100B,100C,100D,100E,100F,100G,100H,100I,100J,100K,100L,100M,100N,101,102";
		$newnumberh3[23]="95,96,97,98,99,100,100A,100B,100C,100D,100E,100F,100G,100H,100I,100J,100K,100L,100M,100N,100O,101,102";
		$newnumberh3[24]="95,96,97,98,99,100,100A,100B,100C,100D,100E,100F,100G,100H,100I,100J,100K,100L,100M,100N,100O,100P,101,102";
		$newnumberh3[25]="95,96,97,98,99,100,100A,100B,100C,100D,100E,100F,100G,100H,100I,100J,100K,100L,100M,100N,100O,100P,100Q,101,102";
		$newnumberfrh4="103,104,105,106,107,108,109,110,111,112";
}


sub checknumbering{
    $error="NOK";
    $error1=$error2=$error3=$error4=$error5=0;


    $val1=substr($frh1,-22,1);
    $val2=substr($frh1,-20,1);
    $val3=substr($frh1,-11,1);
    $val4=substr($frh1,-6,1);
    $val5=substr($frh1,-5,1);

    $val6=substr($frh1,-19,1);
    $val7=substr($frh1,-18,1);
    $val8=substr($frh1,-17,1);


    $h18 = substr($frh1,-8,1);
    $string1="$val2$val7$val8";
    $string2="$val2$val6$val7";


    $type = "unknown";
    $type = "type1" if($string1 eq "EGP");
    $type = "type2" if($string1 eq "EGG");
    $type = "type3" if ($string2 =~ /Q[A-O,Q-Z]G/);
    $type = "type4" if ($string2 eq "QPG");


#    print "$pdbfile.pdb $string1 $string2 $type\n";


#    print "$pdbfile.pdb $h18\n";

    $error1=1 if ($val1 =~ /[LVIM]/);
    $error2=1 if ($val2 =~ /[E]/);
    $error3=1 if ($val3 =~ /[GS]/);
    $error4=1 if ($val4 =~ /[LVIMF]/);
    $error5=1 if ($val5 =~ /[ST]/);

    $error ="OK" if($error1 and $error2 and $error3 and $error4 and $error5);

#    print "$pdbfile\t$val1\t$val2\t$val3\t$val4\t$val5\t$error\t$lenfrh1\t$frh1\n";
#    print "$pdbfile.pdb\n" if !(($frl1 =~ /[LMVI][A-Z][QE][A-Z]{9}G[A-Z]{4}[LVIMF][STN]C/) or ($len ==23 or $len ==24));
}
