#!/usr/bin/perl

unless ( -e "./rampaths.txt" ) {
		die ( "File rampaths.txt not found\n" ) ;
}
#include "rampaths.txt"

%threetoone=("ALA",A,"CYS",C,"ASP",D,"GLU",E,"PHE",F,"GLY",G,"HIS",H,"ILE",I,"LYS",K,"LEU",L,"MET",M,"ASN",N,"PRO",P,"GLN",Q,"ARG",R,"SER",S,"THR",T,"VAL",V,"TRP",W,"TYR",Y);
%onetothree=("A",ALA,"C",CYS,"D",ASP,"E",GLU,"F",PHE,"G",GLY,"H",HIS,"I",ILE,"K",LYS,"L",LEU,"M",MET,"N",ASN,"P",PRO,"Q",GLN,"R",ARG,"S",SER,"T",THR,"V",VAL,"W",TRP,"Y",TYR);

$list=shift @ARGV;
open(list,$list)||die("List of pdb files $list not found\n");
open(cdrdatabase,">cdrdatabase_new1");
&openparsed;

&initialize;

while ( $filename ne '' ) {

    push @pdbfiles,$filename ;

		&readpdbfile;
		@light =@heavy = @antigen = ();

		foreach $chain (@allchains){
				&findcdrL($chain,$filename,$seq{$chain}) ;
				&findcdrH($chain,$filename,$seq{$chain}) ;
				push @antigen,$chain unless($cdrassigned{$chain});
				$value = $type{$chain};
		}
		
		if( $heavychain =~ /[A-Z]/ || $heavychain =~ /[0-9]/) {
				$prefix = substr($filename,0,length($filename)-4);
				#	    print "$prefix $heavychain\n";
				#	    system "extractChains.pl $heavychain $filename > $prefix.pdb" ;

		}

		$lightchain = "L" if ($type{"L"} eq "light");
		$heavychain = "H" if ($type{"H"} eq "heavy");
		
		$val1=@light;
		$val2=@heavy;
		
		#	print "$filename ERROR NO LIGHT CHAIN\n" if (! $val1);
		#	print "$filename ERROR NO HEAVY CHAIN\n" if (! $val2);
		#	print "$filename DOUBLE ERROR\n" if ((!$val1) && (!$val2));
		
		$prefix = substr($filename,0,length($filename)-4);

		print "$prefix      $lightchain$heavychain\n" if($lightchain ne '' and $heavychain ne '');
		&output($filename);
		&initialize;
}

sub initialize{
		$line=<list>;
		chop($line);
		($filename,$resolnew)=split(/ +/,$line);
		$resol{$filename}=$resolnew;
}

sub readpdbfile{
    %seen = %seq = %sequence = @allchains = (); $ch='';
    %cdrassigned = ();
    %type = ();

    $cdrHfound = $cdrLfound =0;
    $frl1=$frl2=$frl3=$frl4='';
    $frh1=$frh2=$frh3=$frh4='';
    $l1=$l2=$l3=$h1=$h2=$h3='';
    $lightchain=$heavychain='';

    open(pdbfile,$filename)||die("PDB file $filename not found\n");
    $line=<pdbfile>;
    chop($line);
    while($line ne ''){
				$identifier = substr($line,0,6);
				$atomno = substr($line,6,5);
				$atom = substr($line,12,4);
				$alt_loc = substr($line,16,1);
				$residue = substr($line,17,3);
				$chain = substr($line,21,1);
				$residueno = substr($line,22,4);
				$insert_code=substr($line,26,1);
				$x = substr($line,30,8);
				$y = substr($line,38,8);
				$z =  substr($line,46,8);

				if($identifier =~ "ATOM" and $atom =~ "CA"){
						push @allchains,$chain unless $seen{$chain}++;
						$seq{$chain}=$seq{$chain}.$threetoone{$residue}unless($old_residueno eq $residueno and $old_alt_loc ne $alt_loc);
						$old_residueno=$residueno;
						$old_alt_loc=$alt_loc;
				}

				$line=<pdbfile>;
				chop($line);
		}
}

sub findcdrL{
    $chain=$_[0];
    $filename=$_[1];
    $sequence=$_[2];
		$l1found=$l3found=0;
    $l1=$l2=$l3= '' ;
		
		#*********L1***************************
		$var = $sequence =~/C[A-Z]{1,17}(WYL|WLQ|WFQ|WYQ|WYH|WVQ|WVR|WWQ|WVK|WLL|WFL|WVF|WIQ|WYR|WNQ|WHL)/;
		if($var){
				$l1found=1;
				$temp=$&;
				$lenl1=length ($temp)-4;
				$l1=substr($temp,1,$lenl1);
		}
		#************************************
    if($l1found){
				#***********L3********************
				$var = $sequence =~/C[A-Z]{1,15}(F|V)G[A-Z]G/;
				if($var){
						$l3found=1;
						$temp=$&;
						$lenl3=length ($temp)-5;
						$l3=substr($temp,1,$lenl3);
				}
				#****************************
		}
    $cdrLfound = ($l1found && $l3found);
   
		if($cdrLfound){
				$cdrassigned{$chain}++;
				$l1start= index($sequence,$l1);
				$l1end=$l1start+$lenl1-1;
				$l2start=$l1end+16;
				$l2end=$l2start+7-1;
				$l3start= index($sequence,$l3);
				$l3end=$l3start+$lenl3-1;
				$l2=substr($sequence,$l2start,7);
				$lenl2=7;
							
				$frl1=substr($sequence,0,$l1start);
				$lenfrl1=length($frl1);
				$frl2=substr($sequence,$l1end+1,15);
				$lenfrl2=length($frl2);
				$frl3=substr($sequence,$l2end+1,$l3start-$l2end-1);
				$lenfrl3=length($frl3);
				$frl4=substr($sequence,$l3end+1,12);
				$lenfrl4=length($frl4);
				
				push @light,$chain;
				$lightchain =$chain;
				
				$type{$chain} = "light";
				
				$lenl1{$filename}=$lenl1;
				$lenl2{$filename}=$lenl2;
				$lenl3{$filename}=$lenl3;
				$seql1{$filename}=$l1;
				$seql2{$filename}=$l2;
				$seql3{$filename}=$l3;
				
				$vltype{$filename} = "other";
				$vltype{$filename} = "kappa" if($frl1 =~ /[LMVI][A-Z][QE][A-Z]{9}G[A-Z]{4}[LVIMF][STNARF]C/);
				$vltype{$filename} = "lambda" if ($frl1 =~ /[LMVI][A-Z][QE][A-Z]{8}G[A-Z]{4}[LVIMF][STN]C/);
				
				if($lenfrl1 == 23){
						$t1=substr($frl1,3,1);
						$t2=substr($frl1,5,1);
						$t3=substr($frl1,15,1);
						$t4=substr($frl1,20,1);
				}
				print "$filename chain=$chain L1=$l1\t$l2\t$l3\n";
    }
}


sub findcdrH{
    $chain=$_[0];
    $filename=$_[1];
    $sequence=$_[2];
    $h1found=$h3found=0;
    $h1=$h2=$h3='' ;

		#**************H1************
    $var = $sequence =~/C[A-Z]{1,16}(W)(I|V|F|Y|A|M|L|N)(R|K|Q|V|N)(Q|K|H|E|L|R)/;
		    if($var){
				$h1found=1;
				$temp=$&if($var);
				$lenh1=length ($temp)-8;
				$h1=substr($temp,4,$lenh1);
		}
		#******************************

		if($h1found){
				#***********H3****************
				$var = $sequence =~/C[A-Z]{1,27}(W)G[A-Z](G|R)/;
				if($var){
						$h3found=1;
						$temp=$&;
						$lenh3=length ($temp)-7;
						$h3=substr($temp,3,$lenh3);
						$h3andstem=substr($temp,0,$lenh3+3);
				}

				#***************************
		}
    $cdrHfound = $h1found && $h3found;
    if($cdrHfound){
				$cdrassigned{$chain}++;
				$h1start = index($sequence,$h1);
				$h1end=$h1start+$lenh1-1;
				$h3start= index($sequence,$h3);
				$h3end=$h3start+$lenh3-1;
				
				$h2start=$h1end+15;
				$h2end=$h3start-33;
				$lenh2=$h2end-$h2start+1;
				$h2= substr($sequence,$h2start,$lenh2);
				
				$frh1=substr($sequence,0,$h1start);
				$lenfrh1=length($frh1);
				$frh2=substr($sequence,$h1end+1,$h2start-$h1end-1);
				$lenfrh2=length($frh2);
				$frh3=substr($sequence,$h2end+1,$h3start-$h2end-1);
				$lenfrh3=length($frh3);
				$frh4=substr($sequence,$h3end+1,10);
				$lenfrh4=length($frh4);
				$seq1=$frh1.$h1.$frh2.$h2.$frh3.$h3;
				
				$heavychain=$chain;
				
				$type{$chain} = "heavy";
				push @heavy,$chain;
				
				$lenh1{$filename}=$lenh1;
				$lenh2{$filename}=$lenh2;
				$lenh3{$filename}=$lenh3;
				$seqh1{$filename}=$h1;
				$seqh2{$filename}=$h2;
				$seqh3{$filename}=$h3;
				#	print "$filename chain=$chain H1=$h1\t$h2\t$h3\n";
    }
}

#******************************************

sub output{
    $filename=$_[0];
    $val1=$lenl1{$filename}||0 ;
    $val2=$lenl2{$filename};
    $val3=$lenl3{$filename};
    $val4=$lenh1{$filename};
    $val5=$lenh2{$filename};
    $val6=$lenh3{$filename};

    $Ltotal=$val1+$val2+$val3;
    $Htotal=$val4+$val5+$val6;
    $total=$Ltotal+$Htotal;

    $VAL1=$seql1{$filename}||"none" ;
    $VAL2=$seql2{$filename}||"none" ;
    $VAL3=$seql3{$filename}||"none" ;
    $VAL4=$seqh1{$filename}||"none" ;
    $VAL5=$seqh2{$filename}||"none" ;
    $VAL6=$seqh3{$filename}||"none" ;

    $R=$RESOL{$filename};
    $species = $SPECIES{$filename};
    $date = $DATE{$filename};

    $vltype = $vltype {$filename}||"none" ;
    $frag = $FRAG{$filename};

		if( $species eq '' ) {
				$species = "Unknown";
		}
		if( $date eq '' ) {
				$date = "00-XXX-00";
		}
		if( $frag eq '' ) {
				$frag = "Unknown";
		}

    printf cdrdatabase "%11s %3.1f %3d %3d %3d %3d %3d %3d %17s  %7s  %13s %13s  %22s  %24s %10s %9s  %6s %8s\n",$filename,$R,$val1,$val2,$val3,$val4,$val5,$val6,$VAL1,$VAL2,$VAL3,$VAL4,$VAL5,$VAL6,$species,$date,$vltype,$frag;
}

#******************************************

sub openparsed {
    open(parsed,"$info/parsed.xml");

    $data = <parsed>;
    chop($data);
		
    while($data ne ''){
				($NAME,$RESOL,$DATE,$SPECIES,$FRAG,$LIGHT,$HEAVY) = split (/ +/, $data);
				$SPECIES{$NAME}= $SPECIES;
				$DATE{$NAME} = $DATE;
				$RESOL{$NAME} = $RESOL;
				$FRAG{$NAME} = $FRAG;

				$data = <parsed>;
				chop($data);
		}
}
#*************************************************
