#!/usr/bin/perl -w
package FRAGS;
###############################################################################
##
## FRAGS Fragment Server Object
##
## This package allows you to post a query to the Robetta Fragment Server and get results.
##
## Copyright 2002, University of Washington
##   This document contains private and confidential information and
##   its disclosure does not constitute publication.  All rights are
##   reserved by University of Washington and the Baker Lab
##   except those specifically granted by license.
##
##  Initial Author: David E. Kim (dekim@u.washington.edu)
##  $Revision: 6156 $
##  $Date: 2005-04-29 04:59:10 -0400 (Fri, 29 Apr 2005) $
##  $Authors: David Kim $
##
###############################################################################
# EXAMPLE
###############################################################################
##
##	my $frags	= new FRAGS;
##
##      $frags->submit(   fasta     => $fasta,        # required
##			  username  => $username,     # required
##			  code      => '1ubq_',
##                        series    => 'bb',
##                        nohoms    => 0,
##                        out_dir   => "$fasta.frags",
##                        interval  => 120,
##                        timeout   => 1800,
##
##                        ChemicalShifts => $chsft,
##                        NoeConstraints => $cst,
##                        DipolarConstraints => $dpl,
##
##                        verbose   => 1 );
##
##      or for existing results
##
##      $frags->getResults(  id        => '101',  # required
##                           code      => '1ubq_',
##                           out_dir   => "$fasta.frags" );
##
###############################################################################
## REQUIREMENTS
###############################################################################
##
## libwww-perl (http://search.cpan.org/search?dist=libwww-perl)
##
##   or
##
## individual modules: HTTP, LWP
##
###############################################################################

## REQUIRED PACKAGES

use strict;
use HTTP::Request::Common qw(POST);
use LWP::UserAgent;
use LWP::Simple;

###############################################################################
# MAIN
###############################################################################
 
sub new {
        my $class = shift;
        my %params = ( @_);
        my $self = bless{}, ref $class || $class;

	## initialize parameters
	$self->{_out_dir}               = './';
	$self->{_submit_url}		= 'http://robetta.bakerlab.org/fragmentsubmit.jsp';
	$self->{_queue_url}             = 'http://robetta.bakerlab.org/fragmentqueue.jsp';
	$self->{_results_url}		= 'http://robetta.bakerlab.org/downloads/fragments/';
	$self->{_verbose}		= undef;
	$self->{_submit_id}             = undef;
	$self->{_results_interval}	= 30;
	$self->{_results_timeout}	= 3600;
	$self->{_max_seq_len}		= 1000;
	$self->{_min_seq_len}           = 10;
	$self->{_fasta_header}		= undef;
	$self->{_code}			= "";
        $self->{_series}                = "aa";
	$self->{_nohoms}		= 0;
	$self->{_ChemicalShifts}	= undef;
	$self->{_NoeConstraints}	= undef;
	$self->{_DipolarConstraints}	= undef;

	$self->{_results_files}		= [	
                                                # standard fragment files
                                                "aat000_03_05.200_v1_3.gz",
						"aat000_09_05.200_v1_3.gz",

                                                ## w/ constraints
                                                "aat000_03_06.200_v1_3.gz",
                                                "aat000_09_06.200_v1_3.gz",
                                                "t000_.chsft",  # w/ chemical shifts

						"t000.dat",
						"t000_.check",
						"t000_.checkpoint",
						"t000_.fasta",
						"t000_.jufo_ss",
						"t000_.psipred",
						"t000_.psipred_ss2",
						"t000_.rdb",

						## nohoms
                                                "t000_.homolog_vall",
                                                "t000_.homolog_nr" ];


        return $self;
}

## SUBMIT

sub submit {
	my $this = shift;
	my %params = ( @_ );

	my $fasta		= $params{fasta};
	my $code		= $params{code};
        my $series              = $params{series};
	my $out_dir		= $params{out_dir};
	my $verbose		= $params{verbose};
	my $interval		= $params{interval};
	my $timeout		= $params{timeout};
	my $user		= $params{username};
	my $nohoms		= $params{nohoms};
	my $cshft		= $params{ChemicalShifts};
	my $dpl			= $params{DipolarConstraints};
	my $noe			= $params{NoeConstraints};

        $this->{_user}                  = $user if ($user);
        $this->{_out_dir}               = $out_dir if ($out_dir);
        $this->{_verbose}               = 1 if ($verbose);
        $this->{_results_interval}      = $interval if ($interval);
        $this->{_results_timeout}       = $timeout if ($timeout);
	$this->{_code}			= $code if ($code);
        $this->{_series}                = $series if ($series);
	$this->{_nohoms}		= $nohoms if ($nohoms);
	$this->{_ChemicalShifts}	= $cshft if ($cshft);
	$this->{_DipolarConstraints}	= $dpl   if ($dpl);
	$this->{_NoeConstraints}	= $noe   if ($noe);


	$this->{_out_dir} .= "/" if ($this->{_out_dir} !~ /\/$/);

	(-d $this->{_out_dir} || mkdir( $this->{_out_dir} ) ) or die "ERROR - cannot mkdir $this->{_out_dir}\n";

	my $seq				= "";
	my $res				= "";

	## get results from web server

	my $fastastr = "";
	my $header   = "";

	if (!$this->{_user}) {
		print "WARNING - username required to submit sequence\n";
		return 0;
	}

	print "READING FASTA: $fasta\n" if ($this->{_verbose});
        if ($fasta && -s $fasta) {                	        
		open(FASTA, $fasta) or do {
			warn "WARNING - cannot open fasta file $fasta: $!\n";
			return 0;
		};
		F: while (my $line = <FASTA>) {
			if ($line =~ /^>/) {
				$header .= $line;
				next F;
			}
			$seq .= $line;
		}
		close(FASTA);
	} else {
		warn "WARNING - fasta required to submit sequence\n";
		return 0;
	}
	$header ||= "unknown\n";
	$fastastr = $header.$seq;
	$seq =~ s/\s+//gs;
	$this->{_fasta_header} = $header;

	print "HEADER: $header" if ($this->{_verbose});
	print "SEQUENCE: $seq\n" if ($this->{_verbose});

	## check if sequence is too long
	if (length($seq) > $this->{_max_seq_len}) {
		warn "WARNING - sequence is too long (>$this->{_max_seq_len})\n";
		return 0;
	}
	if (length($seq) < $this->{_min_seq_len}) {
		warn "WARNING - sequence is too short (>$this->{_min_seq_len})\n";
		return 0;
	}
	if (!$this->checkcode()) {
		warn "WARNING - code must be 5 alphanumerics\n";
		return 0;
	}
        if (!$this->checkseries()) {
                warn "WARNING - series must be 2 alphanumerics\n";
                return 0;
        }

	## clean header
	$header =~ s/[^\w\- ]+//gs;

	print "POSTING REQUEST $this->{_submit_url}\n" if ($this->{_verbose});
	print "USERNAME: $this->{_user}\n" if ($this->{_verbose}); 

	## HTTP POST REQUEST
	## NOTE: THIS DEPENDS ON THE FORMAT OF THE WEB FORM

	my $form_data_content = [       Sequence    => $fastastr,
                                        Notes       => $header,
                                        UserName    => $this->{_user},
                                        Code        => $this->{_code},
                                        NoHoms      => $this->{_nohoms},
                                        type        => "submit" ];

	if ($this->{_ChemicalShifts}) {
		if (-f $this->{_ChemicalShifts}) {
			push(@{$form_data_content}, "ChemicalShifts", [$this->{_ChemicalShifts}]);
		} else {
			warn "WARNING - chemical shifts file $this->{_ChemicalShifts} does not exist\n";
		}
	}
        if ($this->{_NoeConstraints}) {
                if (-f $this->{_NoeConstraints}) {
                        push(@{$form_data_content}, "NoeConstraints", [$this->{_NoeConstraints}]);
                } else {
                        warn "WARNING - NOE constraints file $this->{_NoeConstraints} does not exist\n";
                }
        }
        if ($this->{_DipolarConstraints}) {
                if (-f $this->{_DipolarConstraints}) {
                        push(@{$form_data_content}, "DipolarConstraints", [$this->{_DipolarConstraints}]);
                } else {
                        warn "WARNING - Dipolar constraints file $this->{_DipolarConstraints} does not exist\n";
                }
        }

	my $req = POST $this->{_submit_url}, Content_Type => 'form-data',
                          Content =>  $form_data_content;

	#print $req->content(); #debug
	
	## get ID
	my $ua	= LWP::UserAgent->new;
	$res = $ua->request($req)->as_string;

	#print $res; #debug

	## get ID
	## NOTE: THIS IS DEPENDENT ON THE HTML FORMAT
	print "GETTING SUBMIT ID" if ($this->{_verbose});
	if ($res =~ /Your Job ID is <a href=\"fragmentqueue.jsp\?id=(\d+)\">/) {
		my $submitid = $1;
		print ": $submitid\n" if ($this->{_verbose});
		$this->{_submit_id} = $submitid;
	} else {
		print "\n" if ($this->{_verbose});
		warn "WARNING - cannot parse Submit ID from response. Correct username?\n";
		return 0;
	}

	## get results
	$res = $this->getResults( id => $this->{_submit_id}, code => $this->{_code}, series => $this->{_series} );
	($res) or return 0;

	return 1;
}

## GET RESULTS

sub getResults {
	my $this = shift;
	my %params = ( @_ );

	my $id = $params{id};
	my $code = $params{code};
	my $out_dir = $params{out_dir};
	my $verbose = $params{verbose};
	my $series  = $params{series};

	$this->{_verbose} = 1 if ($verbose);
	$this->{_out_dir} = $out_dir if ($out_dir);
	$this->{_out_dir} .= "/" if ($this->{_out_dir} !~ /\/$/);
	$this->{_code}    = $code if ($code);
	$this->{_series}  = $series if ($series);

	(-d $this->{_out_dir} || mkdir( $this->{_out_dir} ) ) or die "ERROR - cannot mkdir $this->{_out_dir}\n";

	my $interval		= $this->{_results_interval};
	my $timeout		= $this->{_results_timeout};
	my $done		= undef;
	my $elapsed		= 0;
	my $t0			= time();

	## first check if job exists
	my $joburl = $this->{_queue_url}."?id=$id";	
	my $joburlstr;
	print "CHECKING JOB: $joburl\n" if ($this->{_verbose});
	if ( defined ($joburlstr = get $joburl) && $joburlstr =~ /this job has expired/ ) {
		print "FRAGMENT SERVER JOB $id does not exist!!\n" if ($this->{_verbose});
		return 0;
	}

	## check if job has errored out
	if ($joburlstr =~ /Status: .+?Error<\/font>/is) {
		print "FRAGMENT SERVER JOB ERROR: see $joburlstr for details\n";
		return 0;
	}

	## check code
        if (!$this->checkcode()) {
                warn "WARNING - code must be 5 alphanumerics\n";
                return 0;
        }
        ## check series
        if (!$this->checkseries()) {
                warn "WARNING - series must be 2 alphanumerics\n";
                return 0;
        }

	## change results file names if code is given
	my $protcode = substr($this->{_code}, 0, 4);
	for (my $i=0;$i<=$#{$this->{_results_files}};$i++) {
		if ($this->{_code}) {
			${$this->{_results_files}}[$i] =~ s/^t000_/$this->{_code}/;
			${$this->{_results_files}}[$i] =~ s/^t000\.dat$/$protcode\.dat/;
			${$this->{_results_files}}[$i] =~ s/^aat000_/aa$this->{_code}/;
		}
		#print "EXPECTING RESULT FILE: $this->{_results_url}$id/${$this->{_results_files}}[$i]\n" if ($this->{_verbose});
	}

	while ( $elapsed < $timeout ) {

		print "CHECKING RESULTS ".localtime()."\n" if ($this->{_verbose});		
		my $getcnt = 0;
		my $fragmentfilescnt = 0;

		## this is ugly, we should get at least 10 files, +1 if chemical shifts were used, +2 if nohoms
		F: foreach my $file (@{$this->{_results_files}}) {
			my $results		= undef;
			my $resultfile = $this->{_out_dir}.$file;
			$resultfile =~ s/\/aa($this->{_code})([^\/]+)$/\/$this->{_series}$1$2/;
			## does result file already exist?
			if (-s $resultfile) {
				warn "WARNING - $resultfile already exists!\n";
				$fragmentfilescnt++ if ($resultfile =~ /\/$this->{_series}($this->{_code})([^\/]+)$/);
				$getcnt++;
				next F;
			}
                	## HTTP REQUEST	
			my $url = "$this->{_results_url}$id/$file";
			#print "CHECKING: $url\n" if ($this->{_verbose});
			if ( defined ($results = get $url) ) {
				print "CREATING RESULT: $resultfile\n" if ($this->{_verbose});
				open(FILE, ">$resultfile") or do {
					warn "WARNING - cannot create $resultfile: $!\n";
					return 1;
				};
				binmode(FILE);
				print FILE $results;
				close(FILE);
				## gunzip
				if ($resultfile =~ /\.gz$/) {
					( system( "gunzip", $resultfile ) == 0 ) or
						warn "WARNING - cannot gunzip $resultfile\n";
				}
				$fragmentfilescnt++ if ($resultfile =~ /\/$this->{_series}($this->{_code})([^\/]+)$/);
				$getcnt++;
			}
		}
		## this does not ensure that the chemical shift file and/or homologs files were retrieved
		if ($getcnt >= 10 && $fragmentfilescnt == 2) {
			return 1;
		}

		#print "WAITING FOR RESULTS ($interval s interval)\n" if ($this->{_verbose});
		sleep $interval;
		$elapsed = time() - $t0;	
	}
	warn "WARNING - cannot get results: maximum allowed time ($timeout s) reached\n";
	return 0;
}

sub checkcode {
        my $this = shift;
	## must be 5 alphanumerics
	if ($this->{_code} && $this->{_code} !~ /^\w{5}$/) {
		return 0;
	}
	return 1;
}

sub checkseries {
        my $this = shift;
        ## must be 2 alphanumerics
        if ($this->{_series} && $this->{_series} !~ /^\w{2}$/) {
                return 0;
        }
        return 1;
}

1;





