#!/usr/bin/perl -w
package RosettaAB;
###############################################################################
##
## ROSETTA AB ANITIO OBJECT
##
## This is a wrapper for the rosetta executable to generate ab initio 
## decoys in a silent mode file. See required packages below.
##
##
## Copyright 2002, University of Washington
##   This document contains private and confidential information and
##   its disclosure does not constitute publication.  All rights are
##   reserved by University of Washington and the Baker Lab
##   except those specifically granted by license.
##
##  Initial Author: David E. Kim (dekim@u.washington.edu)
##  $Revision: 6156 $
##  $Date: 2005-04-29 04:59:10 -0400 (Fri, 29 Apr 2005) $
##  $Authors: David Kim $
##
###############################################################################
###############################################################################
## 
## USAGE EXAMPLE:  
##
##	# example to make 1000 decoys in silent mode given a fasta file 
##      # 2ptl_.fasta (name is code + chain + .fasta)
##
##	my $RosettaObject = new RosettaAB(
##
##              code                    =>      "2ptl",
##              chain                   =>      "_",
##		executable              =>      "/bin/rosetta.gcc",
##		rosetta_db              =>      "/scratch/shared/rosetta_database/",
##
##              # default is undef (set to full path to make_fragments)
##		# assumes fragment files are in source_dir if not defined
##              # make_fragments_from_server.pl gets fragments from robetta server
##              make_fragments          =>      "/bin/make_fragments.pl",
##
##		# default is current directory
##		run_dir                 =>      "run_dir",
##
##		# default is run_dir
##		source_dir              =>      "source_dir",
##
##              # default is run_dir
##              data_dir                =>      "data_dir",
##
##		# default/minimum is 1
##		nstruct                 =>      1000,
##
##		# default is 'aa'
##		pred_id                 =>      "aa",
##
##		# default is undefined (set to 1 to exclude homologs in make_fragments) 
##		nohoms			=>	1,
##
##		# default is '*******03_05.200_v1_3'
##		frag1file               =>      "*******03_05.200_v1_3",
##
##		# default is '*******09_05.200_v1_3'
##		frag2file               =>      "*******09_05.200_v1_3",
##
##		# default is undef (be careful!!!)
##		additional_args		=>	"-no_filters",
##
##		# print rosetta output default 0
##              printoutput             =>      1
##						
##	);
##                        
##	## run rosetta
##	$decoys_count = $RosettaObject->execute();
##
###############################################################################
###############################################################################

## REQUIRED PACKAGES

use strict;
use locale;
use Cwd;

###############################################################################
# main
###############################################################################

sub new {
	my $class = shift;
	my %param = ( @_);
	my $self = bless{}, ref $class || $class;


################################################################################
## PARAMS DESCRIBED BELOW ######################################################

	$param{executable}		||= "rosetta.gcc";
	$param{rosetta_db}		||= "/scratch/shared/rosetta_database/";

	## directory where rosetta will run (out file will be saved here)
	$param{run_dir}			||= cwd();

	## directory where source files (fasta, fragments, pdb, etc...) reside
	$param{source_dir}		||= $param{run_dir}; 

        ## directory for output data
	$param{data_dir}		||= $param{run_dir};

	$param{nstruct}			||= 1;
	$param{pred_id}			||= "aa";
	$param{chain}			||= "_";
	$param{frag1file}		||= "*******03_05.200_v1_3";
	$param{frag2file}		||= "*******09_05.200_v1_3";
	$param{additional_args}		||= undef;

	## set to 1 if you want to save the raw silent mode files (appended to a single file)
	$param{save_raw_output}		||= 0;

	print "CHECKING ROSETTA PARAMETERS\n";

	# Check nstruct
	($param{nstruct} && $param{nstruct} =~ /^\d+$/ && $param{nstruct} > 0) or die "ROSETTA ERROR - nstruct not set\n";

        # Check run_dir
        (-d $param{run_dir}) or die "ROSETTA ERROR - run_dir ($param{run_dir}) not set: $!\n";
	$param{run_dir} .= '/' if substr($param{run_dir},-1) ne '/';
	$param{run_dir} = "./$param{run_dir}" if ($param{run_dir} !~ /^(\/|\.\/)/);

        # Check source_dir
        (-d $param{source_dir}) or die "ROSETTA ERROR - source_dir ($param{source_dir}) not set: $!\n";
	$param{source_dir} .= '/' if substr($param{source_dir},-1) ne '/'; 
	$param{source_dir} = "./$param{source_dir}" if ($param{source_dir} !~ /^(\/|\.\/)/);

	# Check data_dir
	(-d $param{data_dir}) or die "ROSETTA ERROR - data_dir ($param{data_dir}) not set: $!\n";
	$param{data_dir} .= '/' if substr($param{data_dir},-1) ne '/';
	$param{data_dir} = "./$param{data_dir}" if ($param{data_dir} !~ /^(\/|\.\/)/);

        # Check if pred_id is set
        ($param{pred_id} && $param{pred_id} =~ /^\w\w$/) or die "ROSETTA ERROR - pred_id ($param{pred_id}) not set (must be 2 alphanumeric characters): $!\n";

        # Check if code is set
        ($param{code} && $param{code} =~ /^\w{4}$/) or die "ROSETTA ERROR - code ($param{code}) not set (must be 4 alphanumeric characters)\n";

        # Check if chain is set
        ($param{chain} && $param{chain} =~ /^\w$/) or die "ROSETTA ERROR - chain ($param{chain}) not set\n";

	# Check makefragments executable
	if ( $param{make_fragments} ) {
		( -s $param{make_fragments} ) or die "ROSETTA ERROR - executable ($param{make_fragments}) not set: $!\n";
	} else {
		# Check fragment files
		( $param{frag1file} =~ s/^.{7}/$param{pred_id}$param{code}$param{chain}/ && -s $param{source_dir}.$param{frag1file} ) or
			die "ROSETTA ERROR - fragment file ($param{source_dir}$param{frag1file}) not set: $!\n";
 
		( $param{frag2file} =~ s/^.{7}/$param{pred_id}$param{code}$param{chain}/ && -s $param{source_dir}.$param{frag2file} ) or
			die "ROSETTA ERROR - fragment file ($param{source_dir}$param{frag2file}) not set: $!\n";
	}

	# Check fasta file
	$param{fasta} = $param{source_dir}.$param{code}.$param{chain}.".fasta";
	( -s $param{fasta} ) or die "ROSETTA ERROR - fasta file ($param{fasta}) not set: $!\n";

	# Get sequence length and check sequence
	$param{seq_len}	= 0;
	open(FASTA, "$param{fasta}") or die "ROSETTA ERROR - fasta file ($param{fasta}) not set: $!\n";
	my @lines = <FASTA>;
	close(FASTA);
	my $seq = "";
	foreach my $line (@lines) {
		next if ( $line =~ /^>/ );
		$seq .= $line;
	}
	$seq =~ s/\s+//gs;
	$param{seq_len}	= length($seq);
	($param{seq_len}) or die "ROSETTA ERROR - sequence length is 0 in fasta file ($param{fasta})\n";
	$seq = uc($seq);
	($seq !~ /[^ACDEFGHIKLMNPQRSTVWY]/) or die "ROSETTA ERROR - sequence in fasta file contains invalid characters\n";

        # Check executable here.
        ($param{executable} && -s $param{executable}) or die "ROSETTA ERROR - executable ($param{executable}) not set: $!\n";

        # Check Rosetta database.
        ($param{rosetta_db} && -d $param{rosetta_db}) or warn "ROSETTA WARNING - be sure rosetta_db $param{rosetta_db} has read permissions\n";
        $param{rosetta_db} .= '/' if substr($param{rosetta_db},-1) ne '/';

        $self->_init(%param);

        return $self;
}


sub _init {
        my ($self, %param) = @_;

        print "\n";
	print "PARAMS:\n";
	print "executable          = $param{executable}\n";
	print "additional_args     = $param{additional_args}\n" if ($param{additional_args});
	print "rosetta_db          = $param{rosetta_db}\n";
	print "run_dir             = $param{run_dir}\n";
	print "source_dir          = $param{source_dir}\n";
	print "nstruct             = $param{nstruct}\n";
	print "pred_id             = $param{pred_id}\n";
	print "code                = $param{code}\n";
	print "chain               = $param{chain}\n";
	print "make_fragments      = $param{make_fragments}\n" if ($param{make_fragments});
	print "nohoms              = 1\n" if ($param{nohoms});
	print "frag1file           = $param{frag1file}\n";
	print "frag2file           = $param{frag2file}\n";
	print "\n";


	## initialize object variables
        $self->{_make_fragments}                = $param{make_fragments};
        $self->{_nohoms}                        = 1 if $param{nohoms};
        $self->{_frag1file}                     = $param{frag1file};
        $self->{_frag2file}                     = $param{frag2file};
        $self->{_executable}                    = $param{executable};
        $self->{_run_dir}                       = $param{run_dir};
        $self->{_source_dir}                    = $param{source_dir};
        $self->{_data_dir}                      = $param{data_dir};
        $self->{_pred_id}                       = $param{pred_id};
        $self->{_code}                          = $param{code};
        $self->{_chain}                         = $param{chain};
        $self->{_rosetta_db}                    = $param{rosetta_db};
        $self->{_nstruct}                       = $param{nstruct};
        $self->{_additional_args}               = $param{additional_args};
	$self->{_save_raw_output}		= $param{save_raw_output};
        $self->{_fasta}                         = $param{fasta};
        $self->{_seq_len}                       = $param{seq_len};
	$self->{_printoutput}			= $param{printoutput};

	$self->{_silentmode_format}		= {	seq_header   => undef,
							score_header => undef,
							seq_len      => undef,
							score_len    => undef,
							coord_len    => undef };
	$self->{_target_output_size}		= 0;
	$self->{_previous_output_size}		= 0;
	$self->{_decoys_count}			= 0;
	$self->{_start_time}			= 0;

	return 1;
}

sub _runCmd {
        my %params = ( @_ );
 
        my $cmd                 = $params{cmd};
        my $catch_output        = $params{catch_output};
	my $print_output	= $params{print_output};
 
        my ($exit_status, $output);

	#print "Running command: $cmd\n";

	## print output as default
	#$print_output ||= 1 if ( !defined($print_output) || $print_output != 0 );
	        
	open(CMD, "$cmd |") or die "ROSETTA ERROR - cannot run command $cmd: $!\n";
	$|=1;   # disable output buffering
	while (<CMD>) {
		print $_ if ($print_output);
		$output .= $_ if ($catch_output);
	}
	close(CMD);
	$exit_status = ($? >> 8);
 
        ($catch_output) ? return $exit_status, $output : return $exit_status;
}

sub execute {
        my $this = shift;
	$this->{_start_time}	= time();

        my $last_pdb = "$this->{_data_dir}$this->{_pred_id}$this->{_code}.last_pdb";

	print "EXECUTING ROSETTA\n\n";

        # first check to see if the final output file exists from a previous run
	my $outputfile = $this->{_data_dir}.$this->{_pred_id}.$this->{_code}.".out";
        if ( -s $outputfile ) {
		my $decoy_cnt = $this->getDecoyCount( outfile => $outputfile );
		if ($decoy_cnt >= $this->{_nstruct}) {	
			warn "Final output file ($outputfile) exists with $decoy_cnt decoys!!\n";
			return $decoy_cnt;
		}
		## create last_pdb file to tell rosetta how many decoys have already been made
		open(FILE, ">$last_pdb") or die "ERROR: cannot open $last_pdb: $!\n";
		print FILE " 1\n"; # necessary?
		print FILE " $decoy_cnt\n";
		close(FILE);
        }

	## make fragments if desired
        if ( $this->{_make_fragments} ) {
               	print "Attempting to make fragments\n";
               	$this->makeFragments() or
                       	die "ROSETTA ERROR - Cannot make fragments\n";
        }

	## chdir to run directory
        chdir $this->{_run_dir}
		or die "ROSETTA ERROR - cannot chdir $this->{_run_dir}: $!\n";
	        
	## create paths.txt file
	$this->createRosettaPathsFile;

        ## run rosetta
        my $executable          = $this->{_executable};
        my $nstruct             = $this->{_nstruct};
        my $arguments           = "$this->{_pred_id} $this->{_code} $this->{_chain} -nstruct $nstruct -silent";
        if ($this->{_additional_args}) { $arguments .= " ".$this->{_additional_args}; }
 
        my $shell       = "$executable $arguments";
        print "SHELL: $shell\n";
        my $exit_status = &_runCmd( cmd => $shell, print_output => $this->{_printoutput} );

	## get silent mode format
	$this->getSilentModeFormat($outputfile);
 
        unlink($last_pdb) or
               warn "ROSETTA WARNING - cannot unlink($last_pdb): $!\n";

        ## Clean the silent mode file
        my $clean_outfile       = $this->{_data_dir}.$this->{_pred_id}.$this->{_code}."_clean.out";
        my $dirty_outfile       = $this->{_data_dir}.$this->{_pred_id}.$this->{_code}."_dirty.out";

        my ($clean_cnt, $dirty_cnt) = $this->cleanRosettaOutFile( outfile => $outputfile,
                                                                  clean_outfile => $clean_outfile,
                                                                  dirty_outfile => $dirty_outfile );

        if ($clean_cnt) {
                print "Renaming file $clean_outfile to $outputfile\n";
                rename($clean_outfile, $outputfile) or
                          die "ROSETTA ERROR - cannot rename $clean_outfile to $outputfile: $!\n";
        } else {
                die "ROSETTA ERROR - cannot create clean output file $clean_outfile\n";
        }
 
        ## Sort the silent mode file by score
        my $sorted_outfile      = $this->{_data_dir}.$this->{_pred_id}.$this->{_code}."_sorted.out";
        if ( $this->sortRosettaOutFile( outfile => $outputfile, sorted_outfile => $sorted_outfile ) ) {
                print "Renaming file $sorted_outfile to $outputfile\n";
                rename($sorted_outfile, $outputfile) or
                          die "ROSETTA ERROR - cannot rename $sorted_outfile to $outputfile: $!\n";
        } else {
                 die "ROSETTA ERROR - cannot create sorted output file $sorted_outfile: $!\n";
        }
 
        my $decoy_cnt = $this->getDecoyCount( size => (stat($outputfile))[7] );
        return $decoy_cnt;
}

sub makeFragments {
	my $this = shift;
	my ($shell, $exit_status);
 
	my $rundir		= $this->{_source_dir};
	my $series		= $this->{_pred_id};
	my $code		= $this->{_code};
	my $chain		= $this->{_chain};
	my $frag1		= $this->{_frag1file};
	my $frag2		= $this->{_frag2file};
	my $nohoms		= $this->{_nohoms};
	my $makefragments	= $this->{_make_fragments};

        my $outputdir	= $rundir.$code."_frag/";
        my $fasta       = $rundir.$code.$chain.".fasta";
        $frag1          =~ s/\*{7}/$series$code$chain/;
        $frag2          =~ s/\*{7}/$series$code$chain/;
 
        print "Checking if fragment files already exist\n";
        if (-s $rundir.$frag1 && -s $rundir.$frag2) {
                print "Skipping MakeFragments: fragment files $frag1 $frag2 exist in $rundir\n";
                return 1;
        }
 
        print "Making fragment files: $frag1 $frag2\n";
 
        ## make output directory (must isolate runs in separate directory for MakeFragments to work
        print "Creating MakeFragments output directory $outputdir\n";
        (-d $outputdir || mkdir($outputdir)) or
                die "ROSETTA ERROR - Cannot make MakeFragments output directory $outputdir: $!\n";
 
        ## run MakeFragments
        if ( $nohoms ) {
                print "Using -nohoms to exclude homologs\n";
                $shell = "$makefragments -rundir $outputdir $fasta -nohoms";
        } else {
                $shell = "$makefragments -rundir $outputdir $fasta";
        }
	$shell .= " -verbose" if ($this->{_printoutput});
        print "SHELL: $shell\n";
        $exit_status = &_runCmd( cmd => $shell,  print_output => $this->{_printoutput} );
 
        ## make sure fragment files exist
        if (-s $outputdir.$frag1 && -s $outputdir.$frag2) {
                ## move fragment files and psipred files to $rundir
                $shell = "mv $outputdir$frag1  $outputdir$frag2 $outputdir$code$chain.psipred $outputdir$code$chain.psipred_ss2 $rundir";
                print "SHELL: $shell\n";
                my $status = system("mv", "$outputdir$frag1", "$outputdir$frag2", "$outputdir$code$chain.psipred", "$outputdir$code$chain.psipred_ss2", $rundir);
        }
        if (-s $rundir.$frag1 && -s $rundir.$frag2) {
                print "Fragment files $frag1 $frag2 exist in $rundir\n";
                return 1;
        } else {
                warn "ROSETTA WARNING - Error making fragments (fragment files do not exist in $rundir): $!\n";
                return 0;
        }
}

sub getDecoyCount {
	my $this = shift @_;

	my %params = ( @_ );

	my $size    = $params{size};
	my $outfile = $params{outfile};

	my $decoy_cnt = 0;

	## SILENT MODE FILE FORMAT DEPENDENCY !!!!!!!

	if (defined $outfile && -s $outfile) {
		my @cnt = `grep -c SCORE $outfile`;
		$decoy_cnt = $1 - 1 if ($cnt[0] =~ /^\s*(\d+)\s*$/);
	}
	if ( $size && 
	     $this->{_silentmode_format}->{seq_len} &&
	     $this->{_silentmode_format}->{score_len} &&
	     $this->{_silentmode_format}->{coord_len} ) {
		$decoy_cnt = ( $size - $this->{_silentmode_format}->{seq_len} + $this->{_silentmode_format}->{score_len} )   /   
			     ( $this->{_silentmode_format}->{score_len} + ( $this->{_silentmode_format}->{coord_len}*$this->{_seq_len} ) );
	}

	$decoy_cnt = 0 if ($decoy_cnt <= 0);
	$decoy_cnt = sprintf("%.0f", $decoy_cnt);
	return $decoy_cnt;
}

sub getSilentModeFormat {
	my ($this, $silentfile) = @_;

        my ($seq_header, $score_header, $score, $coord);
        my ($seq_len, $score_len, $coord_len);
        my $cnt       = 0;

	open(FILE, $silentfile) or do {
		warn "ROSETTA WARNING - cannot open $silentfile to get silent mode file format\n";
		return;
	};

	## SILENT MODE FILE FORMAT DEPENDENCY !!!!!!!

	while (my $line = <FILE>) {
		if ($cnt == 0 && $line =~ /^SEQUENCE:/) {
			$seq_len    = length($line);
			$seq_header = $line;
		} elsif ($cnt == 1 && $line =~ /^SCORE:/) {
			$score_len    = length($line);
			$score_header = $line;
		} elsif ($cnt == 2 && $line =~ /^SCORE:/) {
			$score        = $line;
		} elsif ($cnt == 3) {
			$coord_len = length($line);
			$coord     = $line;
		} elsif ($cnt > 3) {
			last;
		}
		$cnt++;
	}	
	close(FILE);

	if ( $seq_len && $score_len && $coord_len ) {
		$this->{_silentmode_format}->{seq_header}   = $seq_header;
		$this->{_silentmode_format}->{score_header} = $score_header;
		$this->{_silentmode_format}->{seq_len}      = $seq_len;
		$this->{_silentmode_format}->{score_len}    = $score_len;
		$this->{_silentmode_format}->{coord_len}    = $coord_len;
		$this->{_target_output_size} = $seq_len  + ( $score_len*($this->{_nstruct}+1) ) +
                                                     ( $coord_len*$this->{_seq_len}*$this->{_nstruct} );

		my $target_size = sprintf("%.2f", $this->{_target_output_size}/1000000);

		## now that we know the format of the silent mode file lets keep track of it's progress
		print "\n";
                print "--------------------------------------------------------------------------------------------------------\n";
                print "--------------------------------------------------------------------------------------------------------\n";
		print " SILENT MODE FILE FORMAT (extracted from $silentfile)\n";
		print " TARGET SIZE: $target_size MB ($this->{_nstruct} decoys)\n";
                print "--------------------------------------------------------------------------------------------------------\n";
		print $seq_header;
		print $score_header;
		print $score;
		print $coord;
		print "\n";
                print "--------------------------------------------------------------------------------------------------------\n";
                print "--------------------------------------------------------------------------------------------------------\n";
		print "\n";
	}
	return;
}

sub createRosettaPathsFile {
        my $this		= shift;

	my $output_dir  = $this->{_data_dir};
	my $frag1	= $this->{_frag1file};
	my $frag2	= $this->{_frag2file};
	my $pred_id	= $this->{_pred_id};
	$frag1 =~ s/^.{7}/$pred_id*****/;
	$frag2 =~ s/^.{7}/$pred_id*****/;

	my $pathsfile = $this->{_run_dir}."paths.txt";
        open PATHS, ">$pathsfile"
		or die "ROSETTA ERROR - cannot create paths.txt file $pathsfile: $!\n";

        print PATHS "Rosetta Input/Output Paths (order essential)\n";
        print PATHS "path is first '/', './',or  '../' to next whitespace, must end with /\n";
        print PATHS "INPUT PATHS:\n";
        print PATHS "pdb1\t$this->{_source_dir}\n";
        print PATHS "pdb2\t$this->{_source_dir}\n";
        print PATHS "pdb3\t$this->{_source_dir}\n";
        print PATHS "fragments\t$this->{_source_dir}\n";
        print PATHS "structure\t$this->{_source_dir}\n";
        print PATHS "sequence\t$this->{_source_dir}\n";
        print PATHS "constraints\t$this->{_source_dir}\n";
        print PATHS "starting structure\t$this->{_source_dir}\n";
        print PATHS "data files\t$this->{_rosetta_db}\n";
        print PATHS "OUTPUT PATHS:\n";
        print PATHS "movie\t$output_dir\n";
        print PATHS "pdb\t$output_dir\n";
        print PATHS "score\t$output_dir\n";
        print PATHS "status\t$output_dir\n";
        print PATHS "user\t$output_dir\n";
        print PATHS "FRAGMENTS:\n";
        print PATHS "2  number of fragment files\n";
        print PATHS "3  file 1 size\n";
        print PATHS "$frag1\n";
        print PATHS "9  file 2 size\n";
        print PATHS "$frag2\n";
        close PATHS;
        return $pathsfile;
}

sub cleanRosettaOutFile {
	my $this = shift;
	my %params = ( @_ );

	## THIS CLEANUP ROUTINE IS VERY STRICT

	my $old_out_file	= $params{outfile};
	my $clean_out_file	= $params{clean_outfile};
	my $dirty_out_file	= $params{dirty_outfile};

	my $seqlength           = 0;
	my $coord_cnt		= 0;
	my $clean_cnt		= 0;
	my $dirty_cnt		= 0;
	my $score               = "";
	my $prevscore		= "";
	my @coords		= ();
	my @dirtycoords		= ();

	print "Cleaning rosetta out file: $old_out_file\n";
	print "Creating cleaned out file: $clean_out_file\n";
	print "Creating corrupted out file: $dirty_out_file\n";
	open(OUTFILE, "$old_out_file") or die "ROSETTA ERROR - Can't open file $old_out_file: $!\n";
	open(CLEANOUTFILE, ">$clean_out_file") or die "ROSETTA ERROR - Can't open file $clean_out_file: $!\n";
	open(DIRTYOUTFILE, ">$dirty_out_file") or die "ROSETTA ERROR - Can't open file $dirty_out_file: $!\n";

	print CLEANOUTFILE $this->{_silentmode_format}->{seq_header};
	print CLEANOUTFILE $this->{_silentmode_format}->{score_header};

	while (<OUTFILE>) {
		my $line = $_;
		next if ($line =~ /^\s*$/);
		next if (substr($line, 0,9) eq "SEQUENCE:"); # skip sequence headers from concatenated output

		## write coords to clean and dirty file
		if (substr($line, 0,6) eq "SCORE:") {
			next if ($line =~ /^SCORE:\s+score/); # skip score headers from concatenated output 
			if ( scalar@coords == $this->{_seq_len} && length($prevscore) == $this->{_silentmode_format}->{score_len} ) {
				## add to clean file
				$clean_cnt++;
				print CLEANOUTFILE $prevscore;
				print CLEANOUTFILE @coords;
			} elsif ($prevscore) {
				## add to dirty file
				$dirty_cnt++;
				print DIRTYOUTFILE $prevscore;
				print DIRTYOUTFILE @dirtycoords;
			}
			$prevscore = $line;
			@coords = ();
			@dirtycoords = ();
			$coord_cnt = 0;
			#last if ($this->{_maintain_output_size} && $clean_cnt >= $this->{_nstruct});
			next; 
		}

		## get clean and dirty coordinates	
		my $coord_num = substr($line, 0,4);
		$coord_num =~ s/^\s+//;
		$coord_cnt++;
		if ($coord_num =~ /^\d+$/ && $coord_num == $coord_cnt && length($line) == $this->{_silentmode_format}->{coord_len}) {
			push(@coords, $line);
			push(@dirtycoords, $line);
		} else {
			push(@dirtycoords, $line);
		}
	}

	## get last set of coordinates
	## check to see if there are coordinates for each sequence position
	## and if the score has the expected number of characters

	if (scalar@coords == $this->{_seq_len} && length($prevscore) == $this->{_silentmode_format}->{score_len}) {
		## add to clean file
		$clean_cnt++;
		print CLEANOUTFILE $prevscore;
		print CLEANOUTFILE @coords;
	} else {
		## add to dirty file
		$dirty_cnt++;
		print DIRTYOUTFILE $prevscore;
		print DIRTYOUTFILE @dirtycoords;
	}

        close(OUTFILE);
        close(CLEANOUTFILE);
        close(DIRTYOUTFILE); 
	( -s $dirty_out_file ) or unlink( $dirty_out_file );

	print "Clean Decoys: $clean_cnt\n";
	print "Corrupted Decoys: $dirty_cnt\n";
	return ($clean_cnt, $dirty_cnt);
}

sub sortRosettaOutFile {
	my $this	= shift;
	my %params	= ( @_ );
	my ($sorted_array_ref, $header, $header_bytes, $bytes_read);

 	my $outfile		= $params{outfile};
	my $sorted_outfile	= $params{sorted_outfile};

	print "Sorting decoys by score\n";

	## get list of decoys sorted by score
	$sorted_array_ref	= $this->getSortedDecoysList( outfile => $outfile );

	print "Creating sorted silent mode file $sorted_outfile\n";

	open(SORTEDFILE, ">$sorted_outfile");
	open(FILE, $outfile);

	## get silent mode file header lines
	$header = "";
	$header_bytes	= $this->{_silentmode_format}->{seq_len} + $this->{_silentmode_format}->{score_len};
	$bytes_read	= read(FILE, $header, $header_bytes);
	die "ROSETTA ERROR - subroutine sortRosettaOutFile failed for silent mode file $outfile: file read error\n" if ($bytes_read != $header_bytes);
	print SORTEDFILE $header;

	## get sorted decoys from silent mode file
	foreach ( @{$sorted_array_ref} ) {
		my $decoy	= $_ - 1; # start decoy numbering at 0
		my $buffer	= "";
		my $skip_bytes	= ($this->{_silentmode_format}->{seq_len} + $this->{_silentmode_format}->{score_len}) + 
                                  ($decoy * ($this->{_silentmode_format}->{score_len} + ( $this->{_seq_len} * $this->{_silentmode_format}->{coord_len} ) ));
		my $read_bytes	= ($this->{_silentmode_format}->{score_len} + ( $this->{_seq_len} * $this->{_silentmode_format}->{coord_len} ));

		seek(FILE, $skip_bytes, 0);
		$bytes_read = read(FILE, $buffer, $read_bytes);

		die "ROSETTA ERROR - subroutine sortRosettaOutFile failed for silent mode file $outfile: file read error\n" if ($bytes_read != $read_bytes);

		print SORTEDFILE $buffer;
	}

	close(SORTEDFILE);
	close(FILE);

	( -s $sorted_outfile && (stat($sorted_outfile))[7] == (stat($outfile))[7]) or 
		die "ROSETTA ERROR - subroutine sortRosettaOutFile failed for silent mode file $outfile: file size error\n";

	return 1;
}

sub getSortedDecoysList {
	my $this	= shift;
	my %params	= ( @_ );
	my ($shell, $i, @score_lines, %scores, @sorted);

	## assumes silent mode file is clean
	my $silent_file = $params{outfile};

	## SILENT MODE FILE FORMAT DEPENDENCY !!!!!!!
 
	## get score lines
	$shell = "grep SCORE\: $silent_file";
	print "SHELL: $shell\n";
	@score_lines = `$shell`;
 
	## remove score header
	shift @score_lines;

	## get scores mapped to decoys in silent mode file
	for ($i = 0; $i <= $#score_lines; $i++ ) {
 
		## SILENT MODE FILE FORMAT DEPENDENCY !!!!!!! 

		my @cols = split(/\s+/, $score_lines[$i]);
 
		## use score column as score (column 2)
		$scores{$i+1} = $cols[1];
	}
 
	## sort by scores
	@sorted = sort { $scores{$a} <=> $scores{$b} } keys %scores;
 
        return \@sorted;
}






1;
