// -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
// vi: set ts=2 noet:
// :noTabs=false:tabSize=4:indentSize=4:
//
// (c) Copyright Rosetta Commons Member Institutions.
// (c) This file is part of the Rosetta software suite and is made available under license.
// (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
// (c) For more information, see http://www.rosettacommons.org. Questions about this can be
// (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.

/// @file   core/sequence/SequenceAlignment.hh
/// @brief
/// @author James Thompson

// Test headers
#include <cxxtest/TestSuite.h>

#include <test/core/init_util.hh>
#include <core/util/Tracer.hh>
#include <test/UTracer.hh>

#include <core/sequence/util.hh>
#include <core/sequence/Sequence.hh>
#include <core/sequence/SequenceAlignment.hh>
#include <core/sequence/SequenceMapping.hh>

#include <core/sequence/NWAligner.hh>
#include <core/sequence/SWAligner.hh>
#include <core/sequence/ScoringScheme.fwd.hh>
#include <core/sequence/SimpleScoringScheme.hh>

#include <numeric/random/random.hh>
#include <utility/file/file_sys_util.hh>


static core::util::Tracer TR("test.core.sequence.SequenceAlignment");


class SequenceAlignmentTests : public CxxTest::TestSuite {
public:
	SequenceAlignmentTests() {}

	// Shared initialization goes here.
	void setUp() {
		core_init();
	}

void test_alignment_io() {
	using namespace core::sequence;

	SequenceAlignment align;
	align.read_from_file( "core/sequence/test.aln" );

	SequenceOP seq1 = align.sequence(1);
	SequenceOP seq2 = align.sequence(2);
	TS_ASSERT( seq1->id() == "1yr0A.pdb" );
	TS_ASSERT( seq1->start() == 1 );

	TS_ASSERT( seq2->id() == "T0374" );
	TS_ASSERT( seq2->start() == 2 );
} // test_alignment_io

void test_grishin_align_io() {
	using namespace core::sequence;
	utility::vector1< SequenceAlignment > alignments
		= core::sequence::read_grishin_aln_file( "core/sequence/test.grishin_aln" );

	TS_ASSERT( alignments[1].score() == 175.0 );
	TS_ASSERT( alignments[2].score() == 175.0 );

	TS_ASSERT( alignments[2].sequence(1)->id() == "T0288" );
	TS_ASSERT( alignments[1].sequence(2)->id() == "2FNEA_1" );
	TS_ASSERT( alignments[2].sequence(1)->id() == "T0288" );
	TS_ASSERT( alignments[2].sequence(2)->id() == "2FNEA_2" );

	TS_ASSERT( alignments[1].sequence(1)->sequence() == "KVTLQKDAQNLIGISIGGG-----AQPCLYIVQVFDNTPAALDGTVAAGDEITGVNGRSIKGKTKVEVAKMIQEVKGEVTIHYNK" );
	TS_ASSERT( alignments[1].sequence(2)->sequence() == "SITLERGPDG-LGFSIVGGYGSPHGDLPIYVKTVFAKGAASEDGRLKRGDQIIAVNGQSLEGVTHEEAVAILKRTKGTVTLMVLS" );
	TS_ASSERT( alignments[2].sequence(1)->sequence() == "SMVP--GKVTLQKDAQNLIGISIGGG-----AQPCLYIVQVFDNTPAALDGTVAAGDEITGVNGRSIKGKTKVEVAKMIQEVKGEVTIHYNKLQYYKV" );
	TS_ASSERT( alignments[2].sequence(2)->sequence() == "--MPQCKSITLERGPDG-LGFSIVGGYGSPHGDLPIYVKTVFAKGAASEDGRLKRGDQIIAVNGQSLEGVTHEEAVAILKRTKGTVTLMVLSSDETSV" );
} // test_grishin_aln_io

void test_general_aln_io() {
	using namespace core::sequence;

	core::Real const TOLERATED_ERROR_SCORE( 1e-4 );
	utility::vector1< SequenceAlignment > alignments
		= core::sequence::read_general_aln_file( "core/sequence/test.general_aln" );

	TS_ASSERT( alignments.size() == 2 );

	// first alignment
	SequenceAlignment first( alignments[1] );
	TS_ASSERT_DELTA( first.score(), -108.9146, TOLERATED_ERROR_SCORE );

	TS_ASSERT( first.sequence(1)->id() == "1ub0A" );
	TS_ASSERT( first.sequence(2)->id() == "1jxhA" );
	TS_ASSERT( first.sequence(1)->start() == 4 );
	TS_ASSERT( first.sequence(2)->start() == 6 );
	TS_ASSERT( first.sequence(1)->sequence() == "ALTIAGSDSGGGAGVQADLKVFFRFGVYGTSALTLVTAQNTLGVQRVHLLPPEVVYAQIESVAQDFPLHAAKTGALGDAAIVEAVAEAVRRFGVRPLVVDPVM---AKEAAAALKERLFPLADLVTPNRLEAEALLGRP-IRTLKEAEEAAKALLALGPKAVLLKGGHLEAVDLLATRGGVLRFSAPRVHTRNTHGTGCTLSAAIAALLAKGRPLAEAVAEAKAYLTRALKTAPSL--GHGHGPLDHW" );
	TS_ASSERT( first.sequence(2)->sequence() == "ALTIAGTDPSGGAGIQADLKTFSALGAYGCSVITALVAENTCGVQSVYRIEPDFVAAQLDSVFSDVRIDTTKIGMLAETDIVEAVAERLQRHHVRNVVLDTVMLLLSPSAIETLRVRLLPQVSLITPNLPEAAALLDAPHARTEQEMLAQGRALLAMGCEAVLMKG------DWLFTREGEQRF---RVNTKNTHGTGCTLSAALAALRPRHRSWGETVNEAKAWLSAALAQADTLEVGKGIGPVHHF" );

	SequenceAlignment second( alignments[2] );
	TS_ASSERT_DELTA( second.score(), 2.0669, TOLERATED_ERROR_SCORE );

	TS_ASSERT( second.sequence(1)->id() == "1ub0A" );
	TS_ASSERT( second.sequence(2)->id() == "1w78A" );
	TS_ASSERT( second.sequence(1)->start() == 168  );
	TS_ASSERT( second.sequence(2)->start() == 27   );
	TS_ASSERT( second.sequence(1)->sequence() == "LEAVDLLATRGGVLRFSAPRVHT-RNTHGTGCTLSAAIAALLAKG" );
	TS_ASSERT( second.sequence(2)->sequence() == "LERVSLVAARLGVLK-PAPFVFTVAGTNGKGTTCRTLESILMAAG" );
}

void test_alignment_mapping() {
	using namespace core::sequence;

	SequenceOP seq1( new Sequence( "ABCD-F", "one", 1 ) );
	SequenceOP seq2( new Sequence( "A-CDEF", "two", 1 ) );

	SequenceAlignment align;
	align.add_sequence( seq1 );
	align.add_sequence( seq2 );

	SequenceMapping mapping = align.sequence_mapping( 1, 2 );

	TS_ASSERT( mapping[ 1 ] == 1 );
	TS_ASSERT( mapping[ 2 ] == 0 );
	TS_ASSERT( mapping[ 3 ] == 2 );
	TS_ASSERT( mapping[ 4 ] == 3 );
	TS_ASSERT( mapping[ 5 ] == 5 );

	SequenceMapping reverse = align.sequence_mapping( 2, 1 );
	TS_ASSERT( reverse[ 1 ] == 1 );
	TS_ASSERT( reverse[ 2 ] == 3 );
	TS_ASSERT( reverse[ 3 ] == 4 );
	TS_ASSERT( reverse[ 4 ] == 0 );
	TS_ASSERT( reverse[ 5 ] == 5 );
} // test_alignment_mapping

void test_simple_aligner() {
	using namespace core::sequence;
	SequenceOP seq1(
		new Sequence(
			"PKALIVYGSTTGNTEYTAETIARELADAGYEVDSRDAASVEAGGLFEGFDLVLLGCSTWGDDSIELQDDFIPLFDSLEETGAQGRKVACFGCGDSSWEYFCGAVDAIEEKLKNLGAEIVQDGLRIDGDPRAARDDIVGWAHDVRGAI",
			"1f4pA_full", 1
		)
	);
	SequenceOP seq2(
		new Sequence(
			"FEGFDLVLLGCSTWGDDSIELQDDFIPLFDSLEETGAQGRKVACFG", "1f4pA_frag", 46
		)
	);

	SWAligner local_aligner;
	ScoringSchemeOP ss( new SimpleScoringScheme( 6, 1, -4, -1 ) );
	SequenceAlignment local_align = local_aligner.align( seq1, seq2, ss );

	TS_ASSERT( local_align.score() == 276 ); // 46 identities, each worth +6
	TS_ASSERT( local_align.sequence(1)->sequence() ==
		"FEGFDLVLLGCSTWGDDSIELQDDFIPLFDSLEETGAQGRKVACFG"
	);
	TS_ASSERT( local_align.sequence(2)->sequence() ==
		"FEGFDLVLLGCSTWGDDSIELQDDFIPLFDSLEETGAQGRKVACFG"
	);
	TS_ASSERT( local_align.sequence(1)->start() == 46 );
	TS_ASSERT( local_align.sequence(2)->start() == 46 );
	TS_ASSERT( local_align.sequence(1)->id() == "1f4pA_full" );
	TS_ASSERT( local_align.sequence(2)->id() == "1f4pA_frag" );
	TS_ASSERT( local_align.identities() == 46 );
} // test_simple_aligner

void test_alignment_functions() {
	using namespace core::sequence;
	SequenceOP seq1( new Sequence( "ABCDEFGHIJ", "first",  1 ) );
	SequenceOP seq2( new Sequence( "----EFG-IJ", "second", 1 ) );

	core::Real const arbitrary_score( 57.3 );

	SequenceAlignment aln;
	aln.add_sequence(seq2);
	aln.add_sequence(seq1);
	aln.score( arbitrary_score );
	TS_ASSERT( aln.score() == arbitrary_score );
	TS_ASSERT( aln.identities() == 5 );
	TS_ASSERT( aln.gapped_positions() == 5 );
	TS_ASSERT( aln.max_gap_percentage() == 0.5 );

	// test copying
	{
		SequenceAlignment aln_copy( aln );

		TS_ASSERT( aln.score() == aln_copy.score() );
		TS_ASSERT( aln.identities() == aln_copy.identities() );
		TS_ASSERT( aln.sequence(1)->sequence() == aln_copy.sequence(1)->sequence() );
		TS_ASSERT( aln.sequence(2)->sequence() == aln_copy.sequence(2)->sequence() );
	}
} // test_alignment_functions

void test_mapping_to_alignment() {
	using namespace core::sequence;
	SequenceOP seq1( new Sequence( "ABCDEGH", "first",  1 ) );
	SequenceOP seq2( new Sequence( "CDEFGH",  "second", 3 ) );
	SequenceMapping map;
	map.insert_aligned_residue_safe( 3, 1 ); // C - C
	map.insert_aligned_residue_safe( 4, 2 ); // D - D
	map.insert_aligned_residue_safe( 5, 3 ); // E - E
	map.insert_aligned_residue_safe( 0, 4 ); // 0 - F
	map.insert_aligned_residue_safe( 6, 5 ); // G - G
	map.insert_aligned_residue_safe( 7, 6 ); // H - H
	TS_ASSERT( map[1] == 0 );
	TS_ASSERT( map[2] == 0 );
	TS_ASSERT( map[3] == 1 );
	TS_ASSERT( map[4] == 2 );
	TS_ASSERT( map[5] == 3 );
	TS_ASSERT( map[6] == 5 );
	TS_ASSERT( map[7] == 6 );
	SequenceAlignment align = mapping_to_alignment( map, seq1, seq2 );
	TS_ASSERT( align.sequence(1)->sequence() == "ABCDE-GH" );
	TS_ASSERT( align.sequence(2)->sequence() == "--CDEFGH" );
}

}; // SequenceAlignmentTests
