// -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
// vi: set ts=2 noet:
//
// This file is made available under the Rosetta Commons license.
// See http://www.rosettacommons.org/license
// (C) 199x-2007 University of Washington
// (C) 199x-2007 University of California Santa Cruz
// (C) 199x-2007 University of California San Francisco
// (C) 199x-2007 Johns Hopkins University
// (C) 199x-2007 University of North Carolina, Chapel Hill
// (C) 199x-2007 Vanderbilt University

/// @file ShortestPathInFoldTree.cc
/// @brief helper class to FoldTree: allows to measure distance in fold-trees
/// @detailed This class provides a fast mechanism to determine the distance between residues
/// according to a given fold-tree
/// instead of storing a full NxN matrix with N number of residues
/// we store only MxM matrix for distances between jump_residue (at most M=2*J J nr of jumps)
/// a table with N entries gives for each peptide the distance to the next jump-node.
/// thus memory requirement is low and still,
/// a single dist-evaluation requires to check only 4 possible pathways
///
/// @author Oliver Lange
///



// Unit Headers
#include "ShortestPathInFoldTree.h"
#include "fold_tree.h"
#include "pose.h"

// ObjexxFCL Headers
#include <ObjexxFCL/formatted.o.hh>

//// C++ headers
#include <cstdlib>
#include <string>

namespace pose_ns {

/// @detail cs-tor
ShortestPathInFoldTree::ShortestPathInFoldTree(
	 Fold_tree const& f
) : nres_( f.get_nres() ), max_dist_( 0 )
{
	build_jumpres_distmap( f );
	build_peptide_table( f );
}



/// @detail the core of the distance cache is build here: node_dist_
/// a 2D array that knows distances for each pair of jump-residues
/// i.e., a 10 100 1 /  100 120 -1 / 120 150 2 -- fold-tree would have
/// jump_residues 10, 100, 120 and 150.
/// distances would be 10, 100   1
///                    120 150   1
//                     10  150   22
///                    100 150   21
///                    ....
/// for the jump_residues we use internal numbering, each  seqpos that is a jump-res in one or more jumps will get a
/// individual number, starting at 1 and counting continously.
/// thus the distance of jump_res-pair i,j is found as node_dist(i,j)
///
/// first we go through the fold-tree to find all jump_residues, and build the map: jump_res_
/// to keep track between mapping    "seqpos<-->internal_numbering"
///
/// then assign distance 1 to each pair of jump_residues connected by a jump
/// and using Warshall algorithm to build up full distance matrix
/// diagonal is distance 0
void
ShortestPathInFoldTree::build_jumpres_distmap( Fold_tree const& f ) {
  // go through jumps and find the jump-residues start distance table
  // maintain an EdgeList to memorize the pairs until full number of unique jump-residues is known

  //  typedef utility::vector1< Edge > EdgeList;
  EdgeList edges;
  int ct = 1; /// for giving individual numbers to jump_residues

  /// cycle through list of jumps --> store jump_residues and assign running number (ct) to them
  for ( int jump = 1; jump <= f.get_num_jump(); jump++ ) {
    int const start ( f.upstream_jump_residue( jump ) );
		int const stop ( f.downstream_jump_residue( jump) );
		//    std::cout << "add jump " << start << "-" << stop << std::endl;

    //
    // search in our list of jump-residues, assign to my_start/my_stop if we got it already
    //
    // setup
    int my_start = -1; //-1 denotes: "not found"
    int my_stop = -1;
    std::map< int, int>::const_iterator fit;

    // look for start residue
    fit =  jump_res_.find( start );
    if ( fit != jump_res_.end() ) {
      my_start = fit->second;
    } else {
      my_start = ct;
      jump_res_[ start ] = ct++;
    }

    // look for stop residue
    fit = jump_res_.find( stop );
    if ( fit != jump_res_.end() ) {
      my_stop = fit->second;
    } else {
      my_stop = ct;
      jump_res_[ stop ] = ct++;
    }
    edges.push_back( Edge(my_start, my_stop, jump ) );
  };


  // create some debug output
  if ( false ) {
		std::cout << " jump_res_nr -  seqpos  \n";
    for ( std::map< int, int>::const_iterator it=jump_res_.begin(), eit=jump_res_.end();
	  it!=eit; ++it ) {
      std::cout << it->second << " - " << it->first << std::endl;
    }

    std::cout << " jump-edges -- internal enumeration \n" ;
    for ( EdgeList::const_iterator it=edges.begin(), eit=edges.end(); it!=eit; ++it ) {
      std::cout << it->start <<  " -- " << it->stop << std::endl;
    }
  }

  init_dist_map( edges );
  compute_dist_map( f );
}


///@detail initialize dist map with dist 1 for each pair of residues connected by jumps
/// as stored in the EdgeList
void
ShortestPathInFoldTree::init_dist_map( EdgeList const& edges ) {

  //  Warshall algorithm
  unsigned int const inf( 12345678 ); //assumption: fewer than 12 million nodes in the graph.
  assert( jump_res_.size() < inf );
  node_dist_.dimension( jump_res_.size(), jump_res_.size(), inf );

  // initialize distance array with jump-edges
  for ( EdgeList::const_iterator it=edges.begin(), eit=edges.end(); it!=eit; ++it ) {
    node_dist_( it->start, it->stop ) = 1;
    node_dist_( it->stop, it->start ) = 1;
    node_dist_( it->start, it->start ) = 0;
    node_dist_( it->stop, it->stop ) = 0;
  }
}


/// @detail
// to compute the full dist map we go through 2 steps:
//
// (1) get distanes on "simple paths" i.e., length of peptide edges that connect two jumps
// (2) use warshall algo to get all distances by combining distances via jumps and simple peptide edges
void
ShortestPathInFoldTree::compute_dist_map( Fold_tree const& f ) {


  // look for peptid edges that connect two jumps
  for ( Fold_tree::const_iterator it=f.begin(), eit=f.end();
	it!=eit;
	++it )
    {
      if ( it->is_jump() ) continue; // only look at peptide edges
      std::map< int, int>::const_iterator fit;

      // do we have start and stop reside listed as jump_residues?
      int my_start = -1; //-1 denotes "not found"
      int my_stop = -1;

      // look for start residue
      fit =  jump_res_.find( it->start );
      if ( fit != jump_res_.end() ) {
	my_start = fit->second;
      }

      // look for stop residue
      fit =  jump_res_.find( it->stop );
      if ( fit != jump_res_.end() ) {
	my_stop = fit->second;
      }

      // if start and stop are jump-residues this is an internal peptide edge!
      if ( my_start > 0 && my_stop > 0 ) {
	int dd = node_dist_( my_start, my_stop ) = std::abs( it->start - it->stop );
	node_dist_( my_stop, my_start ) = dd;
      };
    } // for fold-tree edges


  // Warshall algorithm
  // symmetry makes this marginally inefficient, but easy to read
  // if this shows up in a hotspot, it can be made more efficient
  for ( unsigned int ii = 1; ii <= jump_res_.size(); ++ii ) {
    for ( unsigned int jj = 1; jj <= jump_res_.size(); ++jj ) {
      for ( unsigned int kk = 1; kk <= jump_res_.size(); ++kk ) {
	int const jj_2_kk = node_dist_( jj, kk );
	int const jj_2_ii = node_dist_( jj, ii );
	int const ii_2_kk = node_dist_( ii, kk );

	int const jj_2_ii_2_kk = jj_2_ii + ii_2_kk;

	if ( jj_2_kk > jj_2_ii_2_kk ) {
	  node_dist_( jj, kk ) =  jj_2_ii_2_kk;
	  node_dist_( kk, jj ) =  jj_2_ii_2_kk;
	}
      }
    }
  }

  // produce some debug output
  if ( false ) {
    std::cout << "jump_res distance table:\n";
    for ( unsigned int ii = 1; ii <= jump_res_.size(); ++ ii ) {
      for ( unsigned int jj = 1; jj <= jump_res_.size(); ++ jj ) {
	std::cout << node_dist_( ii, jj ) << " ";
      }
      std::cout << std::endl;
    }
  }
}// compute_dist_map

///@detail build table that gives for each residue the distance to
/// upstream and downstream jump-residues (if available)
///
/// format:
/// <edge_nr> <jump1> <dist1> <jump2> <dist2>
//
/// edge_nr is a unique number that identifes peptide-edges
/// jump1 and jump2 refers to our internally-numbered jump-residues (entries in node_dist_ )
/// dist -- distance in sequence to the respective jump-residues
void
ShortestPathInFoldTree::build_peptide_table( Fold_tree const& f ) {
  using namespace  ObjexxFCL::fmt;
  res2jumps_.dimension( f.get_nres(), 5, -1 );   //5 entries per residue.

  // go thru edges and fill res2jump array accordingly
  int edge_nr = 1;
  for ( Fold_tree::const_iterator it=f.begin(), eit=f.end();
	it!=eit;
	++it, ++edge_nr )
    {
      int start_jump = get_jump( it->start ); // returns -1 if node is not a jump residue
      int stop_jump = get_jump( it->stop );
      if ( !it->is_jump() ) { // a peptide residue
				for ( int seqpos = std::min(it->start,it->stop);
							seqpos<=std::max( it->stop,it->start ); seqpos++ ) {
	  res2jumps_( seqpos, 1 ) = edge_nr;
	  res2jumps_( seqpos, 2 ) = start_jump;
	  res2jumps_( seqpos, 3 ) = std::abs( (int) seqpos - (int) it->start );
	  res2jumps_( seqpos, 4 ) = stop_jump;
	  res2jumps_( seqpos, 5 ) = std::abs( (int) seqpos - (int) it->stop );
	} // for seqpos
      } //if jump
    } //for edges

  //produce some debug output
  if ( false ) {
    std::cout << " edge_nr   jump1    dist1   jump2    dist2 \n";
    for ( int ii = 1; ii<=f.get_nres(); ii++ ) {
      for ( int k = 1; k<=5; k++ ) {
	std::cout << I(3, res2jumps_( ii, k ) );
      }
      std::cout << std::endl;
    }

    std::cout << "\n total distance list \n";
    for ( int ii = 1; ii<=f.get_nres(); ii++ ) {
      for ( int jj = 1; jj<=f.get_nres(); jj++ ) {
	std::cout << "(" << ii << "," << jj << ") " << dist( ii, jj ) << std::endl;
      }
    }
  }
}

///@detail distance between two residues
/// with the help of our pre-computed data
/// this only requires comparison of 4 possible pathways:
/// go via upstream/downstream jump-residue for pos1/pos2
int
ShortestPathInFoldTree::dist( int pos1, int pos2 ) const {

  // on same edge ?
  if ( res2jumps_( pos1, 1 ) == res2jumps_( pos2, 1 ) ) {
    return std::abs( (int) pos1 - (int) pos2 );
  };

  // compute for possibilities and take smallest
  int const inf( 12345678 ); //assumption: fewer than 12 million nodes in the graph.

  int min_dist = inf;

  // check 2x2 possibilities of up-/downstream jump-residues for pos1/pos2
  for ( int ii=1; ii<=2 ; ++ii ) { //choose jump-node for pos1
    if ( res2jumps_( pos1, 2*ii ) < 0 ) continue; // is not a jump-residue
    for ( int jj=1; jj<=2; ++jj ) { //choose jump-node for pos2
      if ( res2jumps_( pos2, 2*jj ) < 0 ) continue; // is not a jump-residue
      //			std::cout << "dist1: " << res2jumps_( pos1, 2*ii+1) << " " << pos1 << " --> " << res2jumps_( pos1, 2*ii ) << "\n";
      //			std::cout << "dist2: " << res2jumps_( pos2, 2*jj+1) << " " << pos2 << " --> " << res2jumps_( pos2, 2*jj ) << "\n";
      int dist = res2jumps_( pos1, 2*ii+1) + res2jumps_( pos2, 2*jj+1 )
	+ node_dist_(  res2jumps_( pos1, 2*ii ),  res2jumps_( pos2, 2*jj ) );
      if ( dist < min_dist ) min_dist = dist;
    }
  }
  assert ( min_dist <= nres_ );
  return min_dist;
}

///@detail shortest distance between the two residues that are furthest apart
///
/// find maximum entry in node_dist_
///
/// and take maximum residue -> node distance
int
ShortestPathInFoldTree::compute_max_dist() const {

	// find largest inter jump-node distance
	int max_ii( 0 ), max_jj( 0 );
	int max_ii_jj = 0;
	for ( unsigned int ii =1; ii<= jump_res_.size(); ii++ ) {
		for ( unsigned int jj = ii+1; jj<=jump_res_.size(); jj++ ) {
			if ( node_dist_( ii, jj) > max_ii_jj ) {
				max_ii_jj  = node_dist_( ii, jj );
				max_ii = ii; max_jj = jj;
			}
		}
	}

	// find furthest residue connected to these jumps
	int max_res2ii( 0 );
	int max_res2jj( 0 );
	for ( int k = 1; k <= nres_; k++ ) {
		for ( int dir = 1; dir <= 2; dir++ ) {
			if ( (res2jumps_( k, 2*dir) == max_ii) && ( res2jumps_(k , 2*dir+1 ) > max_res2ii) ) {
				max_res2ii = res2jumps_(k, 2*dir+1);
			};
			if ( (res2jumps_( k, 2*dir) == max_jj) && ( res2jumps_(k , 2*dir+1 ) > max_res2jj) ) {
				max_res2jj = res2jumps_(k, 2*dir+1);
			}
		}
	}

  return max_ii_jj+max_res2ii+max_res2jj;
}


} //pose_ns

