Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
MCAligner.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file MCAligner.cc
11 /// @brief class definition for a class that aligns two Sequence objects using
12 /// a stochastic version of the Needleman-Wunsch algorithm.
13 /// @author James Thompson
14 
15 #include <core/types.hh>
16 // AUTO-REMOVED #include <basic/Tracer.hh>
17 
18 #include <core/sequence/Aligner.hh>
23 
27 
28 #include <numeric/random/random.hh>
29 
30 #include <utility/vector1.hh>
31 // AUTO-REMOVED #include <utility/io/izstream.hh>
32 
33 // AUTO-REMOVED #include <ObjexxFCL/format.hh>
34 #include <iostream>
35 #include <string>
36 
37 namespace core {
38 namespace sequence {
39 
40  static numeric::random::RandomGenerator align_RG(31882); // <- Magic number (tex's birthday), do not change it!!!
41 
43  SequenceOP seq_y,
44  SequenceOP seq_x,
46  ) {
47  using std::exp;
48  // replace these with clones, fix up match/mismatch calculation
49  SequenceOP new_seq_x = seq_x->clone();
50  SequenceOP new_seq_y = seq_y->clone();
51 
52  new_seq_x->insert_gap(0);
53  new_seq_y->insert_gap(0);
54 
55  //std::cout << "originals are: " << (*seq_x) << std::endl << (*seq_y) << std::endl;
56  //std::cout << "copies are: " << (*new_seq_x) << std::endl << (*new_seq_y) << std::endl;
57 
58  DP_Matrix scores( new_seq_x->length(), new_seq_y->length() );
59  //scores.xlab( new_seq_x->sequence_vec() );
60  //scores.ylab( new_seq_y->sequence_vec() );
61 
62  // std::cout << scores << std::endl;
63 
64  // matrix initialization
65  NWAligner::init_matrix( new_seq_y->length(), new_seq_x->length(), ss, scores );
66  // std::cout << seq_x << std::endl << seq_y << std::endl;
67  // std::cout << scores << std::endl;
68 
69  CellOP best_cell( new Cell( 0.0 ) );
70  for ( Size y = 2; y <= new_seq_y->length(); ++y ) {
71  for ( Size x = 2; x <= new_seq_x->length(); ++x ) {
72 
73  // score this position
74  Real l_gap_penalty( ss->gap_open() ), u_gap_penalty( ss->gap_open() );
75  if ( scores(x,y-1)->came_from() == above ) u_gap_penalty = ss->gap_extend();
76  if ( scores(x-1,y)->came_from() == left ) l_gap_penalty = ss->gap_extend();
77 
78  Real single_mm = ss->score( new_seq_x, new_seq_y, x, y );
79 
80  Real u_gap = scores( x, y-1 )->score() + u_gap_penalty;
81  Real l_gap = scores( x-1, y )->score() + l_gap_penalty;
82  Real mm = scores( x-1, y-1 )->score() + ss->score( new_seq_x, new_seq_y, x, y );
83 
84  // convert u_gap, l_gap and mm to probs using boltzmann averaging (based on kT)
85  Real u_gap_prob( exp( -1 * u_gap_penalty / kT() ) );
86  Real l_gap_prob( exp( -1 * l_gap_penalty / kT() ) );
87  Real mm_prob ( exp( -1 * single_mm / kT() ) );
88 
89  Real const partition( u_gap_prob + l_gap_prob + mm_prob );
90  Real const rand_prob( align_RG.uniform() );
91  //Real const rand_prob( numeric::random::uniform() );
92  u_gap_prob /= partition;
93  l_gap_prob /= partition;
94  mm_prob /= partition;
95 
96  // think of rand_prob as a dart that hits one of three places:
97  // (0,u_gap_prob) => align with u_gap
98  // (u_gap_prob,l_gap_prob) => align with l_gap
99  // (l_gap_prob,1) => align with mm
100 
101  CellOP current_cell = scores(x,y);
102  if ( rand_prob < u_gap_prob ) {
103  // std::cout << "came from diagonal with a score of " << mm << std::endl;
104  current_cell->score( mm );
105  current_cell->next( scores(x-1,y-1) );
106  current_cell->came_from( diagonal );
107  } else if ( rand_prob < l_gap_prob ) { // && rand_prob > 0, handled earlier
108  // std::cout << "came from left with a score of " << l_gap << std::endl;
109  current_cell->score( l_gap );
110  current_cell->next( scores(x-1,y) );
111  current_cell->came_from( left );
112  } else { // if rand_prob > l_gap_prob && rand_prob < 1
113  // std::cout << "came from above with a score of " << u_gap << std::endl;
114  current_cell->score( u_gap );
115  current_cell->next( scores(x,y-1) );
116  current_cell->came_from( above );
117  }
118  // if ( current_cell->score() > best_cell->score() ) best_cell = current_cell;
119  } // x
120  } // y
121 
122  best_cell = scores( scores.rows(),scores.cols() ); // start in lower right-hand corner for NW
123  SequenceAlignment test_alignment = traceback(
124  new_seq_x,
125  new_seq_y,
126  scores,
127  best_cell
128  );
129 
130  scores.clear();
131 
132  return test_alignment;
133  } // align
134 
135 } // sequence
136 } // core