Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ProfSimScoringScheme.hh
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file ProfSimScoringScheme.hh
11 /// @brief class definition for a given scoring scheme for an alignment.
12 /// @detailed Simply based on comparing single profiles from two protein
13 /// sequences, along with affine gap penalties of the form penalty = A + Bk, where
14 /// A represents the penalty for starting a gap, and B represents the penalty for
15 /// extending a previously opened gap by k characters.
16 /// @author James Thompson
17 
18 #ifndef INCLUDED_core_sequence_ProfSimScoringScheme_hh
19 #define INCLUDED_core_sequence_ProfSimScoringScheme_hh
20 
21 #include <core/types.hh>
24 
25 #include <utility/vector1_bool.hh>
26 
27 
28 namespace core {
29 namespace sequence {
30 
32 public:
33 
34  /// @brief constructor
36  Real open = -4,
37  Real extend = -1
38  )
39  {
40  gap_open ( open );
41  gap_extend( extend );
43  type("ProfSim");
44  }
45 
46  /// @brief destructor
47  virtual ~ProfSimScoringScheme() {}
48 
49  /// @brief Initialize log-probabilities of occurence for each amino acid.
50  void initialize_parameters();
51 
52  /// @brief returns owning pointer to a new object with a deep copy of this
53  /// object's values.
54  virtual ScoringSchemeOP clone() const {
55  return new ProfSimScoringScheme(
56  gap_open(),
57  gap_extend()
58  );
59  }
60 
61  /// @brief ProfSim profile-profile similarity metric based on information theory.
62  /// Published by Yona and Levitt in JMB, 2002 in a paper titled "Within the
63  /// Twilight Zone: A Sensitive Profile-Profile Comparison Tool Based on
64  /// Information Theory."
65  /// @detailed The basic idea for this score is that it incorporates both
66  /// divergence of probability distributions at each position and the significance
67  /// of that divergence in order to construct a position-specific
68  /// profile-profile score. The divergence score is the J-S divergence between
69  /// the two probability distributions at probability position, and the
70  /// significance score is the J-S divergence between:
71  /// 1. the average of the two probability distributions at this position
72  /// 2. a prior probability distribution over all allowed characters at this
73  /// position.
74  /// J-S divergence between two distributions is defined as:
75  /// D( p1, p2 ) = 0.5 * sum( p1[i] * log( p1[i] / p2[i] ) ) +
76  /// 0.5 * sum( p2[i] * log( p2[i] / p1[i] ) )
77  virtual Real score(
78  SequenceOP seq1,
79  SequenceOP seq2,
80  Size pos1,
81  Size pos2
82  );
83 private:
85 }; // class ProfSimScoringScheme
86 
87 } // sequence
88 } // core
89 
90 #endif