Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
SequenceProfile.hh
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file SequenceProfile.hh
11 /// @brief class definition for a sequence profile that represents each
12 /// position in a sequence as a probability distribution over the allowed amino
13 /// acids at that position.
14 /// @author James Thompson
15 
16 #ifndef INCLUDED_core_sequence_SequenceProfile_hh
17 #define INCLUDED_core_sequence_SequenceProfile_hh
18 
19 // Unit headers
21 
22 // Project headers
23 #include <core/types.hh>
25 
26 // Utility headers
27 #include <utility/file/FileName.fwd.hh>
28 
29 // C++ headers
30 #include <utility/vector1.hh>
31 
32 namespace core {
33 namespace sequence {
34 
35 class SequenceProfile : public Sequence {
38 public:
39 
40  /// @brief ctors
42  temp_( 1.0 ),
43  negative_better_(false)
44  {}
45 
46  SequenceProfile( FileName const & fn ) :
47  temp_( 1.0 ),
48  negative_better_(false)
49  {
50  read_from_file( fn );
51  }
52 
55  std::string const & sequence,
56  std::string const & id,
57  Size start = 1,
58  bool negative_better = false
59  ) :
60  Sequence( sequence, id, start ),
61  temp_( 1.0 ),
63  {
64  profile( prof );
65  assert( profile().size() == length() );
66  }
67 
68  /// @brief copy ctor
70  Sequence()
71  {
72  *this = src;
73  }
74 
75  /// @brief assignment operator.
77  if ( this == &rhs ) return *this;
78 
79  id ( rhs.id() );
80  start ( rhs.start() );
81  gap_char( rhs.gap_char() );
82  sequence( rhs.sequence() );
83 
84  profile ( rhs.profile() );
85  alphabet( rhs.alphabet() );
86  temp_ = rhs.temp_;
88 
89  return *this;
90  }
91 
92  /// @brief dtor
94 
95  /// @brief Returns an owning pointer to a new SequenceProfile object,
96  /// with data that is a deep copy of the information in this object.
97  virtual SequenceOP clone() const {
98  SequenceOP new_seq_op( new SequenceProfile( *this ) );
99  return new_seq_op;
100  }
101 
102  /// @brief Read an profile matrix from the given filename using the NCBI
103  /// PSSM format for a position-specific scoring matrix.
104  virtual void read_from_file( FileName const & fn );
105 
106  /// @brief Generate the profile matrix from a sequence and a given substitution matrix
107  virtual void generate_from_sequence( Sequence const & seq, std::string matrix="BLOSUM62" );
108 
109  /// @brief Multiply all profile weights by factor
110  void rescale(core::Real factor=1);
111 
112  /// @brief Use boltzman scaling on a per-residue basis to convert the current profile values to probabilities ( range 0.0-1.0 )
114 
115  /// @brief Use linear rescaling (with a fixed zero) to fit the values within the range -1.0 to 1.0
116  void global_auto_rescale();
117 
118  /// @brief Read profile matrix from the given filename using the NNMAKE
119  /// .checkpoint format.
120  /// For compatability, negative_better defaults to true. Set manually if necessary.
121  void read_from_checkpoint( FileName const & fn, bool negative_better = true );
122 
123  /// @brief Returns the 2D vector1 of Real values representing this profile.
125 
126  /// @brief Sets the 2D vector1 of Real values representing this profile.
127  void profile(
128  utility::vector1< utility::vector1< Real > > const & new_profile
129  );
130 
131  /// @brief Inserts a character at the given position.
132  virtual void insert_char( core::Size pos, char new_char );
133 
134  /// @brief Deletes the given position from the Sequence and shifts
135  /// everything else back by one.
136  virtual void delete_position( core::Size pos );
137 
138  virtual std::string type() const {
139  return "sequence_profile";
140  }
141 
142  /// @brief Returns the number of distinct values at each position in this
143  /// profile.
144  Size width() const;
145 
146  /// @brief Returns the vector1 of values at this position.
147  //utility::vector1< Real >
149  prof_row( Size pos ) const;
150 
151  /// @brief Sets the 1D vector1 of Real values representing this profile at pos X.
152  void prof_row(
153  utility::vector1< Real > const & new_prof_row, core::Size pos
154  );
155 
156  Size size() const {
157  return profile_.size();
158  }
159 
160  /// @brief Returns true if negative values are better identities.
161  /// @details The "default" use case is for storing log likelihood values
162  /// where positive is better. If you're using this class to store energy-like
163  /// values, set negative_better to true.
164  bool negative_better() const { return negative_better_; }
165 
166  /// @brief Set whether negative identities are better.
167  void negative_better( bool negbet ) { negative_better_ = negbet; }
168 
169  /// @brief returns the temperature used in computing profile probabilities
170  core::Real temp() const {
171  return temp_;
172  }
173 
174  /// @brief Return the alphabet used by this sequence profile. This is an
175  /// N-dimensional vector1 where N is the width of the profile, and the ith
176  /// entry of any row in the profile represents the probability of ith
177  /// character at that row in the sequence.
179  return alphabet_;
180  }
181 
183  alphabet_ = new_alphabet;
184  }
185 
186  /// @brief Print this SequenceProfile object to the given std::ostream.
187  friend std::ostream & operator<<(
188  std::ostream & out, const SequenceProfile & p
189  );
190 
191 private:
192 
193  /// @brief converts a vector1 of arbitrary scores to values using Bolztmann
194  /// averaging at a given kT. Scores should follow the convention that more positive -> better score.
195  /// If not, set negative_better to true.
196  void scores_to_probs_(
198  core::Real kT,
199  bool negative_better = false
200  ) const;
201 
202  /// @brief Internal consistency check. Returns true if passed, causes a runtime_assertion failure if not.
203  bool check_internals_() const;
206 
207  /// @brief temp used to convert arbitrary scores to/from probabilities
209  /// @brief The orientation of the values. Are negative values better than zero/positive ones?
211 
212 }; // class SequenceProfile
213 
214 } // sequence
215 } // core
216 
217 #endif