Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
MatrixScoringScheme.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file MatrixScoringScheme.cc
11 /// @brief class definition for a given scoring scheme for an alignment.
12 /// @detailed Simply based on comparing single characters from two protein
13 /// sequences, along with affine gap penalties of the form penalty = A + Bk, where
14 /// A represents the penalty for starting a gap, and B represents the penalty for
15 /// extending a previously opened gap by k characters.
16 /// @author James Thompson
17 
18 #include <core/types.hh>
19 #include <basic/Tracer.hh>
24 
25 #include <utility/exit.hh>
26 #include <utility/io/izstream.hh>
27 #include <utility/file/FileName.hh>
28 
29 #include <core/chemical/AA.hh>
30 #include <basic/database/open.hh>
31 
32 #include <iostream>
33 #include <string>
34 
35 #include <utility/vector1.hh>
36 
37 
38 namespace core {
39 namespace sequence {
40 
41 static basic::Tracer tr( "core.sequence.MatrixScoringScheme" );
42 
43 void MatrixScoringScheme::read_data( utility::io::izstream & input ) {
44  std::string line;
45 
46  using utility::vector1;
47  using namespace core::chemical;
49  while( getline( input, line ) ) {
50  if ( line.substr(0,1) == "#" ) continue; // skip comments
51 
52  std::istringstream line_stream( line );
53  if ( line.substr(0,1) == " " ) { // header line
54  char aa;
55  while ( line_stream >> aa, !line_stream.fail() ) {
56  if ( oneletter_code_specifies_aa( aa ) ) {
57  order.push_back( aa_from_oneletter_code(aa) );
58  } else {
59  order.push_back( (core::chemical::AA) 0 ); //"Invalid" sentinel, to keep spacing correct
60  }
61  }
62  vector1< Real > dummy( (core::Size) num_aa_types, 0.0 );
63  scoring_matrix_.resize( (core::Size) num_aa_types, dummy );
64  } else {
65  char aa_name;
66  line_stream >> aa_name;
67  if ( !oneletter_code_specifies_aa( aa_name ) ) continue; // skip non-AA lines
68 
69  AA aa = aa_from_oneletter_code(aa_name);
70  for ( vector1< AA >::const_iterator it = order.begin(),
71  end = order.end(); it != end; ++it
72  ) {
73  Real score;
74  line_stream >> score;
75 
76  if ( line_stream.fail() ) {
77  std::string message = "Error reading line " + line + '\n';
78  utility_exit_with_message( message );
79  }
80 
81  if ( (Size) aa <= order.size() && *it != 0 && (Size) *it <= order.size() )
82  scoring_matrix_[ aa ][ *it ] = score;
83 
84  }
85  }
86  } // while( getline( input, line ) )
87 } // read_data
88 
89 /// @brief Read an alignment matrix from the given filename using the NCBI BLOSUM format
90 /// for matrices.
92  utility::io::izstream input( fn );
93  if ( !input ) {
94  utility_exit_with_message(
95  "ERROR: Unable to open MatrixScoringScheme file!" + std::string(fn)
96  );
97  }
98  read_data( input );
99 } // read_from_file
100 
101 /// @brief Read an alignment matrix from the given database filename using the
102 /// NCBI BLOSUM format for matrices.
104  read_from_file( basic::database::full_name( "sequence/substitution_matrix/" + name ) );
105 }
106 
107 /// @brief Get the values for amino acid aa, in Rosetta aa order.
109  return scoring_matrix_[ aa ];
110 }
111 
112 /// @brief Get the values for amino acid aa, in Rosetta aa order.
116  } else {
118  return retval;
119  }
120 }
121 
123  return scoring_matrix_;
124 }
125 
127  SequenceOP seq1,
128  SequenceOP seq2,
129  Size pos1,
130  Size pos2
131 ) {
132  runtime_assert( pos1 <= seq1->length() );
133  runtime_assert( pos2 <= seq2->length() );
134 
136  aa1( core::chemical::aa_from_oneletter_code( (*seq1)[pos1] ) ),
137  aa2( core::chemical::aa_from_oneletter_code( (*seq2)[pos2] ) );
138 
139  if ( aa1 == core::chemical::aa_unk || aa2 == core::chemical::aa_unk ) {
140  // likely a non-canonical aa in sequence
141  tr.Error << "returning score of zero for comparing amino acids "
142  << (*seq1)[pos1] << " and " << (*seq2)[pos2] << std::endl;
143  return 0;
144  }
145 
146  return 0.1 * scoring_matrix_[ aa1 ][ aa2 ];
147 }
148 
149 } // sequence
150 } // core