Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
SeqprofConsensusOperation.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file protocols/toolbox/task_operations/SeqprofConsensusOperation.cc
11 /// @brief set designable residues to those observed in sequence profile
12 /// @author Florian Richter, floric@u.washington.edu, april 2011
13 
14 
15 // Unit Headers
18 
19 // Project Headers
20 #include <basic/options/option.hh>
21 #include <basic/options/keys/in.OptionKeys.gen.hh>
22 #include <basic/Tracer.hh>
23 
24 #include <core/chemical/AA.hh>
29 #include <core/pose/Pose.hh>
32 
34 
35 // Utility Headers
36 #include <utility/string_util.hh>
37 #include <utility/file/file_sys_util.hh>
38 #include <utility/tag/Tag.hh>
39 #include <utility/vector1.hh>
40 #include <string>
41 
42 #include <utility/vector0.hh>
43 
44 
45 static basic::Tracer tr("protocols.toolbox.tas_operations.SeqprofConsensusOperation");
46 
47 namespace protocols{
48 namespace toolbox{
49 namespace task_operations{
50 
53 {
54  return new SeqprofConsensusOperation;
55 }
56 
57 
58 /// @brief default constructor
60  TaskOperation(),
61  seqprof_filename_( basic::options::option[ basic::options::OptionKeys::in::file::pssm ][1] ),
62  seqprof_(NULL),
63  min_aa_probability_(0.0),
64  prob_larger_current_(true),
65  ignore_pose_profile_length_mismatch_(false)
66 {
69  seqprof->convert_profile_to_probs(); // was previously implicit in from-filename constructor
70  seqprof_ = seqprof;
71  }
72 }
73 
74 
75 /// @brief destructor
77 
78 /// @brief clone
81  return new SeqprofConsensusOperation( *this );
82 }
83 
84 /// @brief all AA that have a higher probability in the seqprofile
85 /// than the native residue are allowed. probability also
86 /// needs to be higher than min_aa_probability_
87 /// @details NOTE ON SYMMETRIC POSE BEHAVIOR:
88 /// pssm files are usually for one chain only, therefore
89 /// this task operation will only set the residue behavior for
90 /// the first chain/asymetric unit.
91 /// it could be possible to handle the symmetry setup here, i.e.
92 /// set up the residue level task for every symmetric copy, but
93 /// it's prolly better to let the symmetry machinery deal with that
94 /// mode of packer task symmetrization should be intersection
95 void
96 SeqprofConsensusOperation::apply( Pose const & pose, PackerTask & task ) const
97 {
98  if( !seqprof_) utility_exit_with_message("No sequence profile set. option -in:file:pssm not specified? no filename in tag specified?");
99 
100  core::Size asymmetric_unit_res( pose.total_residue() );
103  dynamic_cast<core::conformation::symmetry::SymmetricConformation const &> ( pose.conformation()) );
104  asymmetric_unit_res = SymmConf.Symmetry_Info()->num_independent_residues();
106  }
107  core::Size last_res (asymmetric_unit_res <= seqprof_->profile().size() ? pose.total_residue() : seqprof_->profile().size() );
108  for( core::Size i = 1; i <= last_res; ++i){
109 
110  if( !pose.residue_type( i ).is_protein() ) continue;
111  //std::cout << "SCO at pos " << i << " allows the following residues: ";
112  utility::vector1< Real > const & pos_profile( (seqprof_->profile())[ i ] );
114  core::Real current_prob( pos_profile[ pose.residue_type(i).aa() ] );
115 
117  core::Real prob( pos_profile[ aa ] );
118  if( prob >= min_aa_probability_ ){
119  if( prob_larger_current_) {
120  if( prob >= current_prob ) keep_aas[ aa ] = true;
121  }
122  else keep_aas[ aa ] = true;
123  //std::cout << " " << static_cast<core::chemical::AA>(aa) << " prob=" << prob << ", ";
124  }
125  }
126  keep_aas[ pose.residue_type(i).aa() ] = true; //current always allowed
127  //std::cout << " native " << pose.residue_type(i).aa() << " prob=" << native_prob << "." << std::endl;
128 
130 
131  } //loop over all residues for which profile information exists
132 
133  bool prot_res_without_profile_information_exist(false);
134  for( core::Size i = last_res + 1; i <= asymmetric_unit_res; ++i){
136  if( pose.residue_type( i ).is_protein() ) prot_res_without_profile_information_exist = true;
137  }
138 
139  if( prot_res_without_profile_information_exist ){
140  if( ignore_pose_profile_length_mismatch_ ) tr << "WARNING WARNING: the passed in pose is longer than the sequence profile specified. Double check whether the used sequence profile is correct. Setting every excess pose residue to repacking.";
141 
142  else utility_exit_with_message("The passed in pose is longer than the sequence profile specified. Double check whether the used sequence profile is correct.");
143  }
144 } // apply
145 
146 void
148 {
149  if( tag->hasOption("filename") ){
150  seqprof_filename_ = tag->getOption< String >( "filename" );
152  seqprof->convert_profile_to_probs(); // was previously implicit in from-filename constructor
153  seqprof_ = seqprof;
154  }
155  if( tag->hasOption("min_aa_probability") ) min_aa_probability_ = tag->getOption< Real >("min_aa_probability" );
156  if( tag->hasOption("probability_larger_than_current") ) prob_larger_current_ = tag->getOption< bool >("probability_larger_than_current");
157 
158  if( tag->hasOption("ignore_pose_profile_length_mismatch") ) ignore_pose_profile_length_mismatch_ = tag->getOption< bool >("ignore_pose_profile_length_mismatch");
159 }
160 
163 {
164  return seqprof_;
165 }
166 
167 void
169 {
170  if ( reweight ) {
172  reweightedprof->convert_profile_to_probs(); // was previously implicit in from-filename constructor
173  seqprof_ = reweightedprof;
174  } else {
175  seqprof_ = seqprof;
176  }
177 }
178 
181 {
183 }
184 
186 : Parent(),
187  ddG_predictions_filename_(basic::options::option[ basic::options::OptionKeys::in::file::ddg_predictions_file ].value()),
188  conservation_cutoff_(0.6),
189  ddG_cutoff_(1.5),
190  verbose_(false)
191 {
192  position_ddGs_.clear();
195  }
196 }
197 
199 {}
200 
203 {
204  return new RestrictConservedLowDdgOperation( *this );
205 }
206 
207 void
209 {
210  Parent::parse_tag( tag );
211  if( tag->hasOption("ddG_filename") ){
212  ddG_predictions_filename_ = tag->getOption< std::string >("ddG_filename" );
214  }
215 
216  if( tag->hasOption("conservation_cutoff") ) conservation_cutoff_ = tag->getOption< Real >("conservation_cutoff" );
217  if( tag->hasOption("ddG_cutoff") ) ddG_cutoff_ = tag->getOption< Real >("ddG_cutoff" );
218  if( tag->hasOption("verbose") ) verbose_ = tag->getOption< bool >("verbose" );
219 }
220 
221 void
223  Pose const & pose,
224  PackerTask & task
225 ) const
226 {
227  if( !this->seqprof()) utility_exit_with_message("No sequence profile set. option -in:file:pssm not specified? no filename in tag specified?");
228 
229  if( position_ddGs_.size() == 0 ) utility_exit_with_message("No ddG infos were read in. option -in:file:ddg_predictions_file not specified? no filename in tag specified?");
230 
231  for( core::Size i = 1; i <= pose.total_residue(); ++i){
232 
233  if( !pose.residue_type( i ).is_protein() ) continue;
235 
236  if( position_untouchable( i, seqprof_wt_aa ) ){
237  if( seqprof_wt_aa == pose.residue_type(i).aa() ) task.nonconst_residue_task( i ).restrict_to_repacking();
238  else{
240  keep_aas[ seqprof_wt_aa ] = true;
241  keep_aas[ pose.residue_type(i).aa() ] = true;
243  }
244  } // if untouchable
245  } //loop over all residues
246 }
247 
248 bool
250  core::Size seqpos,
251  core::chemical::AA seqprof_wt
252 ) const
253 {
254 
255  //note: first we deal with the alanine special case
256  //obviousluy there is no ddG associated with mutating Ala to Ala,
257  //so for alanine residues we return true through the conservation
258  //criterion alone, the rationale being that conserved alanines
259  //are probably important structurally
260  if( seqprof_wt == core::chemical::aa_ala ){
261  if( (seqprof()->profile())[ seqpos ][ seqprof_wt ] > conservation_cutoff_) return true;
262  else return false;
263  }
264 
265  std::map< core::Size, core::io::PositionDdGInfo::PositionDdGInfoOP >::const_iterator posddg_it( position_ddGs_.find( seqpos ) );
266 
267  //note: if the position wasn't found, this could mean that the predictions file
268  //was incomplete or that the ddG protocol couldn't calculate a proper ddG,
269  //which is the case for disulfides and some modified residue types such as
270  //phospho-ser etc. so let's spit out a warning and just apply the conservation_cutoff_
271  if( posddg_it == position_ddGs_.end() ){
272 
273  tr << "Warning: no ddG information read for sequence position " << seqpos << ". This could either mean that the ddG predictions input file is incomplete or that the original PDB had a disulfide cys or other modified residue at this position. Decision whether residue is untouchable will be made based on sequence conservation alone." << std::endl;
274  if( (seqprof()->profile())[ seqpos ][ seqprof_wt ] > conservation_cutoff_) return true;
275  else return false;
276  }
277 
278  core::io::PositionDdGInfo::PositionDdGInfo const & pos_ddg( *(posddg_it->second) );
279  if( seqprof_wt != pos_ddg.wt_aa() ) utility_exit_with_message ("The wildtype aa for position "+utility::to_string( seqpos ) + " is different in the ddG file and the pssm file. Something's unclean somewhere." );
280  std::map< core::chemical::AA, core::Real >::const_iterator ala_it( pos_ddg.mutation_ddGs().find( core::chemical::aa_ala ) );
281  if( ala_it == pos_ddg.mutation_ddGs().end() ) utility_exit_with_message("The ddG of mutating to Ala was not found for position "+utility::to_string( seqpos )+" in file "+ddG_predictions_filename_ + ".");
282 
283  if( (ala_it->second > ddG_cutoff_) && ( (seqprof()->profile())[ seqpos ][ seqprof_wt ] > conservation_cutoff_) ){
284  if( verbose_ ) tr << "Pos " << seqprof_wt << seqpos << " has ddG_cutoff of " << ala_it->second << " and profile frequency of " << (seqprof()->profile())[ seqpos ][ seqprof_wt ] << ", considered untouchable." << std::endl;
285  return true;
286  }
287  return false;
288 }
289 
292 {
293  return core::chemical::aa_from_oneletter_code( (*(this->seqprof()))[seqpos] );
294 }
295 
298 {
299  if( seqprof_wt_aa( seqpos ) == core::chemical::aa_ala ) return 0.0;
300 
301  std::map< core::Size, core::io::PositionDdGInfo::PositionDdGInfoOP >::const_iterator posddg_it( position_ddGs_.find( seqpos ) );
302  if( posddg_it == position_ddGs_.end() ) utility_exit_with_message("no ddg information read for sequence position "+ utility::to_string( seqpos ) );
303  core::io::PositionDdGInfo::PositionDdGInfo const & pos_ddg( *(posddg_it->second) );
304  std::map< core::chemical::AA, core::Real >::const_iterator ala_it( pos_ddg.mutation_ddGs().find( core::chemical::aa_ala ) );
305  if( ala_it == pos_ddg.mutation_ddGs().end() ) utility_exit_with_message("The ddG of mutating to Ala was not found for position "+utility::to_string( seqpos )+" in file "+ddG_predictions_filename_ + ".");
306  return ala_it->second;
307 }
308 
309 } // TaskOperations
310 } // toolbox
311 } // protocols
312