Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
MatchPositionModifiers.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 // :noTabs=false:tabSize=4:indentSize=4:
4 //
5 // (c) Copyright Rosetta Commons Member Institutions.
6 // (c) This file is part of the Rosetta software suite and is made available under license.
7 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
8 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
9 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
10 
11 /// @file protocols/match/MatchPositionModifiers.cc
12 /// @brief implementations for MatchPositionModifiers
13 /// @author Florian Richter (floric@u.washington.edu ), may 2010
14 
15 // unit headers
17 
18 //package headers
20 
21 //project headers
22 // AUTO-REMOVED #include <core/id/AtomID.hh> // REQUIRED FOR WINDOWS
23 #include <core/chemical/AA.hh>
30 #include <core/pose/Pose.hh>
31 #include <core/pose/PDBInfo.hh>
32 #include <core/scoring/Energies.hh>
34 #include <basic/Tracer.hh>
35 
39 
40 //Numeric headers
41 #include <numeric/constants.hh>
42 #include <numeric/xyzVector.hh>
43 
44 //utility headers
45 #include <utility/string_util.hh>
46 #include <utility/tag/Tag.hh>
47 
48 #include <utility/vector0.hh>
49 #include <utility/vector1.hh>
50 
51 
52 namespace protocols {
53 namespace match {
54 
55 static basic::Tracer tr( "protocols.match.MatchPositionModifiers" );
56 
57 /// @brief "factory" function to create the match position modifiers
60  std::string const mpm_name,
61  core::Size geom_cst,
62  utility::vector1< std::string > const & input_tokens )
63 {
64  if( mpm_name == "ss" ) return new SecondaryStructureMPM( input_tokens );
65  else if( mpm_name == "num_neighbors" ) return new NumNeighborsMPM( input_tokens );
66  else if( mpm_name == "bfactor" ) return new BfactorMPM( input_tokens );
67  else if( mpm_name == "all" ) return new AddAllPositionsMPM();
68  else if( mpm_name == "no_c_n_term" ) return new RemoveNorCTermMPM( input_tokens );
69  else if( mpm_name == "task_operation" ) return new TaskOperationMPM( geom_cst, input_tokens );
70  return NULL;
71 }
72 
74 
76 
79 {
80  if( input_tokens.size() < 3 ) utility_exit_with_message("Not enough information given to initialize SecondaryStructureMPM");
81  for( core::Size i = 2; i < input_tokens.size(); ++i){
82  if( input_tokens[ i ] == "ss_char" ){
83  desired_ss_chars_.insert( input_tokens[i+1][0] );
84  tr << "SecondaryStructureMPM requires positions to have ss char " << input_tokens[i+1][0] << "." << std::endl;
85  i += 2;
86  }
87  else if( input_tokens[ i ] == "ss_motif" ){
88  ss_motifs_.push_back( input_tokens[ i+1 ] );
89  tr << "SecondaryStructureMPM requires positions to have motif " << input_tokens[i+1] << "." << std::endl;
90  i += 2;
91  }
92  else tr << "Token " << input_tokens[ i ] << " could not be understood by SecondaryStructureMPM and will be ignored." << std::endl;
93  }
94 }
95 
97 
100  utility::vector1< core::Size > const & original_positions,
101  core::pose::Pose const & match_pose,
103 ) const
104 {
106  std::string remove_string("");
107 
108  core::scoring::dssp::Dssp pose_ss( match_pose );
109 
110  for( core::Size i =1; i <= original_positions.size(); ++i ){
111 
112  bool position_passes(false);
113  core::Size seqpos( original_positions[i] );
114  //first we'll check if any excat ss chars have been specified
115  if( desired_ss_chars_.size() != 0 ){
116  if( desired_ss_chars_.find( pose_ss.get_dssp_secstruct( seqpos ) ) != desired_ss_chars_.end() ){
117  position_passes = true;
118  }
119  } //if( desired_ss_chars_.size() != 0 )
120 
121  //then we'll check in any more complex motifs have been specified
122  for( core::Size motif = 1; motif <= ss_motifs_.size(); ++motif){
123 
124  if( position_passes ) break;
125 
126  /// @details helix_nterm logic: if seqpos has ss char H, it needs to be in
127  /// the beginning of the helix, i.e. there needs to be another ss char within
128  /// three positions upstream of seqpos
129  /// is seqpos doesn't have ss char H, a helix needs to commence within 3 positions
130  /// downstream of seqpos
131  if( ss_motifs_[ motif ] == "helix_nterm" ){
132  if( pose_ss.get_dssp_secstruct( seqpos ) == 'H' ){
133  for( core::Size j = seqpos - 1; (j >= seqpos - 3) && (j > 0 ); --j){
134  if( pose_ss.get_dssp_secstruct( j ) != 'H'){
135  position_passes = true;
136  break;
137  }
138  }
139  } //if seqpos is in helix
140  else{
141  for( core::Size j = seqpos; (j <= seqpos + 3) && ( j <= match_pose.total_residue() ); ++j){
142  if( pose_ss.get_dssp_secstruct( j ) == 'H'){
143  position_passes = true;
144  break;
145  }
146  }
147  }
148  } //helix_nterm motif
149  else{
150  tr << "WARNING: SecondaryStructureMPM doesn't know how to interpret motif '" << ss_motifs_[ motif ] << "'." << std::endl;
151  }
152  }//for( core::Size motif = 1; motif <= ss_motifs_.size(); ++motif)
153  if( position_passes ){
154  to_return.push_back( seqpos );
155  }
156  else remove_string += utility::to_string( seqpos ) + "+";
157  } //loop over all original positions
158  tr << "SecondaryStructureMPM removed the following match positions " << remove_string << "." << std::endl;
159  return to_return;
160 }
161 
162 
164  : MatchPositionModifier(), min_neighbors_(0), max_neighbors_(0),
165  com_vector_criterion_(false), both_criteria_needed_to_pass_(false),
166  min_com_vector_ang_cos_( 1.0 ), max_com_vector_ang_cos_( -1.0 )
167 {
168  if( input_tokens.size() < 3 ) utility_exit_with_message("Not enough information given to initialize NumNeighborsMPM");
169  for( core::Size i = 2; i <= input_tokens.size(); ++i){
170  if( input_tokens[ i ] == "min_neighbors" ){
171  min_neighbors_ = (core::Size) atoi( input_tokens[i+1].c_str() );
172  tr << "NumNeighborsMPM will only allow positions that have at least " << min_neighbors_ << " 10A neighbors." << std::endl;
173  i++;
174  }
175  else if ( input_tokens[ i ] == "max_neighbors" ){
176  max_neighbors_ = (core::Size) atoi( input_tokens[i+1].c_str() );
177  tr << "NumNeighborsMPM will only allow positions that have no more than " << max_neighbors_ << " 10A neighbors." << std::endl;
178  i++;
179  }
180  else if ( input_tokens[ i ] == "min_com_vector_ang" ){
181  com_vector_criterion_ = true;
182  min_com_vector_ang_cos_ = cos( (( core::Real) atof(input_tokens[i+1].c_str() ) )* numeric::constants::f::degrees_to_radians);
183  i++;
184  }
185  else if ( input_tokens[ i ] == "max_com_vector_ang" ){
186  com_vector_criterion_ = true;
187  max_com_vector_ang_cos_ = cos( (( core::Real) atof(input_tokens[i+1].c_str() ) )* numeric::constants::f::degrees_to_radians );;
188  i++;
189  }
190  else if ( input_tokens[ i ] == "both_criteria_needed_to_pass" ){
192  }
193  else tr << "Token " << input_tokens[ i ] << " could not be understood by NumNeighborsMPM and will be ignored." << std::endl;
194  }
195 }
196 
198 
201  utility::vector1< core::Size > const & original_positions,
202  core::pose::Pose const & match_pose,
204 ) const
205 {
206  core::scoring::TenANeighborGraph const & cur_graph = match_pose.energies().tenA_neighbor_graph();
208  std::string remove_string("");
209 
210  //if we need to calculate com
212  if( com_vector_criterion_ ){
213  for ( Size i = 1; i <= match_pose.total_residue(); ++i ) center_of_mass += match_pose.residue(i).nbr_atom_xyz();
214  center_of_mass /= match_pose.total_residue();
215  //tr << "Center of mass is " << center_of_mass.x() << " " << center_of_mass.y() << " " << center_of_mass.z() << std::endl;
216  }
217 
218  for( core::Size i =1; i <= original_positions.size(); ++i ){
219 
220  core::Size neighbors( cur_graph.get_node( original_positions[i] )->num_neighbors_counting_self() - 1 );
221 
222  bool neighborpass( true ), com_vect_pass(true);
223  if( com_vector_criterion_ ) com_vect_pass = passes_com_vector_criterion( original_positions[i], match_pose, center_of_mass );
224 
225  if( (min_neighbors_ != 0 ) && (neighbors < min_neighbors_) ) neighborpass = false;
226  if( (max_neighbors_ != 0 ) && (neighbors > max_neighbors_) ) neighborpass = false;
227  bool pass(neighborpass);
228 
229  if( com_vector_criterion_ ){
231  pass = (neighborpass && com_vect_pass);
232  //tr << "mpf resi " << original_positions[i] << " neighborpass is " << neighborpass << " compass is " << com_vect_pass << " pass is " << pass << std::endl;
233  }
234  else pass = (neighborpass || com_vect_pass);
235  }
236  if( pass ) to_return.push_back( original_positions[i] );
237  else remove_string += utility::to_string( original_positions[ i ] ) + "+";
238  }
239  tr << "NumNeighborsMPM removed the following match positions " << remove_string << "." << std::endl;
240  return to_return;
241 }
242 
243 bool
245  core::Size seqpos,
246  core::pose::Pose const & pose,
247  core::Vector const & com
248 ) const
249 {
250  core::Vector seqpos_to_com( com - pose.residue( seqpos ).atom("CA").xyz() );
251  core::Vector ca_cb( (pose.residue( seqpos ).aa() == core::chemical::aa_gly ? pose.residue( seqpos ).atom("2HA") : pose.residue( seqpos ).atom("CB")).xyz() - pose.residue( seqpos ).atom("CA").xyz() );
252 
253  //core::Real com_cos( numeric::cos_of( seqpos_to_com, pose.residue( seqpos ).nbr_atom_xyz() ) );
254  core::Real com_cos( seqpos_to_com.dot( ca_cb ) / (seqpos_to_com.length() * ca_cb.length() ) );
255  //std::cerr << seqpos << " com_cos is " << com_cos ;
256  if( ( com_cos < min_com_vector_ang_cos_ ) && (com_cos > max_com_vector_ang_cos_ ) ){
257  //std::cerr << " passing " << std::endl;
258  return true;
259  }
260  //std::cerr << " not passing " << std::endl;
261  return false;
262 }
263 
264 
266  : MatchPositionModifier(), use_relative_bfactors_(false), all_bfactors_zero_(false), max_bfactor_(0.0)
267 {
268  if( input_tokens.size() < 3 ) utility_exit_with_message("Not enough information given to initialize BfactorMPM");
269  for( core::Size i =2; i < input_tokens.size(); ++i){
270  if( input_tokens[ i ] == "relative" ){
271  use_relative_bfactors_ = true;
272  max_bfactor_ = (core::Real) atof( input_tokens[i+1].c_str() );
273  tr << "BfactorMPM will only allow positions that have a relative B-factor of not more than" << max_bfactor_ <<"." << std::endl;
274  }
275  if( input_tokens[ i ] == "absolute" ){
276  use_relative_bfactors_ = false;
277  max_bfactor_ = (core::Real) atof( input_tokens[i+1].c_str() );
278  tr << "BfactorMPM will only allow positions that have an absolute B-factor of not more than" << max_bfactor_ <<"." << std::endl;
279  }
280  else tr << "Token " << input_tokens[ i ] << " could not be understood by BfactorMPM and will be ignored." << std::endl;
281  }
282 }
283 
285 
288  utility::vector1< core::Size > const & original_positions,
289  core::pose::Pose const & match_pose,
291 ) const
292 {
294  std::string remove_string("");
295  utility::vector1< core::Real > bfactors( this->get_ca_bfactors( match_pose ) );
296 
297  if( all_bfactors_zero_){
298  tr << "Warning: all bfactors in the pose were 0, meaning they were probably wiped. BfactorMPM will not modify match positions." << std::endl;
299  to_return = original_positions;
300  return to_return;
301  }
302 
303  for( core::Size i =1; i <= original_positions.size(); ++i ){
304  if( bfactors[ original_positions[ i ] ] <= max_bfactor_ ) to_return.push_back( original_positions[i] );
305  else remove_string += utility::to_string( original_positions[ i ] ) + "+";
306  }
307  tr << "BfactorMPM removed the following match positions " << remove_string << "." << std::endl;
308  return to_return;
309 }
310 
313 {
315  core::pose::PDBInfo const & pdb_info( *(pose.pdb_info()) );
316  all_bfactors_zero_ = true;
317 
319  core::Real max_bfactor(0.0);
320  for( core::Size seqpos = 1; seqpos <= pose.total_residue(); ++seqpos ){
321  if( ! pose.residue_type( seqpos ).is_protein() ) bfactors.push_back( pdb_info.temperature( seqpos, 1 ) );
322  else bfactors.push_back( pdb_info.temperature( seqpos, pose.residue( seqpos ).atom_index( "CA" ) ) );
323  if( bfactors[ seqpos ] > max_bfactor ) max_bfactor = bfactors[ seqpos ];
324  if( bfactors[ bfactors.size() ] > 0.0 ) all_bfactors_zero_ = false;
325  }
326  if( max_bfactor == 0.0 ){ //in this case we return
327  all_bfactors_zero_ = true;
328  return bfactors;
329  }
330 
331  for( core::Size seqpos = 1; seqpos <= pose.total_residue(); ++seqpos ) bfactors[ seqpos ] /= max_bfactor;
332  }
333 
334  else{
335  for( core::Size seqpos = 1; seqpos <= pose.total_residue(); ++seqpos ){
336  if( ! pose.residue( seqpos ).is_protein() ) bfactors.push_back( pdb_info.temperature( seqpos, 1 ) );
337  else bfactors.push_back( pdb_info.temperature( seqpos, pose.residue( seqpos ).atom_index( "CA" ) ) );
338 
339  if( bfactors[ bfactors.size() ] > 0.0 ) all_bfactors_zero_ = false;
340  }
341  }
342  return bfactors;
343 }
344 
347 {}
348 
350 
353  utility::vector1< core::Size > const &, //original_positions,
354  core::pose::Pose const & match_pose,
356 ) const
357 {
359  for( core::Size i = 1; i <= match_pose.total_residue(); ++i ){
360  if( match_pose.residue_type( i ).is_protein() ) to_return.push_back( i );
361  }
362  return to_return;
363 }
364 
366  : MatchPositionModifier(), cterm_length_(0), nterm_length_(0)
367 {
368  for( core::Size i =2; i < input_tokens.size(); ++i){
369  if( input_tokens[ i ] == "cterm" ){
370  runtime_assert( input_tokens.size() > i );
371  cterm_length_ = (core::Size) atoi( input_tokens[i+1].c_str() );
372  }
373  else if( input_tokens[ i ] == "nterm" ){
374  runtime_assert( input_tokens.size() > i );
375  nterm_length_ = (core::Size) atoi( input_tokens[i+1].c_str() );
376  }
377  }
378 }
379 
381 
384  utility::vector1< core::Size > const & original_positions,
385  core::pose::Pose const & match_pose,
387 ) const
388 {
390  core::Size cterm = match_pose.total_residue();
391 
392  if( cterm_length_ != 0 ){ //we have to determine the cterminus of the protein
393  for( core::Size i = match_pose.total_residue(); i > 0; --i ){
394  if( match_pose.residue_type( i ).is_protein() ) {
395  cterm = i;
396  break;
397  }
398  }
399  cterm = cterm - cterm_length_;
400  }
401 
402  for( core::Size i = 1; i <= original_positions.size(); ++i){
403  if( (original_positions[i] >= nterm_length_) && (original_positions[i] <= cterm) ) to_return.push_back( original_positions[i] );
404 
405  } // loop over all original positions
406 
407  return to_return;
408 }
409 
410 /// @details
411 /// this is a little tricky, we have to reassemble a tag out of the tokens
412 /// and then call the TaskOperationFactory::init and some other shit like that...
414  core::Size which_geom_cst,
415  utility::vector1< std::string > const & input_tokens )
416  : MatchPositionModifier(), which_geom_cst_(which_geom_cst), task_op_(NULL)
417 {
418  //1. reassemble tag components into string
419  std::string tagstring(input_tokens[2]);
420  for(core::Size i = 3; i <= input_tokens.size(); ++i ) tagstring = tagstring + " " + input_tokens[i];
421 
422  //let's make sure the dumb user has actually supplied a proper tag
423  runtime_assert(input_tokens[2].substr(0,1) == "<");
424  runtime_assert(input_tokens[input_tokens.size()].substr( input_tokens[input_tokens.size()].length() - 1,1) == ">" );
425  std::string task_op_name = utility::trim( input_tokens[2], "<");
426  tr << "TaskOperationMPM getting task_op of type " << task_op_name << " with tag '" << tagstring << "'." << std::endl;
427 
428  //2. instantiate a tag object from the string
429  utility::tag::TagPtr tag = new utility::tag::Tag();
430  std::istringstream tagstream(tagstring);
431  tag->read(tagstream);
432 
433  //3. make task op
435  task_op_ = topfac->newTaskOperation( task_op_name, tag );
436 
437  //4. yay :)
438 }
439 
441 
442 /// @details
443 /// generates a task based on the parsed task operation,
444 /// and then allows matching at all positions where the
445 /// upstream residues specified in the cstfile are allowed
446 /// in the position's residue_task. fancy shit yo.
449  utility::vector1< core::Size > const & original_positions,
450  core::pose::Pose const & match_pose,
452 ) const
453 {
454  using namespace core::pack::task;
455  TaskFactory tfactory;
456  tfactory.push_back( task_op_);
457  PackerTaskOP ptask(tfactory.create_task_and_apply_taskoperations( match_pose ) );
459 
460  //the following code is not the most efficient, searching through vectors/lists, etc
461  //but it'll only be done once per matcher run, and the vectors are usually not very large
462  //so it shouldn't matter that much
463  utility::vector1< core::chemical::ResidueTypeCOP> const & upstream_restypes( mtask->enz_input_data()->mcfi_list( which_geom_cst_ )->upstream_restypes() );
464 
465  for( core::Size i = 1; i <= original_positions.size(); ++i){
466  ResidueLevelTask const & restask( ptask->residue_task( original_positions[i] ));
467  if( restask.being_packed() ){ //let's only match repackable positions
468 
469  for( ResidueLevelTask::ResidueTypeCOPListConstIter restype_it( restask.allowed_residue_types_begin()), restype_it_end( restask.allowed_residue_types_end() ); restype_it != restype_it_end; ++restype_it ){
470 
471  //let's be somewhat generous: if any of the upstream residues specified in the cstfile
472  //is allowed at this residue, we consider it good for matching
473  //note: we're doing name3 comparison instead of pointer comparison here because
474  //of variant type uncertainties
475  bool name3_found( false );
476  for( utility::vector1< core::chemical::ResidueTypeCOP>::const_iterator upres_it( upstream_restypes.begin() ), upres_end(upstream_restypes.end()); upres_it != upres_end; ++ upres_it ){
477  if( (*restype_it)->name3() == (*upres_it)->name3() ){
478  //tr << "ARRG residue " << (*restype_it)->name3() << " allowed at pos " << i << ", set to matching " << std::endl;
479  to_return.push_back( original_positions[i] );
480  name3_found = true;
481  break;
482  }
483  }
484  if( name3_found ) break;
485  } //loop over packer residue types at this position
486  } //if being repacked
487  } // loop over original positions
488  return to_return;
489 }
490 
491 
492 }
493 }