Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
StepWiseClusterer.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file StepWiseClusterer
11 /// @brief Not particularly fancy, just filters a list of poses.
12 /// @detailed
13 /// @author Rhiju Das
14 
15 
16 //////////////////////////////////
19 
20 //////////////////////////////////
21 #include <core/types.hh>
22 #include <core/pose/Pose.hh>
23 #include <core/pose/util.hh>
31 #include <core/scoring/rms_util.hh>
32 #include <basic/Tracer.hh>
33 #include <utility/vector1.hh>
34 #include <utility/tools/make_vector1.hh>
35 
36 #include <ObjexxFCL/string.functions.hh>
37 #include <ObjexxFCL/format.hh>
38 #include <basic/options/option.hh>
39 #include <basic/options/keys/in.OptionKeys.gen.hh>
40 
41 #include <list>
42 
43 #ifdef PYROSETTA
44  #include <time.h>
45 #endif
46 
47 //Auto Headers
48 #include <core/id/AtomID.hh>
49 using namespace core;
50 using core::Real;
51 
52 static basic::Tracer TR( "protocols.swa.stepwise_clusterer" ) ;
53 using namespace basic::options;
54 using namespace basic::options::OptionKeys;
55 
56 namespace protocols {
57 namespace swa {
58 
59 
60  //////////////////////////////////////////////////////////////////////////
61  //constructor!
62  StepWiseClusterer::StepWiseClusterer( utility::vector1< std::string > const & silent_files_in )
63  {
64  initialize_parameters_and_input();
65  input_->set_record_source( true );
66  input_->filenames( silent_files_in ); //triggers read in of files, too.
67  }
68 
69  StepWiseClusterer::StepWiseClusterer( std::string const & silent_file_in )
70  {
71  initialize_parameters_and_input();
72  input_->set_record_source( true );
73 
74  utility::vector1< std::string > silent_files_;
75  silent_files_.push_back( silent_file_in );
76  input_->filenames( silent_files_ ); //triggers read in of files, too.
77  }
78 
79  StepWiseClusterer::StepWiseClusterer( core::io::silent::SilentFileDataOP & sfd )
80  {
81  initialize_parameters_and_input();
82  input_->set_silent_file_data( sfd ); // triggers reordering by energy and all that.
83  }
84 
85 
86  //////////////////////////////////////////////////////////////////////////
87  //destructor
88  StepWiseClusterer::~StepWiseClusterer()
89  {}
90 
91  //////////////////////////////////////////////////////////////////////////
92  void
93  StepWiseClusterer::initialize_parameters_and_input(){
94  max_decoys_ = 400;
95  cluster_radius_ = 1.5;
96  cluster_by_all_atom_rmsd_ = true;
97  score_diff_cut_ = 1000000.0;
98  auto_tune_ = false;
99  rename_tags_ = false;
100  force_align_ = false;
101 
102  score_min_ = 0.0 ;
103  score_min_defined_ = false;
104 
106  input_->set_order_by_energy( true );
107 
108  initialize_auto_tune_cluster_rmsds();
109  hit_score_cutoff_ = false;
110  initialized_atom_id_map_for_rmsd_ = false;
111 
112  rsd_type_set_ = option[ in::file::residue_type_set ]();
113  }
114 
115 
116  //////////////////////////////////////////////////////////////////////////
117  void
118  StepWiseClusterer::cluster()
119  {
120  using namespace core::scoring;
121  using namespace core::import_pose::pose_stream;
122  using namespace core::chemical;
123  using namespace core::pose;
124 
125  clock_t const time_start( clock() );
126 
128 
129  // basic initialization
130  initialize_cluster_list();
131 
132  if ( auto_tune_ ) {
133  cluster_with_auto_tune();
134  } else {
135  do_some_clustering();
136  }
137 
138  std::cout << "Total time in StepWiseClusterer: " <<
139  static_cast<Real>(clock() - time_start) / CLOCKS_PER_SEC << std::endl;
140 
141  }
142 
143 
144  /////////////////////////////////////////////////////////////////////
145  void
146  StepWiseClusterer::initialize_corresponding_atom_id_map( core::pose::Pose const & pose ){
147  using namespace core::scoring;
148 
149  // Only need to do this once!!!
150  if( cluster_by_all_atom_rmsd_ ) {
151  setup_matching_heavy_atoms( pose, pose, corresponding_atom_id_map_ );
152  } else {
153  setup_matching_protein_backbone_heavy_atoms( pose, pose, corresponding_atom_id_map_ );
154  }
155  initialized_atom_id_map_for_rmsd_ = true;
156  }
157 
158  /////////////////////////////////////////////////////////////////////
159  void
160  StepWiseClusterer::do_some_clustering() {
161 
162  using namespace core::pose;
163 
164  hit_score_cutoff_ = false;
165 
166  //for loop modeling, little chunk of pose used to calculate rms -- and saved.
167  PoseOP pose_op( new Pose );
168  Pose & pose = *pose_op;
169 
170  while ( input_->has_another_pose() ) {
171 
172  core::io::silent::SilentStructOP silent_struct( input_->next_struct() );
173  silent_struct->fill_pose( pose );
174 
175  Real score( 0.0 );
176  getPoseExtraScores( pose, "score", score );
177 
178  if ( !score_min_defined_ ){
179  score_min_ = score;
180  score_min_defined_ = true;
181  }
182 
183  if ( score > score_min_ + score_diff_cut_ ) {
184  hit_score_cutoff_ = true;
185  break;
186  }
187 
188  std::string tag( silent_struct->decoy_tag() );
189  TR << "Checking: " << tag << " with score " << score << " against list of size " << pose_output_list_.size();
190 
191  // carve out subset of residues for rms calculation.
192  if ( calc_rms_res_.size() > 0 ) pdbslice( pose, calc_rms_res_ );
193 
194  Size const found_close_cluster = check_for_closeness( pose_op );
195 
196  if ( found_close_cluster == 0 ) {
197  PoseOP pose_save( new Pose );
198  *pose_save = pose;
199  tag_output_list_.push_back( tag );
200  pose_output_list_.push_back( pose_save );
201  silent_struct_output_list_.push_back( silent_struct );
202  num_pose_in_cluster_.push_back( 1 );
203  TR << " ... added. " << std::endl;
204  if ( pose_output_list_.size() >= max_decoys_ ) break;
205  } else{
206  num_pose_in_cluster_[ found_close_cluster ]++;
207  TR << " ... not added. " << std::endl;
208  }
209  }
210 
211  TR << "After clustering, number of decoys: " << pose_output_list_.size() << std::endl;
212  return;
213 
214  }
215 
216 
217  /////////////////////////////////////////////////////////////////////
218  void
219  StepWiseClusterer::initialize_cluster_list() {
220 
221  pose_output_list_.clear();
222  tag_output_list_.clear();
223  silent_struct_output_list_.clear();
224  num_pose_in_cluster_.clear();
225 
226  score_min_ = 0.0 ;
227  score_min_defined_ = false;
228 
229  hit_score_cutoff_ = false;
230  }
231 
232  /////////////////////////////////////////////////////////////////////
233  void
234  StepWiseClusterer::cluster_with_auto_tune() {
235 
236  for ( Size n = 1; n <= cluster_rmsds_to_try_with_auto_tune_.size(); n++ ) {
237 
238  cluster_radius_ = cluster_rmsds_to_try_with_auto_tune_[ n ];
239 
240  //can current cluster center list be shrunk, given the new cluster_radius cutoff?
241  recluster_current_pose_list();
242 
243  do_some_clustering();
244 
245  if ( hit_score_cutoff_ ) {
246  std::cout << "Hit score cutoff: " << score_diff_cut_ << std::endl;
247  break;
248  }
249  if ( !input_->has_another_pose() ) {
250  std::cout << "Done with pose list. " << std::endl;
251  break;
252  }
253  }
254 
255  std::cout << "Clustering radius after auto_tune: " << cluster_radius_ << std::endl;
256 
257  }
258 
259 
260  /////////////////////////////////////////////////////////////////////
261  void
262  StepWiseClusterer::recluster_current_pose_list() {
263 
264  utility::vector1< core::pose::PoseOP > old_pose_output_list = pose_output_list_;
265  utility::vector1< std::string > old_tag_output_list = tag_output_list_;
266  utility::vector1< core::io::silent::SilentStructOP > old_silent_struct_output_list = silent_struct_output_list_;
267  utility::vector1< core::Size > old_num_pose_in_cluster = num_pose_in_cluster_;
268 
269  pose_output_list_.clear();
270  tag_output_list_.clear();
271  silent_struct_output_list_.clear();
272 
273  for ( Size i = 1; i <= old_pose_output_list.size(); i++ ) {
274 
275  core::pose::PoseOP pose_op = old_pose_output_list[ i ];
276 
277  Size const found_close_cluster = check_for_closeness( pose_op );
278  if ( found_close_cluster == 0 ) {
279  tag_output_list_.push_back( old_tag_output_list[ i ] );
280  pose_output_list_.push_back( old_pose_output_list[ i ] );
281  silent_struct_output_list_.push_back( old_silent_struct_output_list[ i ] );
282  num_pose_in_cluster_.push_back( old_num_pose_in_cluster[ i ] );
283  } else {
284  num_pose_in_cluster_[ found_close_cluster ] += old_num_pose_in_cluster[ i ];
285  }
286 
287  }
288 
289  TR << "After reclustering with rmsd " << cluster_radius_ << ", number of clusters reduced from " <<
290  old_pose_output_list.size() << " to " << pose_output_list_.size() << std::endl;
291 
292  }
293 
294 
295  ///////////////////////////////////////////////////////////////
296  Size
297  StepWiseClusterer::check_for_closeness( core::pose::PoseOP const & pose_op )
298  {
299  using namespace core::scoring;
300 
301  if ( !initialized_atom_id_map_for_rmsd_ ) initialize_corresponding_atom_id_map( *pose_op );
302 
303  // go through the list backwards, because poses may be grouped by similarity --
304  // the newest pose is probably closer to poses at the end of the list.
305  for ( Size n = pose_output_list_.size(); n >= 1; n-- ) {
306 
307  Real rmsd( 0.0 );
308 
309 
310  if ( calc_rms_res_.size() == 0 || force_align_ ) {
311  rmsd = rms_at_corresponding_atoms( *(pose_output_list_[ n ]), *pose_op, corresponding_atom_id_map_ );
312  } else {
313  // assumes prealignment of poses!!!
314  rmsd = rms_at_corresponding_atoms_no_super( *(pose_output_list_[ n ]), *pose_op,
315  corresponding_atom_id_map_ );
316  }
317 
318  if ( rmsd < cluster_radius_ ) {
319  return n;
320  }
321  }
322  return 0;
323  }
324 
325 
326  /////////////////////////////////////////////////////////////////////////////////////////
327  void
328  StepWiseClusterer::output_silent_file( std::string const & silent_file ){
329 
330  using namespace core::io::silent;
331 
332  SilentFileData silent_file_data;
333 
334  for ( Size n = 1 ; n <= silent_struct_output_list_.size(); n++ ) {
335 
336  SilentStructOP & s( silent_struct_output_list_[ n ] );
337  s->add_string_value( "nclust", ObjexxFCL::fmt::I(8,num_pose_in_cluster_[ n ]) );
338 
339  if ( rename_tags_ ){
340  s->add_comment( "PARENT_TAG", s->decoy_tag() );
341  std::string const tag = "S_"+ ObjexxFCL::string_of( n-1 /* start with zero */);
342  s->set_decoy_tag( tag );
343  }
344 
345  silent_file_data.write_silent_struct( *s, silent_file, false /*write score only*/ );
346 
347  }
348 
349  }
350 
351  /////////////////////////////////////////////////////////////////////////////////////////
353  StepWiseClusterer::silent_file_data(){
354 
355  using namespace core::io::silent;
356 
357  SilentFileDataOP silent_file_data = new SilentFileData;
358  for ( Size n = 1 ; n <= silent_struct_output_list_.size(); n++ ) {
359  silent_file_data->add_structure( silent_struct_output_list_[ n ] );
360  }
361  return silent_file_data;
362  }
363 
364  //////////////////////////////////////////////
365  PoseList
366  StepWiseClusterer::clustered_pose_list(){
367 
368  PoseList pose_list;
369 
370  for ( Size n = 1 ; n <= pose_output_list_.size(); n++ ) {
371  pose_list[ tag_output_list_[n] ] = pose_output_list_[ n ];
372  }
373 
374  return pose_list;
375  }
376 
377 
378  //////////////////////////////////////////////////////////////////////////
379  void
380  StepWiseClusterer::set_calc_rms_res( utility::vector1< core::Size > const & calc_rms_res ){
381  calc_rms_res_ = calc_rms_res;
382  }
383 
384  //////////////////////////////////////////////////////////////////////////
385  void
386  StepWiseClusterer::set_silent_file_data( core::io::silent::SilentFileDataOP & sfd ){
387  input_->set_silent_file_data( sfd );
388  }
389 
390  //////////////////////////////////////////////////////////////////////////
391  void
392  StepWiseClusterer::initialize_auto_tune_cluster_rmsds(){
393  cluster_rmsds_to_try_with_auto_tune_.clear();
394  cluster_rmsds_to_try_with_auto_tune_.push_back( 0.1 );
395  cluster_rmsds_to_try_with_auto_tune_.push_back( 0.2 );
396  cluster_rmsds_to_try_with_auto_tune_.push_back( 0.25 );
397  cluster_rmsds_to_try_with_auto_tune_.push_back( 0.3 );
398  cluster_rmsds_to_try_with_auto_tune_.push_back( 0.4 );
399  cluster_rmsds_to_try_with_auto_tune_.push_back( 0.5 );
400  cluster_rmsds_to_try_with_auto_tune_.push_back( 0.6 );
401  cluster_rmsds_to_try_with_auto_tune_.push_back( 0.8 );
402  cluster_rmsds_to_try_with_auto_tune_.push_back( 1.0 );
403  cluster_rmsds_to_try_with_auto_tune_.push_back( 1.2 );
404  cluster_rmsds_to_try_with_auto_tune_.push_back( 1.5 );
405  cluster_rmsds_to_try_with_auto_tune_.push_back( 1.75 );
406  cluster_rmsds_to_try_with_auto_tune_.push_back( 2.0 );
407  cluster_rmsds_to_try_with_auto_tune_.push_back( 2.25 );
408  cluster_rmsds_to_try_with_auto_tune_.push_back( 2.5 );
409  cluster_rmsds_to_try_with_auto_tune_.push_back( 2.75 );
410  cluster_rmsds_to_try_with_auto_tune_.push_back( 3.0 );
411  cluster_rmsds_to_try_with_auto_tune_.push_back( 3.5 );
412  cluster_rmsds_to_try_with_auto_tune_.push_back( 4.0 );
413  cluster_rmsds_to_try_with_auto_tune_.push_back( 4.5 );
414  cluster_rmsds_to_try_with_auto_tune_.push_back( 5.0 );
415  cluster_rmsds_to_try_with_auto_tune_.push_back( 6.0 );
416  cluster_rmsds_to_try_with_auto_tune_.push_back( 7.0 );
417  cluster_rmsds_to_try_with_auto_tune_.push_back( 8.0 );
418  cluster_rmsds_to_try_with_auto_tune_.push_back( 9.0 );
419  cluster_rmsds_to_try_with_auto_tune_.push_back( 10 );
420  cluster_rmsds_to_try_with_auto_tune_.push_back( 12.5 );
421  cluster_rmsds_to_try_with_auto_tune_.push_back( 15.0 );
422  cluster_rmsds_to_try_with_auto_tune_.push_back( 17.5 );
423  cluster_rmsds_to_try_with_auto_tune_.push_back( 20.0 );
424  cluster_rmsds_to_try_with_auto_tune_.push_back( 25.0 );
425  cluster_rmsds_to_try_with_auto_tune_.push_back( 30.0 );
426  cluster_rmsds_to_try_with_auto_tune_.push_back( 35.0 );
427  cluster_rmsds_to_try_with_auto_tune_.push_back( 40.0 );
428  cluster_rmsds_to_try_with_auto_tune_.push_back( 45.0 );
429  cluster_rmsds_to_try_with_auto_tune_.push_back( 50.0 );
430  }
431 
432 
433 }
434 }