Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
StepWiseRNA_Clusterer.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file StepWiseRNA_Clusterer
11 /// @detailed
12 /// @author Parin Sripakdeevong (sripakpa@stanford.edu), Rhiju Das (rhiju@stanford.edu)
13 
14 
15 //////////////////////////////////
17 #include <protocols/swa/rna/StepWiseRNA_OutputData.hh> //Sept 26, 2011
22 //////////////////////////////////
23 #include <core/types.hh>
24 #include <core/pose/Pose.hh>
25 #include <core/pose/util.hh>
26 #include <core/scoring/Energies.hh>
35 
37 #include <core/scoring/rms_util.hh>
39 //#include <basic/datacache/CacheableDataType.hh>
40 #include <basic/datacache/BasicDataCache.hh>
41 #include <basic/datacache/CacheableString.hh>
42 #include <basic/Tracer.hh>
43 #include <utility/vector1.hh>
44 #include <utility/tools/make_vector1.hh>
47 
51 #include <core/io/pdb/pose_io.hh>
52 
53 #include <ObjexxFCL/format.hh>
54 #include <ObjexxFCL/string.functions.hh>
55 
56 #include <list>
57 #include <time.h>
58 
59 using namespace core;
60 using core::Real;
61 using basic::T;
62 
63 static basic::Tracer TR( "protocols.swa.rna_stepwise_rna_clusterer" );
64 
65 
66 namespace protocols {
67 namespace swa {
68 namespace rna {
69 
70 // @brief Auto-generated virtual destructor
71 SlicedPoseJobParameters::~SlicedPoseJobParameters() {}
72 
73 
74  //////////////////////////////////////////////////////////////////////////
75  //constructor!
76  StepWiseRNA_Clusterer::StepWiseRNA_Clusterer( utility::vector1< std::string > const & silent_files_in )
77  {
78  initialize_parameters_and_input();
79  input_->set_record_source( true );
80  input_->filenames( silent_files_in ); //triggers read in of files, too.
81  }
82 
83  StepWiseRNA_Clusterer::StepWiseRNA_Clusterer( std::string const & silent_file_in )
84  {
85  initialize_parameters_and_input();
86  input_->set_record_source( true );
87 
88  utility::vector1< std::string > silent_files_;
89  silent_files_.push_back( silent_file_in );
90  input_->filenames( silent_files_ ); //triggers read in of files, too.
91  }
92 
93  StepWiseRNA_Clusterer::StepWiseRNA_Clusterer( core::io::silent::SilentFileDataOP & sfd )
94  {
95  initialize_parameters_and_input();
96  input_->set_silent_file_data( sfd ); // triggers reordering by energy and all that.
97  }
98 
99  //////////////////////////////////////////////////////////////////////////
100  //destructor
101  StepWiseRNA_Clusterer::~StepWiseRNA_Clusterer()
102  {}
103 
104  //////////////////////////////////////////////////////////////////////////
105  void
106  StepWiseRNA_Clusterer::initialize_parameters_and_input(){
108  input_->set_order_by_energy( true );
109 
110  //max_decoys_ = 9999999999; Feb 02, 2012; This lead to server-test error at R47198
111  //score_diff_cut_ = 1000000000.0; Feb 02, 2012; This might lead to server-test error at R47198
112  max_decoys_ = 999999; //Feb 02, 2012;
113  score_diff_cut_ = 100000.0; //Feb 02, 2012;
114  perform_score_diff_cut_ = false; //Jan 23, 2012: I rarely use score_diff_cut_ in SWA RNA, so PLEASE leave this false as DEFAULT to be safe!
115 
116  whole_struct_cluster_radius_ = 0.5;
117  suite_cluster_radius_= 999.99;
118  loop_cluster_radius_= 999.99;
119 
120  rename_tags_ = false;
121  job_parameters_exist_=false;
122  distinguish_pucker_=true;
123  add_lead_zero_to_tag_=false; //For easier pdb selection in pymol
124  quick_alignment_=false; //new option May 29, 2010...speed up clustering code...however only work if alignment residues are fixed res
125  align_only_over_base_atoms_=true; //Set to true for backward compatibility. Add option in Aug 20, 2011
126  optimize_memory_usage_=false;
127  verbose_ = true;
128  keep_pose_in_memory_= true; //Can save memory at the expense of speed by not keeping the pose
129  keep_pose_in_memory_hydrid_= true; //basically keep pose in memory until memory runs out!
130  max_memory_pose_num_=0;
131  two_stage_clustering_ = false; //Cluster is two stage using triangle inequaility to speed up clustering. Need keep_pose_in_memory mode==false or else code will be too slow.
132  use_triangle_inequality_ = false; //This is turned on during the second stage of the two_stage_clustering mode;
133  PBP_clustering_at_chain_closure_ = false;
134  quick_alignment_pose_is_intialized_=false;
135  skip_clustering_=false; //I know this is weird...basically, this is for using clusterer to recalculate rmsd.
136  perform_VDW_rep_screen_=false; //March 20, 2011
137  perform_filters_=false; //June 20, 2011.
138  VDW_rep_screen_info_.clear(); //make sure that this is empty, Marc 21, 2011
139  full_length_loop_rmsd_clustering_=false;
140  ignore_FARFAR_no_auto_bulge_tag_=false; //Sept 06, 2011..for post-processing.
141  ignore_FARFAR_no_auto_bulge_parent_tag_=false; //Sept 06, 2011..for post-processing.
142  ignore_unmatched_virtual_res_=false; //Sept 07, 2011...for post-processing
143  output_pdb_=false; //Sept 24, 2011
144  min_num_south_ribose_filter_=0; //Oct 02, 2011
146  }
147 
148  //////////////////////////////////////////////////////////////////////////
149  void
150  StepWiseRNA_Clusterer::cluster()
151  {
152  using namespace core::scoring;
153  using namespace core::import_pose::pose_stream;
154  using namespace core::chemical;
155  using namespace core::pose;
156 
157  clock_t const time_start( clock() );
158  Output_title_text("StepWiseRNA_Clusterer::cluster()");
159 
160  Output_boolean("verbose_= ", verbose_); std::cout << std::endl;
161  Output_boolean("skip_clustering_= ", skip_clustering_); std::cout << std::endl;
162 
163  if(skip_clustering_){
164 
165  //Commented out Dec 11, 2011. CHANGE TO SET THIS FROM COMMAND_LINE (See for example SWA_cluster.py!
166  //std::cout << "skip_clustering==true --> set keep_pose_in_memory to false" << std::endl;
167  //keep_pose_in_memory_=false;
168 
169  std::cout << "skip_clustering==true --> set keep_pose_in_memory_hydrid_ to false" << std::endl; //HACKY!
170  keep_pose_in_memory_hydrid_=false; //HACKY!
171  }
172 
173 
174  ///March 20, 2011/////////////////////
175  if(( perform_VDW_rep_screen_==true) && (VDW_rep_screen_info_.size()==0) ){
176  std::cout << "User pass in perform_VDW_rep_screen_==true but VDW_rep_screen_info_.size()==0" << std::endl;
177  std::cout << "Override and set perform_VDW_rep_screen_ to false" << std::endl;
178  perform_VDW_rep_screen_=false;
179  }
180 
181  Output_boolean("perform_VDW_rep_screen_= ", perform_VDW_rep_screen_); std::cout << std::endl;
182  Output_boolean("perform_filters_= ", perform_filters_); std::cout << std::endl;///June 14, 2011 Perform other filters aside from VDW_rep_screen.
183 
184  if(perform_filters_ && (skip_clustering_==false)) utility_exit_with_message("perform_filters_ but skip_clustering_==false");
185  if(perform_VDW_rep_screen_ && (skip_clustering_==false)) utility_exit_with_message("perform_VDW_rep_screen_ but skip_clustering_==false");
186  /////////////////////////////////////
187 
188  std::cout << "suite_cluster_radius_= " << suite_cluster_radius_ << std::endl;
189  std::cout << "loop_cluster_radius_= " << loop_cluster_radius_ << std::endl;
190  Output_boolean("job_parameters_exist_= ", job_parameters_exist_); std::cout << std::endl;
191  Output_boolean("quick_alignment_= ", quick_alignment_); std::cout << std::endl;
192  Output_boolean("align_only_over_base_atoms_=", align_only_over_base_atoms_); std::cout << std::endl;
193  Output_boolean("two_stage_clustering_= ", two_stage_clustering_); std::cout << std::endl;
194  Output_boolean("keep_pose_in_memory_= ", keep_pose_in_memory_); std::cout << std::endl;
195  Output_boolean("keep_pose_in_memory_hydrid_= ", keep_pose_in_memory_hydrid_); std::cout << std::endl;
196  Output_boolean("optimize_memory_usage_(by slicing out fixed region of the pose)= ", optimize_memory_usage_); std::cout << std::endl;
197  Output_boolean("distinguish_pucker_= ", distinguish_pucker_); std::cout << std::endl;
198  Output_boolean("add_lead_zero_to_tag_= ", add_lead_zero_to_tag_); std::cout << std::endl;
199  Output_boolean("PBP_clustering_at_chain_closure_= ", PBP_clustering_at_chain_closure_); std::cout << std::endl;
200  Output_boolean("full_length_loop_rmsd_clustering_= ", full_length_loop_rmsd_clustering_); std::cout << std::endl;
201  Output_boolean("ignore_FARFAR_no_auto_bulge_tag_= ", ignore_FARFAR_no_auto_bulge_tag_); std::cout << std::endl;
202  Output_boolean("ignore_FARFAR_no_auto_bulge_parent_tag_= ", ignore_FARFAR_no_auto_bulge_parent_tag_); std::cout << std::endl;
203  Output_boolean("ignore_unmatched_virtual_res_= ", ignore_unmatched_virtual_res_); std::cout << std::endl;
204 
205  std::cout << "max_decoys_= " << max_decoys_ << std::endl;
206  std::cout << "score_diff_cut_= " << score_diff_cut_ << std::endl;
207  Output_boolean("perform_score_diff_cut_= ", perform_score_diff_cut_); std::cout << std::endl;
208 
209  //////////basic initialization///////////////
210  pose_output_list_.clear();
211  tag_output_list_.clear();
212  silent_struct_output_list_.clear();
213  /////////////////////////////////////////////
214 
215  if(optimize_memory_usage_){
216  if(!job_parameters_exist_) utility_exit_with_message("optimize_memory_usage=True but job_parameters_exist_=False!");
217  sliced_pose_job_params_.setup(job_parameters_);
218  }
219 
220  if(ignore_FARFAR_no_auto_bulge_tag_ || ignore_FARFAR_no_auto_bulge_parent_tag_){
221  create_tags_map();
222  }
223 
224  if(quick_alignment_) initialize_quick_alignment_pose();
225 
226  if(perform_VDW_rep_screen_) initialize_VDW_rep_screener();
227 
228  initialize_max_memory_pose_num();
229 
230  if(skip_clustering_){
231  create_silent_file_and_tag_list();
232  } else if(two_stage_clustering_){
233  two_stage_clustering();
234  }else {
235  do_some_clustering();
236  }
237 
238 
239  if(tag_output_list_.size()!=silent_struct_output_list_.size()) utility_exit_with_message( "tag_output_list_.size()!=silent_struct_output_list_.size()");
240 
241  if((keep_pose_in_memory_==true) && (keep_pose_in_memory_hydrid_==false) ){
242  if(pose_output_list_.size()!=tag_output_list_.size()) utility_exit_with_message( "pose_output_list_.size()!=tag_output_list_.size()");
243  }
244 
245 
246  std::cout << "Final cluster_pose_list size= " << silent_struct_output_list_.size() << std::endl;
247  std::cout << "Total clustering time : " << static_cast<Real>( clock() - time_start ) / CLOCKS_PER_SEC << std::endl;
248 
249  }
250 
251  /////////////////////////////////////////////////////////////////////
252  void
253  StepWiseRNA_Clusterer::initialize_max_memory_pose_num(){
254 
255  using namespace core::pose;
256  using namespace ObjexxFCL;
257 
258  clock_t const time_start( clock() );
259 
260 
261  pose::Pose first_pose;
262  pose::Pose first_pose_before_slicing;
263 
264  Size num_silent_struct=0;
265  bool found_valid_struct=false;
266 
267  input_->reset(); //reset the silentfile stream to the beginning..
268 
269 
270  //get the first pose in the silent_file_stream.
271  while ( input_->has_another_pose() ) {
272  num_silent_struct++;
273 
274  core::io::silent::SilentStructOP const silent_struct( input_->next_struct() );
275 
276  if(found_valid_struct==false){
277  PoseOP pose_op( new Pose );
278  silent_struct->fill_pose( *pose_op, *rsd_set_ );
279 
280  std::string const & tag( silent_struct->decoy_tag() );
281 
282  if(protocols::swa::rna::check_for_messed_up_structure((*pose_op), tag)==true) continue;
283 
284  first_pose_before_slicing=(*pose_op);
285 
286  if(optimize_memory_usage_) (*pose_op)=sliced_pose_job_params_.create_sliced_pose(*pose_op);
287 
288  //OK found a valid (non-messed) pose. Will use this pose as the "global" quick alignment pose_
289  first_pose=(*pose_op);
290  found_valid_struct=true;
291  }
292  }
293 
294  input_->reset(); //reset the silentfile stream to the beginning..
295 
296  Size const total_res_before_slicing=first_pose_before_slicing.total_residue();
297 
298  Size const total_res=first_pose.total_residue();
299 
300  //OK, one example of crash due to insufficient memory:
301  //Building region 11_4 of J5/J5a hinge 2r8s
302  //12 nucleotides pose, 4G, 2C, 3U and 3A
303  //209,155 silent struct 280 large clusters pose, and 9889 normal pose.
304  //the size of the silent_file is: 1.4G REGION_11_4/start_from_region_11_2_sample_filtered.out
305  //So 6.69 KB. 0.55 KB per nucleotide
306  //Memory limit on Biox is
307  //MEMLIMIT
308  //4000000 KB, 4G.
309  //So mememory used to store pose is 2.4G for (9889+280=10169 pose)
310  //236 KB per pose. 19 KB per nucleotide.
311 
312  //Consistency check:
313  //Finished reading 164975 structures from REGION_4_8/start_from_region_5_8_sample_filtered.out
314  //553M REGION_4_8/start_from_region_5_8_sample_filtered.out
315  //This is 1ZIH 4-8 is res 5-9, (5,6,7,8,9)-> 5 res. -> 0.6 KB per nucleotide!!
316  //Finished reading 247508 structures from REGION_4_8/start_from_region_4_7_sample_filtered.out
317  //277M REGION_4_8/start_from_region_4_7_sample_filtered.out
318  //->0.2 KB per nucleotide... WHY DOESN THE VALUE FLUCTUATE SO MUCH??
319 
320  //4,000,000=(max_memory_pose_num_)*19*(total_res) + (num_silent_struct)*(0.078)*(total_res)
321  Real const total_memory=4000000;
322 
323  Real const memory_taken_by_silent_struct=(num_silent_struct*0.55*total_res_before_slicing);
324 
325  if(memory_taken_by_silent_struct > total_memory){
326  max_memory_pose_num_=0;
327  std::cout << "memory_taken_by_silent_struct (" << memory_taken_by_silent_struct << ") > specified_total_memory(" << total_memory << ")" << std::endl;
328  }else{
329  max_memory_pose_num_= int( 0.7*( (total_memory-memory_taken_by_silent_struct)/(19*total_res) ) ); //0.7 is to be on the safe side
330  }
331 
332  std::cout << "--------------StepWiseRNA_Clusterer::initialize_max_memory_pose_num----------" << std::endl;
333  Output_boolean("optimize_memory_usage_ (by slicing)= ", optimize_memory_usage_); std::cout << std::endl;
334  std::cout << "first_pose total_res (before_slicing)= " << total_res_before_slicing << std::endl;
335  std::cout << "first_pose total_res (already account for slicing)= " << total_res << std::endl;
336  std::cout << "num_silent_struct= " << num_silent_struct << std::endl;
337  std::cout << "memory_taken_by_silent_struct= " << memory_taken_by_silent_struct << std::endl;
338  std::cout << "max_memory_pose_num_= " << max_memory_pose_num_ << std::endl;
339  std::cout << "time in function= " << static_cast<Real>( clock() - time_start ) / CLOCKS_PER_SEC << std::endl;
340  std::cout << "--------------StepWiseRNA_Clusterer::initialize_max_memory_pose_num----------" << std::endl;
341 
342  }
343 
344  /////////////////////////////////////////////////////////////////////
345  void
346  StepWiseRNA_Clusterer::initialize_VDW_rep_screener(){
347 
348  using namespace core::pose;
349  using namespace ObjexxFCL;
350 
351  if(!job_parameters_exist_) utility_exit_with_message("perform_VDW_rep_screen_=True but job_parameters_exist_=False!");
352 
353  if(optimize_memory_usage_) utility_exit_with_message("perform_VDW_rep_screen_=True and optimize_memory_usage_=True!");
354 
355  if(job_parameters_->Is_simple_full_length_job_params()==true) utility_exit_with_message("job_parameters_->Is_simple_full_length_job_params()==true!");
356 
357  input_->reset(); //reset the silentfile stream to the beginning..
358 
359  //get the first pose in the silent_file_stream.
360  while ( input_->has_another_pose() ) {
361 
362  PoseOP pose_op( new Pose );
363  core::io::silent::SilentStructOP const silent_struct( input_->next_struct() );
364  silent_struct->fill_pose( *pose_op, *rsd_set_ );
365 
366  std::string const & tag( silent_struct->decoy_tag() );
367 
368  if(protocols::swa::rna::check_for_messed_up_structure((*pose_op), tag)==true) continue;
369 
370  if(optimize_memory_usage_) (*pose_op)=sliced_pose_job_params_.create_sliced_pose(*pose_op);
371 
372  //OK found a valid (non-messed) pose. Will use this pose as the "global" quick alignment pose_
373 
374  user_input_VDW_bin_screener_->setup_using_user_input_VDW_pose( VDW_rep_screen_info_, (*pose_op), StepWiseRNA_JobParametersCOP(job_parameters_) );
375 
376  break;
377 
378  }
379 
380  input_->reset(); //reset the silentfile stream to the beginning..
381 
382  }
383 
384  /////////////////////////////////////////////////////////////////////
385 
386  void
387  StepWiseRNA_Clusterer::initialize_quick_alignment_pose(){
388 
389  using namespace core::pose;
390  using namespace ObjexxFCL;
391 
392  if(!job_parameters_exist_) utility_exit_with_message("quick_alignment_=True but job_parameters_exist_=False!");
393 
394 
395  //OK first check that it valid to use the quick_alignment_pose mode... in this mode, all alignment must be fixed res..However, this check itself is not enough to gaurantee that quicj_alignemnt_mode will work. Another requirement is that all the residue in working_best_alignment must be fixed in space with respect to each other. I try to ensure that this is always the case by making sure that every residues in the working_best_alignment is in the root_partition. (See StepWiseRNA_JobParameters_Setup.cc)
396 
397  utility::vector1< core::Size > const working_best_alignment= job_parameters_->working_best_alignment();
398  utility::vector1< core::Size > const working_fixed_res =job_parameters_->working_fixed_res();
399 
400  for(Size n=1; n<=working_best_alignment.size(); n++){
401  Size const seq_num=working_best_alignment[n];
402 
403  if(Contain_seq_num(seq_num, working_fixed_res)==false) {
404 
405  Output_seq_num_list("working_best_alignment= ", working_best_alignment, 30);
406  Output_seq_num_list("working_fixed_res= ", working_fixed_res, 30);
407 
408  utility_exit_with_message( "quick_alignment_mode is true. However: seq_num " + string_of(seq_num) + " is a element of working_best_alignment BUT not a element of working_fixed_res ");
409 
410  }
411  }
412 
413 
414  input_->reset(); //reset the silentfile stream to the beginning..
415 
416  quick_alignment_pose_is_intialized_=true;
417 
418  //get the first pose in the silent_file_stream.
419  while ( input_->has_another_pose() ) {
420 
421 
422  PoseOP pose_op( new Pose );
423  core::io::silent::SilentStructOP const silent_struct( input_->next_struct() );
424  silent_struct->fill_pose( *pose_op, *rsd_set_ );
425 
426  std::string const & tag( silent_struct->decoy_tag() );
427 
428  if(protocols::swa::rna::check_for_messed_up_structure((*pose_op), tag)==true) continue;
429 
430  if(optimize_memory_usage_) (*pose_op)=sliced_pose_job_params_.create_sliced_pose(*pose_op);
431 
432  //OK found a valid (non-messed) pose. Will use this pose as the "global" quick alignment pose_
433 
434  quick_alignment_pose_=(*pose_op);
435  quick_alignment_tag_=tag;
436  std::cout << "found quick alignment_pose, tag= " << tag << std::endl;
437 
438  break;
439 
440  }
441 
442  if(output_pdb_) quick_alignment_pose_.dump_pdb( "quick_alignment_pose_" + quick_alignment_tag_ + ".pdb");
443 
444  input_->reset(); //reset the silentfile stream to the beginning..
445 
446 
447  }
448 
449  /////////////////////////////////////////////////////////////////////
450  void
451  StepWiseRNA_Clusterer::align_to_quick_alignment_pose(core::pose::Pose & pose, std::string const & tag) const {
452 
453  using namespace core::pose;
454 
455  if(quick_alignment_pose_is_intialized_==false) utility_exit_with_message( "quick_alignment_pose_is_intialized_==false");
456 
457  utility::vector1< core::Size > const & alignment_res= get_act_alignment_res();
458 
459  align_poses(pose, tag, quick_alignment_pose_, "quick_alignment_tag_" + quick_alignment_tag_, alignment_res, align_only_over_base_atoms_);
460 
461  }
462 
463  //////////////////////////////////////////////////////////////////////
464 
465  void
466  StepWiseRNA_Clusterer::two_stage_clustering(){
467 
468  Output_title_text("Enter two_stage_clustering function");
469 
470  Real const whole_struct_cluster_radius_actual=whole_struct_cluster_radius_;
471  Real const loop_cluster_radius_actual = loop_cluster_radius_;
472  Real const suite_cluster_radius_actual = suite_cluster_radius_;
473  bool const keep_pose_in_memory_actual=keep_pose_in_memory_;
474  bool const keep_pose_in_memory_hydrid_actual=keep_pose_in_memory_hydrid_;
475 
476  whole_struct_cluster_radius_=2.0; //hard code
477  loop_cluster_radius_= 2.0; //hard_code
478  suite_cluster_radius_= 999; //hard_code ...no suite_cluster...
479  keep_pose_in_memory_=true;
480  keep_pose_in_memory_hydrid_=false;
481  use_triangle_inequality_=false;
482 
483  Output_title_text("First stage: large RMSD clustering");
484 
485  input_->reset(); //reset the silentfile stream to the beginning..
486  do_some_clustering();
487 
488  large_cluster_pose_list_ = pose_output_list_;
489  pose_output_list_.clear();
490  tag_output_list_.clear();
491  silent_struct_output_list_.clear();
492 
493  //////////////////////////////////////////////////////
494 
495 
496  input_->reset(); //reset the silentfile stream to the beginning..
497  create_large_cluster_centers_member_list();
498 
499  //////////////////////////////////////////////////////
500  Output_title_text("Second stage: Actual clustering");
501 
502 
503  //Reset to actual (user specified) value
504  whole_struct_cluster_radius_=whole_struct_cluster_radius_actual;
505  loop_cluster_radius_ = loop_cluster_radius_actual;
506  suite_cluster_radius_ = suite_cluster_radius_actual;
507  keep_pose_in_memory_=keep_pose_in_memory_actual;
508  keep_pose_in_memory_hydrid_=keep_pose_in_memory_hydrid_actual;
509 
510  use_triangle_inequality_=true;
511 
512  input_->reset(); //reset the silentfile stream to the beginning..
513  do_some_clustering();
514 
515  }
516 
517  /////////////////////////////////////////////////////////////////////
518  //The member is both dimension of the vector are sorted so that lowest energy appear first.
519 
520  void
521  StepWiseRNA_Clusterer::create_large_cluster_centers_member_list(){
522 
523  using namespace core::pose;
524 
525 
526  Output_title_text("create_large_cluster_centers_member_list");
527  clock_t const time_start( clock() );
528 
529  cluster_centers_neighbor_list_.clear();
530 
532  cluster_centers_neighbor_list_.assign(large_cluster_pose_list_.size(), empty_vector);
533 
534 
535  utility::vector1< core::Size > const & alignment_res= get_act_alignment_res();
536  utility::vector1 < core::Size > const & rmsd_res_list = get_act_rmsd_res_list();
537  std::map< core::Size, core::Size > const & full_to_sub = get_act_full_to_sub();
538  std::map< core::Size, bool > const & Is_prepend_map = get_act_Is_prepend_map();
539 
540  Size input_ID=0;
541 
542  Real last_cluster_center_score( 0.0 ); //does slicing the pose change the score?
543  getPoseExtraScores( *(large_cluster_pose_list_[large_cluster_pose_list_.size()]), "score", last_cluster_center_score );
544 
545  while ( input_->has_another_pose() ) {
546 
547  input_ID++; //count messed up poses as well
548 
549  if( (input_ID % 1000) ==0){
550  std::cout << "input_ID= " << input_ID << " time taken so far= " << static_cast<Real>( clock() - time_start ) / CLOCKS_PER_SEC << std::endl;
551  }
552 
553 
554  PoseOP pose_op( new Pose );
555  core::io::silent::SilentStructOP const silent_struct( input_->next_struct() );
556 
557  if(pass_FARFAR_no_auto_bulge_filter(silent_struct)==false) continue;
558 
559  silent_struct->fill_pose( *pose_op, *rsd_set_ ); //umm is the pose still connected to the silent_struct? Meaning that if the pose change, does the silent struct get changed as well? Apr 23, 2010 Parin.
560 
561  std::string const & tag( silent_struct->decoy_tag() );
562 
563  //Hacky thing. Ignore messed structure until we find a fix
564  if(protocols::swa::rna::check_for_messed_up_structure((*pose_op), tag)==true) continue;
565 
566  if(optimize_memory_usage_) (*pose_op)=sliced_pose_job_params_.create_sliced_pose(*pose_op);
567 
568  //need to align pose_op to the global alignment pose.
569  if(quick_alignment_) align_to_quick_alignment_pose(*pose_op, tag);
570 
571  Real score( 0.0 ); //does slicing the pose change the score?
572  getPoseExtraScores( *pose_op, "score", score );
573 
574  if(score>(last_cluster_center_score+0.001) ) break; //Exclude bad score poses that will never to be part of the final output_pose_list.
575 
576  for(Size n=1; n<=large_cluster_pose_list_.size(); n++){
577 
578  pose::Pose const & cluster_center_pose=*(large_cluster_pose_list_[n]);
579 
580  if(quick_alignment_==false) align_poses(*pose_op, "current_pose", cluster_center_pose, "large_cluster_center", alignment_res, align_only_over_base_atoms_);
581 
582  Real const RMSD=rmsd_over_residue_list(*pose_op , cluster_center_pose, rmsd_res_list, full_to_sub, Is_prepend_map, false);
583 
584  if(RMSD<(loop_cluster_radius_*1.5)){ //A neigbor/member of this cluster_center
585  Cluster_Member member;
586  member.ID=input_ID;
587  member.RMSD=RMSD;
588  member.score=score;
589  cluster_centers_neighbor_list_[n].push_back(member);
590  }
591  }
592  }
593 
594  std::cout << "check large_cluster_pose_list_ member size " << std::endl;
595  for(Size n=1; n<=large_cluster_pose_list_.size(); n++){
596  std::cout << "cluster center " << n << " has " << cluster_centers_neighbor_list_[n].size() << " members " << std::endl;
597  }
598 
599 
600  }
601 
602 
603  /////////////////////////////////////////////////////////////////////
604 
605 
606  void
607  StepWiseRNA_Clusterer::create_silent_file_and_tag_list(){
608 
609  using namespace core::pose;
610  using namespace ObjexxFCL;
611 
612 
613  Output_title_text("StepWiseRNA_Clusterer::create_silent_file_and_tag_list()");
614 
615  input_->reset(); //reset the silentfile stream to the beginning..
616 
617  tag_output_list_.clear();
618  silent_struct_output_list_.clear();
619  pose_output_list_.clear();
620 
621  utility::vector1 < core::Size > working_global_sample_res_list;
622  utility::vector1 < core::Size > working_filter_virtual_res_list;
623 
624 
625  if(perform_VDW_rep_screen_ || perform_filters_){
626 
627  if(job_parameters_exist_==false) utility_exit_with_message("(perform_VDW_rep_screen_ || perform_filters_) but job_parameters_exist_==false!");
628 
629  working_global_sample_res_list=job_parameters_->working_global_sample_res_list();
630  working_filter_virtual_res_list=apply_full_to_sub_mapping(filter_virtual_res_list_, job_parameters_);
631 
632  Output_seq_num_list("filter_virtual_res_list_=", filter_virtual_res_list_, 50);
633  Output_seq_num_list("working_filter_virtual_res_list=", working_filter_virtual_res_list, 50);
634  Output_seq_num_list("working_global_sample_res_list=", working_global_sample_res_list, 50);
635  std::cout << "min_num_south_ribose_filter_=" << min_num_south_ribose_filter_ << std::endl;
636  }
637 
638  Size input_ID=0;
639 
640  bool filter_verbose=true;
641 
642  while ( input_->has_another_pose() ) {
643  input_ID++;
644 
645  core::io::silent::SilentStructOP const silent_struct( input_->next_struct() );
646  std::string const & tag( silent_struct->decoy_tag() );
647 
648  if(pass_FARFAR_no_auto_bulge_filter(silent_struct)==false) continue;
649 
650  if(perform_VDW_rep_screen_ || perform_filters_ ){
651 
652  PoseOP pose_op( new Pose );
653  silent_struct->fill_pose( *pose_op, *rsd_set_ ); //umm is the pose still connected to the silent_struct?
654 
655  if(protocols::swa::rna::check_for_messed_up_structure((*pose_op), tag)==true) continue;
656 
657  ///Jan 12, 2012:Consistency check://///
658  if(job_parameters_exist_){
659  if( (*pose_op).total_residue()!=job_parameters_->working_sequence().size()){
660  utility_exit_with_message("(*pose_op).total_residue()=("+string_of((*pose_op).total_residue())+")!=("+string_of(job_parameters_->working_sequence().size())+")=job_parameters_working_sequence().size()");
661  }
662  }
663  ////////////////////////////////////////
664 
665  if(perform_VDW_rep_screen_){
666 
667  if(user_input_VDW_bin_screener_->user_inputted_VDW_screen_pose()!=true){
668  utility_exit_with_message("user_input_VDW_bin_screener_->user_inputted_VDW_screen_pose()!=true");
669  }
670 
671  bool const pass_VDW_rep_screen=user_input_VDW_bin_screener_->VDW_rep_screen_with_act_pose( (*pose_op), working_global_sample_res_list, false /*local verbose*/);
672  if(pass_VDW_rep_screen==false){
673  if(filter_verbose) std::cout << "tag= " << tag << " fail VDW_rep_screen! " << std::endl;
674  continue;
675  }
676  }
677 
678  if(perform_filters_){
679  bool pass_filter=true;
680 
681  utility::vector1< core::Size > const & force_north_ribose_list=job_parameters_->working_force_north_ribose_list();
682  utility::vector1< core::Size > const & force_south_ribose_list=job_parameters_->working_force_south_ribose_list();
683  utility::vector1< core::Size > const & force_syn_chi_res_list=job_parameters_->working_force_syn_chi_res_list();
684 
685  for(Size n=1; n<=force_north_ribose_list.size(); n++){
686  Size const seq_num=force_north_ribose_list[n];
687  if((*pose_op).residue(seq_num).has_variant_type("BULGE")) continue;
688  if((*pose_op).residue(seq_num).has_variant_type("VIRTUAL_RIBOSE")) continue;
689  if((*pose_op).residue(seq_num).has_variant_type("VIRTUAL_RNA_RESIDUE")) continue;
690  if(Get_residue_pucker_state((*pose_op), seq_num)!=NORTH){
691  pass_filter=false;
692  if(filter_verbose) std::cout << "pose= " << tag << " doesn't have north_ribose at seq_num= " << seq_num << std::endl;
693  }
694  }
695 
696  for(Size n=1; n<=force_south_ribose_list.size(); n++){
697  Size const seq_num=force_south_ribose_list[n];
698  if((*pose_op).residue(seq_num).has_variant_type("BULGE")) continue;
699  if((*pose_op).residue(seq_num).has_variant_type("VIRTUAL_RIBOSE")) continue;
700  if((*pose_op).residue(seq_num).has_variant_type("VIRTUAL_RNA_RESIDUE")) continue;
701  if(Get_residue_pucker_state((*pose_op), seq_num)!=SOUTH){
702  pass_filter=false;
703  if(filter_verbose) std::cout << "pose= " << tag << " doesn't have south_ribose at seq_num= " << seq_num << std::endl;
704  }
705  }
706 
707  for(Size n=1; n<=force_syn_chi_res_list.size(); n++){
708  Size const seq_num=force_syn_chi_res_list[n];
709  if((*pose_op).residue(seq_num).has_variant_type("BULGE")) continue;
710  if((*pose_op).residue(seq_num).has_variant_type("VIRTUAL_RIBOSE")) continue;
711  if((*pose_op).residue(seq_num).has_variant_type("VIRTUAL_RNA_RESIDUE")) continue;
712  if(Get_residue_base_state((*pose_op), seq_num)!=SYN){
713  pass_filter=false;
714  if(filter_verbose) std::cout << "pose= " << tag << " doesn't have syn_chi at seq_num= " << seq_num << std::endl;
715  }
716  }
717 
718  for(Size n=1; n<=working_filter_virtual_res_list.size(); n++){
719  Size const seq_num=working_filter_virtual_res_list[n];
720  if((*pose_op).residue(seq_num).has_variant_type("VIRTUAL_RNA_RESIDUE")==false){
721  pass_filter=false;
722  if(filter_verbose) std::cout << "pose= " << tag << " doesn't have virtual_rna_residue variant_type at seq_num= " << seq_num << std::endl;
723  }
724  }
725 
726  if(min_num_south_ribose_filter_!=0){
727  Size num_south_ribose=0;
728  for(Size n=1; n<=working_global_sample_res_list.size(); n++){
729  Size const seq_num=working_global_sample_res_list[n];
730  if(Get_residue_pucker_state((*pose_op), seq_num)==SOUTH){
731  num_south_ribose+=1;
732  }
733  }
734  //if(filter_verbose) std::cout << "pose= " << tag << " have " << num_south_ribose << " south_pucker_ribose." << std::endl;
735  if(num_south_ribose<min_num_south_ribose_filter_) pass_filter=false;
736  }
737 
738  if(pass_filter==false) continue;
739 
740  }
741 
742  }
743 
744  if(verbose_) std::cout << "Adding " << tag << " ID= " << input_ID << std::endl;
745 
746  tag_output_list_.push_back( tag );
747  silent_struct_output_list_.push_back( silent_struct );
748 
749 
750  if(keep_pose_in_memory_){
751  if( (keep_pose_in_memory_hydrid_==false) || (pose_output_list_.size() < max_memory_pose_num_) ){
752  PoseOP localized_pose_op( new Pose );
753  silent_struct->fill_pose( *localized_pose_op, *rsd_set_ );
754  pose_output_list_.push_back( localized_pose_op );
755  }
756  }
757 
758  }
759 
760  if(perform_VDW_rep_screen_ || perform_filters_){
761  std::cout << tag_output_list_.size() << " out of " << input_ID << " poses pass the filters." << std::endl;
762  }
763 
764  input_->reset(); //reset the silentfile stream to the beginning..
765 
766  Output_title_text("");
767 
768 
769  }
770 
771  /////////////////////////////////////////////////////////////////////
772  void
773  StepWiseRNA_Clusterer::do_some_clustering() {
774 
775  using namespace core::pose;
776  using namespace ObjexxFCL;
777 
778  clock_t const time_start( clock() );
779 
780  input_->reset(); //Dec 11, 2011.
781  tag_output_list_.clear(); //Dec 11, 2011.
782  silent_struct_output_list_.clear(); //Dec 11, 2011.
783  pose_output_list_.clear(); //Dec 11, 2011.
784 
785  if(use_triangle_inequality_) all_pose_to_output_pose_ID_map_.clear();
786 
787  bool Is_first_pose = true;
788  bool score_min_defined = false;
789  Real score_min = 0.0;
790 
791  Size num_pose_clustered = 0;
792  Size input_ID=0; //this count messed up pose where as num_pose_clustered doesn't
793  while( input_->has_another_pose() ){
794 
795  input_ID++; //count messed up poses as well
796  if(use_triangle_inequality_) all_pose_to_output_pose_ID_map_.push_back(0); //If is a output_pose, ID value will be updated below
797 
798  PoseOP pose_op( new Pose );
799  core::io::silent::SilentStructOP const silent_struct( input_->next_struct() );
800 
801  if(pass_FARFAR_no_auto_bulge_filter(silent_struct)==false) continue;
802 
803  silent_struct->fill_pose( *pose_op, *rsd_set_ );
804 
805  Real score( 0.0 );
806  getPoseExtraScores( *pose_op, "score", score );
807 
808  if ( score_min_defined==false ){
809  score_min = score;
810  score_min_defined = true;
811  }
812 
813  if( perform_score_diff_cut_ && (score > (score_min + score_diff_cut_) ) ) break;
814 
815  std::string const & tag( silent_struct->decoy_tag() );
816  TR << "CHECKING " << tag << " with score " << score << " ( score_min= " << score_min << " ) against list of size " << silent_struct_output_list_.size();
817  TR << " Num_pose_clustered so far " << num_pose_clustered << std::endl;
818 
819  //Hacky thing. Ignore messed structure until we find a fix...ideally should just remove messed up pose from input_ at beginning of the Class.
820  if(protocols::swa::rna::check_for_messed_up_structure((*pose_op), tag)==true) continue;
821 
822  ///Jan 12, 2012:Consistency check://///
823  if(job_parameters_exist_){
824  if (job_parameters_ -> add_virt_res_as_root() ) {
825  if( (*pose_op).total_residue() - 1 != job_parameters_->working_sequence().size()){
826  utility_exit_with_message("(*pose_op).total_residue()=("+string_of((*pose_op).total_residue())+")!=("+string_of(job_parameters_->working_sequence().size())+")=job_parameters_working_sequence().size()");
827  }
828  } else {
829  if( (*pose_op).total_residue()!=job_parameters_->working_sequence().size()){
830  utility_exit_with_message("(*pose_op).total_residue()=("+string_of((*pose_op).total_residue())+")!=("+string_of(job_parameters_->working_sequence().size())+")=job_parameters_working_sequence().size()");
831  }
832  }
833  }
834  ////////////////////////////////////////
835 
836  if(optimize_memory_usage_) (*pose_op)=sliced_pose_job_params_.create_sliced_pose(*pose_op);
837 
838  if(Is_first_pose){
839  first_pose_=(*pose_op);
840  Is_first_pose=false;
841  }
842 
843 
844  /////////////////////////////////////////////////////////
845 
846 
847  bool const OK = check_for_closeness( pose_op , tag );
848 
849  if ( OK ) {
850  TR << "ADDING " << tag << std::endl;
851 
852  tag_output_list_.push_back( tag );
853 
854  if(keep_pose_in_memory_==true){
855  if( (keep_pose_in_memory_hydrid_==false) || (pose_output_list_.size() < max_memory_pose_num_) ){
856 
857  pose_output_list_.push_back( pose_op );
858 
859  }
860  }
861 
862  silent_struct_output_list_.push_back( silent_struct );
863 
864  if(use_triangle_inequality_) {
865 
866  if(input_ID>all_pose_to_output_pose_ID_map_.size()){
867  utility_exit_with_message( "input_ID>all_pose_to_output_pose_ID_map_.size(), input_ID= " + string_of(input_ID) + ", all_pose_to_output_pose_ID_map_.size()=" + string_of(all_pose_to_output_pose_ID_map_.size()));
868  }
869 
870  all_pose_to_output_pose_ID_map_[input_ID]=silent_struct_output_list_.size();
871  }
872 
873  if( silent_struct_output_list_.size() >= max_decoys_ ) break;
874 
875  }
876 
877  num_pose_clustered++;
878 
879  if( (num_pose_clustered % 100) ==0){
880  std::cout << "num_pose_clustered= " << num_pose_clustered << " num_cluster_centers= " << silent_struct_output_list_.size() << " time_taken_so_far= " << static_cast<Real>( clock() - time_start ) / CLOCKS_PER_SEC << std::endl;
881  }
882  }
883 
884  TR << "After clustering, number of decoys: " << silent_struct_output_list_.size() << " from " << num_pose_clustered << " input poses " << std::endl;
885  return;
886 
887  }
888 
889  /////////////////////////////////////////////////////////////////////
890 
891  bool
892  StepWiseRNA_Clusterer::Is_old_individual_suite_cluster(pose::Pose const & current_pose,
893  pose::Pose const & cluster_center_pose,
894  utility::vector1 < core::Size > const & rmsd_res_list,
895  std::map< core::Size, core::Size > const & full_to_sub,
896  std::map< core::Size, bool > const & Is_prepend_map,
897  core::Real const & cluster_radius) const{
898 
899 
900  utility::vector1< Real > rmsd_list(rmsd_res_list.size(), 9999.99);
901  utility::vector1< bool > same_ribose_pucker_list(rmsd_res_list.size(), false);
902 
903 
904  for(Size i=1; i<=rmsd_res_list.size(); i++){
905 
906  Size const full_seq_num= rmsd_res_list[i];
907 
908  if(full_to_sub.find(full_seq_num)==full_to_sub.end() ) utility_exit_with_message( "full_to_sub.find(full_seq_num)==full_to_sub.end()!");
909  if(Is_prepend_map.find(full_seq_num)==Is_prepend_map.end() ) utility_exit_with_message( "Is_prepend_map.find(full_seq_num)==Is_prepend_map.end()!");
910 
911  Size const seq_num=full_to_sub.find(full_seq_num)->second;
912  bool Is_prepend=Is_prepend_map.find(full_seq_num)->second;
913 
914  //Important only if both pose are real
915  same_ribose_pucker_list[i]=Is_same_ribose_pucker(current_pose, cluster_center_pose, seq_num);
916 
917  bool const current_is_virtual_res=current_pose.residue(seq_num).has_variant_type("VIRTUAL_RNA_RESIDUE");
918  bool const center_is_virtual_res=cluster_center_pose.residue(seq_num).has_variant_type("VIRTUAL_RNA_RESIDUE");
919 
920  bool const current_is_virtual_ribose=current_pose.residue(seq_num).has_variant_type("VIRTUAL_RIBOSE");
921  bool const center_is_virtual_ribose=cluster_center_pose.residue(seq_num).has_variant_type("VIRTUAL_RIBOSE");
922 
923 
924  if(ignore_unmatched_virtual_res_==false){ //Sep 07. 2011
925 
926  if(current_is_virtual_res != center_is_virtual_res){
927  return false; //current_pose is not part of this cluster center
928  }
929 
930  }
931 
932  if(current_is_virtual_res && center_is_virtual_res){
933  rmsd_list[i]=8888.88;
934  continue;
935  }
936 
937 
938  if(PBP_clustering_at_chain_closure_ && job_parameters_->gap_size()==0 ){ //new option Aug 15, 2010..include both phosphates in rmsd calculation at chain_break
939  rmsd_list[i] = phosphate_base_phosphate_rmsd( current_pose, cluster_center_pose, seq_num, false /*ignore_virtual_atom*/);
940  }else{
941  rmsd_list[i] = suite_rmsd(current_pose, cluster_center_pose, seq_num, Is_prepend, false /*ignore_virtaul_atom*/);
942  }
943 
944  if(rmsd_list[i]> cluster_radius ) return false; //current_pose is not part of this cluster center
945 
946 
947  if(distinguish_pucker_){
948 
949  if(current_is_virtual_ribose != center_is_virtual_ribose){
950  //New on Oct 09, 2011. This should NOT lead to any new changes, since virtual_ribose is usually accompanied by virtual_res at the neighoring nucleotide.
951  return false;
952  }
953 
954  bool check_pucker=true;
955 
956  if(current_is_virtual_ribose && center_is_virtual_ribose){
957  //New on Oct 09, 2011. This should remove "false" new cluster where current pose and cluster center pose differ only by the pucker of a virtual_ribose.
958  check_pucker=false;
959  }
960 
961  if(check_pucker && (same_ribose_pucker_list[i]==false)){
962  return false;
963  }
964 
965  }
966 
967 
968  /*
969  if(distinguish_pucker_){
970  if(rmsd_list[i]> cluster_radius || (same_ribose_pucker_list[i]==false)) return false; //current_pose is not part of this cluster center
971  }else{
972  if(rmsd_list[i]> cluster_radius ) return false; //current_pose is not part of this cluster center
973  }
974  */
975 
976  }
977 
978  if(verbose_){
979  for(Size i=1; i<=rmsd_res_list.size(); i++){
980 
981  Size const full_seq_num= rmsd_res_list[i];
982  Size const seq_num=full_to_sub.find(full_seq_num)->second;
983  bool Is_prepend=Is_prepend_map.find(full_seq_num)->second;
984  bool both_pose_res_is_virtual=false;
985  if(current_pose.residue(seq_num).has_variant_type("VIRTUAL_RNA_RESIDUE") && cluster_center_pose.residue(seq_num).has_variant_type("VIRTUAL_RNA_RESIDUE")){
986  both_pose_res_is_virtual=true;
987  }
988  std::cout << "full_seq_num= " << full_seq_num << " seq_num= " << seq_num; Output_boolean(" Is_prepend= ",Is_prepend); Output_boolean(" both_pose_res_is_virtual= ",both_pose_res_is_virtual);
989  std::cout << " same_pucker[" << i << "]= "; Output_boolean(same_ribose_pucker_list[i]);
990  print_ribose_pucker_state(" curr_pucker= ", Get_residue_pucker_state(current_pose, seq_num));
991  print_ribose_pucker_state(" center_pucker= ", Get_residue_pucker_state(cluster_center_pose, seq_num));
992  std::cout << " rmsd_list[" << i << "]= " << rmsd_list[i];
993 
994  if(ignore_unmatched_virtual_res_){
995  if(current_pose.residue(seq_num).has_variant_type("VIRTUAL_RNA_RESIDUE")!=cluster_center_pose.residue(seq_num).has_variant_type("VIRTUAL_RNA_RESIDUE")){
996  std::cout << " Ignoring unmatched_virtual_res= ";
997  Output_boolean(" curr_virt= ", current_pose.residue(seq_num).has_variant_type("VIRTUAL_RNA_RESIDUE") );
998  Output_boolean(" center_virt= ", cluster_center_pose.residue(seq_num).has_variant_type("VIRTUAL_RNA_RESIDUE") );
999  }
1000  }
1001 
1002  std::cout << std::endl;
1003  }
1004 
1005  }
1006 
1007  return true; //current_pose is not part of this cluster center
1008  }
1009 
1010  //////////////////////////////////////////////////////////////////
1012  StepWiseRNA_Clusterer::get_poseOP(Size const n){
1013 
1014  using namespace core::pose;
1015  using namespace ObjexxFCL;
1016 
1017 // std::cout << "enter get_poseOP(" << n << ")" << std::endl;
1018 
1019  if(keep_pose_in_memory_){
1020  if(keep_pose_in_memory_hydrid_==false || n<=max_memory_pose_num_){
1021 
1022  if(pose_output_list_.size()<n) utility_exit_with_message( "pose_output_list_.size() (" +string_of(pose_output_list_.size()) +") <n ("+ string_of(n) +")" );
1023 
1024  return pose_output_list_[ n ]; //if quick_alignment is true then this pose is already aligned to the quick_alignment_pose?
1025  }
1026  }
1027 
1028  //OK if reach this point means that pose is not stored in the pose_output_list_, need to extract it from the silent_file.
1029  core::pose::PoseOP pose_op( new Pose );
1030  silent_struct_output_list_[n]->fill_pose( *pose_op, *rsd_set_ );
1031 
1032  if(optimize_memory_usage_) (*pose_op)=sliced_pose_job_params_.create_sliced_pose(*pose_op);
1033 
1034  if(quick_alignment_) align_to_quick_alignment_pose((*pose_op), tag_output_list_[n] );
1035 
1036  return pose_op;
1037 
1038  }
1039 
1040 
1041  //////////////////////////////////////////////////////////////////
1042  void
1043  StepWiseRNA_Clusterer::setup_fail_triangle_inequailty_list(pose::Pose & current_pose, std::string const & tag, utility::vector1< bool > & fail_triangle_inequality_list){
1044 
1045  using namespace core::scoring;
1046  using namespace ObjexxFCL;
1047 
1048  utility::vector1< core::Size > const & alignment_res= get_act_alignment_res();
1049  utility::vector1 < core::Size > const & rmsd_res_list = get_act_rmsd_res_list();
1050  std::map< core::Size, core::Size > const & full_to_sub = get_act_full_to_sub();
1051  std::map< core::Size, bool > const & Is_prepend_map = get_act_Is_prepend_map();
1052 
1053  Size num_fail_triangle_inequality=0;
1054  Size num_cluster_center_used=0;
1055 
1056  Real current_score( 0.0 );
1057 
1058  getPoseExtraScores( current_pose, "score", current_score ); //Is this slow?
1059 
1060  fail_triangle_inequality_list.assign(silent_struct_output_list_.size(), false);
1061 
1062  for( Size n =1; n<=large_cluster_pose_list_.size(); n++){ //lowest score cluster center at the beginning of the list
1063 
1064  Real cluster_center_score( 0.0 );
1065 
1066  pose::Pose & cluster_center_pose=*(large_cluster_pose_list_[n]);
1067 
1068  if(quick_alignment_==false) align_poses(current_pose, tag, cluster_center_pose, "large_cluster_center", alignment_res, align_only_over_base_atoms_);
1069 
1070  getPoseExtraScores( cluster_center_pose, "score", cluster_center_score ); //Is this slow?
1071 
1072  if((cluster_center_score+0.001)>current_score) break; //0.001 to make account for round off error. Umm maybe faster without this break statement?
1073 
1074  // std::cout << "cluster_center_score= " << cluster_center_score << " current_score= " << current_score << std::endl;
1075 
1076  num_cluster_center_used++;
1077 
1078  Real const RMSD=rmsd_over_residue_list(current_pose, cluster_center_pose, rmsd_res_list, full_to_sub, Is_prepend_map, false);
1079  //problem is that bulge residues are excluded?? The weight of the RMSD and member.RMSD might not be the same... Aug 9, 2010
1080 
1081  utility::vector1< Cluster_Member > const & member_list=cluster_centers_neighbor_list_[n];
1082 
1083  for(Size ii=1; ii<=member_list.size(); ii++){ //lowest socre cluster center member at the beginning of the list.
1084 
1085  Cluster_Member const & member=member_list[ii];
1086 
1087  if((member.score+0.001)>current_score) break; //0.001 to account for round off errors.
1088 
1089  if( (RMSD-member.RMSD)>(loop_cluster_radius_+0.02) ){ //satisfies triangle inequality
1090 
1091  if( (member.ID>all_pose_to_output_pose_ID_map_.size()) || (member.ID < 1) ){
1092  utility_exit_with_message( "member.ID (" + string_of(member.ID) + ") > all_pose_to_output_pose_ID_map_.size() ( " + string_of(all_pose_to_output_pose_ID_map_.size()) + ") ");
1093  }
1094 
1095  Size const output_pose_ID=all_pose_to_output_pose_ID_map_[member.ID];
1096 
1097  if(output_pose_ID==0) continue; //member is not a output_pose..
1098 
1099  if( (output_pose_ID>silent_struct_output_list_.size()) || (output_pose_ID < 1) ){
1100  utility_exit_with_message( "output_pose_ID (" + string_of(output_pose_ID) + ") > silent_struct_output_list_.size() ( " + string_of(silent_struct_output_list_.size() )+") ");
1101  }
1102 
1103  if(fail_triangle_inequality_list[output_pose_ID]==false) num_fail_triangle_inequality++;
1104 
1105  fail_triangle_inequality_list[output_pose_ID]=true;
1106 
1107  }
1108  }
1109  }
1110  //std::cout << "num_cluster_center_used= " << num_cluster_center_used;
1111  std::cout << "num_fail_triangle_inequality= " << num_fail_triangle_inequality << " out_of= " << silent_struct_output_list_.size() << std::endl;
1112 
1113  }
1114 
1115  //////////////////////////////////////////////////////////////////
1116  bool
1117  StepWiseRNA_Clusterer::Is_new_cluster_center_with_job_parameters(core::pose::PoseOP const & pose_op, std::string const & tag){
1118 
1119  using namespace core::scoring;
1120  using namespace ObjexxFCL;
1121 
1122  //////////////////////////////////////////////////////////////////
1123 
1124  utility::vector1< core::Size > const & alignment_res= get_act_alignment_res();
1125  utility::vector1 < core::Size > const & rmsd_res_list = get_act_rmsd_res_list();
1126  std::map< core::Size, core::Size > const & full_to_sub = get_act_full_to_sub();
1127  std::map< core::Size, bool > const & Is_prepend_map = get_act_Is_prepend_map();
1128 
1129  pose::Pose & current_pose=*(pose_op);
1130 
1131  if(quick_alignment_) align_to_quick_alignment_pose(current_pose, tag);
1132 
1133  utility::vector1< bool > fail_triangle_inequality_list;
1134 
1135  if(use_triangle_inequality_) setup_fail_triangle_inequailty_list(current_pose, tag, fail_triangle_inequality_list);
1136 
1137  for ( Size n = silent_struct_output_list_.size(); n >= 1; n-- ) {
1138 
1139  if(use_triangle_inequality_ && (fail_triangle_inequality_list[n]==true)) continue;
1140 
1141  pose::PoseOP const cluster_center_poseOP=get_poseOP(n);
1142  pose::Pose const & cluster_center_pose=(*cluster_center_poseOP);
1143  std::string const & cluster_center_tag=tag_output_list_[n];
1144 
1145 
1146  if(quick_alignment_==false) align_poses(current_pose, tag, cluster_center_pose, cluster_center_tag, alignment_res, align_only_over_base_atoms_);
1147 
1148  //////////////////////////////////////////////////////////
1149 
1150  bool old_suite_cluster=Is_old_individual_suite_cluster(current_pose, cluster_center_pose, rmsd_res_list, full_to_sub, Is_prepend_map, suite_cluster_radius_ );
1151 
1152 
1153  Real loop_rmsd=99.99;
1154 
1155  if(full_length_loop_rmsd_clustering_){
1156  if(optimize_memory_usage_) utility_exit_with_message("Both full_length_loop_rmsd_clustering_ and optimize_memory_usage_ equal true");
1157  std::string const & full_sequence=job_parameters_->full_sequence();
1158  loop_rmsd=full_length_rmsd_over_residue_list(current_pose, cluster_center_pose, rmsd_res_list, full_sequence, false /*verbose*/, false /*ignore_virtual_atom*/);
1159  }else{
1160  loop_rmsd=rmsd_over_residue_list(current_pose, cluster_center_pose, rmsd_res_list, full_to_sub, Is_prepend_map, false /*verbose*/, false /*ignore_virtual_atom*/);
1161  }
1162 
1163  bool old_loop_cluster=(loop_rmsd < loop_cluster_radius_ );
1164 
1165  if(verbose_){
1166  std::cout << "Between " << tag << " AND " << cluster_center_tag << ": loop_rmsd=" << loop_rmsd << " ";
1167  Output_boolean("Is_old_suite_cluster= ", old_suite_cluster); std::cout << std::endl;
1168  }
1169 
1170  if(old_suite_cluster==true && old_loop_cluster==true){
1171  std::cout << tag << " is a neighbor of " << cluster_center_tag << std::endl;
1172  return false;
1173  }
1174 
1175  }
1176 
1177  return true; //new cluster center!
1178  }
1179 
1180  //////////////////////////////////////////////////////////////////
1181 
1182  bool
1183  StepWiseRNA_Clusterer::check_for_closeness_without_job_parameters( core::pose::PoseOP const & pose_op )
1184  {
1185  using namespace core::scoring;
1186 
1187  // go through the list backwards, because poses may be grouped by similarity --
1188  // the newest pose is probably closer to poses at the end of the list.
1189  for ( Size n = silent_struct_output_list_.size(); n >= 1; n-- ) {
1190 
1191  Real rmsd = all_atom_rmsd( *(get_poseOP(n)) , *pose_op );
1192 
1193  if ( rmsd < whole_struct_cluster_radius_ ) return false;
1194  }
1195  return true;
1196  }
1197 
1198  //////////////////////////////////////////////////////////////////
1199 
1200  bool
1201  StepWiseRNA_Clusterer::check_for_closeness( core::pose::PoseOP const & pose_op, std::string const & tag ){
1202 
1203  if(skip_clustering_==true){
1204  utility_exit_with_message( "skip_clustering==true but StepWiseRNA_Clusterer::check_for_closeness() is called! " );
1205  }
1206 
1207  if(job_parameters_exist_){
1208  return Is_new_cluster_center_with_job_parameters(pose_op, tag);
1209  }else{
1210  return check_for_closeness_without_job_parameters(pose_op);
1211  }
1212 
1213  }
1214 
1215 
1216 
1217  /////////////////////////////////////////////////////////////////////////////////////////
1218  void
1219  StepWiseRNA_Clusterer::output_silent_file( std::string const & silent_file ){
1220 
1221  using namespace core::io::silent;
1222 
1223  SilentFileData silent_file_data;
1224 
1225  for ( Size n = 1 ; n <= silent_struct_output_list_.size(); n++ ) {
1226 
1227  SilentStructOP & s( silent_struct_output_list_[ n ] );
1228 
1229  if ( rename_tags_ ){
1230  s->add_comment( "PARENT_TAG", s->decoy_tag() );
1231 
1232  std::string tag;
1233  if(add_lead_zero_to_tag_){
1234  tag = "S_"+ ObjexxFCL::lead_zero_string_of( n-1 /* start with zero */, 6);
1235  }else{
1236  tag = "S_"+ ObjexxFCL::string_of( n-1 /* start with zero */);
1237  }
1238 
1239  s->set_decoy_tag( tag );
1240  }
1241 
1242  silent_file_data.write_silent_struct( *s, silent_file, false /*write score only*/ );
1243 
1244  }
1245 
1246  }
1247 
1248  ////////////////////////////////////////////////////////////////////////////////////////////////////////////
1249  void
1250  StepWiseRNA_Clusterer::recalculate_rmsd_and_output_silent_file(std::string const & silent_file,
1251  protocols::swa::rna::StepWiseRNA_PoseSetupOP & stepwise_rna_pose_setup,
1252  bool const write_score_only){
1253 
1254  using namespace core::io::silent;
1255  using namespace core::scoring;
1256  using namespace core::pose;
1257 
1258  clock_t const time_start( clock() );
1259 
1260  Output_title_text("ENTER StepWiseRNA_Clusterer::recalculate_rmsd_and_output_silent_file()");
1261 
1262  if(job_parameters_exist_==false) utility_exit_with_message("job_parameters_exist_==false!");
1263 
1264  ///This could actually work...but it is just yet tested!
1265  if(job_parameters_->Is_simple_full_length_job_params()==true) utility_exit_with_message("job_parameters_->Is_simple_full_length_job_params()==true!");
1266 
1267  Output_boolean("write_score_only= ", write_score_only); std::cout << std::endl;
1268 
1269  utility::vector1< core::Size > const & working_best_alignment = job_parameters_->working_best_alignment();
1270  utility::vector1< core::Size > const & working_native_alignment = job_parameters_->working_native_alignment();
1271  std::string const & full_sequence=job_parameters_->full_sequence();
1272 
1273 
1274  stepwise_rna_pose_setup->set_verbose(true); //New OPTION, Mar 22
1275 
1276  if(tag_output_list_.size()!=silent_struct_output_list_.size()) utility_exit_with_message( "pose_output_list_.size()!=silent_struct_output_list_!" );
1277 
1278  SilentFileData silent_file_data;
1279 
1280  utility::vector1 < core::Size > const & rmsd_res_list = job_parameters_->rmsd_res_list();
1281  std::map< core::Size, core::Size > const & full_to_sub = job_parameters_->const_full_to_sub();
1282 
1283  //bool const ignore_min_decoys=true; //Over the keep min_decoy mode...Comment out on Dec 11, 2011.
1284 
1285  //float best_score=9999999999999; //lead to server-test build error at R47198; Feb 02, 2012
1286  //Real best_score=999999999; //Should Fix server-test build error BUT yet not tested; Feb 02, 2012
1287 
1288  std::map< core::Size, bool > Is_prepend_map;
1289  Is_prepend_map.clear();
1290 
1291  Is_prepend_map = job_parameters_->Is_prepend_map();
1292 
1293  bool Is_full_length_pose=true; //Will be init first time the loop is tranversed.
1294 
1295  bool Is_valid_first_struct=true;
1296 
1297  for ( Size n = 1 ; n <= silent_struct_output_list_.size(); n++ ) {
1298 
1299  std::string tag=tag_output_list_[n];
1300  SilentStructOP s( silent_struct_output_list_[ n ] );
1301 
1302  if((n % 100) ==0){
1303  std::cout << "recalculate rmsd for " << tag << " n= " << n << " taken time " << static_cast<Real>( clock() - time_start ) / CLOCKS_PER_SEC << std::endl;
1304  }
1305  core::pose::PoseOP const pose_op=get_poseOP(n);
1306  core::pose::Pose pose=(*pose_op);
1307 
1308  if(protocols::swa::rna::check_for_messed_up_structure(pose, tag)==true) continue;
1309 
1310  Real score( 0.0 );
1311  getPoseExtraScores( pose, "score", score );
1312 
1313  //This kinda weird in that setup_native_pose actually set the working_native_pose in the job_parameters...this interdependency is not good!
1314  if(Is_valid_first_struct){
1315  //best_score = score;
1316  stepwise_rna_pose_setup->setup_native_pose( pose ); //Setup native_pose;
1317  Is_full_length_pose=( pose.total_residue()==full_sequence.size()) ? true : false;
1318  Output_boolean("Is_full_length_pose= ", Is_full_length_pose); std::cout << std::endl;
1319 
1320  Is_valid_first_struct=false;
1321  }
1322 
1323  //if(score > best_score + score_diff_cut_) break; //Comment out on Dec 11, 2011.
1324 
1325  PoseOP native_pose_OP=new Pose;
1326  (*native_pose_OP)=(*job_parameters_->working_native_pose()); //Hard copy...
1327 
1328  align_poses((*native_pose_OP), "native", pose, tag, working_best_alignment, align_only_over_base_atoms_);
1329 
1330 
1331  s->add_energy( "NEW_all_rms", rms_at_corresponding_heavy_atoms( pose, *native_pose_OP ) );
1332  s->add_energy( "NEW_loop_rmsd", rmsd_over_residue_list( pose, *native_pose_OP, rmsd_res_list, full_to_sub, Is_prepend_map, false, false) );
1333 
1334  ///////////////////////////////////////////////////////////////////////////////////////////////
1335 
1336  if(working_native_alignment.size()!=0){ //user specify which residue to align with native.
1337  align_poses((*native_pose_OP), "native", pose, tag, working_native_alignment, align_only_over_base_atoms_);
1338  }else{ //default
1339  align_poses((*native_pose_OP), "native", pose, tag, working_best_alignment, align_only_over_base_atoms_); //REDUNDANT
1340  }
1341  s->add_energy( "NEW_O_loop_rmsd", rmsd_over_residue_list( pose, *native_pose_OP, rmsd_res_list, full_to_sub, Is_prepend_map, false, false) );
1342 
1343  if(Is_full_length_pose){
1344  s->add_energy( "NEW_Full_L_rmsd", full_length_rmsd_over_residue_list(pose, *native_pose_OP, rmsd_res_list, full_sequence, false, false) );
1345  }
1346 
1347  ////////Simple loop RMSD exclude only virtual atoms in native_pdb (mostly just the native virtual_res)//////////////
1348  core::pose::Pose curr_pose_no_variants=pose;
1349  remove_all_variant_types(curr_pose_no_variants); //This remove all virtual_atoms!
1350 
1351  if(working_native_alignment.size()!=0){ //user specify which residue to align with native.
1352  align_poses((*native_pose_OP), "native", curr_pose_no_variants, tag +"_no_variants", working_native_alignment, align_only_over_base_atoms_);
1353  }else{ //default
1354  align_poses((*native_pose_OP), "native", curr_pose_no_variants, tag +"_no_variants", working_best_alignment, align_only_over_base_atoms_);
1355  }
1356 
1357  s->add_energy( "NEW_NAT_rmsd", rmsd_over_residue_list( curr_pose_no_variants, *native_pose_OP, rmsd_res_list, full_to_sub, Is_prepend_map, false /*verbose*/, true /*ignore_virtual_atom*/) );
1358 
1359  ////March 7, 2011....Output BASE-PAIRS STATISTIC///////////////////////////////
1360  //utility::vector1< core::Size > const working_rmsd_res_list=apply_full_to_sub_mapping(rmsd_res_list, job_parameters_);
1361 
1362  //Nov 01, 2011 WARNING THIS currently does not work if there is protonated Adenosine!
1363  //add_base_pair_stats( s, pose, *native_pose_OP, working_rmsd_res_list);
1364 
1365 
1366  if ( rename_tags_ ){
1367  s->add_comment( "PARENT_TAG", s->decoy_tag() );
1368  if(add_lead_zero_to_tag_) tag = "S_"+ ObjexxFCL::lead_zero_string_of( n-1 /* start with zero */, 6);
1369  s->set_decoy_tag( tag );
1370  }
1371 
1372  ///////////////////////////////////////////////////////////////////////////////////////////////
1373 
1374  silent_file_data.write_silent_struct( *s, silent_file, write_score_only );
1375 
1376  }
1377 
1378  std::cout << "Total # pose alignment and rmsd recalculation= " << silent_struct_output_list_.size() << std::endl;
1379  std::cout << "Total recalculate rmsd time : " << static_cast<Real>( clock() - time_start ) / CLOCKS_PER_SEC << std::endl;
1380 
1381  Output_title_text("EXIT StepWiseRNA_Clusterer::recalculate_rmsd_and_output_silent_file()");
1382 
1383  }
1384 
1385  ////////////////////////////////////////////////////////////////////////////////////////////////////////////
1386  void
1387  StepWiseRNA_Clusterer::get_best_neighboring_shift_RMSD_and_output_silent_file(std::string const & silent_file){
1388 
1389  using namespace core::io::silent;
1390  using namespace core::scoring;
1391  using namespace core::pose;
1392 
1393  clock_t const time_start( clock() );
1394 
1395  Output_title_text("ENTER StepWiseRNA_Clusterer::get_best_neighboring_shift_RMSD_and_output_silent_file()");
1396 
1397  if(job_parameters_exist_==false) utility_exit_with_message("job_parameters_exist_==false!");
1398 
1399  ///This could actually work...but it is just yet tested!
1400  if(job_parameters_->Is_simple_full_length_job_params()==true) utility_exit_with_message("job_parameters_->Is_simple_full_length_job_params()==true!");
1401 
1402  utility::vector1< core::Size > const & working_best_alignment = job_parameters_->working_best_alignment();
1403 
1404  utility::vector1 < core::Size > const & rmsd_res_list = job_parameters_->rmsd_res_list();
1405  std::map< core::Size, core::Size > const & full_to_sub = job_parameters_->const_full_to_sub();
1406 
1407  std::map< core::Size, bool > Is_prepend_map;
1408  Is_prepend_map.clear();
1409 
1410  Is_prepend_map = job_parameters_->Is_prepend_map();
1411 
1412  std::string const & full_sequence=job_parameters_->full_sequence();
1413 
1414  SilentFileData silent_file_data;
1415 
1416  std::cout << "loop_cluster_radius_ = " << loop_cluster_radius_ << std::endl;
1417  std::cout << "suite_cluster_radius_= " << suite_cluster_radius_ << std::endl;
1418 
1419  bool Is_full_length_pose=true; //Will be init first time the loop is tranversed.
1420 
1421  bool Is_valid_first_struct=true;
1422 
1423  for( Size n = 1 ; n <= silent_struct_output_list_.size(); n++ ) {
1424 
1425  std::string tag=tag_output_list_[n];
1426  SilentStructOP s( silent_struct_output_list_[ n ] );
1427 
1428  if((n % 100) ==0){
1429  std::cout << "find_best_neighboring_shift_rmsd for " << tag << " n= " << n << " taken time " << static_cast<Real>( clock() - time_start ) / CLOCKS_PER_SEC << std::endl;
1430  }
1431 
1432  core::pose::PoseOP const current_pose_op=get_poseOP(n);
1433  core::pose::Pose current_pose=(*current_pose_op);
1434 
1435  if(protocols::swa::rna::check_for_messed_up_structure(current_pose, tag)==true) continue;
1436 
1437  if(Is_valid_first_struct){
1438  Is_full_length_pose=( current_pose.total_residue()==full_sequence.size()) ? true : false;
1439  Output_boolean("Is_full_length_pose= ", Is_full_length_pose); std::cout << std::endl;
1440 
1441  Is_valid_first_struct=false;
1442 
1443  }
1444 
1445  Real start_score( 0.0 );
1446  bool has_total_score=getPoseExtraScores( current_pose, "score", start_score );
1447  if(has_total_score==false) utility_exit_with_message("current_pose (" + tag + ") missing total score!");
1448 
1449 
1450  Real start_shift_score( 0.0 );
1451  bool has_shift_score=getPoseExtraScores( current_pose, "shift_score", start_shift_score );
1452  if(has_shift_score==false) utility_exit_with_message("current_pose (" + tag + ") missing shift_score!");
1453 
1454  if(quick_alignment_) align_to_quick_alignment_pose(current_pose, tag);
1455 
1456  Real best_shift_score=start_shift_score;
1457  std::string best_shift_tag=tag;
1458 
1459  for( Size other_pose_ID = 1 ; other_pose_ID <= silent_struct_output_list_.size(); other_pose_ID ++ ) {
1460 
1461  std::string other_tag=tag_output_list_[ other_pose_ID ];
1462 
1463  core::pose::PoseOP const other_pose_op=get_poseOP( other_pose_ID );
1464  core::pose::Pose other_pose=(*other_pose_op);
1465 
1466  if(protocols::swa::rna::check_for_messed_up_structure(other_pose, other_tag)==true) continue;
1467 
1468  if(quick_alignment_==false) align_poses(other_pose, other_tag, current_pose, tag, working_best_alignment, align_only_over_base_atoms_);
1469 
1470 
1471  bool old_suite_cluster=Is_old_individual_suite_cluster(current_pose, other_pose, rmsd_res_list, full_to_sub, Is_prepend_map, suite_cluster_radius_ );
1472 
1473  Real loop_rmsd=99.99;
1474 
1475  if(Is_full_length_pose){
1476  if(optimize_memory_usage_) utility_exit_with_message("Both full_length_loop_rmsd_clustering_ and optimize_memory_usage_ equal true");
1477  loop_rmsd=full_length_rmsd_over_residue_list(current_pose, other_pose, rmsd_res_list, full_sequence, false /*verbose*/, false /*ignore_virtual_atom*/);
1478  }else{
1479  loop_rmsd=rmsd_over_residue_list(current_pose, other_pose, rmsd_res_list, full_to_sub, Is_prepend_map, false /*verbose*/, false /*ignore_virtual_atom*/);
1480  }
1481 
1482  bool old_loop_cluster=(loop_rmsd < loop_cluster_radius_ );
1483 
1484  if(old_suite_cluster==true && old_loop_cluster==true){
1485  if(verbose_) std::cout << tag << " is a neighbor of " << other_tag << std::endl;
1486  }else{
1487  continue; //Not a neighor!
1488  }
1489 
1490  Real other_shift_score( 0.0 );
1491  bool has_shift_score=getPoseExtraScores( other_pose, "shift_score", other_shift_score );
1492  if(has_shift_score==false) utility_exit_with_message("other_pose (" + other_tag + ") missing shift_score");
1493 
1494  if(other_shift_score<best_shift_score){
1495  best_shift_score=other_shift_score;
1496  best_shift_tag=other_tag;
1497  }
1498  }
1499 
1500  float const new_score=start_score-start_shift_score+best_shift_score;
1501 
1502  //setPoseExtraScores(pose, "score", new_score);
1503  //setPoseExtraScores(pose, "shift_score", best_shift_score);
1504  //setPoseExtraScores(pose, "self_shift_score", start_shift_score);
1505  //add_score_line_string(pose, "src_shift_tag", best_shift_tag);
1506 
1507  SilentStructOP new_silent_struct=s->clone(); //Important to create new one since shift_score is being changed!!!
1508 
1509  new_silent_struct->add_energy( "score", new_score );
1510  new_silent_struct->add_energy( "shift_score", best_shift_score );
1511  new_silent_struct->add_energy( "self_shift_score", start_shift_score );
1512  new_silent_struct->add_string_value("src_shift_tag", best_shift_tag);
1513 
1514  silent_file_data.write_silent_struct( *new_silent_struct, silent_file, false /*write_score_only*/ );
1515 
1516  }
1517 
1518  std::cout << "silent_struct_output_list_.size()= " << silent_struct_output_list_.size() << std::endl;
1519  std::cout << "Total get_best_neighboring_shift_RMSD_and_output_silent_file time : " << static_cast<Real>( clock() - time_start ) / CLOCKS_PER_SEC << std::endl;
1520 
1521  Output_title_text("EXIT StepWiseRNA_Clusterer::get_best_neighboring_shift_RMSD_and_output_silent_file()");
1522 
1523 
1524  }
1525 
1526 
1527  ////////////////////////////////////Sept 06, 2011 (For post_processing)////////////////////////////////////////////////////
1528  void
1529  StepWiseRNA_Clusterer::create_tags_map(){
1530 
1531 
1532  Output_title_text("ENTER StepWiseRNA_Clusterer::create_tag_map()");
1533 
1534  input_->reset(); //reset the silentfile stream to the beginning..
1535 
1536  current_tags_map_.clear();
1537  parent_tags_map_.clear();
1538 
1539  //mymap.count(c)
1540 
1541  while( input_->has_another_pose() ) {
1542 
1543  core::io::silent::SilentStructOP const silent_struct( input_->next_struct() );
1544 
1545  std::string const tag = silent_struct->decoy_tag();
1546 
1547  if(ignore_FARFAR_no_auto_bulge_tag_){
1548 
1549  if(current_tags_map_.count(tag)!=0) utility_exit_with_message(tag + " already exist in current_tags_map_!");
1550 
1551  current_tags_map_[tag]=true;
1552 
1553  }
1554 
1555 
1556  if(ignore_FARFAR_no_auto_bulge_parent_tag_){
1557 
1558  if(silent_struct->has_parent_remark("PARENT_TAG")==false){
1559  std::cout << "silent_struct (" << tag << ") missing PARENT_TAG!" << std::endl;
1560  silent_struct->print_parent_remarks(std::cout);
1561  utility_exit_with_message("silent_struct (" + tag + ") missing PARENT_TAG!");
1562  }
1563 
1564  std::string const parent_tag=silent_struct->get_parent_remark("PARENT_TAG");
1565 
1566  if(parent_tags_map_.count(parent_tag)!=0) utility_exit_with_message(parent_tag + " already exist in parent_tags_map_!");
1567 
1568  parent_tags_map_[parent_tag]=true;
1569 
1570  }
1571 
1572  }
1573 
1574  input_->reset(); //reset the silentfile stream to the beginning..
1575 
1576  Output_title_text("EXIT StepWiseRNA_Clusterer::create_tag_map()");
1577 
1578  }
1579 
1580  ////////////////////////////////////Sept 06, 2011 (For post_processing)////////////////////////////////////////////////////
1581  bool
1582  StepWiseRNA_Clusterer::pass_FARFAR_no_auto_bulge_filter(core::io::silent::SilentStructOP const & silent_struct) const{
1583 
1584  //This only effects to FARFAR models!
1585  //For the purpose of clustering, assume that the NO_AUTO_BULGE belong to the same same cluster as the WITH_AUTO_BULGE pose.
1586  //So if a instance of the pose with WITH_AUTO_BULGE exist in the silent_file then ignore the NO_AUTO_BULGE version of the pose!
1587  //The WITH_AUTO_BULGE pose always have better energy!
1588 
1589  std::string const NO_AUTO_BULGE_STR ="_NO_AUTO_BULGE";
1590  std::string const WITH_AUTO_BULGE_STR="_WITH_AUTO_BULGE";
1591 
1592  std::string const tag = silent_struct->decoy_tag();
1593 
1594  if(ignore_FARFAR_no_auto_bulge_tag_){
1595 
1596  if(current_tags_map_.size()==0) utility_exit_with_message("current_tags_map_ is empty!");
1597 
1598  size_t found_curr_tag;
1599  found_curr_tag=tag.find(NO_AUTO_BULGE_STR);
1600 
1601  if(found_curr_tag!=std::string::npos){
1602 
1603  std::string WITH_AUTO_BULGE_curr_tag=tag;
1604 
1605  WITH_AUTO_BULGE_curr_tag.replace( found_curr_tag, 14, WITH_AUTO_BULGE_STR );
1606 
1607  if(current_tags_map_.count(WITH_AUTO_BULGE_curr_tag)>0){
1608 
1609  std::cout << "Ignoring NO_AUTO_BULGE pose: " << tag << " since WITH_BULGE_curr_tag: " << WITH_AUTO_BULGE_curr_tag << " exist!" << std::endl;
1610  return false;
1611 
1612  }
1613 
1614  }
1615 
1616  //if(tag.substr(tag.size()-14, 14)==NO_AUTO_BULGE_STR){ //Problem with this is that it doesn't account for the renamed tag S_0 to S_0_1 possibility!
1617  //if(found==std::string::npos) utility_exit_with_message("CANNOT FIND NO_AUTO_BULGE_STR in current_tag: " + tag);
1618 
1619  }
1620 
1621 
1622  if(ignore_FARFAR_no_auto_bulge_parent_tag_){
1623 
1624  if(parent_tags_map_.size()==0) utility_exit_with_message("parent_tags_map_ is empty!");
1625 
1626  if(silent_struct->has_parent_remark("PARENT_TAG")==false) utility_exit_with_message("silent_struct (" + tag + " missing PARENT_TAG!");
1627 
1628  std::string const parent_tag=silent_struct->get_parent_remark("PARENT_TAG");
1629 
1630  size_t found_parent_tag;
1631  found_parent_tag=parent_tag.find(NO_AUTO_BULGE_STR);
1632 
1633  if(found_parent_tag!=std::string::npos){
1634 
1635  std::string WITH_AUTO_BULGE_parent_tag=parent_tag;
1636 
1637  WITH_AUTO_BULGE_parent_tag.replace( found_parent_tag, 14, WITH_AUTO_BULGE_STR );
1638 
1639  if(parent_tags_map_.count(WITH_AUTO_BULGE_parent_tag)>0){
1640 
1641  std::cout << "Ignoring NO_AUTO_BULGE pose: " << tag << " with parent_tag: " << parent_tag;
1642  std::cout << ", since WITH_AUTO_BULGE_parent_tag " << WITH_AUTO_BULGE_parent_tag << " exist!" << std::endl;
1643  return false;
1644 
1645  }
1646  }
1647  }
1648 
1649  return true;
1650 
1651  }
1652 
1653 
1654  //////////////////////////////////////////////
1655  void
1656  StepWiseRNA_Clusterer::set_job_parameters( protocols::swa::rna::StepWiseRNA_JobParametersCOP & job_parameters){
1657 
1658  job_parameters_=job_parameters;
1659 
1660  }
1661  //////////////////////////////////////////////
1662  void
1663  StepWiseRNA_Clusterer::set_job_parameters_exist( bool const job_parameters_exist){
1664 
1665  job_parameters_exist_=job_parameters_exist;
1666 
1667  }
1668 
1669  //////////////////////////////////////////////
1671  StepWiseRNA_Clusterer::get_act_alignment_res() const {
1672  utility::vector1< core::Size > const & alignment_res= (optimize_memory_usage_) ? sliced_pose_job_params_.sliced_pose_best_alignment: job_parameters_->working_best_alignment() ;
1673  return alignment_res;
1674  }
1675 
1677  StepWiseRNA_Clusterer::get_act_rmsd_res_list() const {
1678  utility::vector1 < core::Size > const & rmsd_res_list = (optimize_memory_usage_) ? sliced_pose_job_params_.sliced_pose_rmsd_res_list : job_parameters_->rmsd_res_list();
1679  return rmsd_res_list;
1680  }
1681 
1682  std::map< core::Size, core::Size > const &
1683  StepWiseRNA_Clusterer::get_act_full_to_sub() const {
1684  std::map< core::Size, core::Size > const & full_to_sub =(optimize_memory_usage_) ? sliced_pose_job_params_.sliced_pose_full_to_sub : job_parameters_->const_full_to_sub();
1685  return full_to_sub;
1686  }
1687 
1688  std::map< core::Size, bool > const &
1689  StepWiseRNA_Clusterer::get_act_Is_prepend_map() const {
1690  std::map< core::Size, bool > const & Is_prepend_map = (optimize_memory_usage_) ? sliced_pose_job_params_.sliced_pose_Is_prepend_map: job_parameters_->Is_prepend_map();
1691  return Is_prepend_map ;
1692  }
1693 
1694 
1695 
1696  //////////////////////////////////////////////
1697  void
1698  SlicedPoseJobParameters::setup(protocols::swa::rna::StepWiseRNA_JobParametersCOP & job_parameters){
1699 
1700  Output_title_text("Enter SlicedPoseJobParameters::setup()");
1701 
1702  Is_setup_=true;
1703 
1704  Size const nres=(job_parameters->working_sequence()).size();
1705  utility::vector1< core::Size > const & working_best_alignment( job_parameters->working_best_alignment() );
1706  utility::vector1 < core::Size > const & rmsd_res_list = job_parameters->rmsd_res_list();
1707  std::map< core::Size, bool > const & Is_prepend_map = job_parameters->Is_prepend_map();
1708  std::map< core::Size, core::Size > const & sub_to_full( job_parameters->const_sub_to_full() );
1709 
1710 
1711  utility::vector1< core::Size > working_rmsd_res_list=apply_full_to_sub_mapping(rmsd_res_list, job_parameters);
1712 
1713  Size sliced_seq_num=1;
1714  for(Size seq_num=1; seq_num<=nres; seq_num++){
1715  bool keep_res=false;
1716 
1717  if(Contain_seq_num(seq_num, working_best_alignment) ) {
1718  std::cout << "seq_num " << seq_num << " is in working_best_alignment res "<< std::endl;
1719  keep_res=true;
1720  }
1721 
1722 
1723  if(Contain_seq_num(seq_num, working_rmsd_res_list) ){
1724  std::cout << "seq_num " << seq_num << " is in working_rmsd_res_list "<< std::endl;
1725  keep_res=true;
1726  }
1727 
1728  if(keep_res==false && (seq_num+1)<=nres && Contain_seq_num(seq_num+1, working_rmsd_res_list) ){
1729  std::cout << "seq_num " << seq_num << " is in working_rmsd_res_list-1 "<< std::endl;
1730  keep_res=true;
1731  }
1732 
1733  if(keep_res==false && (seq_num-1)>=1 && Contain_seq_num(seq_num-1, working_rmsd_res_list) ){
1734  std::cout << "seq_num " << seq_num << " is in working_rmsd_res_list+1 "<< std::endl;
1735  keep_res=true;
1736  }
1737 
1738 
1739  Is_sliced_res_.push_back(keep_res);
1740 
1741  if(keep_res==true){
1742  working_to_sliced_res_map_.push_back(sliced_seq_num);
1743  sliced_to_working_res_map_.push_back(seq_num);
1744  sliced_seq_num++;
1745  }else{
1746  working_to_sliced_res_map_.push_back(0);
1747  }
1748 
1749  }
1750 
1751  std::cout << "------------Before slice to After slice seq_num------------" << std::endl;
1752  for(Size seq_num=1; seq_num<=working_to_sliced_res_map_.size(); seq_num++){
1753  std::cout << seq_num << "----> " << working_to_sliced_res_map_[seq_num] << std::endl;
1754 
1755  if(Contain_seq_num(seq_num, working_best_alignment) ) sliced_pose_best_alignment.push_back(working_to_sliced_res_map_[seq_num]) ;
1756  if(Contain_seq_num(seq_num, working_rmsd_res_list) ) sliced_pose_rmsd_res_list.push_back(working_to_sliced_res_map_[seq_num]) ;
1757  }
1758  std::cout << "-----------------------------------------------------------" << std::endl;
1759 
1760  std::cout << "------------After slice to Before slice seq_num------------" << std::endl;
1761  for(Size seq_num=1; seq_num<=sliced_to_working_res_map_.size(); seq_num++){
1762  std::cout << seq_num << "----> " << sliced_to_working_res_map_[seq_num] << std::endl;
1763  sliced_pose_full_to_sub[seq_num]=seq_num; //identity
1764 
1765  Size const working_seq_num=sliced_to_working_res_map_[seq_num];
1766  Size const full_seq_num= sub_to_full.find( working_seq_num )->second;
1767  bool const Is_prepend= Is_prepend_map.find( full_seq_num )->second;
1768  sliced_pose_Is_prepend_map[seq_num] =(Is_prepend);
1769 
1770  }
1771  std::cout << "-----------------------------------------------------------" << std::endl;
1772 
1773  //////////////////////////////////////////////
1774  bool in_delete_range=false;
1775 
1776  Size range_end=0;
1777  Size range_begin=0;
1778 
1779  for(Size seq_num=1; seq_num<=Is_sliced_res_.size()+1; seq_num++){ //optimization for using delete_residue_range_slow instead of delete_residue_slow
1780 
1781 
1782  if(in_delete_range==false ){
1783 
1784  if(seq_num== (Is_sliced_res_.size()+1) ) continue;
1785 
1786  if( Is_sliced_res_[seq_num]==false){
1787  range_begin=seq_num;
1788  in_delete_range=true;
1789  }
1790 
1791  }else{
1792  if( seq_num==(Is_sliced_res_.size()+1) || Is_sliced_res_[seq_num]==true ){
1793  range_end=seq_num-1; //This obviously fail if seq_num=0...but this cannot occur since in_delete_range is false at first cycle.
1794  in_delete_range=false;
1795 
1796  delete_res_range_list_.push_back( std::make_pair(range_begin, range_end) );
1797  range_end=0;
1798  range_begin=0;
1799  }
1800  }
1801  }
1802  //////////////////////////////////////////////
1803 
1804 
1805  //output debug
1806  Output_seq_num_list("sliced_pose_best_alignment= ", sliced_pose_best_alignment, 50);
1807  Output_seq_num_list("sliced_pose_rmsd_res_list= ", sliced_pose_rmsd_res_list, 50);
1808  Output_is_prepend_map("sliced_pose_Is_prepend_map= " , sliced_pose_Is_prepend_map, working_to_sliced_res_map_.size(), 50);
1809  output_pair_size_vector(delete_res_range_list_, "delete_res_range_list= " , 50);
1810 
1811  Output_title_text("Exit SlicedPoseJobParameters::setup()");
1812 
1813  }
1814 
1815  //////////////////////////////////////////////
1817  SlicedPoseJobParameters::create_sliced_pose(core::pose::Pose const & working_pose){
1818 
1819  using namespace core::conformation;
1820  using namespace core::pose;
1821  using namespace ObjexxFCL;
1822 
1823  if(Is_setup_==false){
1824  utility_exit_with_message("Is_setup_==false" );
1825  }
1826 
1827  core::pose::Pose sliced_pose=working_pose;
1828 
1829  if(Is_sliced_res_.size()!=working_pose.total_residue() ){
1830  utility_exit_with_message("Is_sliced_res.size() ( " + string_of(Is_sliced_res_.size() ) + ") != working_pose.total_residue() ( " + string_of( working_pose.total_residue() )+ ")" );
1831  }
1832 
1833 // for(Size seq_num=Is_sliced_res_.size(); seq_num>=1; seq_num--){
1834 // if(Is_sliced_res_[seq_num]==false){
1835 // sliced_pose.conformation().delete_residue_slow(seq_num);
1836 // sliced_pose.conformation().delete_polymer_residue(seq_num); //doesn't work at jump_point...
1837 // }
1838 // }
1839 
1840  for(Size n=delete_res_range_list_.size(); n>=1; n--){
1841  sliced_pose.conformation().delete_residue_range_slow( delete_res_range_list_[n].first, delete_res_range_list_[n].second );
1842  }
1843 
1844  if(sliced_pose.total_residue()!=sliced_to_working_res_map_.size()){
1845  utility_exit_with_message("working_pose.total_res() ( " + string_of(working_pose.total_residue()) + ") != sliced_to_working.size() ( " + string_of( sliced_to_working_res_map_.size() )+ ")" );
1846  }
1847 
1848 
1849 // working_pose.dump_pdb( "clusterer_working_pose.pdb");
1850 // sliced_pose.dump_pdb( "clusterer_sliced_pose.pdb");
1851 
1852 // exit(1);
1853 
1854  return sliced_pose;
1855 
1856  }
1857 
1858  //////////////////////////////////////////////
1859 
1860 } //rna
1861 } //swa
1862 } // protocols