Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Cluster.impl.hh
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file toolbox/Cluster.impl.hh
11 /// @brief template implementation for clustering of silentstructs that are provided by iterators
12 /// @author Oliver Lange
13 
14 #ifndef INCLUDED_protocols_toolbox_Cluster_impl_hh
15 #define INCLUDED_protocols_toolbox_Cluster_impl_hh
16 
17 
18 // AUTO-REMOVED #include <utility/vector1.hh>
19 #include <core/types.hh>
20 #include <ObjexxFCL/FArray2D.hh>
21 #include <deque>
22 
23 #include <utility/vector1.hh>
24 #include <basic/Tracer.hh>
25 #include <utility/exit.hh>
26 #include <ObjexxFCL/string.functions.hh>
28 // AUTO-REMOVED #include <protocols/toolbox/DecoySetEvaluation.impl.hh>
30 
32 
33 namespace protocols {
34 namespace toolbox {
35 
36 static basic::Tracer _impl_tr("protocols.toolbox.cluster");
37 
38 template< typename SilentStructIterator, typename StructureContainer >
40  core::Size n_decoys,
41  SilentStructIterator input_decoys_begin,
42  SilentStructIterator input_decoys_end, //it->SilentStructOP
43  StructureContainer& new_structs, //provides a "push_back" method
44  ClusterOptions opts
45 ) {
46  //read CA coords into DecoySetEvaluation
47  DecoySetEvaluation CA_set;
48  CA_set.push_back_CA_xyz_from_silent_file( n_decoys, input_decoys_begin, input_decoys_end, true /*store energies*/ );
49 
50  //cluster the decoys according to CA_rmsd
51  cluster_silent_structs( CA_set, input_decoys_begin, input_decoys_end, new_structs, opts );
52 }
53 
54 template< typename SilentStructIterator, typename StructureContainer >
56  SilentStructIterator input_decoys_begin,
57  SilentStructIterator input_decoys_end,
58  StructureContainer& new_structs,
59  ClusterOptions opts
60 ) {
61 
62  core::Size const n_decoys( CA_set.n_decoys() );
63 
64  // initialize cluster object
65  toolbox::ClusterPhilStyle cluster( n_decoys , opts.cluster_radius );
66 
67  _impl_tr.Info << "compute distance matrix" << std::endl;
68  // compute distance matrix
69  CA_set.compute_distance_matrix( cluster.distance_matrix() );
70 
71  // don't limit the maximum cluster size
72  cluster.set_n_max_cluster( 1000000 );
73 
74  _impl_tr.Info << "compute clusters" << std::endl;
75  cluster.do_clustering();
76 
77  // for the next steps we need energies --- could be made optional
78  _impl_tr.Info << "sort clusters by energy" << std::endl;
79  runtime_assert( CA_set.all_energies().size() == n_decoys );
80  cluster.sort_each_group_by_energy( CA_set.all_energies(), opts.keep_center );
81  if ( opts.limit_cluster_size > 0 ) {
82  cluster.limit_groupsize( opts.limit_cluster_size );
83  }
84 
85  // now go thru the clusters and write out c.XXX.NNN tags
86  toolbox::ClusterBase::ClusterList const & clusterlist=cluster.clusterlist();
87 
88  _impl_tr.Info << " clustering: " << clusterlist.size() << " clusters found. ";
89  if ( opts.limit_cluster_size ) {
90  _impl_tr.Info << " cluster size limited to "
91  << opts.limit_cluster_size << "\n";
92  }
93  _impl_tr.Info << std::endl;
94  // one entry per decoys corresponding to the running number in the input data which is also used as index in the clusters
95  // the "kept_tags" entry will remain "" for removed structures (limit_cluster_size).
96  // this is slightly memory intensive... the alternative is that for each decoy in input_decoys.begin...end we have
97  // to find the corresponding entry in the clusterlist.
98  utility::vector1< std::string > kept_tags( n_decoys, "" );
99  utility::vector1< std::string > kept_orig_tags( n_decoys, "" );
100 
101  _impl_tr.Info << " generate new tags... " << std::endl;
102  for ( core::Size i=1; i<=clusterlist.size(); i++ ) {
103  for ( core::Size j=1; j<=clusterlist[i].size(); j++ ) {
104  using namespace ObjexxFCL;
105  //form tags of type: c.<cluster_number>.<decoy_in_group_nr>
106  kept_tags[ clusterlist[i][j-1] ] = "c."+lead_zero_string_of( i-1,4 )+"."+lead_zero_string_of( j-1,3 );
107  }
108  }
109 
110  _impl_tr.Info << "copy remaining structures to output... " << std::endl;
111  utility::vector1< std::string >::const_iterator tags_it = kept_tags.begin();
112  core::Size ct( 1 );
113  for ( SilentStructIterator it=input_decoys_begin; it!=input_decoys_end; ++it, ++tags_it, ++ct ) {
114 
115  //if tag is "" this decoy has been filtered out by limit_cluster_size
116  if ( tags_it->size() == 0 ) {
117  _impl_tr.Info << "removed decoy " << it->decoy_tag() << " with score " << it->get_energy( "score" ) << "\n";
118  continue;
119  }
120 
121  //this decoy will be transfered into output
122  _impl_tr.Info << "keep decoy "<< it->decoy_tag() << " with score " << it->get_energy( "score" ) << " as " << *tags_it << "\n";
123  core::io::silent::SilentStructOP new_decoy = it->clone();
124 
125  //rename decoy to "c.XXX.NNN" ?
126  if ( opts.assign_new_cluster_tag ) {
127  new_decoy->add_string_value( "decoy_tag", it->decoy_tag() );
128  new_decoy->set_decoy_tag( *tags_it );
129  }
130 
131  new_structs.push_back( new_decoy );
132 
133  //store these for the "print_summary" output --- definitly memory that could be saved
134  kept_orig_tags[ ct ] = it->decoy_tag();
135  }
136 
137  _impl_tr.Info << std::endl;
138  cluster.print_summary( kept_orig_tags, CA_set.all_energies() );
139 }
140 
141 
142 }
143 }
144 
145 
146 #endif