Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
KCluster.hh
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file protocols/toolbox/KCluster.hh
11 /// @brief Fast clustering algorithm for large silent file
12 /// @author Yuan Liu (wendao@u.washington.edu)
13 
14 #ifndef INCLUDED_protocols_toolbox_KCluster_hh
15 #define INCLUDED_protocols_toolbox_KCluster_hh
16 
19 
20 #include <core/types.hh>
21 #include <utility/vector1.hh>
22 
23 #include <ObjexxFCL/FArray3D.hh>
24 #include <ObjexxFCL/FArray2D.hh>
25 #include <ObjexxFCL/FArray1D.hh>
26 #include <ObjexxFCL/FArray3.hh>
27 #include <ObjexxFCL/FArray2.hh>
28 #include <ObjexxFCL/FArray1.hh>
29 #include <ObjexxFCL/FArray2P.hh>
30 #include <ObjexxFCL/FArray3P.hh>
31 
32 #include <string>
33 
34 using namespace std;
35 using namespace core;
36 
37 namespace protocols {
38 namespace toolbox {
39 
40 ////////////////////////////////////////////////////////////////////////
41 // Hierachical Cluster Data Structure
42 ////////////////////////////////////////////////////////////////////////
44 {
45 public:
46  ///@brief Automatically generated virtual destructor for class deriving directly from ReferenceCount
47  virtual ~KClusterElement();
51 
53  max_distance_(-1.0),
54  max_dist_ndx_(0),
55  edit_mode(false){}
56 
58  data_ndx_(nd,0),
59  type_list_(nd,0),
60  dist_list_(nd,999999),
61  max_distance_(-1.0),
62  max_dist_ndx_(0),
63  edit_mode(false)
64  {
65  for(Size i=1; i<=nd; i++)data_ndx_[i]=i;
66  }
67 
68  /// @brief assign a data into a cluster
69  void assign_type_data(Size, Size, Real);
70 
71  /// @brief add a new struture ndx to the data_ndx_
72  void add_new_data(Size ndx_data)
73  {
74  data_ndx_.push_back(ndx_data);
75  type_list_.push_back(0);
76  dist_list_.push_back(999999);
77 
78  }
79 
80  /// @brief add a new cluster center's data_ndx
81  void add_new_cluster(Size ndx_data)
82  {
83  center_ndx_.push_back(ndx_data);
84  subclusters_.push_back(new KClusterElement());
85  }
86 
87  /// @brief set a cluster center's data_ndx
88  void set_cluster(Size ndx_cluster, Size ndx_data)
89  {
90  center_ndx_[ndx_cluster] = ndx_data;
91  }
92 
93  /// @brief return data's type(local cluster index)
94  Size get_type(Size ndx_data) const
95  {
96  return type_list_[ndx_data];
97  }
98 
99  /// @brief return distance between data and center
100  Real get_distance(Size ndx_data) const
101  {
102  return dist_list_[ndx_data];
103  }
104 
105  /// @brief return cluster center's data_ndx
106  Size get_center_ndx(Size ndx_cluster) const
107  {
108  return center_ndx_[ndx_cluster];
109  }
110 
111  /// @brief return the data ndx list of this cluster
113  {
114  return data_ndx_;
115  }
116 
117  /// @brief return the ndx list of sub-cluster
119  {
120  return subclusters_[c]->get_ndx_list();
121  }
122 
123  /// @brief return the subcluster
125  {
126  return subclusters_[nc];
127  }
128 
130  return center_ndx_.size();
131  }
132 
133  /// @brief return current cluster number
135  {
136  return subclusters_.size();
137  }
138 
139  /// @brief return current data number
140  Size get_ndata() const
141  {
142  return data_ndx_.size();
143  }
144 
145  /// @brief return current data number
146  Size get_data_ndx(Size ndx_data) const
147  {
148  return data_ndx_[ndx_data];
149  }
150 
152  {
153  return max_distance_;
154  }
155 
157  {
158  return max_dist_ndx_;
159  }
160 
161  /// @brief clean the data list
162  void clear_data()
163  {
164  data_ndx_.clear();
165  type_list_.clear();
166  dist_list_.clear();
167  }
168  /// @brief clean the subcluster's list, open edit mode
169  void clear()
170  {
171  assert(!edit_mode);
172  for (Size i=1, e=subclusters_.size(); i<=e; i++)
173  {
174  subclusters_[i]->clear_data();
175  }
176  edit_mode=true;
177  max_distance_=-1.0;
178  max_dist_ndx_=0;
179  }
180 
181  /// @brief check the list, close edit mode
182  void check()
183  {
184  assert(edit_mode);
185  edit_mode=false;
186  }
187 
188 private:
189  ClusterNdxList data_ndx_; //the ndx of each data; root empty
190  ClusterTypList type_list_; //thet type of each data
191  ClusterDisList dist_list_; //the distance of each data to center
192 
193  ClusterNdxList center_ndx_; //the data ndx of each cluster center
195 
196  Size n_data_; //number of data
197  Real max_distance_; //save the farest data's d
198  Size max_dist_ndx_; //save the farest data's ndx
199 
200  bool edit_mode;
201 };
202 
203 
204 /// @brief database of a K-style Clustering algorithm
206 {
207 public:
208  ///@brief Automatically generated virtual destructor for class deriving directly from ReferenceCount
209  virtual ~KClusterData();
210  typedef ObjexxFCL::FArray1D_double FA1Dd;
212  typedef ObjexxFCL::FArray2_double FA2d;
213  typedef ObjexxFCL::FArray2P_double FA2Pd;
214  typedef ObjexxFCL::FArray3P_double FA3Pd;
216 
217  KClusterData();
218 
219  void load_silent_files();
220  void load_silent_file(string,Size);
221  //void save_cluster();
222 
223  Size get_ndata() const
224  {
225  return ndata_;
226  }
227  Size get_natom() const
228  {
229  return n_ca_atom_;
230  }
231 
233  return original_tags_[i][1];
234  }
235 
237  return original_filenames_[i];
238  }
239 
241  {
242  return dataset_.coords();
243  }
244 
245  //for saving the cluster center
246  void mark_tags(KClusterElementOP, string);
247  void save_all_in_one();
248  void save_cluster_tree();
249 
250  Real dist_square(FA2d &, FA2d &);
251  Real dist_square(Size, Size);
252 
253  void show_cluster_assignments(); //ek added 2-4-2011
254 
255 private:
257  Size ndata_; //number of data
258  Size natom_; //number of atom for calculating rmsd
259  Size n_ca_atom_; //number of CA in each data
260  Size nfile_; //number of silent file
261  utility::vector1< TagList > tags_; //save the tag for each data
262  utility::vector1< TagList > original_tags_; // map the cluster tag back to the original decoy tag
263  utility::vector1< std::string > original_filenames_; //map cluster tag back to original filename. (this is because often i have lots of silent-files with decoys of the same name )
265 };
266 
267 
268 /// @brief basic class for performing a K-style Clustering algorithm
270 {
271 public:
272  typedef ObjexxFCL::FArray2_double FA2d;
273  typedef ObjexxFCL::FArray2P_double FA2Pd;
275 
276  KCluster();
277  virtual ~KCluster();
278  virtual void init(KClusterElementOP, Size first=0)=0;
279  virtual void update(KClusterElementOP, KClusterData&)=0;
280  virtual bool whoami()=0;
281  virtual Real get_threshold()=0;
282  virtual void set_threshold(Real)=0;
283 
284  //virtual Real assign(KClusterData&)=0;//typical assign (K-means)
285  virtual Real assign( KClusterElementOP, KClusterData&)=0;
286 
287  void cluster( KClusterElementOP, KClusterData&, Size first=0);
288  void set_ncluster(Size nc){n_cluster_=nc;}
289 
290 protected:
292 };
293 
294 
295 /// @brief Typical K-Medoids Clustering Algorithm
296 class KMedoid : public KCluster
297 {
298 public:
299  KMedoid();
300 
301  bool whoami();
302  Real get_threshold();
303  void init(KClusterElementOP, Size first=0);
305  void update(KClusterElementOP, KClusterData&);
306  void set_threshold(Real);
307 protected:
310  void copy_coord(Size, FA2d &, FA2d &);
311 };
312 
313 
314 /// @brief Greedy K-Center Clustering Algorithm
315 /// @note "A Fast Geometric Clustering Method on Conformation Space of Biomolecules"
316 /// Jian Sun, Yuan Yao, Xuhui Huang, Vijay Pande, Gunnar Carlsson, Leonidas J. Guibas
317 class GreedyKCenter : public KCluster
318 {
319 public:
320  GreedyKCenter();
321 
322  bool whoami();
323  Real get_threshold();
324  void init(KClusterElementOP, Size first=0);
326  void update(KClusterElementOP, KClusterData&);
327  void set_threshold(Real);
328 protected:
330 };
331 
332 //get the type of cluster from options
333 KClusterOP get_K_cluster_engine(const string&);
334 //parse the tag, get full path of the silent file
335 string file_full_path(string);
336 //fix the missing tag
337 string fix_tag_suffix(string);
338 
339 }//toolbox
340 }//protocols
341 
342 #endif