Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
KCluster.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file protocols/toolbox/KCluster.hh
11 /// @brief Fast clustering algorithm for large silent file
12 /// @author Yuan Liu (wendao@u.washington.edu)
13 
16 #include <protocols/loops/Loop.hh>
17 #include <protocols/loops/Loops.hh>
18 
19 // AUTO-REMOVED #include <core/init.hh>
21 //#include <core/io/silent/SilentStructFactory.hh>
22 
23 #include <basic/Tracer.hh>
24 #include <basic/options/option.hh>
25 // AUTO-REMOVED #include <basic/options/option_macros.hh>
26 #include <basic/options/keys/in.OptionKeys.gen.hh>
27 #include <basic/options/keys/out.OptionKeys.gen.hh>
28 #include <basic/options/keys/mc.OptionKeys.gen.hh>
29 #include <basic/options/keys/cluster.OptionKeys.gen.hh>
30 
31 #include <utility/exit.hh>
32 #include <utility/file/FileName.hh>
33 #include <utility/file/PathName.hh>
34 #include <utility/file/file_sys_util.hh>
35 #include <numeric/xyzMatrix.hh>
36 #include <numeric/random/random.hh>
37 
38 #include <iostream>
39 #include <string>
40 // AUTO-REMOVED #include <cstring>
41 #include <fstream>
42 #include <sstream>
43 #include <cstdio>
44 #include <algorithm>
45 
46 #include <utility/vector1.hh>
47 
48 
49 using namespace std;
50 using namespace core;
51 
52 static basic::Tracer TR("protocols.kcluster");
53 static numeric::random::RandomGenerator RG(2831); // <- Magic number, do not change it!!!
54 
55 namespace protocols {
56 namespace toolbox {
57 
58 /// @details Auto-generated virtual destructor
59 KClusterData::~KClusterData() {}
60 
61 /// @details Auto-generated virtual destructor
62 KClusterElement::~KClusterElement() {}
63 
64 KClusterOP get_K_cluster_engine(const string &style)
65 {
66  if (style == "GKC") return new GreedyKCenter();
67  else if (style == "KMedoid") return new KMedoid();
68  else
69  {
70  utility_exit_with_message("Undefined KCluster type!");
71  }
72 
73  return new GreedyKCenter();
74 }
75 
76 //get desired output file name from tag
77 string file_full_path(string tag)
78 {
79  using namespace basic::options;
80  using namespace basic::options::OptionKeys;
81 
82  static string subdir_prefix("sub_");
83  static string libdir_prefix("c_");
84  static string libdir_suffix("_lib");
85  static string s("/");
86  static int nofsubdir = option[cluster::K_n_sub];
87 
88  //if the mc:hierarchical_pool is specified, then use that dir
89  static string rootpath;
90  static string defaultpath = utility::file::FileName(option[out::file::silent]()).base() + libdir_suffix + s;
91  static string defaultfile;
92  static bool flag=true;
93 
94  if (flag)
95  {
96  if (option[mc::hierarchical_pool].user())
97  {
98  rootpath = option[mc::hierarchical_pool]()+"_dir/sub_000/";
99  defaultfile = "c_00001.out";
100  defaultpath = "c_00001_lib/";
101  }
102  else
103  {
104  rootpath = utility::file::FileName(option[out::file::silent]()).path();
105  defaultfile = utility::file::FileName(option[out::file::silent]()).local_name();
106  defaultpath = utility::file::FileName(defaultfile).base() + libdir_suffix + s;
107  }
108  flag=false;
109  }
110 
111  int pos=tag.find('.');
112  int len=tag.length();
113  utility::vector1<int> id_stack;
114  while(pos<len && pos>0)
115  {
116  int newpos = tag.find('.', pos+1);
117  id_stack.push_back(atoi(tag.substr(pos+1,newpos-pos-1).c_str()));
118  pos = newpos;
119  }
120 
121  Size n = id_stack.size();
122  if (n == 1) return rootpath+defaultfile;
123  ostringstream fnstream;
124  fnstream << "c_" << setfill ('0') << setw (5) << id_stack[n-1] << ".out";
125  string fn(fnstream.str());
126 
127  for (Size i=n-1; i>=1; i--)
128  {
129  ostringstream pathstream1;
130  ostringstream pathstream2;
131 
132  //main dir
133  if (i>1) pathstream1 << libdir_prefix << setfill ('0') << setw (5) << id_stack[i-1] << libdir_suffix << s ;
134  //sub dir
135  pathstream2 << subdir_prefix << setfill ('0') << setw (3) << int((id_stack[i]-1)/nofsubdir) << s;
136 
137  fn = pathstream1.str()+pathstream2.str()+fn;
138  }
139  return rootpath+defaultpath+fn;
140 }
141 
142 string fix_tag_suffix(string str)
143 {
144  using namespace basic::options;
145  using namespace basic::options::OptionKeys;
146  static Size nlevel = option[cluster::K_level];
147  Size n=count(str.begin(), str.end(), '.');
148  for (Size i=n; i<nlevel; i++) str+=".1";
149 
150  int pos; //a silly hack
151  while ((pos = str.find('_'))>0) str.replace(pos,1,1,'.');
152 
153  return str;
154 }
155 
156 /// @brief assign a data into a cluster
157 void KClusterElement::assign_type_data(Size ndx_data, Size ndx_cluster, Real d)
158 {
159  using namespace basic::options;
160  using namespace basic::options::OptionKeys;
161  assert(edit_mode);
162  //assign type
163  type_list_[ndx_data] = ndx_cluster;
164  //save distance
165  dist_list_[ndx_data] = d;
166  //save the farest one
167  if (d>max_distance_)
168  {
169  max_distance_=d;
170  max_dist_ndx_=ndx_data;
171  }
172  //add into list, real data id
173  //???
174  //don't save the center into its cluster list
175  //???
176  if (option[cluster::K_redundant]() || (data_ndx_[ndx_data] != center_ndx_[ndx_cluster]))
177  {
178  subclusters_[ndx_cluster]->add_new_data(data_ndx_[ndx_data]);
179  }
180 }
181 
182 //////////////
183 //KClusterData
184 //////////////
185 KClusterData::KClusterData()
186 :ndata_(0),
187 natom_(0),
188 n_ca_atom_(0),
189 nfile_(0)
190 {
191  using namespace basic::options;
192  using namespace basic::options::OptionKeys;
193  using namespace protocols::loops;
194 
195  Loops loops( true );
196  natom_ = loops.loop_size();
197  if (natom_>0)
198  {
199  //specified loop region
200  for( Loops::const_iterator it=loops.begin(), it_end=loops.end(); it != it_end; ++it )
201  {
202  for (core::Size i=it->start(), end=it->stop(); i<=end; i++)
203  {
204  rmsd_ca_list_.push_back(i);
205  }
206  }
207  runtime_assert(natom_==rmsd_ca_list_.size());
208  }
209 
211  runtime_assert(n_ca_atom_>0 && ndata_>0);
212 
213  //debug
214  TR << "Finished loading database:" << endl;
215  TR << "Number of data: " << ndata_ << endl;
216  TR << "Number of file: " << nfile_ << endl;
217 }
218 
220 {
221  using namespace basic::options;
222  using namespace basic::options::OptionKeys;
223 
224  static FA1Dd weights( natom_, 1.0 );
225  static numeric::xyzMatrix< Real > R; //do not care
226  //fit
227  if (!option[cluster::K_not_fit_xyz]) protocols::toolbox::fit_centered_coords(natom_, weights, conf1, conf2, R);
228 
229  //cal dist
230  Real sum=0.0;
231  for (Size i=1; i<=natom_; i++)
232  {
233  for (Size d=1; d<=3; d++)
234  {
235  Real dx = conf1(d,i) - conf2(d,i);
236  sum += dx*dx;
237  }
238  }
239  return sum/natom_;
240 }
241 
243 {
244  FA2Pd conf1(dataset_.coords()(1,1,ndx1), 3, natom_);
245  FA2Pd conf2(dataset_.coords()(1,1,ndx2), 3, natom_);
246  return dist_square(conf1, conf2);
247 }
248 
250 {
251  using namespace basic::options;
252  using namespace basic::options::OptionKeys;
253 
254  TR << "Reading Silent Files ..." << endl;
255 
256  if (option[in::file::silent].user())
257  {
258  for (Size i=1, e=option[in::file::silent]().size(); i<=e; i++)
259  {
260  load_silent_file(option[in::file::silent]()[i], i);
261  }
262  }
263  else if (option[in::file::silent_list].user())
264  {
265  for (Size i=1, e=option[in::file::silent_list]().size(); i<=e; i++)
266  {
267  load_silent_file(option[in::file::silent_list]()[i], i);
268  }
269  }
270  else
271  {
272  //no silent file input
273  utility_exit_with_message("Please specify the input silent file or list!");
274  }
275 
276  if (!option[cluster::K_not_fit_xyz])
277  {
278  //dataset_.superimpose(); //!!! maybe a new method that just reset_x is better
279  FA1Dd transvec( 3 );
280  FA1Dd weights(natom_, 1.0);
281  for (Size i=1; i<=ndata_; i++)
282  {
283  FA2Pd conf( dataset_.coords()(1,1,i), 3, natom_ );
284  reset_x( natom_, conf, weights, transvec );
285  }
286  }
287 }
288 
289 void KClusterData::load_silent_file(string silent_file, Size nfile)
290 {
292  sfd.read_file( silent_file );
293 
294  Size extra=sfd.size();
295 
296  if ( dataset_.n_decoys_max() < dataset_.n_decoys() + extra + 1 )
297  {
298  dataset_.reserve( dataset_.n_decoys() + extra );
299  }
300 
301  Size ncoord=0;
302  for ( io::silent::SilentFileData::iterator it=sfd.begin(), eit=sfd.end(); it!=eit; ++it )
303  {
304  FA2Dd original_xyz(it->get_CA_xyz());
305  //check n_ca_atom_
306  if ( n_ca_atom_==0 ) n_ca_atom_=it->nres();
307  else runtime_assert( n_ca_atom_ == it->nres() );
308 
309  //setup final_xyz
310  if (natom_==0)
311  {
312  //no loop specified, use all residues
313  natom_ = n_ca_atom_;
314  for (core::Size i=1; i<=n_ca_atom_; i++) rmsd_ca_list_.push_back(i);
315  }
316  //debug
317  //TR << "n_ca_atom_ = " << n_ca_atom_ << std::endl;
318  //TR << "natom_ = " << natom_ << std::endl;
319  FA2Dd final_xyz(3, natom_, 0.0);
320 
321  for( core::Size i=1; i<=natom_; i++)
322  {
323  final_xyz(1,i) = original_xyz(1,rmsd_ca_list_[i]);
324  final_xyz(2,i) = original_xyz(2,rmsd_ca_list_[i]);
325  final_xyz(3,i) = original_xyz(3,rmsd_ca_list_[i]);
326  }
327 
328  dataset_.push_back_CA_xyz( final_xyz, natom_ );
329 
330  //build tag
331  ncoord++;
332  ostringstream tag;
333  tag << "d." << setfill ('0') << setw (4) << nfile << "."
334  << setfill ('0') << setw (8) << ncoord;
335  TagList list;
336  list.push_back(tag.str());
337  tags_.push_back(list);
338  TagList tagvec;
339  tagvec.push_back(it->decoy_tag());
340  original_tags_.push_back(tagvec);
341  // TagList fn;
342  // fn.push_back( silent_file );
343  original_filenames_.push_back( silent_file );
344  }
345 
346  //check natom_
347  runtime_assert(dataset_.n_atoms()==natom_);
348  //if (natom!=natom_) {
349  // utility_exit_with_message("Input silent file contains different protein size!");
350  //}
351 
352  ndata_ += extra;
353  assert(ndata_ == dataset_.n_decoys());
354  nfile_++;
355 }
356 
357 //for saving the cluster center
359 {
360  //mark of the centers in this element
361  //if the subcluster's center_ndx_ is not empty, recursive
362  Size nc = c->get_cur_ncluster();
363  if (nc==0)return;//makesure this element has been clustered
364  for (Size i=1; i<=nc; i++)
365  {
366  //for all center
367  ostringstream tag;
368  //tag << t << "." << setfill ('0') << setw (5) << i;
369  tag << t << "." << i;
370  //mark that have not been marked
371  //if (tags_[c->get_center_ndx(i)].c_str()[0]=='d') tags_[c->get_center_ndx(i)]=tag.str();
372  tags_[c->get_center_ndx(i)].push_back(tag.str());
373 
374  //if has subcluster
375  mark_tags(c->get_subcluster(i), tag.str());
376  }
377 }
378 
380 {
381  using namespace basic::options;
382  using namespace basic::options::OptionKeys;
383 
385 
386  if (option[in::file::silent].user())
387  {
388  fnlist=option[in::file::silent]();
389  }
390  else if (option[in::file::silent_list].user())
391  {
392  fnlist=option[in::file::silent_list]();
393  }
394 
395  //save the center
397  string const silent_outfile(option[out::file::silent]());
398 
399  //read from file list
400  Size count=0;
401  for (Size i=1; i<=nfile_; i++)
402  {
403  //sort the struct order
405  sfd.read_file( fnlist[i] );
406 
407  for ( io::silent::SilentFileData::iterator it=sfd.begin(), eit=sfd.end(); it!=eit; ++it )
408  {
409  count++;
410  //if (tags_[count].c_str()[0]=='d') continue; //skip data
411  //get center that has been marked
412  //it->set_decoy_tag(tags_[count]);
413  //clusters.write_silent_struct(**it,silent_outfile);
414  for(Size i=2, nt=tags_[count].size(); i<=nt; i++)
415  {
416  //it->set_decoy_tag(tags_[count][i]);
417  it->add_string_value("cluster_id",tags_[count][i]);
418  clusters.write_silent_struct(**it,silent_outfile);
419  }
420  }
421  }
422  assert(count==ndata_);
423 }
424 
426 {
427  using namespace basic::options;
428  using namespace basic::options::OptionKeys;
429 
431 
432  if (option[in::file::silent].user())
433  {
434  fnlist=option[in::file::silent]();
435  }
436  else if (option[in::file::silent_list].user())
437  {
438  fnlist=option[in::file::silent_list]();
439  }
440 
441  //save the center
443 
444  //read from file list
445  Size count=0;
446  for (Size i=1; i<=nfile_; i++)
447  {
448  //sort the struct order
450  sfd.read_file( fnlist[i] );
451 
452  for ( io::silent::SilentFileData::iterator it=sfd.begin(), eit=sfd.end(); it!=eit; ++it )
453  {
454  count++;
455  //if (tags_[count].c_str()[0]=='d') continue; //skip data
456  //get center that has been marked
457  for(Size i=2, nt=tags_[count].size(); i<=nt; i++)
458  {
459  string filename(file_full_path(tags_[count][i]));
460  utility::file::create_directory_recursive(utility::file::FileName(filename).path());
461 
462  if (option[cluster::K_redundant])
463  {
464 
465  //it->set_decoy_tag(fix_tag_suffix(tags_[count][i]));
466  it->add_string_value("cluster_id",fix_tag_suffix(tags_[count][i]));
467  }
468  else
469  {
470  //it->set_decoy_tag(tags_[count][i]);
471  it->add_string_value("cluster_id",tags_[count][i]);
472  }
473 
474  if (option[ cluster::K_save_headers ])
475  {
476  std::ofstream os;
477  os.open( filename.c_str(), std::ios::app);
478  (*it)->print_header( os ); //this outputs a header to the silent file.
479  os.close();
480  }
481  clusters.write_silent_struct(**it,filename);
482  }
483  }
484  }
485  assert(count==ndata_);
486 }
487 
488 void
490  std:: cout << "outputting cluster assignments..." << std::endl;
491  for( core::Size ii = 1; ii <= tags_.size(); ii++ ){
492  std::cout << "size of tagslist for index: " << ii << " is " << tags_[ii].size() << std::endl;
493  for( core::Size jj = 1; jj <= tags_[ii].size(); jj++ ){
494  std::cout << "ndx belonging to cluster " << ii << " " << tags_[ii][jj] << std::endl;
495  }
496  }
497 
498 }
499 
500 //////////
501 //KCluster
502 //////////
504 {
505  //using namespace basic::options;
506  //using namespace basic::options::OptionKeys;
507  //n_cluster_ = option[ cluster::K_n_cluster ];
508  n_cluster_ = 0;
509 }
510 
512 
514 {
515  TR << endl;
516  TR << "*********** Job ***********" << endl;
517  init(c, first); //init cluster(s), randomly or keep the old cluster
518  TR << "Database: " << c->get_ndata() << " structures" << endl;
519  TR << "Clustering ..." << endl;
520 
521  bool flag(whoami());
522  Real old_score = 999999;
523  do
524  {
525  Real score = assign(c,d);
526  TR << "Score: " << score << " N_cluster: " << c->get_cur_ncluster() << endl;
527 
528  if (flag)
529  {
530  TR << "in KCenter mode" << std::endl;
531  //KCenter
532  if (c->get_cur_ncluster()==n_cluster_)
533  {
534  break;
535  }
536 
537  if (score<=get_threshold())break;
538  }
539  else
540  {
541  //KMedoid
542  if (old_score-score<=get_threshold()) break;
543  old_score = score;
544  }
545 
546  update(c,d);
547  }
548  while (true);
549  TR << "Finish!" << endl;
550 }
551 
552 //////////
553 //KMedoid
554 //////////
556 {
557  using namespace basic::options;
558  using namespace basic::options::OptionKeys;
559  //threshold_ = option[cluster::K_threshold];
560  threshold_ = 0.02;
561 }
562 
564 {
565  return threshold_;
566 }
567 
569 {
570  threshold_ = t;
571 }
572 
574 {
575  TR << "I am K-Medoid Algorithm!" << endl;
576  return false;
577 }
578 
580 {
581  cur_ncluster_ = c->get_cur_ncluster();
582  Size nd = c->get_ndata();
583 
584  if (cur_ncluster_ == 0)
585  {
586  cur_ncluster_ = min(n_cluster_, nd);
587  assert(cur_ncluster_ > 0);
588  //randomly choose n center
589  TR << "Empty cluster, randomly choose center" << endl;
590  for (Size i=1; i<=cur_ncluster_; i++)
591  {
592  Size newcenter;
593  if (i==1 && first>0)
594  {
595  //specify the first one
596  newcenter = first;
597  }
598  else
599  {
600  newcenter = c->get_data_ndx(static_cast<int>( RG.uniform() * nd + 1 ));
601  }
602 
603  Size flag = i;
604  for (Size j=1; j<i; j++)
605  {
606  //make sure there's no replica
607  if (c->get_center_ndx(j) == newcenter)
608  {
609  i--;
610  break;
611  }
612  }
613 
614  if (i==flag) c->add_new_cluster(newcenter);
615  }
616  }
617  else
618  {
619  TR << "Load clusters with " << cur_ncluster_ << " centers" << endl;
620  }
621 }
622 
623 void KMedoid::copy_coord(Size len, FA2d &src, FA2d &dst)
624 {
625  for (Size i=1; i<=len; i++)
626  {
627  for (Size j=1; j<=3; j++)
628  {
629  dst(j,i) = src(j,i);
630  }
631  }
632 }
633 
635 {
636  //assign all data to the nearest center
637  //keep the coord that fit the nearest center
638  Size nd = c->get_ndata();
639  Size na = d.get_natom();
640  FA2Dd coord(3, na, 0.0);
641 //TR << "DEBUG: ncluster" << cur_ncluster_ << endl;
642  //DEBUG
643  //TR<<"center list:"<< endl;
644  //for (Size i=1; i<=cur_ncluster_; i++)
645  //{
646  // TR <<c->get_center_ndx(i) << endl;
647  //}
648 
649  for (Size nc=1; nc<=cur_ncluster_; nc++)
650  {
651  //for each cluster center
652 
653  //build the center dist list
654  ObjexxFCL::FArray1D_double center_dis_list(nc,0.0);
655  for (Size i=1; i<nc; i++)
656  {
657  //TR << "Center list: c" << i <<":"<<c->get_center_ndx(i) << ", c" << nc <<":"<<c->get_center_ndx(nc)<< " d: ";
658  center_dis_list(i) = sqrt(d.dist_square(
659  c->get_center_ndx(i),
660  c->get_center_ndx(nc) ));
661  //TR << center_dis_list(i) << endl;
662  }
663 
664  c->clear();
665  for (Size i=1; i<=nd; i++)
666  {
667  //assign the center to its cluster
668  if (c->get_data_ndx(i)==c->get_center_ndx(nc))
669  {
670  c->assign_type_data(i, nc, 0.0);
671  continue;
672  }
673 
674  //for each data
675  if (nc==1)
676  {
677  //assign all to the the first
678  c->assign_type_data(i, nc, sqrt(d.dist_square(
679  c->get_center_ndx(nc), c->get_ndx_list()[i]
680  )));
681  continue;
682  }
683 
684  Real d_old = c->get_distance(i);
685 
686  //apply triangle inequality
687  if (d_old <= center_dis_list(c->get_type(i))/2.0 )
688  {
689  //dist to the new center is farer than the old one
690  c->assign_type_data(i,c->get_type(i),d_old);
691  continue;
692  }
693 
694 
695  FA2Pd src(d.coords()(1,1,c->get_ndx_list()[i]),3,na);
696  copy_coord(na, src, coord);//save old structure
697  Real d_new = sqrt(d.dist_square(c->get_center_ndx(nc),c->get_ndx_list()[i]));//it would be align to the new center
698  if (d_new<d_old)
699  {
700  c->assign_type_data(i, nc, d_new);
701  }
702  else
703  {
704  copy_coord(na, coord, src);//re, make sure every structure align to their center
705  c->assign_type_data(i,c->get_type(i),d_old);
706  }
707  }
708  c->check();
709  }
710 
711  //calculate score
712  Real sum=0.0;
713  for (Size i=1; i<=nd; i++)
714  {
715  sum += c->get_distance(i);
716  }
717  return sum/nd;
718 }
719 
721 {
722  //TR << "Update..." << endl;
723  //find the coord most closed to the average of the cluster
724  Size na = d.get_natom();
725  for (Size i=1; i<=cur_ncluster_; i++)
726  {
727  //for each cluster
728  const utility::vector1<Size> &list(c->get_ndx_list(i));
729  Size nd = list.size();
730  FA2Dd sum(3, na, 0.0);
731  for (Size j=1; j<=nd; j++)
732  {
733  FA2Pd coord(d.coords()(1,1,list[j]),3,na);
734  for (Size m=1; m<=na; m++)
735  {
736  for (Size n=1; n<=3; n++)
737  {
738  sum(n,m)+=coord(n,m);
739  }
740  }
741  }
742 
743  /*/debug
744  TR << "Cluster: " << i << " -> ";
745  for (Size j=1; j<=nd; j++)
746  {
747  TR << list[j] << " ";
748  }
749  TR << endl;*/
750 
751  //average
752  for (Size m=1; m<=na; m++)
753  {
754  for (Size n=1; n<=3; n++)
755  {
756  sum(n,m)/=nd;
757  }
758  }
759 
760  //find the nearest
761  Real mindist2 = 999999;
762  Size nearest = 0;
763  for (Size j=1; j<=nd; j++)
764  {
765  //DEBUG
766  //TR << "U list: " << j << ":" << list[j];
767  FA2Pd coord(d.coords()(1,1,list[j]),3,na);
768  Real d2_new = d.dist_square(coord,sum);
769  //TR << " d_new: " << d2_new << " -- " << mindist2 << endl;
770  if (d2_new < mindist2)
771  {
772  mindist2 = d2_new;
773  nearest = list[j];
774  //TR << nearest << endl;
775  }
776  }
777 
778  //TR << "final: "<< nearest << endl;
779  //re set the center
780  assert(nearest>0);
781  c->set_cluster(i,nearest);
782  }
783 }
784 
785 ///////////////
786 //GreedyKCenter
787 ///////////////
789 {
790  using namespace basic::options;
791  using namespace basic::options::OptionKeys;
792  //radius_ = option[ cluster::K_radius ];
793  radius_ = 2.0;
794 }
795 
796 //GreedyKCenter::~GreedyKCenter(){}
798 {
799  TR << "I am Approximate K-Center Algorithm!" << endl;
800  return true;
801 }
802 
804 {
805  return radius_;
806 }
807 
809 {
810  radius_ = r;
811 }
812 
814 {
815  //random select a center
816  TR << "Initializing ..." << endl;
817  assert(c->get_cur_ncluster()==0);//only begin from an empty clusters
818  Size center;
819  if (first>0)
820  {
821  center = first;
822  }
823  else
824  {
825  center = c->get_data_ndx(static_cast< int >( RG.uniform() * c->get_ndata() + 1 ));
826  }
827  //debug
828  //TR << "Rand: " << center << endl;
829  c->add_new_cluster(center);
830 }
831 
833 {
834  //assign all data to the nearest center
835  Size nc = c->get_cur_ncluster();
836 
837  //save center dist
838  ObjexxFCL::FArray1D_double center_dis_list(nc,0.0);
839  for (Size i=1; i<nc; i++)
840  {
841  center_dis_list(i) = sqrt(d.dist_square(
842  c->get_center_ndx(i),
843  c->get_center_ndx(nc) ));
844  //TR << "Center list: c" << i << ", c" << nc << " d: " << center_dis_list(i) << endl;
845  }
846 
847  c->clear();
848  Size nd = c->get_ndata();
849  for (Size i=1; i<=nd; i++)
850  {
851  //for each data
852  Real d_old = c->get_distance(i);
853  if (nc>1)
854  {
855  if (d_old <= center_dis_list(c->get_type(i))/2.0 )
856  {
857  //dist to the new center is farer than the old one
858  c->assign_type_data(i,c->get_type(i),d_old);
859  continue;
860  }
861  }
862  Real d_new = sqrt(d.dist_square(c->get_center_ndx(nc),c->get_ndx_list()[i]));
863  if (d_new<d_old)
864  {
865  c->assign_type_data(i, nc, d_new);
866  }
867  else
868  {
869  c->assign_type_data(i,c->get_type(i),d_old);
870  }
871  }
872  c->check();
873 
874  return c->get_max_distance();
875 }
876 
878 {
879  if (c->get_cur_ncluster()==n_cluster_)
880  {
881  assert(false);
882  //add a psesudo cluster center, which will be remove later
883  c->add_new_cluster(0);
884  return;
885  }
886 
887  //debug
888  //TR << "Farest one:" << c->get_max_dist_ndx() << endl;
889  c->add_new_cluster(c->get_ndx_list()[c->get_max_dist_ndx()]);
890  return;
891 }
892 
893 }//toolbox
894 }//protocols