23 #include <basic/Tracer.hh>
24 #include <basic/options/option.hh>
26 #include <basic/options/keys/in.OptionKeys.gen.hh>
27 #include <basic/options/keys/out.OptionKeys.gen.hh>
28 #include <basic/options/keys/mc.OptionKeys.gen.hh>
29 #include <basic/options/keys/cluster.OptionKeys.gen.hh>
31 #include <utility/exit.hh>
32 #include <utility/file/FileName.hh>
33 #include <utility/file/PathName.hh>
34 #include <utility/file/file_sys_util.hh>
35 #include <numeric/xyzMatrix.hh>
36 #include <numeric/random/random.hh>
46 #include <utility/vector1.hh>
52 static basic::Tracer
TR(
"protocols.kcluster");
53 static numeric::random::RandomGenerator
RG(2831);
59 KClusterData::~KClusterData() {}
62 KClusterElement::~KClusterElement() {}
67 else if (style ==
"KMedoid")
return new KMedoid();
70 utility_exit_with_message(
"Undefined KCluster type!");
79 using namespace basic::options;
80 using namespace basic::options::OptionKeys;
82 static string subdir_prefix(
"sub_");
83 static string libdir_prefix(
"c_");
84 static string libdir_suffix(
"_lib");
86 static int nofsubdir = option[cluster::K_n_sub];
89 static string rootpath;
91 static string defaultfile;
92 static bool flag=
true;
96 if (option[mc::hierarchical_pool].user())
98 rootpath = option[mc::hierarchical_pool]()+
"_dir/sub_000/";
99 defaultfile =
"c_00001.out";
100 defaultpath =
"c_00001_lib/";
111 int pos=tag.find(
'.');
112 int len=tag.length();
114 while(pos<len && pos>0)
116 int newpos = tag.find(
'.', pos+1);
117 id_stack.push_back(atoi(tag.substr(pos+1,newpos-pos-1).c_str()));
121 Size n = id_stack.size();
122 if (n == 1)
return rootpath+defaultfile;
123 ostringstream fnstream;
124 fnstream <<
"c_" << setfill (
'0') << setw (5) << id_stack[n-1] <<
".out";
125 string fn(fnstream.str());
127 for (
Size i=n-1; i>=1; i--)
129 ostringstream pathstream1;
130 ostringstream pathstream2;
133 if (i>1) pathstream1 << libdir_prefix << setfill (
'0') << setw (5) << id_stack[i-1] << libdir_suffix << s ;
135 pathstream2 << subdir_prefix << setfill (
'0') << setw (3) <<
int((id_stack[i]-1)/nofsubdir) << s;
137 fn = pathstream1.str()+pathstream2.str()+fn;
139 return rootpath+defaultpath+fn;
144 using namespace basic::options;
145 using namespace basic::options::OptionKeys;
146 static Size nlevel = option[cluster::K_level];
147 Size n=count(str.begin(), str.end(),
'.');
148 for (
Size i=n; i<nlevel; i++) str+=
".1";
151 while ((pos = str.find(
'_'))>0) str.replace(pos,1,1,
'.');
157 void KClusterElement::assign_type_data(
Size ndx_data,
Size ndx_cluster,
Real d)
159 using namespace basic::options;
160 using namespace basic::options::OptionKeys;
163 type_list_[ndx_data] = ndx_cluster;
165 dist_list_[ndx_data] = d;
170 max_dist_ndx_=ndx_data;
176 if (option[cluster::K_redundant]() || (data_ndx_[ndx_data] != center_ndx_[ndx_cluster]))
178 subclusters_[ndx_cluster]->add_new_data(data_ndx_[ndx_data]);
185 KClusterData::KClusterData()
191 using namespace basic::options;
192 using namespace basic::options::OptionKeys;
193 using namespace protocols::loops;
214 TR <<
"Finished loading database:" << endl;
215 TR <<
"Number of data: " <<
ndata_ << endl;
216 TR <<
"Number of file: " <<
nfile_ << endl;
221 using namespace basic::options;
222 using namespace basic::options::OptionKeys;
233 for (
Size d=1; d<=3; d++)
235 Real dx = conf1(d,i) - conf2(d,i);
251 using namespace basic::options;
252 using namespace basic::options::OptionKeys;
254 TR <<
"Reading Silent Files ..." << endl;
256 if (option[in::file::silent].user())
258 for (
Size i=1, e=option[in::file::silent]().
size(); i<=e; i++)
263 else if (option[in::file::silent_list].user())
265 for (
Size i=1, e=option[in::file::silent_list]().
size(); i<=e; i++)
273 utility_exit_with_message(
"Please specify the input silent file or list!");
276 if (!option[cluster::K_not_fit_xyz])
304 FA2Dd original_xyz(it->get_CA_xyz());
307 else runtime_assert(
n_ca_atom_ == it->nres() );
333 tag <<
"d." << setfill (
'0') << setw (4) << nfile <<
"."
334 << setfill (
'0') << setw (8) << ncoord;
336 list.push_back(tag.str());
337 tags_.push_back(list);
339 tagvec.push_back(it->decoy_tag());
362 Size nc = c->get_cur_ncluster();
364 for (
Size i=1; i<=nc; i++)
369 tag << t <<
"." << i;
372 tags_[c->get_center_ndx(i)].push_back(tag.str());
375 mark_tags(c->get_subcluster(i), tag.str());
381 using namespace basic::options;
382 using namespace basic::options::OptionKeys;
386 if (option[in::file::silent].user())
388 fnlist=option[in::file::silent]();
390 else if (option[in::file::silent_list].user())
392 fnlist=option[in::file::silent_list]();
397 string const silent_outfile(option[out::file::silent]());
417 it->add_string_value(
"cluster_id",
tags_[count][i]);
418 clusters.write_silent_struct(**it,silent_outfile);
427 using namespace basic::options;
428 using namespace basic::options::OptionKeys;
432 if (option[in::file::silent].user())
434 fnlist=option[in::file::silent]();
436 else if (option[in::file::silent_list].user())
438 fnlist=option[in::file::silent_list]();
462 if (option[cluster::K_redundant])
471 it->add_string_value(
"cluster_id",
tags_[count][i]);
474 if (option[ cluster::K_save_headers ])
477 os.open( filename.c_str(), std::ios::app);
478 (*it)->print_header( os );
481 clusters.write_silent_struct(**it,filename);
490 std:: cout <<
"outputting cluster assignments..." << std::endl;
492 std::cout <<
"size of tagslist for index: " << ii <<
" is " <<
tags_[ii].size() << std::endl;
494 std::cout <<
"ndx belonging to cluster " << ii <<
" " <<
tags_[ii][jj] << std::endl;
516 TR <<
"*********** Job ***********" << endl;
518 TR <<
"Database: " << c->get_ndata() <<
" structures" << endl;
519 TR <<
"Clustering ..." << endl;
522 Real old_score = 999999;
525 Real score = assign(c,d);
526 TR <<
"Score: " << score <<
" N_cluster: " << c->get_cur_ncluster() << endl;
530 TR <<
"in KCenter mode" << std::endl;
532 if (c->get_cur_ncluster()==n_cluster_)
537 if (score<=get_threshold())
break;
542 if (old_score-score<=get_threshold())
break;
549 TR <<
"Finish!" << endl;
557 using namespace basic::options;
558 using namespace basic::options::OptionKeys;
575 TR <<
"I am K-Medoid Algorithm!" << endl;
582 Size nd = c->get_ndata();
589 TR <<
"Empty cluster, randomly choose center" << endl;
600 newcenter = c->get_data_ndx(static_cast<int>(
RG.uniform() * nd + 1 ));
604 for (
Size j=1; j<i; j++)
607 if (c->get_center_ndx(j) == newcenter)
614 if (i==flag) c->add_new_cluster(newcenter);
625 for (
Size i=1; i<=len; i++)
627 for (
Size j=1; j<=3; j++)
638 Size nd = c->get_ndata();
640 FA2Dd coord(3, na, 0.0);
654 ObjexxFCL::FArray1D_double center_dis_list(nc,0.0);
655 for (
Size i=1; i<nc; i++)
659 c->get_center_ndx(i),
660 c->get_center_ndx(nc) ));
665 for (
Size i=1; i<=nd; i++)
668 if (c->get_data_ndx(i)==c->get_center_ndx(nc))
670 c->assign_type_data(i, nc, 0.0);
679 c->get_center_ndx(nc), c->get_ndx_list()[i]
684 Real d_old = c->get_distance(i);
687 if (d_old <= center_dis_list(c->get_type(i))/2.0 )
690 c->assign_type_data(i,c->get_type(i),d_old);
695 FA2Pd src(d.
coords()(1,1,c->get_ndx_list()[i]),3,na);
697 Real d_new = sqrt(d.
dist_square(c->get_center_ndx(nc),c->get_ndx_list()[i]));
700 c->assign_type_data(i, nc, d_new);
705 c->assign_type_data(i,c->get_type(i),d_old);
713 for (
Size i=1; i<=nd; i++)
715 sum += c->get_distance(i);
729 Size nd = list.size();
730 FA2Dd sum(3, na, 0.0);
731 for (
Size j=1; j<=nd; j++)
734 for (
Size m=1; m<=na; m++)
736 for (
Size n=1; n<=3; n++)
738 sum(n,m)+=coord(n,m);
752 for (
Size m=1; m<=na; m++)
754 for (
Size n=1; n<=3; n++)
761 Real mindist2 = 999999;
763 for (
Size j=1; j<=nd; j++)
770 if (d2_new < mindist2)
781 c->set_cluster(i,nearest);
790 using namespace basic::options;
791 using namespace basic::options::OptionKeys;
799 TR <<
"I am Approximate K-Center Algorithm!" << endl;
816 TR <<
"Initializing ..." << endl;
817 assert(c->get_cur_ncluster()==0);
825 center = c->get_data_ndx(static_cast< int >(
RG.uniform() * c->get_ndata() + 1 ));
829 c->add_new_cluster(center);
835 Size nc = c->get_cur_ncluster();
838 ObjexxFCL::FArray1D_double center_dis_list(nc,0.0);
839 for (
Size i=1; i<nc; i++)
842 c->get_center_ndx(i),
843 c->get_center_ndx(nc) ));
848 Size nd = c->get_ndata();
849 for (
Size i=1; i<=nd; i++)
852 Real d_old = c->get_distance(i);
855 if (d_old <= center_dis_list(c->get_type(i))/2.0 )
858 c->assign_type_data(i,c->get_type(i),d_old);
862 Real d_new = sqrt(d.
dist_square(c->get_center_ndx(nc),c->get_ndx_list()[i]));
865 c->assign_type_data(i, nc, d_new);
869 c->assign_type_data(i,c->get_type(i),d_old);
874 return c->get_max_distance();
883 c->add_new_cluster(0);
889 c->add_new_cluster(c->get_ndx_list()[c->get_max_dist_ndx()]);