Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
HPool.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file protocols/canonical_sampling/mc_convergence_checks/HPool.cc
11 /// @brief hierarchical pool
12 /// @author Yuan Liu (wendao@u.washington.edu)
13 
16 
17 #include <basic/Tracer.hh>
20 
21 #include <ObjexxFCL/FArray3D.hh>
22 #include <ObjexxFCL/FArray2D.hh>
23 #include <ObjexxFCL/FArray1D.hh>
24 #include <ObjexxFCL/FArray3.hh>
25 #include <ObjexxFCL/FArray2.hh>
26 #include <ObjexxFCL/FArray1.hh>
27 #include <ObjexxFCL/FArray2P.hh>
28 #include <ObjexxFCL/FArray3P.hh>
29 
30 #include <utility/exit.hh>
31 #include <utility/file/FileName.hh>
32 #include <utility/file/PathName.hh>
33 #include <utility/file/file_sys_util.hh>
35 
36 #include <numeric/xyzMatrix.hh>
37 
38 #include <basic/options/option.hh>
39 // AUTO-REMOVED #include <basic/options/option_macros.hh>
40 // AUTO-REMOVED #include <basic/options/keys/in.OptionKeys.gen.hh>
41 #include <basic/options/keys/out.OptionKeys.gen.hh>
42 #include <basic/options/keys/mc.OptionKeys.gen.hh>
43 #include <basic/options/keys/cluster.OptionKeys.gen.hh>
44 
45 #include <utility/vector1.hh>
46 
47 
48 static basic::Tracer TR("protocols.hcluster");
49 
50 namespace protocols {
51 namespace canonical_sampling{
52 namespace mc_convergence_checks {
53 
54 //get desired lib file name from tag
56 {
57  using namespace std;
58  using namespace core;
59  using namespace basic::options;
60  using namespace basic::options::OptionKeys;
61 
62  static string subdir_prefix("sub_");
63  static string libdir_prefix("c_");
64  static string libdir_suffix("_lib");
65  static string s("/");
66  static int nofsubdir = option[cluster::K_n_sub];
67  static string rootpath = utility::file::FileName(option[mc::known_structures]()).path();
68  static string defaultpath = utility::file::FileName(option[mc::known_structures]()).base() + libdir_suffix + s;
69 
70  string tag = tag_orign + ".0000";
71 
72  Size pos=1;
73  Size len=tag.length();
74  utility::vector1<Size> id_stack;
75  while (pos<len)
76  {
77  Size newpos = tag.find(".", pos+1);
78  id_stack.push_back(atoi(tag.substr(pos+1,newpos-pos-1).c_str()));
79  pos = newpos;
80  }
81 
82  Size n = id_stack.size();
83  if (n == 1) return option[out::file::silent]();
84  ostringstream fnstream;
85  fnstream << "c_" << setfill ('0') << setw (5) << id_stack[n-1] << ".out";
86  string fn(fnstream.str());
87 
88  for (Size i=n-1; i>=1; i--)
89  {
90  ostringstream pathstream1;
91  ostringstream pathstream2;
92 
93  //main dir
94  if (i>1) pathstream1 << libdir_prefix << setfill ('0') << setw (5) << id_stack[i-1] << libdir_suffix << s ;
95  //sub dir
96  pathstream2 << subdir_prefix << setfill ('0') << setw (3) << int((id_stack[i]-1)/nofsubdir) << s;
97 
98  fn = pathstream1.str()+pathstream2.str()+fn;
99  }
100  return defaultpath+fn;
101 }
102 
103 //////////////////////////////////////////////////////////////////////////////////////////////////////////
105 :Pool_RMSD(silent_file),
106  silent_file_(silent_file),
107  level_(lv),
108  n_extra_(0)
109 {
110  //set radius
111  using namespace basic::options;
112  using namespace basic::options::OptionKeys;
113  radius_ = option[cluster::K_radius]()[level_];
114  TR << "New cluster loaded: level=" << level_ << " radius=" << radius_ << std::endl;
115 
116  //subclusters lib path comes from lib_path_full
117  //not this
118  //lib_path_ = utility::file::FileName(silent_file).path();
119  //lib_path_ += utility::file::FileName(silent_file).base() + "_lib";
120  //TR << "Desired lib path: " << lib_path_ << std::endl;
121 
122  old_size_ = size();
123  //get the tag prefix from the first tag
124  //xxx.xxx.xxx.yyy -> xxx.xxx.xxx
125  core::Size pos=1,oldpos=1;
126  std::string t(tag(1));
127  core::Size len=t.length();
128  while (pos<len) {oldpos=pos; pos = t.find(".", pos+1);}
129  tag_prefix_ = t.substr(0,oldpos+1).c_str();
130  TR << "tag_prefix=" << tag_prefix_ << std::endl;
131 
132  has_child_ = (level_ == core::Size(option[cluster::K_level]())) ? false : true;
133  //if (utility::file::file_exists(lib_path)) has_child_ = true;
134 
135  if (has_child_)
136  {
137  //TR << "with subcluster" << std::endl;
138  subpools_.resize(size());
139  }
140 
142 }
143 
145 {
146  //save pair dist in a 1D vecotr, easy to expand
147  pair_dis_.clear();
148  pair_dis_.resize(size()*(size()-1)/2);//space
149 
150  for (core::Size i=2; i<=size(); i++)
151  {
152  for (core::Size j=1; j<=i-1; j++)
153  {
154  //cal id in 1D vector
155  core::Size ndx = (i-1)*(i-2)/2 + j;
156  pair_dis_[ndx] = std::sqrt(dist_square(i,j));
157  }
158  }
159 }
160 
161 core::Real HPool_RMSD::dist_square(ObjexxFCL::FArray2_double &conf1, ObjexxFCL::FArray2_double &conf2)
162 {
163  using namespace basic::options;
164  using namespace basic::options::OptionKeys;
165 
166  static ObjexxFCL::FArray1D_double weights( natom(), 1.0 );
167  static numeric::xyzMatrix< core::Real > R; //do not care
168 
169  //fit
170  if (!option[cluster::K_not_fit_xyz]) {
171  protocols::toolbox::fit_centered_coords(natom(), weights, conf1, conf2, R);
172  }
173 
174  //cal dist
175  core::Real sum=0.0;
176  for (Size i=1; i<=natom(); i++)
177  {
178  for (core::Size d=1; d<=3; d++)
179  {
180  core::Real dx = conf1(d,i) - conf2(d,i);
181  sum += dx*dx;
182  }
183  }
184  return sum/natom();
185 }
186 
188 {
189  ObjexxFCL::FArray2P_double conf1(coords()(1,1,ndx1), 3, natom());
190  ObjexxFCL::FArray2P_double conf2(coords()(1,1,ndx2), 3, natom());
191  return dist_square(conf1, conf2);
192 }
193 /*
194 void HPool_RMSD::add(core::pose::Pose& pose, std::string &tag)
195 {
196  //fill pose to coords
197  ObjexxFCL::FArray2D_double coord( 3, natom(), 0.0 );
198  runtime_assert( natom() == pose.total_residue() );
199  protocols::toolbox::fill_CA_coords( pose, coord );
200  add(coord, tag);
201 }
202 
203 void HPool_RMSD::add(core::io::silent::SilentStruct& pss, std::string &tag)
204 {
205  //fill pose to coords
206  ObjexxFCL::FArray2D_double coord( pss.get_CA_xyz() );
207  runtime_assert( pss.nres() == natom() );
208  add(coord, tag);
209 }
210 */
212 {
213  //expand the pair dist matrix
214  Pool_RMSD::add(coord, natom(), tag);//add new coords
215  core::Size nd = size();
216  pair_dis_.reserve(pair_dis_.size()+nd-1); //expand space
217  for (core::Size i=1; i<nd; i++)
218  {
219  //add new pair dist
220  pair_dis_.push_back(std::sqrt(dist_square(nd,i)));
221  }
222 
223  //add new subcluster
224  if (has_child_)
225  {
226  subpools_.push_back(*(new HPool_RMSD_OP));
227  }
228 }
229 
231 {
232  if (i==j) return 0.0;
233  core::Size col = std::min(i,j);
234  core::Size row = std::max(i,j);
235  core::Size ndx = (row-1)*(row-2)/2 + col;
236  return pair_dis_[ndx];
237 }
238 
240 {
241  for (core::Size i=1; i<=size(); i++)
242  {
243  for (core::Size j=1; j<=size(); j++)
244  {
245  std::cout << get_pair_dist(i,j) << " ";
246  }
247  std::cout << std::endl;
248  }
249 }
250 
252  core::pose::Pose& pose,
253  core::Real resolution,
254  std::string& best_decoy,
255  core::Real& best_rmsd )
256 {
257  if ( size() == 0 )
258  {
259  best_decoy = "n/a";
260  best_rmsd = 10000;
261  return 0;
262  }
263  ObjexxFCL::FArray2D_double coord( 3, natom(), 0.0 );
264  runtime_assert( natom() == pose.total_residue() );
265  protocols::toolbox::fill_CA_coords( pose, coord );
266 
267  /*
268  //TR << "eval: resolustion=" << resolution << std::endl;
269  core::Size best_ndx=evaluate_core(coord, best_decoy, best_rmsd, 1);
270  //TR << "best " << best_decoy << ":" << best_rmsd << std::endl;
271  if (best_rmsd <= resolution)
272  {
273  //get the right one
274  return 0;
275  }
276  else if ( (best_rmsd<radius_) && load_lib(best_ndx) )
277  {
278  //search from sub cluster
279  subpools_[best_ndx]->evaluate(pose,resolution,best_decoy,best_rmsd);
280  return 0;
281  }
282  else
283  {
284  //add a new center
285  TR << "min_rmsd=" << best_rmsd << " out of R=" << radius_;
286  TR << " Adding a new center ..." << std::endl;
287  std::ostringstream tag;
288  tag << "new." << (++n_extra_);
289  best_decoy = tag.str();
290  add(pose, best_decoy);
291  best_rmsd = 0.0;
292  return 0;
293  }
294  */
295 
296  if (evaluate(coord, resolution, best_decoy, best_rmsd) == 1 )
297  {
298  //save this strcuture into subcluster's silent file
299  }
300  return 0;
301 }
302 
305  core::Real resolution,
306  std::string& best_decoy,
307  core::Real& best_rmsd )
308 {
309  if ( size() == 0 )
310  {
311  best_decoy = "n/a";
312  best_rmsd = 10000;
313  return 0;
314  }
315 
316  ObjexxFCL::FArray2D_double coord( pss.get_CA_xyz() );
317  runtime_assert( pss.nres() == natom() );
318 
319  /*
320  //TR << "eval: resolustion=" << resolution << std::endl;
321  core::Size best_ndx=evaluate_core(coord, best_decoy, best_rmsd, 1);
322  //TR << "best " << best_decoy << ":" << best_rmsd << std::endl;
323  if (best_rmsd <= resolution)
324  {
325  //get the right one
326  return 0;
327  }
328  else if ( (best_rmsd<radius_) && load_lib(best_ndx) )
329  {
330  //search from sub cluster
331  subpools_[best_ndx]->evaluate(pss,resolution,best_decoy,best_rmsd);
332  return 0;
333  }
334  else
335  {
336  //add a new center
337  TR << "min_rmsd=" << best_rmsd << " out of R=" << radius_;
338  TR << " Adding a new center ..." << std::endl;
339  std::ostringstream tag;
340  tag << "new." << (++n_extra_);
341  best_decoy = tag.str();
342  add(pss, best_decoy);
343  best_rmsd = 0.0;
344  return 0;
345  }
346  */
347 
348  //hierarchy
349  if (evaluate(coord, resolution, best_decoy, best_rmsd) == 1 )
350  {
351  //save this strcuture into subcluster's silent file
353  utility::file::create_directory_recursive(utility::file::FileName(filename).path());
355  TR << "tag:" << best_decoy << " -> " << filename << std::endl;
356  pss.set_decoy_tag(best_decoy);
357  clusters.write_silent_struct(pss,filename);
358  }
359  return 0;
360 }
361 
362 ///////////////////////////
363 //hierarchy evaluate FArray2D
364 ///////////////////////////
367  core::Real resolution,
368  std::string& best_decoy,
369  core::Real& best_rmsd )
370 {
371  //TR << "eval: resolustion=" << resolution << std::endl;
372  core::Size best_ndx=evaluate_core(coord, best_decoy, best_rmsd, 1);
373  //TR << "best " << best_decoy << ":" << best_rmsd << std::endl;
374  if (best_rmsd <= resolution)
375  {
376  //get the right one
377  return 0;
378  }
379  /*
380  else if ( (best_rmsd<radius_) && load_lib(best_ndx) )
381  {
382  //search from sub cluster
383  subpools_[best_ndx]->evaluate(pose,resolution,best_decoy,best_rmsd);
384  }
385  */
386  //06/20/2010
387  //if this pool is not the lowest level but there is no
388  //sub cluster for this center, then create a new one
389  else if ((best_rmsd<radius_) && has_child_)
390  {
391  //
392  if (load_lib(best_ndx))
393  {
394  //successfully loaded the subcluster
395  return subpools_[best_ndx]->evaluate(coord,resolution,best_decoy,best_rmsd);
396  }
397  else
398  {
399  //there is no element in the subcluster, create the file for it
400  best_decoy = tag(best_ndx)+".00001";
401  best_rmsd = 0.0;
402  return 1;//need to save this strcture
403  }
404  }
405  else
406  {
407  //add a new center
408  TR << "min_rmsd=" << best_rmsd << " out of R=" << radius_;
409  TR << " Adding a new center ..." << std::endl;
410 
411  std::ostringstream tag;
412  //tag << "new." << (++n_extra_);
413  //new tag, same format as old structure
414  tag << tag_prefix_ << setfill ('0') << setw (5) << (++n_extra_+old_size_);
415  best_decoy = tag.str();
416  add(coord, best_decoy);
417  best_rmsd = 0.0;
418  return 1;//need to save this strcture
419  }
420  return 0;
421 }
422 
423 //ref Pool_ConvergenceCheck.cc
424 //index indicates what index you start evaluation from
427  std::string& best_decoy,
428  core::Real& best_rmsd ,
429  core::Size index ) const
430 {
431  if ( size() == 0 )
432  {
433  best_decoy = "n/a";
434  best_rmsd = 10000;
435  return 0;
436  }
437 
438  ObjexxFCL::FArray1D_double const weights( natom(), 1.0 );
439  ObjexxFCL::FArray1D_double transvec( 3 );
440  protocols::toolbox::reset_x( natom(), coord, weights, transvec );//center coordinates
441  //n_atoms is simply # CA atoms in each "pose"
442  core::Real invn( 1.0 / natom() );
443  best_rmsd = 1000000;
444  core::Size best_index( 0 );
445  for ( core::Size i = index; i <= size(); i++ )
446  {
447  //get xyz
449  ObjexxFCL::FArray2P_double xx2( coords()(1,1,i), 3, natom() );
450  protocols::toolbox::fit_centered_coords( natom(), weights, xx2, coord, R );
451 
452  //triangle inequality
453  if (i>index)
454  {
455  if (get_pair_dist(best_index,i)/2.0>=best_rmsd)
456  {
457  //TR << "jump" << std::endl;
458  continue;
459  }
460  }
461 
462  //calculate rmsd
463  core::Real rmsd( 0 );
464  for ( core::Size n = index; n <= natom(); n++)
465  {
466  for ( core::Size d = 1; d<=3; ++d )
467  {
468  rmsd += ( coord( d, n ) - xx2( d, n ) ) * ( coord( d, n ) - xx2( d, n ) ) * invn;
469  }
470  }
471  rmsd = std::sqrt( rmsd );
472  if ( rmsd <= best_rmsd )
473  {
474  best_index = i;
475  best_rmsd = rmsd;
476  }
477  }
478 
479  best_decoy = tag(best_index);
480  return best_index;
481 }
482 
484 {
485  using namespace basic::options;
486  using namespace basic::options::OptionKeys;
487 
488  if (!has_child_) return false;
489 
490  if (level_<=core::Size(option[cluster::K_deque_level]()))
491  {
492  //search in the list
493  core::Size nlist = sub_ndx_deque_.size();
494  core::Size i;
495  //remember, deque is from 0 to n-1
496  for (i=0; i<nlist; i++)
497  {
498  if (sub_ndx_deque_[i] == nc) break;
499  }
500 
501  if (i>=nlist)
502  {
503  //not in the list
504  if (nlist>=core::Size(option[cluster::K_deque_size]()))
505  {
506  //reach the limit of the list
507  //removing the oldest subcluster in memory
509  sub_ndx_deque_.pop_front();
510  }
511  //the last one is the newest one
512  sub_ndx_deque_.push_back(nc);
513  }
514  else
515  {
516  //already in the list
517  if (i<nlist-1) //not the last one
518  {
519  sub_ndx_deque_.erase(sub_ndx_deque_.begin()+i);
520  sub_ndx_deque_.push_back(nc);//put nc to the tail
521  }
522  }
523  }//only for top K_deque_level level
524 
525  if (subpools_[nc]) return true;
527  TR << "try to load " << filename <<std::endl;
529  {
530  TR << "loading new cluster..." << std::endl;
531  subpools_[nc] = new HPool_RMSD(filename, level_+1);
532  return true;
533  }
534  return false;
535 }
536 
538 {
539  subpools_[nc].reset_to_null();
540 }
541 
542 }//mc_conv
543 }//moves
544 }//prot
545 
546 
547 
548