Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
cluster.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 ///
10 /// @author Mike Tyka
11 
12 
15 // AUTO-REMOVED #include <core/conformation/symmetry/util.hh>
16 
21 #include <core/pose/util.hh>
22 #include <core/scoring/Energies.hh>
23 #include <core/scoring/rms_util.hh>
28 // AUTO-REMOVED #include <protocols/evaluation/RmsdEvaluator.hh>
30 #include <protocols/moves/Mover.hh>
31 #include <utility/file/FileName.hh>
32 #include <utility/string_util.hh>
35 #include <utility/io/izstream.hh>
36 #include <numeric/random/random.hh>
37 
38 // C++ headers
39 #include <cstdlib>
40 #include <fstream>
41 #include <iostream>
42 #include <string>
43 #include <deque>
44 #include <map>
45 #include <cmath>
46 
47 // option key includes
48 
49 #include <basic/options/keys/cluster.OptionKeys.gen.hh>
50 #include <basic/options/keys/out.OptionKeys.gen.hh>
51 #include <basic/options/keys/symmetry.OptionKeys.gen.hh>
52 
54 #include <utility/vector1.hh>
55 #include <ObjexxFCL/format.hh>
56 #include <basic/Tracer.hh>
57 
58 
59 //Auto using namespaces
60 namespace ObjexxFCL { namespace fmt { } } using namespace ObjexxFCL::fmt; // AUTO USING NS
61 //Auto using namespaces end
62 
63 namespace protocols {
64 namespace cluster {
65 
66 using namespace core;
67 using namespace ObjexxFCL;
68 using namespace pose;
69 using namespace basic::options;
70 using namespace evaluation;
71 
72 static basic::Tracer tr("protocols.cluster");
73 static numeric::random::RandomGenerator RG(42032);
74 
75 
76 
77 std::map< std::string, core::Real > read_template_scores( const std::string filename ){
78 
79  utility::io::izstream data( filename.c_str() );
80  if ( !data.good() ) {
81  utility_exit_with_message(
82  "ERROR: Unable to open extra_scores file: '" + filename + "'"
83  );
84  }
85 
86  std::map< std::string, core::Real > score_data;
87  std::string line,tag;
88  while( getline(data,line) ) {
89  std::istringstream l( line );
90  std::string template_name;
91  core::Real template_score;
92  l >> template_name;
93  l >> template_score;
94 
95  score_data[template_name] = template_score;
96  }
97 
98  return score_data;
99 }
100 
101 
103  const std::pair< int, Real > & p1, const std::pair< int, Real > & p2 )
104 {
105  return p1.second < p2.second;
106 }
107 
108 /*
109 class PoseComparator {
110  public:
111  PoseComparator() { }
112  virtual Real measure( Pose &pose1, Pose &pose2 ) = 0;
113  private:
114 };
115 
116 
117 class PoseComparator_RMSD : public PoseComparator {
118  public:
119  PoseComparator() { }
120  ual Real measure( Pose &pose1, Pose &pose2 ) = 0;
121  private:
122 };
123 
124 */
125 
126 GatherPosesMover::GatherPosesMover() : Mover()
127 {
128  filter_ = 1000000.0;
129  processed_ = 0;
131  cluster_by_all_atom_ = false;
132 
133  using namespace basic::options;
134  using namespace basic::options::OptionKeys;
135 
136  if ( option[ OptionKeys::cluster::template_scores].user() ) {
137  template_scores = read_template_scores( option[ OptionKeys::cluster::template_scores]() );
138 
139  tr.Info << "Read template scores: " << std::endl;
140  for( std::map<std::string, core::Real >::iterator ii=template_scores.begin(); ii!=template_scores.end(); ++ii)
141  {
142  tr.Info << (*ii).first << ": " << (*ii).second << std::endl;
143  }
144  }
145 
146 }
147 
148 // This should probably be replaced with an object -- "PosePoseRMSD" or something.
149 Real
151  const Pose & pose1,
152  const Pose & pose2
153 ) const {
154  using namespace basic::options;
155  using namespace basic::options::OptionKeys;
156 
157  if ( option[ basic::options::OptionKeys::symmetry::symmetric_rmsd ]() &&
159  tr.Info << "Warning!!! For symmetric rmsd selected but pose is not symmetric. Ignoring symmetry" << std::endl;
160 
161  if ( option[ OptionKeys::cluster::hotspot_hash ]() ) {
162  Size const resnum1 = pose1.total_residue();
163  Size const resnum2 = pose2.total_residue();
164  conformation::ResidueCOP res1( pose1.residue(resnum1) );
165  conformation::ResidueCOP res2( pose2.residue(resnum2) );
166  //return protocols::hotspot_hashing::residue_sc_rmsd_no_super( res1, res2 );
167  return core::scoring::residue_sc_rmsd_no_super( res1, res2 );
168  }
169 
170  if ( option[ OptionKeys::cluster::gdtmm ]() ) {
171  if ( option[ basic::options::OptionKeys::symmetry::symmetric_rmsd ]() ) utility_exit_with_message( "No symmetric gdtmm available!!!!\n" ) ;
172 
174  core::scoring::invert_exclude_residues( pose1.total_residue(), option[ OptionKeys::cluster::exclude_res ](), residues );
175  // Make a new measure that is like an inverse gdtmm running from 10 (=gdtm of 0) to
176  // 0 (= gdtmm of 1)
177  return 10 -10.0 * scoring::CA_gdtmm( pose1, pose2, residues );
178  }
179 
180  if ( option[ OptionKeys::cluster::exclude_res ].user() ) {
182  core::scoring::invert_exclude_residues( pose1.total_residue(), option[ OptionKeys::cluster::exclude_res ](), residues );
183  if ( pose1.residue(1).is_RNA() ) utility_exit_with_message( "Hey put in all atom rmsd code for residue subset!\n" ) ;
184  if ( option[ basic::options::OptionKeys::symmetry::symmetric_rmsd ]() &&
186  tr.Info << "Warning!!! For symmetric clustering currently only all CA clustering is available. Calculating all CA rmsd instead..." << std::endl;
187  return scoring::CA_rmsd_symmetric( pose1, pose2 );
188  }
189  return scoring::CA_rmsd( pose1, pose2, residues );
190  } else {
191  // no residues excluded from the native.
192  if ( option[ basic::options::OptionKeys::symmetry::symmetric_rmsd ]() &&
193  core::pose::symmetry::is_symmetric( pose1 ) ) return scoring::CA_rmsd_symmetric( pose1, pose2 );
194  if ( pose1.residue(1).is_RNA() ) return scoring::all_atom_rmsd( pose1, pose2 );
195  if ( cluster_by_all_atom_ ) return scoring::all_atom_rmsd( pose1, pose2 );
197  return scoring::CA_rmsd( pose1, pose2 );
198  }
199 } // native_CA_rmsd
200 
201 
203  sfxn_ = sfxn;
204 }
205 
207  filter_ = filter;
208 }
209 
211 
212  core::Real score;
213  // does pose already have score set ?
214  if ( !getPoseExtraScores( pose, "silent_score", score ) ) {
215  // if not do we have scoring function ?
216 
217  if ( !sfxn_ ) {
218  //oh well - can't do enything ... just set it to 0.
219  } else {
220  // if so - try and score the structure and set that energy column
221  ////////////////// add constraints if specified by user.
223  Real score = (*sfxn_)(pose) ;
224  tr.Info << "RESCORING: " << core::pose::extract_tag_from_pose( pose )<< std::endl;
225  setPoseExtraScores( pose, "silent_score", score );
226 
227  if ( get_native_pose() ) {
229  pose, "rms", get_distance_measure( pose, *get_native_pose() )
230  );
231  }
232  pose.energies().clear();
233  }
234 
235  // remember tag!
236  tag_list.push_back( core::pose::extract_tag_from_pose( pose ) );
237  }
238 
239  if( template_scores.size() > 0 ){
240 
241  std::map< std::string, std::string > score_line_strings( core::pose::get_all_score_line_strings( pose ) );
242 
243  std::string template_name = score_line_strings["aln_id"];
244  template_name = template_name.substr(0,5);
245 
246  std::cout << "Found template name: " << template_name << std::endl;
247  core::Real template_score = template_scores[ template_name ];
248 
249  score += template_score;
250  setPoseExtraScores( pose, "silent_score", score );
251  }
252 
253  tr.Info << "Adding struc: " << score << std::endl;
254 
255  // filter structures if the filter is active
256  if ( score > filter_ ) return; // ignore pose if filter value is too high
257 
258  // now save the structure.
259  poselist.push_back( pose );
260  processed_ ++;
261  //tr.Info << "Read " << poselist.size() << " structures" << std::endl;
262 }
263 
266  return "GatherPosesMover";
267 }
268 
269 bool GatherPosesMover::check_tag( const std::string &query_tag ) {
270  using std::find;
271  if ( find( tag_list.begin(), tag_list.end(), query_tag ) == tag_list.end() ) {
272  return false;
273  }
274  return true;
275 }
276 
278  : GatherPosesMover(),
279  export_only_low_(false),
280  median_rms_( 0.0 ),
281  population_weight_( 0.09 ),
282  cluster_radius_( 2.0 )
283 {}
284 
287  return "ClusterBase";
288 }
289 
290 void ClusterBase::set_cluster_radius( Real cluster_radius ) {
291  cluster_radius_ = cluster_radius;
292 }
293 
294 void ClusterBase::set_population_weight( Real population_weight ) {
295  population_weight_ = population_weight ;
296 }
297 
299  return cluster_radius_;
300 }
301 
303  FArray2D< Real > p1a, p2a;
304  distance_matrix = FArray2D< Real > ( poselist.size(), poselist.size(), 0.0 );
305  int count = 0;
306  tr.Info << "Calculating RMS matrix: " << std::endl;
307 
308  Real hist_resolution = 0.25;
309  int hist_size = int(20.0 / hist_resolution);
310 
311  std::vector<int> histcount(hist_size,0);
312 
313  for ( Size i = 0; i < poselist.size(); i++ ) {
314  for ( Size j = i; j < poselist.size(); j++ ) {
315  // only do comparisons once
316  if ( i < j ) {
317  // get the similarity between structure with index i and with index j
319  distance_matrix( i+1, j+1 ) = dist;
320  distance_matrix( j+1, i+1 ) = dist;
321  // tr.Info << "( " << i+1 << ";" << j+1 << " ) " << dist << std::endl;
322  int histbin = int(dist/hist_resolution);
323  if ( histbin < hist_size ) histcount[histbin]+=1;
324 
325  // print some stats of progress
326  count ++;
327  if ( count % 5000 == 0 ) {
328  Real const percent_done ( 200.0 * static_cast< Real > ( count ) / ( (poselist.size() - 1) * poselist.size() ) );
329  tr.Info << count
330  << "/" << ( poselist.size() - 1 )*( poselist.size() )/2
331  << " ( " << F(8,1,percent_done) << "% )"
332  << std::endl;
333  }
334  }
335  } // for it2
336  } // for it1
337 
338 
339  tr.Info << "Histogram of pairwise similarity values for the initial clustering set" << std::endl;
340  int maxcount_count = 0;
341  int maxcount_i = 0;
342  for ( Size i = 0; i <(Size)hist_size; i++ ) {
343  tr.Info << "hist " << Real(i)*hist_resolution << " " << histcount[i] << std::endl;
344  if( histcount[i] > maxcount_count){
345  maxcount_count = histcount[i];
346  maxcount_i = i;
347  }
348  }
349 
350  median_rms_ = maxcount_i * hist_resolution;
351  tr.Info << "Median RMS " << get_median_rms() << std::endl;
352 
353 } // calculate_distance_matrix
354 
355 // PostProcessing ---------------------------------------------------------
357 
358  poselist.push_back( pose );
359  int nexindex = poselist.size() - 1;
360  int lowrmsi=-1;
361  Real lowrms=1000000.0;
362  for (int m=0;m<(int)clusterlist.size();m++ ) {
363  Real rms;
364  rms = get_distance_measure(
365  poselist[ nexindex ],
366  poselist[ clusterlist[m].get_cluster_center() ] ); // clustercentre m
367  if (rms < lowrms) {
368  lowrms = rms;
369  lowrmsi = m;
370  }
371  }
372 
373  if (lowrms <= 0.001 ){
374  tr.Info << "Structure identical to existing structure - ignoring" << std::endl;
375  return;
376  }
377 
378  if (lowrmsi >= 0) {
379  if ( lowrms < get_cluster_radius() ) { // if within our radius - then add to cluster
380  clusterlist[lowrmsi].add_member(nexindex);
381  tr.Info << "Adding to cluster " << lowrmsi << " Cluster_rad: " << get_cluster_radius() << std::endl;
382  }else{ // else make a new cluster !!
383  Cluster new_cluster( nexindex );
384  clusterlist.push_back( new_cluster );
385  tr.Info << "Adding as new cluster " << " Cluster_rad: " << get_cluster_radius() << std::endl;
386  }
387  }
388 }
389 
391  // fisher-yates
392  for(int i=member.size()-1; i>-1; i--) {
393  int j = RG.random_range(0, 100000000) % (i + 1);
394  if(i != j) {
395  std::swap(member[j], member[i]);
396  }
397  }
398 }
399 // PostProcessing ---------------------------------------------------------
400 
402  tr.Info << "Sorting each cluster's structures by energy: " << std::endl;
403  int i,j;
404  for (i=0;i<(int)clusterlist.size();i++ ) {
405 
406  std::vector< std::pair< int, Real > > cluster_energies;
407  for (j=0;j<(int)clusterlist[i].size();j++ ) {
408  Real score=0.0;
409  if ( !getPoseExtraScores( poselist[ clusterlist[i][j] ], "silent_score", score ) ) {
410  tr.Error << "Warning: no score available for " << std::endl;
411  }
412  cluster_energies.push_back( std::pair< int, Real > ( clusterlist[i][j], score ) );
413  }
414 
415  std::sort( cluster_energies.begin(), cluster_energies.end(), compareIndexEnergyPair );
416  clusterlist[i].clear(); // This retains the cluster center!
417  for (j=0;j<(int)cluster_energies.size();j++ ) clusterlist[i].push_back( cluster_energies[j].first );
418 
419  }
420 
421 }
422 
423 // PostProcessing ---------------------------------------------------------
424 
426  int i,j;
427  for (i=0;i<(int)clusterlist.size();i++ ) {
428  if ( clusterlist[i].size() <= 1 ) continue;
429 
430  // sort group by energy
431  std::vector< std::pair< int, Real > > cluster_energies;
432  for (j=0;j<(int)clusterlist[i].size();j++ ) {
433  Real score=0.0;
434  if ( !getPoseExtraScores( poselist[ clusterlist[i][j] ], "silent_score", score ) ) {
435  tr.Error << "Warning: no score available for " << std::endl;
436  }
437 
438  cluster_energies.push_back( std::pair< int, Real > ( clusterlist[i][j], score ) );
439  }
440  std::sort( cluster_energies.begin(), cluster_energies.end(), compareIndexEnergyPair );
441  clusterlist[i].clear();
442  for (j=0;j<(int)(cluster_energies.size()-1);j++ ) clusterlist[i].push_back( cluster_energies[j].first );
443  }
444 
445 }
446 
448  tr.Info << "Sorting clsuters by energy: " << std::endl;
449 
450  int i;
451  std::vector < Cluster > temp = clusterlist;
452 
453  std::vector< std::pair< int, Real > > cluster_energies;
454  for (i=0;i<(int)clusterlist.size();i++ ) {
455  Real score;
456 
457  // This assumes that the first member is already the lowest energy one!
458  int energy_member = 0;
459  if ( !getPoseExtraScores( poselist[ clusterlist[i][energy_member] ], "silent_score", score ) ) {
460  tr.Error << "Warning: no score available for " << std::endl;
461  }
462 
463  Real combo_score = (1.0 - population_weight_ ) * score - population_weight_ * clusterlist[i].group_size() ; // group size keeps trakc of the size of the clsuter, even with pruning
464  cluster_energies.push_back( std::pair< int, Real > ( i, combo_score ) );
465  }
466 
467  std::sort( cluster_energies.begin(), cluster_energies.end(), compareIndexEnergyPair );
468 
469  // clear the cluster list
470  clusterlist.clear();
471 
472  // now put back every cluster in the order of energies
473  for (i=0;i<(int)cluster_energies.size();i++ ) clusterlist.push_back( temp[cluster_energies[i].first ] );
474 
475 }
476 
478  int i;
479  std::vector < Cluster > clusterlist_copy = clusterlist;
480  clusterlist.clear();
481 
482  std::vector< std::pair< int, Real > > cluster_energies;
483  for (i=0;i<(int)clusterlist_copy.size();i++ ) {
484  if ( clusterlist_copy[i].group_size() > 1 ) {
485  clusterlist.push_back( clusterlist_copy[i] );
486  }
487  }
488 
489  //In case nothing is left, better at least return one cluster?
490  if ( clusterlist.size() < 1 ) {
491  clusterlist.push_back( clusterlist_copy[ 0 ] );
492  }
493 
494 
495 }
496 
497 // take first 'limit' from each cluster
498 void ClusterBase::limit_groupsize( int limit ) {
499  tr.Info << "Limiting each cluster to a total size of : " << limit << std::endl;
500  int i,j;
501  if ( limit < 1 ) return;
502  for (i=0;i<(int)clusterlist.size();i++ ) {
503  Cluster temp = clusterlist[i];
504  clusterlist[i].clear();
505  for (j=0;(j<(int)temp.size()) && (j<limit);j++ ) {
506  clusterlist[i].push_back( temp[j] );
507  }
508  }
509 }
510 
511 // take first 'limit'% from each cluster
513  int i,j, limit;
514  if ( percent_limit > 1.0 ) return;
515  for (i=0;i<(int)clusterlist.size();i++ ) {
516  Cluster temp = clusterlist[i];
517  clusterlist[i].clear();
518  limit = static_cast<core::Size>( std::floor(percent_limit*temp.size()) );
519  tr << "truncating from " << temp.size() << " to " << limit << std::endl;
520  for (j=0;j<limit;j++ ) {
521  clusterlist[i].push_back( temp[j] );
522  }
523  }
524 }
525 
526 // take random 'limit'% from each cluster
528  int i,j,limit;
529  if ( percent_limit >= 1.0 ) return;
530  for (i=0;i<(int)clusterlist.size();i++ ) {
531  Cluster temp = clusterlist[i];
532  clusterlist[i].clear();
533  limit = static_cast<core::Size> (std::floor(percent_limit*temp.size()) );
534  tr << "truncating from " << temp.size() << " to " << limit << std::endl;
535  temp.shuffle();
536  for (j=0;j<limit;j++ ) {
537  clusterlist[i].push_back( temp[j] );
538  }
539  }
540 }
541 
542 // take first 'limit' clusters
543 void ClusterBase::limit_groups( int limit ) {
544  tr.Info << "Limiting total number of clusters to : " << limit << std::endl;
545  int i;
546  if ( limit < 0 ) return;
547  std::vector < Cluster > temp = clusterlist;
548  clusterlist.clear();
549  for (i=0;i<(int)limit;i++ ) {
550  if( (int)i >= (int)temp.size() ) break;
551  clusterlist.push_back( temp[i] );
552  }
553 
554  // Remove the Poses from the removed clusters!
555  for (i=limit; i<(int)temp.size() ;i++ ) {
556  for (int j=0;j<(int)temp[i].size();j++ ) {
557  poselist[ temp[i].member[j] ] = Pose();
558  }
559  }
560 }
561 
562 // take first 'limit' total_structures
564  tr.Info << "Limiting total structure count to : " << limit << std::endl;
565  int i,j;
566  if ( limit < 0 ) return;
567  std::vector < Cluster > temp = clusterlist;
568  clusterlist.clear();
569  int count=0;
570  for (i=0;i<(int)temp.size();i++ ) {
571  Cluster newcluster( temp[i].get_cluster_center() );
572  newcluster.clear();
573  for (j=0;j<(int)temp[i].size();j++ ) {
574  if ( count < limit ) {
575  newcluster.push_back( temp[i][j] );
576  }
577  count ++;
578  }
579  if ( newcluster.size() > 0 ){
580  clusterlist.push_back( newcluster );
581  }
582  }
583 }
584 
585 
587  tr.Info << "Cleaning Pose store to save memory ... " << std::endl;
588  // for each structure in the pose list
589  for ( Size index=0; index < poselist.size(); index ++ ) {
590  bool ispresent = false;
591  // try and find it in the cluster assigments
592  Size i,j;
593  for (i=0;i<clusterlist.size();i++ ) {
594  for (j=0;j<clusterlist[i].size();j++ ) {
595  if ( (int)index == clusterlist[i][j] ) { ispresent = true; break; }
596  }
597  if ( (int)index == clusterlist[i].get_cluster_center() ) ispresent = true;
598  if ( ispresent) break;
599  }
600  if ( !ispresent) {
601  // zap the pose (but retain a DUMMY POSE so that the indexing stays the smae !!! )
602  // i know this is retarded, PoseOP would be better at least.
603  poselist[index] = Pose() ;
604  }
605  }
606 }
607 
608 
609 // ------ OUTPUT -----------------------------------------------------------------------
610 
612  tr.Info << "---------- Summary ---------------------------------" << std::endl;
613  int i,j;
614 
615  int count=0;
616 
617  for (i=0;i<(int)clusterlist.size();i++ ) {
618  tr.Info << "Cluster: " << i << " N: " << (int)clusterlist[i].size() << "GN: " << (int)clusterlist[i].group_size() << " c." << i <<".*.pdb " ;
619  tr.Info << std::endl;
620  count += clusterlist[i].size();
621  for (j=0;j<(int)clusterlist[i].size();j++ ) {
622  tr.Info << " ";
623  tr.Info << core::pose::extract_tag_from_pose( poselist[ clusterlist[i][j] ] ) << " " ;
624  Real score = 0.0;
625  if ( !getPoseExtraScores( poselist[ clusterlist[i][j] ], "silent_score", score ) ) {
626  tr.Info << "----" ;
627  }else{
628  tr.Info << score ;
629  }
630 
631  tr.Info << std::endl;
632  }
633  }
634  tr.Info << "----------------------------------------------------" << std::endl;
635  tr.Info << " Clusters: " << clusterlist.size() << std::endl;
636  tr.Info << " Structures: " << count << std::endl;
637  tr.Info << "----------------------------------------------------" << std::endl;
638 }
639 
641  int i,j;
642  for (i=0;i<(int)clusterlist.size();i++ ) {
643  tr.Info << i << " : ";
644  for (j=0;j<(int)clusterlist[i].size();j++ ) {
645  tr.Info << clusterlist[i][j] << " ";
646  }
647  tr.Info << std::endl;
648  }
649 }
650 
652  int i,j;
653  for (i=0;i<(int)clusterlist.size();i++ ) {
654  for (j=0;j<(int)clusterlist[i].size();j++ ) {
655  tr.Info << clusterlist[i][j]
657  << " " << i << " " << j << std::endl;
658  }
659  }
660 }
661 
663  using namespace basic::options;
664  using namespace basic::options::OptionKeys;
665  bool idealize_final = option[ OptionKeys::cluster::idealize_final_structures ]();
666  int i,j;
667  for ( i=0;i<(int)clusterlist.size();i++ ) {
668  std::vector< int > new_cluster;
669  for ( j=0;j<(int)clusterlist[i].size();j++ ) {
670  if (export_only_low_){
671  if( clusterlist[i].size() >= 1 && j!=0 ) continue; // print only clustercenter
672  }
673  //std::string output_name = "c." + string_of( i ) + "." + string_of( j ) + "." +
674  // core::pose::extract_tag_from_pose( poselist[ clusterlist[i][j] ] ) + ".pdb";
675  std::string output_name = "c." + string_of( i ) + "." + string_of( j ) + "." + "pdb";
676  utility::replace_in( output_name, '/', "_" );
677 
678  // Idealize ?
679  Pose pose;
680  pose = poselist[ clusterlist[i][j] ];
681 
682  // make sure the parent/template name gets included in the REMARK line of the PDB
683  using std::map;
684  using std::string;
685  map< string, string > score_line_strings( core::pose::get_all_score_line_strings( pose ) );
686  for ( map< string, string >::const_iterator it = score_line_strings.begin(),
687  end = score_line_strings.end();
688  it != end; ++it )
689  {
690  if ( it->first != "aln_id" ) continue;
691  core::pose::add_comment( pose, "parents", it->second );
692  }
693 
694  map< string, string > comments = core::pose::get_all_comments( pose );
695  for ( map< string, string >::const_iterator it = comments.begin(),
696  end = comments.end(); it != end; ++it
697  ) {
698  if ( it->first != "aln_id" ) continue;
699  core::pose::add_comment( pose, "parents", it->second );
700  }
701 
702 
703  if ( idealize_final ) {
705  idealizer.fast( false );
706  idealizer.apply( pose );
707  }
708 
709  pose.dump_pdb( prefix + output_name );
710  }
711  }
712 }
713 
715  int i;
716  std::vector< PoseOP > templist;
717  for ( i=0;i<(int)clusterlist.size();i++ ) {
718  PoseOP tempPointer;
719  if (clusterlist[i].size() > 0) templist.push_back( new Pose(poselist[ clusterlist[i][0] ]) );
720  }
721  return templist;
722 }
723 
724 std::vector< core::pose::PoseOP > ClusterBase::return_top_poses_in_clusters( core::Size count) {
725  int i;
726  std::vector< core::pose::PoseOP > templist;
727  for ( i=0;i<(int)clusterlist.size();i++ ) {
728 
729  core::pose::PoseOP tempPointer;
730  if (clusterlist[i].size() < 2){
731  //if singleton return the cluster center
732  tempPointer = new core::pose::Pose(poselist[ clusterlist[i][0]]);
733  templist.push_back(tempPointer);
734  } else if (clusterlist[i].size() >= 2 &&
735  clusterlist[i].size() <= count+1){ //+1 because of cluster center
736  for (Size j = 1; j < clusterlist[i].size(); j++){
737  tempPointer = new core::pose::Pose(poselist[ clusterlist[i][j]]);
738  templist.push_back(tempPointer);
739  }
740  } else if (clusterlist[i].size() > count+1){
741  for (Size j = 1; j <= count; j++){
742  tempPointer = new core::pose::Pose(poselist[ clusterlist[i][j]]);
743  templist.push_back(tempPointer);
744  }
745  } else {}
746  }
747  return templist;
748 }
749 
751  using namespace basic::options;
752  using namespace basic::options::OptionKeys;
753  bool idealize_final = option[ OptionKeys::cluster::idealize_final_structures ]();
754 
757 
758  int i,j;
761 
762  ss = io::silent::SilentStructFactory::get_instance()->get_silent_struct_out();
763  std::string silent_file_ = option[ OptionKeys::out::file::silent ]();
764 
765  for ( i=0;i<(int)clusterlist.size();i++ ) {
766  std::vector< int > new_cluster;
767  for ( j=0;j<(int)clusterlist[i].size();j++ ) {
768 
769  if (export_only_low_){
770  if( clusterlist[i].size() >= 1 && j!=0 ) continue; // print only clustercenter
771  }
772 
773  std::string tag = prefix + "c." + string_of( i ) + "." + string_of( j ) + "." + "pdb";
774  utility::replace_in( tag, '/', "_" );
775 
776  // Idealize ?
777  Pose pose;
778  pose = poselist[ clusterlist[i][j] ];
779  if ( idealize_final ) {
781  idealizer.fast( false );
782  idealizer.apply( pose );
783  }
784 
785  ss->fill_struct( pose, tag );
786 
787  sfd.write_silent_struct( *ss, silent_file_ );
788 
789  }
790  }
791 }
792 
794  std::string prefix,
795  EnsembleConstraints &constraint_maker)
796 {
797  int i,j;
798  tr.Info << "Making constraints .. " << std::endl;
799  for (i=0;i<(int)clusterlist.size();i++ ) {
800  constraint_maker.clear();
801  for (j=0;j<(int)clusterlist[i].size();j++ ) {
802  constraint_maker.push_back( poselist[ clusterlist[i][j] ] );
803  }
804 
805  std::string output_name = "c." + string_of( i ) + "." + "cst";
806  utility::replace_in( output_name, '/', "_" );
807  std::ofstream outf( std::string(prefix + output_name).c_str() );
808  constraint_maker.createConstraints( outf );
809  outf.close();
810  }
811 
812 }
813 
814 
815 
816 
817 
818 
819 
820 
821 
822 
823 
824 
825 
826 
827 
828 
829 
830 
831 
832 
834 {
835 
836 }
837 
840  return "ClusterPhilStyle";
841 }
842 
843 void ClusterPhilStyle::do_clustering( Size max_total_cluster ) {
844 
845  using namespace basic::options;
846  using namespace basic::options::OptionKeys;
847 
848 
849  int listsize = poselist.size();
850  if( listsize <= 0 ) return;
851 
852  tr.Info << "Clustering an initial set of " << listsize << " structures " << std::endl;
854 
855  int i,j;
856  std::vector < int > neighbors ( poselist.size(), 0 );
857  std::vector < int > clusternr ( poselist.size(), -1 );
858  std::vector < int > clustercenter;
859  int mostneighbors;
860  Size nclusters=0;
861 
862  if ( listsize == 0 ) {
863  utility_exit_with_message( "Error: no Poses to cluster! Try -in:file:s or -in:file:silent!" );
864  }
865 
866  if ( get_cluster_radius() < 0 ){
867 
869 
870  // slightly different rule for gdtmm clsutering
871  if ( option[ OptionKeys::cluster::gdtmm ]() ) {
873  if( cluster_radius_ > 5.0 ) cluster_radius_ = 5.0; // Cluster radius of GDTMM < 0.5 is just too coarse.
874  if( cluster_radius_ < 1.0 ) cluster_radius_ = 1.0; // Cluster radius of GDTMM > 0.9 is too fine for clustering
875  }
876 
877  tr.Info << "Clustering of " << listsize << "structures with radius " << get_cluster_radius() << " (auto) " << std::endl;
878  } else {
879  tr.Info << "Clustering of " << listsize << "structures with radius " << get_cluster_radius() << std::endl;
880  }
881 
882  std::vector <int> clustercentre;
883 
884  tr.Info << "Assigning initial cluster centres " << std::endl;
885  // now assign groupings
886  while(true) {
887  // count each's neighbors
888  for (i=0;i<listsize;i++ ) {
889  neighbors[i] = 0;
890  if (clusternr[i]>=0) continue; // ignore ones already taken
891  for (j=0;j<listsize;j++ ) {
892  if (clusternr[j]>=0) continue; // ignore ones already taken
893  if ( distance_matrix( i+1, j+1 ) < get_cluster_radius()) neighbors[i]++;
894  }
895  }
896 
897  mostneighbors = 0;
898  for (i=0;i<listsize;i++ ) {
899  if (neighbors[i]>neighbors[mostneighbors]) mostneighbors=i;
900  }
901  if (neighbors[mostneighbors] <= 0) break; // finished!
902 
903 
904  for (i=0;i<listsize;i++ ) {
905  if (clusternr[i]>=0) continue; // ignore ones already taken
906  if ( distance_matrix( i+1, mostneighbors+1 ) < get_cluster_radius()) {
907  clusternr[i] = mostneighbors;
908  }
909  }
910 
911  clustercentre.push_back(mostneighbors);
912  nclusters++;
913 
914  if (nclusters > max_total_cluster ) break; // currently fixed but ought to be a paraemter
915  if ((nclusters%10)==0) tr.Info << ".";
916  tr.Info.flush();
917  }
918  tr.Info << std::endl;
919 
920  for (i=0;i<(int)clustercentre.size();i++ ) {
921  Cluster new_cluster( clustercentre[i] );
922  new_cluster.clear();
923  for (j=0;j<listsize;j++ ) {
924  // if that struture belongs to a given cluster
925  if (clusternr[j] == clustercentre[i]) {
926  new_cluster.add_member(j); // add structure
927  }
928  }
929 
930  // add the new list to the clusterlist
931  clusterlist.push_back(new_cluster);
932  }
933 
934 
935 }
936 
937 // this ensures every structure is in the cluster to who's cluster center it is most similar too
939  using namespace basic::options;
940  using namespace basic::options::OptionKeys;
941 
942  int i,j;
943 
944  tr.Info << "Redistributing groups ..." << clusterlist.size() << " cluster centers";
945 
946  // redistribute groups - i.e. take each structure, calculate the rms to each cluster centre.
947  for (i=0;i<(int)clusterlist.size();i++ ) {
948  for (j=1;j<(int)clusterlist[i].size();j++ ) {
949  int lowrmsi=i;
950  Real lowrms=10000.0;
951  for (int m=0;m<(int)clusterlist.size();m++ ) {
952  Real rms;
953  rms = distance_matrix( clusterlist[i][j]+1, // current structure vs
954  clusterlist[m].get_cluster_center()+1); // clustercentre of cluster m
955 
956  if (rms < lowrms) {
957  lowrms = rms;
958  lowrmsi = m;
959  }
960  }
961  if (lowrmsi != i) { // is a different cluster centre closer to us than our current centre ?
962  tr.Info << "Switched " << lowrmsi << "<--" << i << std::endl;
963  // switch over;
964  clusterlist[lowrmsi].add_member(clusterlist[i][j]);
965  clusterlist[i].remove_member( j );
966  }
967  }
968 
969  }
970 
971 
972 
973 }
974 
975 
978  return "ClusterPhilStyle_Loop";
979 }
980 
981 Real
984  const Pose & pose1,
985  const Pose & pose2
986  ) const
987 {
988  return protocols::loops::loop_rmsd_with_superimpose(pose2, pose1, loop_def_, false );
989 }
990 
991 
992 
993 
994 
995 
996 
997 
998 
999 
1000 
1001 
1002 
1003 
1004 
1005 
1006 
1007 
1008 
1009 
1010 
1011 
1012 
1013 
1014 
1015 
1016 
1017 
1018 
1019 
1020 
1021 
1022 
1023 
1024 
1025 
1026 
1027 
1028 
1029 
1031 {
1032  cluster_base_ = cluster_base;
1033  processed_ = 0;
1034 }
1035 
1037  using namespace basic::options;
1038  using namespace basic::options::OptionKeys;
1039  poselist.clear();
1040  GatherPosesMover::apply( pose );
1041 
1042  // check we hvnt added this structure before !
1043  if ( cluster_base_->check_tag( core::pose::extract_tag_from_pose( pose ) ) ) {
1044  tr.Info << "Already added: " << core::pose::extract_tag_from_pose( pose ) << std::endl;
1045  return;
1046  }
1047 
1048  // find the best group by looping round all the existing groups and comparing
1049  // protected:
1050  cluster_base_->add_structure( pose );
1051  processed_ ++;
1052 
1053 
1054  // to prevent memory crazyness, periodically limit back to the number of
1055  // desired structures! giving preference to low energy clusters
1056  static int count=0;
1057 
1058  Real score;
1059  getPoseExtraScores( pose , "silent_score", score );
1060  tr.Info << "Adding a "
1062  << " " << score
1063  << std::endl;
1064 
1065  if (( (count+1) % 150 ) == 0 ){
1066  cluster_base_->print_summary();
1067  cluster_base_->sort_each_group_by_energy();
1068  cluster_base_->print_summary();
1069  cluster_base_->sort_groups_by_energy();
1070  cluster_base_->print_summary();
1071  cluster_base_->limit_groupsize( option[ OptionKeys::cluster::limit_cluster_size] );
1072  cluster_base_->limit_groups( option[OptionKeys::cluster::limit_clusters] );
1073  cluster_base_->limit_total_structures( option[ OptionKeys::cluster::limit_total_structures] );
1074  cluster_base_->clean_pose_store();
1075  cluster_base_->print_summary();
1076  }
1077 
1078  count++;
1079 }
1080 
1083  return "AssignToClustersMover";
1084 }
1085 
1088  return "EnsembleConstraints";
1089 }
1090 
1093  return "EnsembleConstraints_Simple";
1094 }
1095 
1097  int nres = poselist[0].total_residue();
1098  int residuesep = 5;
1099  Real strength = 1.0;
1100 
1101  out << "[ atompairs ]" << std::endl;
1102 
1103  for (int ir = 1; ir <= nres; ir ++ ) {
1104  for (int jr = 1; jr <= nres; jr ++ ) {
1105  if ( ir >= (jr - residuesep) ) continue;
1106 
1107  Real lowdist=1000000.0;
1108  Real highdist=-100000.0;
1109  for ( Size i=0; i<poselist.size(); i++ ) {
1110  Vector ir_CA = poselist[i].residue(ir).xyz( "CA" );
1111  Vector jr_CA = poselist[i].residue(jr).xyz( "CA" );
1112  Real dist = ir_CA.distance( jr_CA );
1113  if ( dist < lowdist ) lowdist = dist;
1114  if ( dist > highdist)highdist = dist;
1115  }
1116 
1117  if ( lowdist > 11.0 ) continue;
1118  if ( highdist > 11.0 ) continue;
1119 
1120  if ( ( highdist - lowdist ) < minimum_width_ ) {
1121  highdist += 0.5 * minimum_width_;
1122  lowdist -= 0.5 * minimum_width_;
1123  }
1124 
1125  out << " CA" << right_string_of(ir,7,' ')
1126  << " CA" << right_string_of(jr,7,' ')
1127  << " BOUNDED"
1128  << F( 12, 3, lowdist)
1129  << F( 12, 3, highdist)
1130  << F(4,1,strength )
1131  << " na"
1132  << "; " << F( 12, 3, highdist - lowdist)
1133  << std::endl;
1134  }
1135  }
1136 }
1137 
1138 
1139 } // cluster
1140 } // protocols