31 #include <utility/file/FileName.hh>
32 #include <utility/string_util.hh>
35 #include <utility/io/izstream.hh>
36 #include <numeric/random/random.hh>
49 #include <basic/options/keys/cluster.OptionKeys.gen.hh>
50 #include <basic/options/keys/out.OptionKeys.gen.hh>
51 #include <basic/options/keys/symmetry.OptionKeys.gen.hh>
54 #include <utility/vector1.hh>
55 #include <ObjexxFCL/format.hh>
56 #include <basic/Tracer.hh>
60 namespace ObjexxFCL {
namespace fmt { } }
using namespace ObjexxFCL::fmt;
67 using namespace ObjexxFCL;
69 using namespace basic::options;
70 using namespace evaluation;
72 static basic::Tracer
tr(
"protocols.cluster");
73 static numeric::random::RandomGenerator
RG(42032);
79 utility::io::izstream data( filename.c_str() );
81 utility_exit_with_message(
82 "ERROR: Unable to open extra_scores file: '" + filename +
"'"
86 std::map< std::string, core::Real > score_data;
88 while( getline(data,line) ) {
89 std::istringstream l( line );
95 score_data[template_name] = template_score;
103 const std::pair< int, Real > & p1,
const std::pair< int, Real > & p2 )
105 return p1.second < p2.second;
126 GatherPosesMover::GatherPosesMover() : Mover()
133 using namespace basic::options;
134 using namespace basic::options::OptionKeys;
136 if ( option[ OptionKeys::cluster::template_scores].user() ) {
139 tr.Info <<
"Read template scores: " << std::endl;
142 tr.Info << (*ii).first <<
": " << (*ii).second << std::endl;
154 using namespace basic::options;
155 using namespace basic::options::OptionKeys;
157 if ( option[ basic::options::OptionKeys::symmetry::symmetric_rmsd ]() &&
159 tr.Info <<
"Warning!!! For symmetric rmsd selected but pose is not symmetric. Ignoring symmetry" << std::endl;
161 if ( option[ OptionKeys::cluster::hotspot_hash ]() ) {
170 if ( option[ OptionKeys::cluster::gdtmm ]() ) {
171 if ( option[ basic::options::OptionKeys::symmetry::symmetric_rmsd ]() ) utility_exit_with_message(
"No symmetric gdtmm available!!!!\n" ) ;
180 if ( option[ OptionKeys::cluster::exclude_res ].user() ) {
183 if ( pose1.
residue(1).
is_RNA() ) utility_exit_with_message(
"Hey put in all atom rmsd code for residue subset!\n" ) ;
184 if ( option[ basic::options::OptionKeys::symmetry::symmetric_rmsd ]() &&
186 tr.Info <<
"Warning!!! For symmetric clustering currently only all CA clustering is available. Calculating all CA rmsd instead..." << std::endl;
192 if ( option[ basic::options::OptionKeys::symmetry::symmetric_rmsd ]() &&
223 Real score = (*sfxn_)(pose) ;
243 std::string template_name = score_line_strings[
"aln_id"];
244 template_name = template_name.substr(0,5);
246 std::cout <<
"Found template name: " << template_name << std::endl;
249 score += template_score;
253 tr.Info <<
"Adding struc: " << score << std::endl;
266 return "GatherPosesMover";
279 export_only_low_(false),
281 population_weight_( 0.09 ),
282 cluster_radius_( 2.0 )
287 return "ClusterBase";
303 FArray2D< Real > p1a, p2a;
306 tr.Info <<
"Calculating RMS matrix: " << std::endl;
308 Real hist_resolution = 0.25;
309 int hist_size =
int(20.0 / hist_resolution);
311 std::vector<int> histcount(hist_size,0);
322 int histbin =
int(dist/hist_resolution);
323 if ( histbin < hist_size ) histcount[histbin]+=1;
327 if ( count % 5000 == 0 ) {
328 Real const percent_done ( 200.0 * static_cast< Real > ( count ) / ( (
poselist.size() - 1) *
poselist.size() ) );
331 <<
" ( " << F(8,1,percent_done) <<
"% )"
339 tr.Info <<
"Histogram of pairwise similarity values for the initial clustering set" << std::endl;
340 int maxcount_count = 0;
342 for (
Size i = 0; i <(
Size)hist_size; i++ ) {
343 tr.Info <<
"hist " <<
Real(i)*hist_resolution <<
" " << histcount[i] << std::endl;
344 if( histcount[i] > maxcount_count){
345 maxcount_count = histcount[i];
361 Real lowrms=1000000.0;
373 if (lowrms <= 0.001 ){
374 tr.Info <<
"Structure identical to existing structure - ignoring" << std::endl;
381 tr.Info <<
"Adding to cluster " << lowrmsi <<
" Cluster_rad: " <<
get_cluster_radius() << std::endl;
383 Cluster new_cluster( nexindex );
392 for(
int i=
member.size()-1; i>-1; i--) {
393 int j =
RG.random_range(0, 100000000) % (i + 1);
402 tr.Info <<
"Sorting each cluster's structures by energy: " << std::endl;
406 std::vector< std::pair< int, Real > > cluster_energies;
410 tr.Error <<
"Warning: no score available for " << std::endl;
412 cluster_energies.push_back( std::pair< int, Real > (
clusterlist[i][j], score ) );
431 std::vector< std::pair< int, Real > > cluster_energies;
435 tr.Error <<
"Warning: no score available for " << std::endl;
438 cluster_energies.push_back( std::pair< int, Real > (
clusterlist[i][j], score ) );
448 tr.Info <<
"Sorting clsuters by energy: " << std::endl;
453 std::vector< std::pair< int, Real > > cluster_energies;
458 int energy_member = 0;
460 tr.Error <<
"Warning: no score available for " << std::endl;
464 cluster_energies.push_back( std::pair< int, Real > ( i, combo_score ) );
473 for (i=0;i<(
int)cluster_energies.size();i++ )
clusterlist.push_back( temp[cluster_energies[i].first ] );
479 std::vector < Cluster > clusterlist_copy =
clusterlist;
482 std::vector< std::pair< int, Real > > cluster_energies;
483 for (i=0;i<(
int)clusterlist_copy.size();i++ ) {
484 if ( clusterlist_copy[i].group_size() > 1 ) {
499 tr.Info <<
"Limiting each cluster to a total size of : " << limit << std::endl;
501 if ( limit < 1 )
return;
505 for (j=0;(j<(
int)temp.
size()) && (j<limit);j++ ) {
514 if ( percent_limit > 1.0 )
return;
518 limit =
static_cast<core::Size>( std::floor(percent_limit*temp.
size()) );
519 tr <<
"truncating from " << temp.
size() <<
" to " << limit << std::endl;
520 for (j=0;j<limit;j++ ) {
529 if ( percent_limit >= 1.0 )
return;
533 limit =
static_cast<core::Size> (std::floor(percent_limit*temp.
size()) );
534 tr <<
"truncating from " << temp.
size() <<
" to " << limit << std::endl;
536 for (j=0;j<limit;j++ ) {
544 tr.Info <<
"Limiting total number of clusters to : " << limit << std::endl;
546 if ( limit < 0 )
return;
549 for (i=0;i<(
int)limit;i++ ) {
550 if( (
int)i >= (
int)temp.size() )
break;
555 for (i=limit; i<(
int)temp.size() ;i++ ) {
556 for (
int j=0;j<(
int)temp[i].
size();j++ ) {
564 tr.Info <<
"Limiting total structure count to : " << limit << std::endl;
566 if ( limit < 0 )
return;
570 for (i=0;i<(
int)temp.size();i++ ) {
571 Cluster newcluster( temp[i].get_cluster_center() );
573 for (j=0;j<(
int)temp[i].
size();j++ ) {
574 if ( count < limit ) {
579 if ( newcluster.
size() > 0 ){
587 tr.Info <<
"Cleaning Pose store to save memory ... " << std::endl;
589 for (
Size index=0; index <
poselist.size(); index ++ ) {
590 bool ispresent =
false;
595 if ( (
int)index ==
clusterlist[i][j] ) { ispresent =
true;
break; }
597 if ( (
int)index ==
clusterlist[i].get_cluster_center() ) ispresent =
true;
598 if ( ispresent)
break;
612 tr.Info <<
"---------- Summary ---------------------------------" << std::endl;
619 tr.Info << std::endl;
631 tr.Info << std::endl;
634 tr.Info <<
"----------------------------------------------------" << std::endl;
636 tr.Info <<
" Structures: " << count << std::endl;
637 tr.Info <<
"----------------------------------------------------" << std::endl;
643 tr.Info << i <<
" : ";
647 tr.Info << std::endl;
657 <<
" " << i <<
" " << j << std::endl;
663 using namespace basic::options;
664 using namespace basic::options::OptionKeys;
665 bool idealize_final = option[ OptionKeys::cluster::idealize_final_structures ]();
668 std::vector< int > new_cluster;
675 std::string output_name =
"c." + string_of( i ) +
"." + string_of( j ) +
"." +
"pdb";
676 utility::replace_in( output_name,
'/',
"_" );
686 for ( map< string, string >::const_iterator it = score_line_strings.begin(),
687 end = score_line_strings.end();
690 if ( it->first !=
"aln_id" )
continue;
695 for ( map< string, string >::const_iterator it = comments.begin(),
696 end = comments.end(); it !=
end; ++it
698 if ( it->first !=
"aln_id" )
continue;
703 if ( idealize_final ) {
705 idealizer.
fast(
false );
706 idealizer.
apply( pose );
709 pose.
dump_pdb( prefix + output_name );
716 std::vector< PoseOP > templist;
726 std::vector< core::pose::PoseOP > templist;
733 templist.push_back(tempPointer);
738 templist.push_back(tempPointer);
741 for (
Size j = 1; j <= count; j++){
743 templist.push_back(tempPointer);
751 using namespace basic::options;
752 using namespace basic::options::OptionKeys;
753 bool idealize_final = option[ OptionKeys::cluster::idealize_final_structures ]();
762 ss = io::silent::SilentStructFactory::get_instance()->get_silent_struct_out();
763 std::string silent_file_ = option[ OptionKeys::out::file::silent ]();
766 std::vector< int > new_cluster;
773 std::string tag = prefix +
"c." + string_of( i ) +
"." + string_of( j ) +
"." +
"pdb";
774 utility::replace_in( tag,
'/',
"_" );
779 if ( idealize_final ) {
781 idealizer.
fast(
false );
782 idealizer.
apply( pose );
785 ss->fill_struct( pose, tag );
787 sfd.write_silent_struct( *ss, silent_file_ );
798 tr.Info <<
"Making constraints .. " << std::endl;
800 constraint_maker.
clear();
805 std::string output_name =
"c." + string_of( i ) +
"." +
"cst";
806 utility::replace_in( output_name,
'/',
"_" );
807 std::ofstream outf(
std::string(prefix + output_name).c_str() );
840 return "ClusterPhilStyle";
845 using namespace basic::options;
846 using namespace basic::options::OptionKeys;
850 if( listsize <= 0 )
return;
852 tr.Info <<
"Clustering an initial set of " << listsize <<
" structures " << std::endl;
856 std::vector < int > neighbors (
poselist.size(), 0 );
857 std::vector < int > clusternr (
poselist.size(), -1 );
858 std::vector < int > clustercenter;
862 if ( listsize == 0 ) {
863 utility_exit_with_message(
"Error: no Poses to cluster! Try -in:file:s or -in:file:silent!" );
871 if ( option[ OptionKeys::cluster::gdtmm ]() ) {
877 tr.Info <<
"Clustering of " << listsize <<
"structures with radius " <<
get_cluster_radius() <<
" (auto) " << std::endl;
879 tr.Info <<
"Clustering of " << listsize <<
"structures with radius " <<
get_cluster_radius() << std::endl;
882 std::vector <int> clustercentre;
884 tr.Info <<
"Assigning initial cluster centres " << std::endl;
888 for (i=0;i<listsize;i++ ) {
890 if (clusternr[i]>=0)
continue;
891 for (j=0;j<listsize;j++ ) {
892 if (clusternr[j]>=0)
continue;
898 for (i=0;i<listsize;i++ ) {
899 if (neighbors[i]>neighbors[mostneighbors]) mostneighbors=i;
901 if (neighbors[mostneighbors] <= 0)
break;
904 for (i=0;i<listsize;i++ ) {
905 if (clusternr[i]>=0)
continue;
907 clusternr[i] = mostneighbors;
911 clustercentre.push_back(mostneighbors);
914 if (nclusters > max_total_cluster )
break;
915 if ((nclusters%10)==0)
tr.Info <<
".";
918 tr.Info << std::endl;
920 for (i=0;i<(
int)clustercentre.size();i++ ) {
921 Cluster new_cluster( clustercentre[i] );
923 for (j=0;j<listsize;j++ ) {
925 if (clusternr[j] == clustercentre[i]) {
939 using namespace basic::options;
940 using namespace basic::options::OptionKeys;
944 tr.Info <<
"Redistributing groups ..." <<
clusterlist.size() <<
" cluster centers";
962 tr.Info <<
"Switched " << lowrmsi <<
"<--" << i << std::endl;
978 return "ClusterPhilStyle_Loop";
1037 using namespace basic::options;
1038 using namespace basic::options::OptionKeys;
1060 tr.Info <<
"Adding a "
1065 if (( (count+1) % 150 ) == 0 ){
1071 cluster_base_->limit_groupsize( option[ OptionKeys::cluster::limit_cluster_size] );
1072 cluster_base_->limit_groups( option[OptionKeys::cluster::limit_clusters] );
1073 cluster_base_->limit_total_structures( option[ OptionKeys::cluster::limit_total_structures] );
1083 return "AssignToClustersMover";
1088 return "EnsembleConstraints";
1093 return "EnsembleConstraints_Simple";
1097 int nres =
poselist[0].total_residue();
1099 Real strength = 1.0;
1101 out <<
"[ atompairs ]" << std::endl;
1103 for (
int ir = 1; ir <= nres; ir ++ ) {
1104 for (
int jr = 1; jr <= nres; jr ++ ) {
1105 if ( ir >= (jr - residuesep) )
continue;
1107 Real lowdist=1000000.0;
1108 Real highdist=-100000.0;
1112 Real dist = ir_CA.distance( jr_CA );
1113 if ( dist < lowdist ) lowdist = dist;
1114 if ( dist > highdist)highdist = dist;
1117 if ( lowdist > 11.0 )
continue;
1118 if ( highdist > 11.0 )
continue;
1120 if ( ( highdist - lowdist ) < minimum_width_ ) {
1121 highdist += 0.5 * minimum_width_;
1122 lowdist -= 0.5 * minimum_width_;
1125 out <<
" CA" << right_string_of(ir,7,
' ')
1126 <<
" CA" << right_string_of(jr,7,
' ')
1128 << F( 12, 3, lowdist)
1129 << F( 12, 3, highdist)
1132 <<
"; " << F( 12, 3, highdist - lowdist)