12 #include <utility/exit.hh>
13 #include <utility/string_util.hh>
14 #include <utility/file/FileName.hh>
15 #include <utility/file/file_sys_util.hh>
16 #include <utility/io/ozstream.hh>
17 #include <utility/io/izstream.hh>
28 #include <basic/Tracer.hh>
46 #include <basic/options/option.hh>
47 #include <basic/options/keys/in.OptionKeys.gen.hh>
48 #include <basic/options/keys/cm.OptionKeys.gen.hh>
49 #include <basic/options/keys/run.OptionKeys.gen.hh>
61 #include <utility/vector1.hh>
64 namespace ObjexxFCL {
namespace fmt { } }
using namespace ObjexxFCL::fmt;
69 static basic::Tracer
tr(
"AlignmentClustering");
72 namespace comparative_modeling {
83 using namespace core::sequence;
88 alns.push_back(aln_in);
93 AlignmentCluster::~AlignmentCluster(){
100 alns.push_back(aln_in);
106 return (alns[index]);
124 void AlignmentCluster::output(std::ostream & alignment_out){
125 for (
Size ii = 1; ii <= alns.size(); ++ii ) {
127 string const aln_id( alns[ii].sequence(2)->
id() );
128 tr << aln_id <<
"," ;
136 set<string> alignmentsInCluster;
137 set<string>::iterator location;
138 for(
Size ii=1; ii<= alns.size(); ++ii){
139 alignmentsInCluster.insert(alns[ii].sequence(2)->
id());
141 for(
Size jj= 1; jj <= cluster_in->size(); ++jj){
143 location = alignmentsInCluster.find(tempAln.
sequence(2)->id());
144 if (location == alignmentsInCluster.end()){
153 set<string> alignmentsInCluster;
154 set<string>::iterator location;
155 for(
Size ii=1; ii<= alns.size(); ++ii)
156 alignmentsInCluster.insert(alns[ii].sequence(2)->id());
158 Size count_inClust = 0;
159 for(
Size ii=1; ii<=cluster_in->size(); ++ii){
160 location = alignmentsInCluster.find(cluster_in->get_aln(ii).sequence(2)->id());
161 if (location != alignmentsInCluster.end())
164 return((
Real)count_inClust/(
Real)cluster_in->size());
170 AlignmentClustering::AlignmentClustering(){
172 using basic::options::option;
176 using ObjexxFCL::string_of;
178 using namespace protocols;
179 using namespace core::chemical;
180 using namespace basic::options;
181 using namespace basic::options::OptionKeys;
182 using namespace core::sequence;
183 using namespace ObjexxFCL::fmt;
184 Real THRESHOLD_FOR_E_VAL = 1e-30;
187 Real INCREMENT_GDT = .05;
188 Size GOAL_NUMB_CLUSTERS = 5;
189 Real MAX_CLUSTER_OVERLAP = .70;
190 Size MIN_POSE_SIZE = 5;
194 map<string,SequenceAlignment> alns;
195 map<string,SequenceAlignment>::iterator location_alns;
196 for (
Size ii = 1; ii <= align_fns.size(); ++ii ) {
198 option[ cm::aln_format ](), align_fns[ii]
200 for (
Size jj = 1; jj <= tmp_alns.size(); ++jj ) {
201 string aln_id = tmp_alns[jj].sequence(2)->id();
202 location_alns = alns.find(aln_id);
203 while(location_alns != alns.end()){
204 string fixed_aln_id = aln_id +
"X";
205 tmp_alns[jj].sequence(2)->id(fixed_aln_id);
206 aln_id = tmp_alns[jj].sequence(2)->id();
207 location_alns = alns.find(aln_id);
209 aln_id = (tmp_alns[jj].sequence(2)->id() );
210 alns.insert(std::pair<string,SequenceAlignment>(aln_id,tmp_alns[jj]));
218 option[ in::file::template_pdb ]());
220 string query_sequence (
222 Size max_pose_len(0);
225 for (
Size ii = 1; ii <= rankedAlignments.size(); ++ii ) {
226 string const aln_id( rankedAlignments[ii].sequence(2)->
id() );
227 string const template_id( aln_id.substr(0,5) );
228 map< string, Pose >::iterator pose_it = template_poses.find( template_id );
229 if ( pose_it == template_poses.end() ) {
230 string msg(
"Error: can't find pose (id = "
234 std::cout << msg << std::endl;
242 using namespace protocols::comparative_modeling;
243 std::cout <<
"building incomplete model with " << std::endl
244 << aln_id << std::endl;
246 mover.
apply( query_pose );
247 max_pose_len = std::max( max_pose_len, query_pose.
total_residue() );
249 poses.push_back( query_pose );
250 aln_ids.push_back( aln_id );
251 rankedAlignments_valid.push_back(rankedAlignments[ii]);
254 tr << aln_id <<
"has only "<<query_pose.
total_residue() <<
"which is below size threshold of " << MIN_POSE_SIZE << std::endl;
256 query_pose.
dump_pdb( aln_id +
".pdb" );
260 Size total_comparisons_done(0);
262 rankedAlignments_valid.size(),
vector1< Real >(rankedAlignments_valid.size(), 0.0 )
264 for (
Size ii = 1; ii <= rankedAlignments_valid.size(); ++ii ) {
265 gdtmms[ii][ii] = 1.0;
266 for (
Size jj = ii + 1; jj <= rankedAlignments_valid.size(); ++jj ) {
267 ++total_comparisons_done;
270 ObjexxFCL::FArray2D< Real > p1a, p2a;
275 Real const coverage( (
Real) n_atoms / (
Real) max_pose_len );
282 gdtmms[ii][jj] = gdtmm;
283 gdtmms[jj][ii] = gdtmm;
285 std::cerr <<
"coverage = " << coverage <<
", n_atoms = " << n_atoms
286 <<
", max_pose_len = " << max_pose_len <<
", gdtmm = " << gdtmm << std::endl;
287 std::cerr <<
"pose1 seq = " << poses[ii].sequence() << std::endl;
288 std::cerr <<
"pose2 seq = " << poses[jj].sequence() << std::endl;
289 std::cerr << aln << std::endl;
290 std::cerr <<
"sim(" << aln_ids[ii] <<
"," << aln_ids[jj] <<
") = " << gdtmm << std::endl;
291 std::cerr <<
"--------------------------------------------------------------------------------"
294 if ( total_comparisons_done % 50 == 0 ) {
295 std::cout <<
"." << std::flush;
296 if ( total_comparisons_done % 1000 == 0 ) {
297 std::cout <<
" finished with "
298 << total_comparisons_done <<
"." << std::endl;
303 Size number_clusters = 999999;
305 Real threshold_gdt = MAX_GDT;
307 set<Size>::iterator location_mergeSet;
308 multimap<Size,Size> mergeMap;
309 multimap<Size,Size>::iterator start_mergeMap,stop_mergeMap;
310 while((number_clusters > GOAL_NUMB_CLUSTERS) && (threshold_gdt >= MIN_GDT)){
312 cluster_v = cluster(gdtmms,rankedAlignments_valid,threshold_gdt);
317 for (
Size ii = 1; ii <= cluster_v.size(); ++ii){
318 location_mergeSet = mergeSet.find(ii);
319 if(location_mergeSet == mergeSet.end()){
320 for(
Size jj=ii+1; jj<=cluster_v.size(); ++jj){
321 location_mergeSet = mergeSet.find(jj);
322 if(location_mergeSet == mergeSet.end()){
323 if (cluster_v[ii]->
overlap(cluster_v[jj])>MAX_CLUSTER_OVERLAP){
325 mergeMap.insert(std::pair<int,int>(ii,jj));
331 start_mergeMap = mergeMap.begin();
332 stop_mergeMap = mergeMap.end();
333 while(start_mergeMap != stop_mergeMap){
334 cluster_v[start_mergeMap->first]->merge(cluster_v[start_mergeMap->second]);
338 std::cout <<
"cluster_v.size" << cluster_v.size() <<
"merged clusters" << mergeSet.size() << std::endl;
340 number_clusters = cluster_v.size() - mergeSet.size();
341 tr <<
"threshold_gdt" << threshold_gdt <<
"number_clusters" << number_clusters << std::endl;
343 if(number_clusters > GOAL_NUMB_CLUSTERS)
344 threshold_gdt = threshold_gdt - INCREMENT_GDT;
349 for (
Size ii = 1; ii <= cluster_v.size(); ++ii){
350 location_mergeSet = mergeSet.find(ii);
351 if(location_mergeSet == mergeSet.end()){
352 std::cout <<
"--------" << std::endl;
353 std::stringstream convert_to_string;
354 convert_to_string << numbOutput;
356 utility::io::ozstream alignment_out(filename );
357 cluster_v[ii]->output(alignment_out);
359 alignment_out.close();
368 AlignmentClustering::~AlignmentClustering(){
377 for (
Size ii = 1; ii <= rankedAlignments.size(); ++ii ) {
378 alignmentInCluster.push_back(
false);
380 bool allClustered =
false;
381 Size clusterCenter = 1;
383 while(!allClustered){
385 alignmentInCluster[clusterCenter] =
true;
386 for(
Size ii = 1; ii <= rankedAlignments.size(); ++ii) {
387 if(ii != clusterCenter){
391 if (gdtmms[clusterCenter][ii] >= threshold_gdt){
392 tempCluster->add_aln(rankedAlignments[ii]);
393 alignmentInCluster[ii] =
true;
397 cluster_v.push_back(tempCluster);
398 int nxtClusterCenter = -1;
399 for(
Size jj = 1; ((jj <= alignmentInCluster.size()) && (nxtClusterCenter==-1)) ; ++jj){
400 if(alignmentInCluster[jj] ==
false){
401 nxtClusterCenter = jj;
404 if(nxtClusterCenter == -1)
407 clusterCenter = nxtClusterCenter;
418 using namespace core::chemical;
421 map< string, Pose > poses;
423 for ( iter it = fn_list.begin(),
end = fn_list.end(); it !=
end; ++it ) {
427 string name = utility::file_basename( *it );
428 name = name.substr( 0, 5 );
438 using namespace basic::options;
439 using namespace basic::options::OptionKeys;
442 multimap<Real,string> rankedTemplate_map;
443 map<string,Real> template_map;
445 map<string,SequenceAlignment>::iterator start_alns, stop_alns;
450 Real low_e_val = 9999;
451 Size minNumbRankedModels = 10;
453 if(option[ cm::ev_map ].user()){
454 multimap<Real,string>::iterator start_rankedTemplate,stop_rankedTemplate;
455 map<string,Real>::iterator start_template, stop_template,location_template;
458 tr <<
"ev_map being read in" << std::endl;
460 utility::io::izstream data(filename);
462 utility_exit_with_message(
" Warning: can't open file" + filename +
"!");
466 while (getline(data,line)){
467 std::istringstream line_stream(line);
468 line_stream >> pdbid >> e_val;
469 if(pdbid !=
"template"){
471 pdbid = pdbid.substr(0,4) + pdbid.substr(5,1);
474 string temp_string =
"X";
475 temp_string[0] = toupper(pdbid[5]);
476 pdbid = pdbid.substr(1,4) + temp_string;
478 if(e_val < low_e_val)
481 location_template = template_map.find(pdbid);
482 if (location_template == template_map.end())
483 template_map.insert(std::pair<string,Real>(pdbid,e_val));
485 if (location_template->second > e_val)
486 location_template->second = e_val;
490 start_template =template_map.begin();
491 stop_template = template_map.end();
492 while(start_template!=stop_template){
493 rankedTemplate_map.insert(std::pair<Real,string>(start_template->second,start_template->first));
496 if(low_e_val <= THRESHOLD_FOR_E_VAL){
497 start_rankedTemplate = rankedTemplate_map.begin();
498 stop_rankedTemplate = rankedTemplate_map.end();
499 while(start_rankedTemplate != stop_rankedTemplate){
500 start_alns = alns.begin();
501 stop_alns = alns.end();
502 while (start_alns != stop_alns){
503 string aln_id = start_alns->second.sequence(2)->id();
504 string template_id = aln_id.substr(0,5);
505 if(template_id == start_rankedTemplate->second){
506 rankedAlignments.push_back(start_alns->second);
510 start_rankedTemplate++;
516 if((option[ cm::hh_map ].user()&& !option[ cm::ev_map ].user()) || (option[ cm::hh_map ].user() && (low_e_val >= THRESHOLD_FOR_E_VAL))){
517 rankedTemplate_map.clear();
518 rankedAlignments.clear();
521 multimap<Real,string>::reverse_iterator start_rankedTemplate,stop_rankedTemplate;
522 tr <<
"hh_map being read in" << std::endl;
524 utility::io::izstream data(filename);
526 utility_exit_with_message(
" Warning: can't open file" + filename +
"!");
530 while (getline(data,line)){
531 std::istringstream line_stream(line);
532 line_stream >> pdbid >> hh_val;
533 rankedTemplate_map.insert(std::pair<Real,string>(hh_val,pdbid));
535 start_rankedTemplate =rankedTemplate_map.rbegin();
536 stop_rankedTemplate =rankedTemplate_map.rend();
537 while(start_rankedTemplate != stop_rankedTemplate){
538 start_alns = alns.begin();
539 stop_alns = alns.end();
540 while (start_alns != stop_alns){
541 string const aln_id( start_alns->second.sequence(2)->id() );
542 string const template_id( aln_id.substr(0,5) );
543 if(template_id == start_rankedTemplate->second){
544 rankedAlignments.push_back(start_alns->second);
548 start_rankedTemplate++;
553 if((rankedAlignments.size() < alns.size())&&(rankedAlignments.size()<minNumbRankedModels)){
554 rankedAlignments.clear();
555 start_alns = alns.begin();
556 stop_alns = alns.end();
557 while (start_alns != stop_alns){
558 rankedAlignments.push_back(start_alns->second);
562 return rankedAlignments;