91 #include <basic/datacache/BasicDataCache.hh>
115 #include <utility/excn/Exceptions.hh>
116 #include <utility/io/izstream.hh>
117 #include <utility/tag/Tag.hh>
118 #include <utility/string_util.hh>
119 #include <basic/Tracer.hh>
120 #include <numeric/random/WeightedSampler.hh>
121 #include <ObjexxFCL/format.hh>
122 #include <boost/foreach.hpp>
129 #include <basic/options/option.hh>
130 #include <basic/options/keys/symmetry.OptionKeys.gen.hh>
131 #include <basic/options/keys/edensity.OptionKeys.gen.hh>
132 #include <basic/options/keys/cm.OptionKeys.gen.hh>
133 #include <basic/options/keys/in.OptionKeys.gen.hh>
134 #include <basic/options/keys/score.OptionKeys.gen.hh>
135 #include <basic/options/keys/relax.OptionKeys.gen.hh>
136 #include <basic/options/keys/jumps.OptionKeys.gen.hh>
137 #include <basic/options/keys/evaluation.OptionKeys.gen.hh>
141 #define foreach BOOST_FOREACH
143 static basic::Tracer
TR(
"protocols.hybridization.HybridizeProtocol" );
144 static numeric::random::RandomGenerator
RG(541938);
146 namespace protocols {
147 namespace hybridization {
150 using namespace core;
151 using namespace kinematics;
152 using namespace sequence;
153 using namespace pack;
154 using namespace task;
155 using namespace operation;
156 using namespace scoring;
157 using namespace constraints;
182 template_weights_sum_(0)
184 using namespace basic::options;
185 using namespace basic::options::OptionKeys;
190 if (option[cm::hybridize::template_list].user()) {
206 using namespace basic::options;
207 using namespace basic::options::OptionKeys;
229 if (option[cm::hybridize::starting_template].user()) {
239 option[cm::hybridize::stage1_weights](), option[cm::hybridize::stage1_patch]() );
241 option[cm::hybridize::stage2_weights](), option[cm::hybridize::stage2_patch]() );
246 if ( option[ OptionKeys::constraints::cst_fa_file ].user() ) {
250 TR.Info <<
"Fullatom Constraint choice: " <<
fa_cst_fn_ << std::endl;
254 relax_repeats_ = option[ basic::options::OptionKeys::relax::default_repeats ]();
257 if (option[ in::fix_disulf ].user()) {
262 if ( option[ in::file::frag9 ].user() ) {
263 using namespace core::fragment;
267 if ( option[ in::file::frag3 ].user() ) {
268 using namespace core::fragment;
274 if ( option[ in::file::native ].user() ) {
277 }
else if ( option[ evaluation::align_rmsd_target ].user() ) {
285 if ( option[jumps::sheets].user() ) {
286 sheets_ = option[jumps::sheets]();
305 dynamic_cast<core::conformation::symmetry::SymmetricConformation &> ( pose.
conformation()) );
307 nres_tgt = symm_info->num_independent_residues();
316 using namespace basic::options;
317 using namespace basic::options::OptionKeys;
318 if (option[ OptionKeys::in::file::psipred_ss2 ].user()) {
321 tgt_ss[j-1] = psipred[j];
328 if (!
templates_[i]->residue(j).is_protein())
continue;
331 runtime_assert( tgt_pos<=nres_tgt );
334 if (tgt_ss[tgt_pos-1] ==
'0') {
335 tgt_ss[tgt_pos-1] = tgt_ss_j;
336 }
else if (tgt_ss[tgt_pos-1] != tgt_ss_j) {
337 tgt_ss[tgt_pos-1] =
'D';
342 if (tgt_ss[j-1] ==
'0') tgt_ss[j-1] =
'D';
372 using namespace basic::options;
373 using namespace basic::options::OptionKeys;
376 for (
Size i=1; i<=chosen_templ->total_residue(); ++i)
377 for (
Size j=1; j<=chosen_templ->residue(i).natoms(); ++j) {
379 pose.
set_xyz( tgt, chosen_templ->xyz( src ) );
383 TR <<
"CONTIGS" << std::endl << template_contigs_icluster << std::endl;
391 dynamic_cast<core::conformation::symmetry::SymmetricConformation &> ( pose.
conformation()) );
393 nres_tgt = symm_info->num_independent_residues();
401 for (
Size i=1; i<=chosen_templ->total_residue(); ++i) {
402 core::Size cres = chosen_templ->pdb_info()->number(i);
403 templ_coverage[cres] =
true;
407 for (
Size i=1; i<=nres_tgt-2; ++i) {
408 if (!templ_coverage[i] && templ_coverage[i+1] && !templ_coverage[i+2]) {
409 templ_coverage[i+1]=
false;
410 }
else if(i<=nres_tgt-3 && !templ_coverage[i] && templ_coverage[i+1] && templ_coverage[i+2] && !templ_coverage[i+3]) {
411 templ_coverage[i+1]=
false;
412 templ_coverage[i+2]=
false;
413 }
else if(i<=nres_tgt-4 &&
414 !templ_coverage[i] && templ_coverage[i+1] && templ_coverage[i+2] && templ_coverage[i+3] && !templ_coverage[i+4]) {
415 templ_coverage[i+1]=
false;
416 templ_coverage[i+2]=
false;
417 templ_coverage[i+3]=
false;
421 bool inloop=!templ_coverage[1];
423 for (
Size i=2; i<=nres_tgt; ++i) {
424 if (templ_coverage[i] && inloop) {
428 if (loopstop < loopstart + 2) {
429 if (loopstart>1) loopstart--;
430 if (loopstop<nres_tgt) loopstop++;
432 loops->add_loop( loopstart,loopstop );
433 }
else if (!templ_coverage[i] && !inloop) {
442 while (loopstop < loopstart + 2) { loopstart--; }
443 loops->add_loop( loopstart,loopstop );
445 TR <<
"LOOPS" << std::endl << *loops << std::endl;
448 if (loops->size() != 0) {
450 loops->auto_choose_cutpoints(pose);
459 for(
Size i=it->start(); i<=it->stop(); ++i ) {
460 mm_loop->set_bb(i,
true);
461 mm_loop->set_chi(i,
true);
469 TR.Info <<
"FRAGMENTS small max length: " << frags_small->max_frag_length() << std::endl;
470 TR.Info <<
"FRAGMENTS big max length: " << frags_big->max_frag_length() << std::endl;
472 frag3mover->set_check_ss(
false ); frag3mover->enable_end_bias_check(
false );
474 frag9mover->set_check_ss(
false ); frag9mover->enable_end_bias_check(
false );
488 for (
Size n=1; n<=neffcycles; ++n) {
489 frag9mover->apply( pose ); (*scorefxn)(pose); mc1->boltzmann( pose ,
"frag9" );
490 frag3mover->apply( pose ); (*scorefxn)(pose); mc1->boltzmann( pose ,
"frag3" );
494 mc1->show_counters();
497 mc1->recover_low( pose );
502 for (
Size n=1; n<=neffcycles; ++n) {
503 frag9mover->apply( pose ); (*scorefxn)(pose); mc2->boltzmann( pose ,
"frag9" );
504 frag3mover->apply( pose ); (*scorefxn)(pose); mc2->boltzmann( pose ,
"frag3" );
508 mc2->show_counters();
513 mc2->recover_low( pose );
546 TR.Debug <<
"Chunks from template\n" << chunks << std::endl;
547 TR.Debug <<
"Contigs from template\n" << contigs << std::endl;
565 utility::io::izstream f_stream( template_list );
567 while (!f_stream.eof()) {
568 getline(f_stream, line);
570 if (line.size() == 0)
continue;
572 std::istringstream str_stream(line);
578 if (!str_stream.eof()) {
579 str_stream >> template_fn;
580 if (template_fn.empty())
continue;
581 if (template_fn[0] ==
'#')
continue;
583 if (!str_stream.eof()) str_stream >> cst_fn;
584 if (!str_stream.eof()) str_stream >> cluster_id;
585 if (!str_stream.eof()) str_stream >> weight;
587 TR << template_fn <<
" " << cst_fn <<
" " << cluster_id <<
" " << weight << std::endl;
591 if ( str_stream >> cst_reses_str ) {
593 for (
Size i=1; i<= cst_reses_parsed.size(); ++i ) {
594 cst_reses.push_back( (
core::Size) std::atoi( cst_reses_parsed[i].c_str() ) );
597 add_template(template_fn, cst_fn,
"", weight,domain_assembly_weight, cluster_id, cst_reses);
613 for (
core::Size i_ref=1; i_ref<= template_filenames.size(); ++i_ref) {
629 using namespace basic::options;
630 using namespace basic::options::OptionKeys;
632 template_index_icluster.clear();
633 templates_icluster.clear();
634 weights_icluster.clear();
635 template_chunks_icluster.clear();
636 template_contigs_icluster.clear();
639 numeric::random::WeightedSampler weighted_sampler;
643 Size k = weighted_sampler.random_sample(
RG);
647 numeric::random::WeightedSampler weighted_sampler;
649 initial_template_index = weighted_sampler.random_sample(
RG);
656 template_index_icluster.push_back(i_template);
657 templates_icluster.push_back(
templates_[i_template]);
662 if (i_template == initial_template_index) {
663 initial_template_index_icluster = template_index_icluster.size();
671 using namespace protocols::moves;
672 using namespace basic::options;
673 using namespace basic::options::OptionKeys;
674 using namespace core::pose::datacache;
675 using namespace core::io::silent;
676 using namespace ObjexxFCL::fmt;
683 TR.Info <<
"FRAGMENTS small max length: " << frags_small->max_frag_length() << std::endl;
684 TR.Info <<
"FRAGMENTS big max length: " << frags_big->max_frag_length() << std::endl;
687 std::vector < SilentStructOP > post_centroid_structs;
688 bool need_more_samples =
true;
696 dynamic_cast<core::conformation::symmetry::SymmetricConformation &> ( pose.
conformation()) );
698 nres_tgt = symm_info->num_independent_residues();
705 while(need_more_samples) {
706 need_more_samples =
false;
717 initial_template_index, initial_template_index_icluster,
718 template_index_icluster, templates_icluster, weights_icluster,
719 template_chunks_icluster, template_contigs_icluster );
720 TR <<
"Using initial template: " << I(4,initial_template_index) <<
" " <<
template_fn_[initial_template_index] << std::endl;
724 for (
Size ires=1; ires <=
templates_[initial_template_index]->total_residue(); ++ires ) {
725 if (
templates_[initial_template_index]->pdb_info()->number(ires) > (
int)nres_tgt) {
726 if (
templates_[initial_template_index]->residue(ires).is_polymer() && !
templates_[initial_template_index]->residue(ires).is_lower_terminus() ) {
737 domain_assembly.
run();
751 domains_all_templ.resize(
templates_.size() );
752 for (
Size i_template=1; i_template<=
templates_.size(); ++i_template) {
754 domains_all_templ[i_template] = ddom.
split( *
templates_[i_template], nres_protein_tgt );
758 for (
Size iloops=1; iloops<=domains_all_templ[i_template].size(); ++iloops) {
759 for (
Size iloop=1; iloop<=domains_all_templ[i_template][iloops].num_loop(); ++iloop) {
760 Size seqpos_start_pose =
templates_[i_template]->pdb_info()->number(domains_all_templ[i_template][iloops][iloop].
start());
761 domains_all_templ[i_template][iloops][iloop].set_start( seqpos_start_pose );
763 Size seqpos_stop_pose =
templates_[i_template]->pdb_info()->number(domains_all_templ[i_template][iloops][iloop].
stop());
764 domains_all_templ[i_template][iloops][iloop].set_stop( seqpos_stop_pose );
768 TR <<
"Found " << domains_all_templ[i_template].size() <<
" domains for template " <<
template_fn_[i_template] << std::endl;
769 for (
Size i=1; i<=domains_all_templ[i_template].size(); ++i) {
770 TR <<
"domain " << i <<
": " << domains_all_templ[i_template][i] << std::endl;
776 TR <<
"Final decision: " <<
domains_.size() <<
" domains" << std::endl;
778 TR <<
"domain " << i <<
": " <<
domains_[i] << std::endl;
785 for (
Size i_template=1; i_template<=
templates_.size(); ++i_template) {
796 if (!symmdef_file.empty() && symmdef_file !=
"NULL") {
798 makeSymm.
apply(pose);
800 basic::options::option[basic::options::OptionKeys::symmetry::symmetry_definition].value(
"dummy" );
805 if ( option[ OptionKeys::edensity::mapfile ].user() ) {
814 history->setall( initial_template_index_icluster );
815 pose.
data().set( CacheableDataType::TEMPLATE_HYBRIDIZATION_HISTORY, history );
836 initial_template_index_icluster, templates_icluster, weights_icluster,
837 template_chunks_icluster, template_contigs_icluster, frags_small, frags_big) ) ;
838 ft_hybridize->set_constraint_file( cst_fn );
857 ft_hybridize->set_sheets(
sheets_ );
861 ft_hybridize->apply(pose);
868 core::pose::PoseOP chosen_templ = templates_icluster[initial_template_index_icluster];
877 TR <<
"GDTMM_after_stage1" << F(8,3,gdtmm) << std::endl;
882 TR <<
"Realigning template domains to stage1 pose." << std::endl;
898 if (!option[cm::hybridize::skip_stage2]()) {
901 templates_icluster, weights_icluster,
902 template_chunks_icluster,template_contigs_icluster, frags_big ) );
908 bool linbonded_old = option[ score::linear_bonded_potential ]();
909 option[ score::linear_bonded_potential ].value(
true );
910 cart_hybridize->apply(pose);
911 option[ score::linear_bonded_potential ].value( linbonded_old );
918 TR <<
"GDTMM_after_stage2" << ObjexxFCL::fmt::F(8,3,gdtmm) << std::endl;
921 runtime_assert( pose.
data().has( CacheableDataType::TEMPLATE_HYBRIDIZATION_HISTORY ) );
922 history = *(
static_cast< TemplateHistory*
>( pose.
data().get_ptr( CacheableDataType::TEMPLATE_HYBRIDIZATION_HISTORY )() ));
925 for (
Size i=1; i<= history->size(); ++i ) {
TR << I(4,i); }
928 for (
Size i=1; i<= history->size(); ++i ) {
TR << I(4, history->get(i)); }
932 if (!option[cm::hybridize::skip_stage2]()) {
937 if (fa_cart_bonded_wt == 0) fa_cart_bonded_wt = 0.5;
944 (*stage2_scorefxn_copy)(pose); minimizer.
run( pose, mm, *stage2_scorefxn_copy, options_lbfgs );
958 basic::options::option[ basic::options::OptionKeys::in::fix_disulf ].value(
disulf_file_);
961 basic::options::option[ basic::options::OptionKeys::in::fix_disulf ].deactivate();
962 basic::options::option[ basic::options::OptionKeys::in::fix_disulf ].to_default();
980 Real const coord_sdev( 1 );
984 history->get(i) > (
int)template_index_icluster.size()
987 if ( std::find( source_list.begin(), source_list.end(), i ) != source_list.end() ) {
988 TR <<
"Constrain residue " << i << std::endl;
1000 relax_prot.
apply(pose);
1005 new_struct->fill_struct( pose );
1006 new_struct->energies_from_pose( pose );
1007 post_centroid_structs.push_back( new_struct );
1027 post_centroid_structs[0]->fill_pose( pose );
1031 need_more_samples =
true;
1036 (*stage2_scorefxn_)(pose);
1042 TR <<
"GDTMM_final" << ObjexxFCL::fmt::F(8,3,gdtmm) << std::endl;
1050 if (all_domains[ref_domains_index].
size() == 0)
return domains;
1053 residue_mask.resize(n_residues);
1056 for (
Size i_pose=1; i_pose <= all_domains.size(); ++i_pose) {
1057 for (
Size idomain=1; idomain <= all_domains[i_pose].size(); ++idomain) {
1058 for (
core::Size iloop=1; iloop<=all_domains[i_pose][idomain].num_loop(); ++iloop) {
1059 for (
core::Size ires=all_domains[i_pose][idomain][iloop].
start()+1; ires<=all_domains[i_pose][idomain][iloop].stop(); ++ires) {
1060 residue_mask[ires] =
true;
1067 for (
Size idomain=1; idomain <= all_domains[ref_domains_index].size(); ++idomain) {
1068 for (
core::Size iloop=1; iloop<=all_domains[ref_domains_index][idomain].num_loop(); ++iloop) {
1069 if (idomain == 1 && iloop == 1) {
1070 domains[idomain][iloop].set_start(1);
1072 if (idomain == all_domains[ref_domains_index].
size() && iloop == all_domains[ref_domains_index][idomain].num_loop()) {
1073 domains[idomain][iloop].set_stop(n_residues);
1077 Size jdomain = idomain;
1078 Size jloop = iloop+1;
1079 if (jloop > all_domains[ref_domains_index][idomain].num_loop()) {
1082 if (jdomain > all_domains[ref_domains_index].
size())
continue;
1084 Size gap_start = all_domains[ref_domains_index][idomain][iloop].stop()+1;
1085 Size gap_stop = all_domains[ref_domains_index][jdomain][jloop].start();
1087 for (
Size ires=gap_start; ires<=gap_stop; ++ires) {
1088 if (residue_mask[ires])
continue;
1089 cut_options.push_back(ires);
1091 if (cut_options.size() == 0) {
1092 for (
Size ires=gap_start; ires<=gap_stop; ++ires) {
1093 cut_options.push_back(ires);
1096 Size cut = cut_options[
RG.random_range(1,cut_options.size())];
1098 domains[idomain][iloop].set_stop(cut-1);
1099 domains[jdomain][jloop].set_start(cut);
1109 for (
Size i_pose=1; i_pose <= poses.size(); ++i_pose) {
1110 if (poses[i_pose] == ref_pose)
continue;
1122 for (
Size i_domain = 1; i_domain <= domains.size() ; ++i_domain) {
1125 std::list <Size> residue_list;
1130 for (
core::Size iloop=1; iloop<=domains[i_domain].num_loop(); ++iloop) {
1131 if ( pose_res < (
int)domains[i_domain][iloop].start() || pose_res > (
int)domains[i_domain][iloop].
stop() )
continue;
1132 residue_list.push_back(ires);
1135 std::list <Size> ref_residue_list;
1138 int ref_pose_res = (ref_pose.
pdb_info()) ? ref_pose.
pdb_info()->number(jres) : jres;
1139 for (
core::Size iloop=1; iloop<=domains[i_domain].num_loop(); ++iloop) {
1140 if ( ref_pose_res < (
int)domains[i_domain][iloop].start() || ref_pose_res > (
int)domains[i_domain][iloop].
stop() )
continue;
1141 ref_residue_list.push_back(jres);
1146 string seq_pose, seq_ref, aligned;
1147 int reval = tm_align.
apply(pose, ref_pose, residue_list, ref_residue_list);
1149 tm_align.
alignment2AtomMap(pose, ref_pose, residue_list, ref_residue_list, n_mapped_residues, atom_map);
1152 using namespace ObjexxFCL::fmt;
1153 Size norm_length = residue_list.size() < ref_residue_list.size() ? residue_list.size():ref_residue_list.size();
1154 TR <<
"Align domain with TMscore of " << F(8,3,tm_align.
TMscore(norm_length)) << std::endl;
1155 TR << seq_pose << std::endl;
1156 TR << aligned << std::endl;
1157 TR << seq_ref << std::endl;
1159 if (n_mapped_residues >= 6) {
1161 aln_cutoffs.push_back(6);
1162 aln_cutoffs.push_back(4);
1163 aln_cutoffs.push_back(3);
1164 aln_cutoffs.push_back(2);
1165 aln_cutoffs.push_back(1.5);
1166 aln_cutoffs.push_back(1);
1168 partial_align(pose, ref_pose, atom_map, residue_list,
true, aln_cutoffs, min_coverage);
1183 return "HybridizeProtocol";
1191 if( tag->hasOption(
"config_file" ) )
1200 if( tag->hasOption(
"starting_template" ) ) {
1203 Size const value = std::atoi( field.c_str() );
1208 if( tag->hasOption(
"stage1_1_cycles" ) )
1210 if( tag->hasOption(
"stage1_2_cycles" ) )
1212 if( tag->hasOption(
"stage1_3_cycles" ) )
1214 if( tag->hasOption(
"stage1_4_cycles" ) )
1216 if( tag->hasOption(
"stage1_probability" ) )
1218 if( tag->hasOption(
"add_hetatm" ) )
1219 add_hetatm_ = tag->getOption<
bool >(
"add_hetatm" );
1220 if( tag->hasOption(
"hetatm_cst_weight" ) )
1222 if( tag->hasOption(
"domain_assembly" ) )
1224 if( tag->hasOption(
"realign_domains" ) )
1226 if( tag->hasOption(
"realign_domains_stage2" ) )
1228 if( tag->hasOption(
"add_non_init_chunks" ) )
1230 if( tag->hasOption(
"frag_1mer_insertion_weight" ) )
1232 if( tag->hasOption(
"small_frag_insertion_weight" ) )
1234 if( tag->hasOption(
"big_frag_insertion_weight" ) )
1236 if( tag->hasOption(
"frag_weight_aligned" ) )
1238 if( tag->hasOption(
"auto_frag_insertion_weight" ) )
1240 if( tag->hasOption(
"max_registry_shift" ) )
1242 if( tag->hasOption(
"no_global_frame" ) )
1244 if( tag->hasOption(
"linmin_only" ) )
1245 linmin_only_ = tag->getOption<
bool >(
"linmin_only" );
1246 if( tag->hasOption(
"repeats" ) )
1248 if( tag->hasOption(
"cartfrag_overlap" ) )
1250 if( tag->hasOption(
"disulf_file" ) )
1254 if( tag->hasOption(
"stage1_scorefxn" ) ) {
1258 if( tag->hasOption(
"stage2_scorefxn" ) ) {
1262 if( tag->hasOption(
"fa_scorefxn" ) ) {
1275 for (tag_it = branch_tags.begin(); tag_it != branch_tags.end(); ++tag_it) {
1276 if ( (*tag_it)->getName() ==
"Fragments" ) {
1277 using namespace core::fragment;
1278 if ( (*tag_it)->hasOption(
"3mers" ) ) {
1281 }
else if ( (*tag_it)->hasOption(
"small" ) ) {
1283 for (
core::Size i=1; i<= frag_files.size(); ++i )
1286 if ( (*tag_it)->hasOption(
"9mers" ) ) {
1289 }
else if ( (*tag_it)->hasOption(
"big" ) ) {
1291 for (
core::Size i=1; i<= frag_files.size(); ++i )
1296 if ( (*tag_it)->getName() ==
"Template" ) {
1300 core::Real domain_assembly_weight = (*tag_it)->getOption<
core::Real>(
"domain_assembly_weight", 0. );
1303 if ((*tag_it)->hasOption(
"constrain_res" ))
1308 add_template(template_fn, cst_fn, symm_file, weight, domain_assembly_weight, cluster_id, cst_reses);
1312 if ( (*tag_it)->getName() ==
"Pairings" ) {
1314 if ( (*tag_it)->hasOption(
"sheets") ) {
1319 }
else if ( (*tag_it)->hasOption(
"random_sheets") ) {