22 #include <numeric/xyzVector.hh>
44 #include <utility/vector1.hh>
45 #include <utility/tag/Tag.fwd.hh>
46 #include <utility/tag/Tag.hh>
47 #include <numeric/random/random.hh>
51 #include <basic/Tracer.hh>
52 #include <boost/foreach.hpp>
55 #define foreach BOOST_FOREACH
57 using namespace core::scoring;
58 using namespace protocols::seeded_abinitio;
60 static basic::Tracer
TR(
"protocols.seeded_abinitio.SeedFoldTree" );
61 static numeric::random::RandomGenerator
RG(124523);
64 namespace seeded_abinitio{
66 using namespace protocols::moves;
70 SeedFoldTreeCreator::keyname()
const
72 return SeedFoldTreeCreator::mover_name();
76 SeedFoldTreeCreator::create_mover()
const {
81 SeedFoldTreeCreator::mover_name()
83 return "SeedFoldTree";
86 SeedFoldTree::~SeedFoldTree() {
89 SeedFoldTree::SeedFoldTree() :
158 for(
Size i = 1; i <= anchor.size(); ++i)
178 TR <<
"defining cut points stochasticly between the given two residues: "<<start_resi<<
" and "<<stop_resi <<std::endl;
182 TR<<
"start and stop: " << start_resi <<
" " << stop_resi <<
"\nsecondary structure string between seeds: \n";
184 for (
Size resi = start_resi + 1 ; resi < stop_resi - 1; ++resi) {
185 ss = secondarystruct_seq[ resi - 1 ];
188 loopy_regions.push_back( resi );
192 if( loopy_regions.size() < 1 )
193 utility_exit_with_message(
"there are no loopy residues between the motifs, this is currently not supported.");
196 int high = loopy_regions.size();
198 core::Size cutpoint = loopy_regions[ ran ] + start_fold_pose -1 ;
200 TR.Debug <<
"random number: "<< ran <<
", number from loop container: " << loopy_regions[ran]<<
", adjusting by " << start_fold_pose - 1 <<
", cutpoint: " << cutpoint << std::endl;
201 TR<<
"picked a cutpoint between "<<start_resi <<
" and " << stop_resi <<
" ( "<<cutpoint - start_fold_pose <<
" ). Renumbering will be adjusted by "<< start_fold_pose -1 << std::endl;
208 std::pair< Size, Size >
215 using namespace core::conformation;
216 using namespace core::chemical;
222 std::pair<Size,Size> closest_pair;
223 if( target_seed_pose->conformation().num_chains() != 2 )
224 utility_exit_with_message(
"only two chains as input supported" );
225 Size target_length = target_seed_pose->split_by_chain( 1 ).total_residue();
227 TR<<
"iterating through each seed residue to find the closest target residue" <<std::endl;
229 for (
Size seed_resi = seed_start; seed_resi <= seed_stop ; ++seed_resi) {
230 for (
Size target_resi = 1; target_resi <= target_length ; ++target_resi) {
233 core::Real const distance( target_seed_pose->residue(target_resi).xyz(
"CA" ).distance(target_seed_pose->residue(seed_resi).xyz(
"CA" ) ) );
235 if( distance < nearest_dist ){
236 nearest_resi_target = target_resi;
240 if( nearest_dist < nearest_dist2 ){
241 nearest_resi = seed_resi ;
242 nearest_dist2 = nearest_dist;
246 runtime_assert( nearest_resi );
247 runtime_assert( nearest_resi_target );
248 TR<<
"closest pair between seed and target: "<<nearest_resi <<
" and: " <<nearest_resi_target <<
", distance: "<<nearest_dist<<std::endl;
249 closest_pair.second = nearest_resi;
250 closest_pair.first = nearest_resi_target;
252 return ( closest_pair );
255 std::pair< Size, Size >
257 using namespace core::conformation;
258 using namespace core::chemical;
260 Size nearest_resi( 0 );
261 Real nearest_dist( 100000.0 );
262 std::pair<Size,Size> closest_pair;
263 if( target_seed_pose->conformation().num_chains() != 2 )
264 utility_exit_with_message(
"only two chains as input are currently supported" );
265 Size target_length = target_seed_pose->split_by_chain( 1 ).total_residue();
268 Residue res_anchor( target_seed_pose->residue( anchor ));
269 if( res_anchor.name3() ==
"GLY" )
272 for (
Size target_resi = 1; target_resi <= target_length ; ++target_resi) {
275 Residue res( target_seed_pose->residue( target_resi ));
276 if( res.name3() ==
"GLY" )
279 core::Real const distance( res.xyz( connect_atom ).distance( res_anchor.xyz( anchor_atom ) ) );
281 if( distance < nearest_dist ){
282 nearest_resi = target_resi;
286 runtime_assert( nearest_resi );
287 TR<<
"closest residue to anchor residue "<< anchor <<
" is "<<nearest_resi <<
", distance: "<<nearest_dist<<std::endl;
289 closest_pair.second = anchor;
290 closest_pair.first = nearest_resi;
292 return ( closest_pair );
299 TR <<
"tspose: " << ts_pose->total_residue() <<std::endl;
300 TR <<
"----------alanine scanning to identify the best jump atom in seed ------------"<<std::endl;
312 orig_dG = ddg_filter.
compute( pose );
313 TR <<
"\noriginal seed complex dG "<<orig_dG<<std::endl;
315 TR.Debug <<
"start: " << start <<
"stop : " << end << std::endl;
320 TR<<
"dG for resi "<<pose.
residue( resi ).
name3()<<resi<<
" is "<<dG<<std::endl;
321 if( dG < lowest_dG ){
326 TR<<
"seed residue with lowest dG based on ala scanning is : "<< lowest_res <<
" with dG of: " << lowest_dG << std::endl;
335 bool protein_not_folded_yet ){
337 using namespace core;
338 using namespace kinematics;
339 using namespace protocols::seeded_abinitio;
345 if( target_seed_pose->conformation().num_chains() == 2 ){
347 TR<<
"two chains were were submitted for the seed pdb, reading target info"<< std::endl;
349 target_chain_ =
new pose::Pose( target_seed_pose->split_by_chain( 1 ) );
350 TR<<
"input pdb: "<< secstr.length() <<
" target chain: " <<target_chain_->total_residue() << std::endl;
351 seeds_only_ =
new pose::Pose( target_seed_pose->split_by_chain( 2 ) );
356 Size total_size_complex = secstr.length() + target_length;
357 Size start_new_protein = target_length + 1 ;
362 Size position_adjustment = 0;
363 Size seed_res_counter = 1;
364 std::set< core::Size > res_on_target;
365 std::set< core::Size > res_on_design;
369 folding_verteces_.insert( total_size_complex );
370 folding_verteces_.insert( start_new_protein );
373 utility_exit_with_message(
"NO SEEDS SPECIFIED!!!" );
379 TR<<
"finding cutpoints..."<<std::endl;
381 for(
Size seed_it = 2 ; seed_it <= seed_num ; ++seed_it ){
382 TR.Debug <<
"loops[seed_it - 1].stop()"<< loops[seed_it - 1].stop()<<
" and loops[seed_it - 1].stop()" <<loops[seed_it - 1].stop() << std::endl;
383 Size end_seed = loops[seed_it - 1].stop();
384 Size start_new_seed = loops[seed_it].start();
385 TR<<
"... between: "<< end_seed <<
" and " << start_new_seed << std::endl;
387 cut_points_.push_back( cut );
388 folding_verteces_.insert( cut );
389 folding_verteces_.insert( cut + 1 );
390 TR<<
"vector: "<<cut_points_[seed_it - 1] <<
"method cut: " << cut << std::endl;
395 for(
Size seed_it = 1 ; seed_it <= seed_num ; ++seed_it ){
397 std::pair<Size,Size> jump_pair;
399 if( protein_not_folded_yet){
401 TR <<
"assuming that the pose does not have full length yet" << std::endl;
402 TR.Debug<<
"seed_res_counter "<< seed_res_counter << std::endl;
403 TR.Debug<<
"seed start " << loops[seed_it].start() + target_length << std::endl;
404 TR.Debug<<
"seed stop " << loops[seed_it].stop() + target_length << std::endl;
407 seed_start = loops[seed_it].start() + target_length;
408 seed_stop = loops[seed_it].stop() + target_length;
411 pdb_start = seed_res_counter + target_length;
412 pdb_stop = pdb_start + loops[seed_it].stop() - loops[seed_it].start();
414 TR.Debug<<
"pdb_start for seed " << pdb_start << std::endl;
415 TR.Debug<<
"pdb_stop for seed: " << pdb_stop << std::endl;
417 TR<<
"seed_residue_counter: "<<seed_res_counter<<std::endl;
419 folding_verteces_.insert(seed_start);
420 folding_verteces_.insert(seed_stop);
422 TR<<
"numbering for seed(only)-target pose with target\n ----- SEED: "<< seed_it <<
" start: "<<seed_start<<
", stop: "<<seed_stop<<
" ---------" <<std::endl;
423 TR<<
"position adjustment of TRUNCATED seed motif by "<<position_adjustment<<std::endl;
425 if( anchor_specified() ){
426 if( anchors_.size() < 1 )
427 utility_exit_with_message(
"no anchor specified?!");
428 Size adjust_anchor = anchors_[seed_it] - loops[seed_it].start() + seed_res_counter;
429 TR.Debug <<
"anchor defined: "<< anchors_[seed_it]<<
", adjusted to " << adjust_anchor << std::endl;
431 jump_pair.second += loops[seed_it].start() - seed_res_counter;
432 TR<<
"jump pairs: " << jump_pair.first <<
" " << jump_pair.second << std::endl;
435 if( !anchor_specified() ){
437 TR<<
"computing dG for seed " << seed_it <<
" to identify jump atom "<< std::endl;
438 Size seed_jump_residue = best_by_ala_scan( pdb_start, pdb_stop, target_seed_pose );
446 TR.Debug<<
"loops[seed_it].start(): "<<loops[seed_it].start() <<
" seed_res_counter " << seed_res_counter << std::endl;
448 position_adjustment = loops[seed_it].start() - seed_res_counter;
449 jump_pair.second += position_adjustment;
452 seed_res_counter += loops[seed_it].stop() - loops[seed_it].start() + 1;
453 TR<<
"updating seed res counter: "<<seed_res_counter<<std::endl;
459 TR <<
"assuming pose has its full length" <<std::endl;
460 seed_start = loops[seed_it].start()+ target_length;
461 seed_stop = loops[seed_it].stop() + target_length;
462 TR<<
"--------- SEED: " << seed_start <<
" " << seed_stop << std::endl;
465 TR<<
"total size complex: " << total_size_complex << std::endl;
469 if( anchor_specified_ ){
470 if( anchors_[ seed_it ] == 0 )
477 TR<<
"computing dG for seed " << seed_it <<
" to identify jump atom "<< std::endl;
478 Size seed_jump_residue = best_by_ala_scan( seed_start, seed_stop, target_seed_pose );
488 TR<<
"finding closest opposing residues pair for seed starting IDs: " << seed_start <<
" " <<seed_stop<<
" as jump: "<<rb_jump<<std::endl;
489 TR.Debug<<
"after adjustment "<< jump_pair.second << std::endl;
491 res_on_target.insert( jump_pair.first );
492 res_on_design.insert( jump_pair.second );
493 fold_tree_->add_edge( jump_pair.first, jump_pair.second, rb_jump );
495 TR<<
"SEED: "<<seed_it <<
", seed and target jump pairs: " <<jump_pair.first<<
" and " <<jump_pair.second <<
"\n";
496 TR<<
"registered jump pairs after full-length adjustments: " << position_adjustment <<std::endl;
500 TR<<
"new SeedFoldTree jumps: " <<*fold_tree_ << std::endl;
505 Size target_head = 1;
506 foreach(
core::Size const res, res_on_target ){
508 fold_tree_->add_edge( target_head, res, Edge::PEPTIDE );
513 core::Size const target_lastjump( *res_on_target.rbegin() );
514 fold_tree_->add_edge( target_lastjump, target_length , Edge::PEPTIDE );
521 foreach (
core::Size const jpos, res_on_design){
522 TR<<
"foldpose iterator: "<< jpos <<
"and +1 "<< jpos+1 << std::endl;
524 fold_tree_->add_edge( last_cut+1 , jpos , Edge::PEPTIDE );
525 if( jpos != *res_on_design.rbegin() ) {
526 fold_tree_->add_edge( jpos, cut_points_[cut_iter], Edge::PEPTIDE );
527 last_cut = cut_points_[cut_iter];
534 core::Size const first( *res_on_design.begin() );
535 core::Size const last( *res_on_design.rbegin() );
536 TR<<
"first: " << *res_on_design.begin() <<
" and " << last << std::endl;
539 if( first - 1 >= start_new_protein ) {
540 fold_tree_->add_edge( first, start_new_protein , Edge::PEPTIDE );
543 if( last < total_size_complex ){
544 TR <<
"--- total_size_complex: " << total_size_complex << std::endl;
545 fold_tree_->add_edge( last, total_size_complex, Edge::PEPTIDE );
548 TR <<
"before deleting self edges: " << *fold_tree_ << std::endl;
549 fold_tree_->delete_self_edges();
550 TR<<
"before reordering: " << *fold_tree_ <<std::endl;
551 fold_tree_->reorder( 1 );
553 TR<<
"Fold tree:\n"<<*fold_tree_<<std::endl;
562 else if( target_seed_pose->conformation().num_chains() == 1 ){
564 TR<<
"there is no target chain, either because you turned off the option, or it was not loaded" <<std::endl;
566 if( loops.
size() > 1 ){
567 using namespace core;
568 using namespace kinematics;
570 Size start_protein = 1;
572 TR<<
"more than one seed is defined " << std::endl;
573 for (
Size seed_it = 2 ; seed_it <= seed_num; ++seed_it ){
574 Size starting = loops[seed_it - 1].stop();
575 Size ending = loops[seed_it].start();
577 TR<<
"adding cut: " << cut <<std::endl;
578 cut_points_.push_back( cut );
579 fold_tree_->add_edge( 1, target_seed_pose->total_residue(), Edge::PEPTIDE );
582 for (
Size i=1; i < loops.
size() ; ++i){
583 TR<<
"seed "<< i <<std::endl;
584 TR<<
"cut point"<< cut_points_[i]<<std::endl;
585 Size cutpoint = cut_points_[i];
588 Size loop1_midpoint = ((loops[1].stop()-loops[1].start())/2) + loops[1].start();
589 TR<<
"loop 1 mid point"<< loop1_midpoint<<std::endl;
590 Size variable_midpoint = ((loops[i+1].stop()-loops[i+1].start())/2) + loops[i+1].start();
591 TR<<
"Variable mid_point"<< variable_midpoint <<std::endl;
592 fold_tree_->new_jump( loop1_midpoint, variable_midpoint, cutpoint );
594 TR <<
"Fold Tree for the scaffold " << *fold_tree_ << std::endl;
603 TR<<
"no special foldtree needed. There is no target chain addition and less then 2 or no seed defined"<<std::endl;
604 fold_tree_->add_edge( 1, target_seed_pose->total_residue(), Edge::PEPTIDE );
605 TR <<
"Pose fold tree " << fold_tree_ << std::endl;
681 bool protein_not_folded =
true;
685 if( pose.
split_by_chain( chain_num ).total_residue() == template_pdb_->total_residue() ){
686 protein_not_folded =
false;
687 TR<<
"assuming pose has full size" << std::endl;
690 if( chain_num <= 2 ){
691 TR<<
"Previous fold tree: "<< pose.
fold_tree()<<
'\n';
692 TR<<
"reseting foldtree"<<std::endl;
694 fold_tree_ = set_foldtree( poseOP, template_pdb_->secstruct(), all_seeds_ , protein_not_folded );
699 utility_exit_with_message(
"more than 2 chains as input are currently not supported" );
702 runtime_assert( fold_tree_ );
704 TR<<
"Previous fold tree: "<< pose.
fold_tree()<<
'\n';
706 TR<<
"New fold tree: "<< pose.
fold_tree()<<std::endl;
714 std::set< core::Size >
730 TR<<
"SeedFoldTree has been invoked"<<std::endl;
732 ddg_based_ = tag->getOption<
bool >(
"ddG_based", 0 );
739 foreach(
TagPtr const btag, branch_tags ){
751 anchor_specified_ =
false;
753 if( btag->getName() ==
"Seeds" ) {
757 all_seeds_.add_loop( begin , end , 0, 0,
false );
758 if( btag->hasOption(
"anchor" )){
760 TR<<
"anchor residue: " << anchor_res << std::endl;
761 anchors_.push_back( anchor_res );
762 anchor_specified_ =
true;
765 anchors_.push_back( 0 );
770 if( btag->getName() ==
"Jumps" ) {
771 set_jumps_manually =
true;
772 std::pair< Size, Size > jump_pair;
773 jump_pair.first = btag->getOption<
core::Size>(
"from", 0 ) ;
774 jump_pair.second = ( btag->getOption<
core::Size>(
"to", 0 ) );
775 if( jump_pair.first > jump_pair.second)
776 utility_exit_with_message(
"specifiied jumps need to be defined in sequence order" );
777 manual_jump_pairs_.push_back( jump_pair );