Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Splice.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file protocols/protein_interface_design/movers/Splice.cc
11 /// @brief
12 /// @author Sarel Fleishman (sarelf@u.washington.edu)
13 
14 // Unit headers
19 #include <utility/string_util.hh>
20 #include <utility/exit.hh>
22 #include <protocols/loops/Loop.hh>
23 #include <protocols/loops/Loops.hh>
25 #include <core/chemical/AA.hh>
28 #include <boost/foreach.hpp>
31 #define foreach BOOST_FOREACH
32 // Package headers
33 #include <core/pose/Pose.hh>
34 #include <core/pose/util.hh>
39 #include <basic/Tracer.hh>
41 #include <utility/tag/Tag.hh>
42 #include <utility/vector1.hh>
45 #include <protocols/moves/Mover.hh>
47 #include <core/pose/selection.hh>
49 #include <utility/io/izstream.hh>
50 #include <utility/io/ozstream.hh>
51 #include <iostream>
52 #include <sstream>
53 #include <algorithm>
54 //Auto Headers
62 #include <numeric/xyzVector.hh>
67 #include <numeric/random/random.hh>
68 #include <numeric/random/random_permutation.hh>
73 
74 #include <fstream>
75 #include <ctime>
76 namespace protocols {
77 namespace protein_interface_design {
78 namespace movers {
79 
80 static basic::Tracer TR( "protocols.protein_interface_design.movers.Splice" );
81 static basic::Tracer TR_ccd( "protocols.protein_interface_design.movers.Splice_ccd" );
82 static numeric::random::RandomGenerator RG( 78289 );
85 {
87 }
88 
91  return new Splice;
92 }
93 
96 {
97  return "Splice";
98 }
99 
101  Mover( SpliceCreator::mover_name() ),
102  from_res_( 0 ),
103  to_res_( 0 ),
104  saved_from_res_( 0 ),
105  saved_to_res_( 0 ),
106  source_pdb_( "" ),
107  ccd_( true ),
108  rms_cutoff_( 999999 ),
109  res_move_( 4 ),
110  randomize_cut_( false ),
111  cut_secondarystruc_( false ),
112  task_factory_( NULL ),
113  design_task_factory_( NULL ),
114  torsion_database_fname_( "" ),
115  database_entry_( 0 ),
116  database_pdb_entry_( "" ),
117  template_file_( "" ),
118  poly_ala_( true ),
119  equal_length_( false ),
120  template_pose_( NULL ),
121  start_pose_( NULL ),
122  saved_fold_tree_( NULL ),
123  design_( false ),
124  dbase_iterate_( false ),
125  first_pass_( true ),
126  locked_res_( NULL ),
127  locked_res_id_( ' ' ),
128  checkpointing_file_ ( "" ),
129  loop_dbase_file_name_( "" ),
130  loop_pdb_source_( "" ),
131  mover_tag_( NULL ),
132  splice_filter_( NULL ),
133  use_sequence_profiles_( false ),
134  segment_type_( "" ),
135  profile_weight_away_from_interface_( 1.0 )
136 {
137  torsion_database_.clear();
138  delta_lengths_.clear();
139  dbase_subset_.clear();
140  splice_segments_.clear();
141  pdb_segments_.clear();
143  end_dbase_subset_->obj = false;
144 }
145 
147 
148 /// @brief copy a stretch of aligned phi-psi dofs from source to target. No repacking no nothing.
149 /// The core function, copy_segment, copies residues from the source to the target without aligning the residues, thereby delivering all of their dofs
150 void
151 copy_stretch( core::pose::Pose & target, core::pose::Pose const & source, core::Size const from_res, core::Size const to_res ){
152  using namespace core::pose;
153  using namespace protocols::rosetta_scripts;
154  using namespace core::chemical;
155 
156  core::Size const host_chain( 1 ); /// in certain cases, when the partner protein sterically overlaps with the designed protein, there are amibguities about which chain to search. The use of host_chain removes these ambiguities. Here, ugly hardwired
157  core::Size const from_nearest_on_source( find_nearest_res( source, target, from_res, host_chain ) );
158  core::Size const to_nearest_on_source( find_nearest_res( source, target, to_res, host_chain ) );
159  TR<<"target: "<<from_res<<" "<<to_res<<" source: "<<from_nearest_on_source<<" "<<to_nearest_on_source<<std::endl;
160  runtime_assert( from_nearest_on_source && to_nearest_on_source );
161 // change loop length:
162  core::Size const residue_diff( to_nearest_on_source - from_nearest_on_source - (to_res - from_res ));
163 // if( residue_diff == 0 ){
164 // TR<<"skipping copy_stretch since loop lengths are identical"<<std::endl;
165 // return;
166 // }
167  core::kinematics::FoldTree const saved_ft( target.fold_tree() );
168  TR<<"DEBUG: copy_stretch foldtree: "<<saved_ft<<std::endl;
170  llc.loop_start( from_res );
171  llc.loop_end( to_res );
172  llc.delta( residue_diff );
173 // target.dump_pdb( "before_copy_stretch_llc_test.pdb" );
174  llc.apply( target );
175 // target.dump_pdb( "after_copy_stretch_llc_test.pdb" );
176 
177  target.copy_segment( to_nearest_on_source - from_nearest_on_source + 1, source, from_res, from_nearest_on_source );
178 // target.dump_pdb( "after_copy_stretch_test.pdb" );
179 }
180 
181 /// The checkpointing file has the following structure: the first line contains an ordered list of the dbase_subset_ for splice to iterate over the loop database. The second line contains the last element tested (the loop-entry number in the database; not the iterator to it!) and the third line contains the best element tested (again, the loop number from the database, not the iterator!).
182 /// To recover from a checkpoint the following reads the dbase_subset_ then, if this is a first_pass_ the best entry becomes current, and if it is not a first_pass then the current entry is current.
183 void
185 {
186  using namespace std;
187 
188  if( checkpointing_file_ == "" ) return;
189  utility::io::izstream data( checkpointing_file_ );
190  if ( !data ) return;
191  TR<<"Loading from checkpoint"<<std::endl;
192 /// first read the dbase_subset from the checkpointing file
193  {
194  string line;
195  getline( data, line );
196  if( line.length() == 0 ){
197  TR<<"Checkpointing file empty or corrupted. Not loading."<<std::endl;
198  return;
199  }
200  istringstream line_stream( line );
201  dbase_subset_.clear();
202  while( !line_stream.eof() ){
203  core::Size entry;
204  line_stream >> entry;
205  dbase_subset_.push_back( entry );
206  }
207  }
208  TR<<"dbase subset order loaded from checkpoint is: ";
209  foreach( core::Size const i, dbase_subset_ ){
210  TR<<i<<' ';
211  }
212 
213  {
214  std::string line;
215  getline( data, line );
216  istringstream line_stream( line );
217  core::Size entry;
218  line_stream >> entry;
219  current_dbase_entry_ = std::find( dbase_subset_.begin(), dbase_subset_.end(), entry );
220  }
221  TR << "current dbase entry loaded from checkpoint is: " << *current_dbase_entry_ << std::endl;
222 }
223 
224 void
226  if( checkpointing_file_ == "" )
227  return;
228  TR<<"Splice checkpointing to file: "<<checkpointing_file_<<std::endl;
229  std::ofstream data;
230  data.open( checkpointing_file_.c_str(), std::ios::out );
231  if( !data.good() )
232  utility_exit_with_message( "Unable to open splice checkpointing file for writing: " + checkpointing_file_ + "\n" );
233  foreach( core::Size const dbase_entry, dbase_subset_ ){
234  TR<<' '<<dbase_entry;
235  data << ' ' << dbase_entry;
236  }
237  if( current_dbase_entry_ == dbase_subset_.end() )
238  data<<'\n'<<99999<<std::endl;
239  else
240  data<<'\n'<<*current_dbase_entry_<<std::endl;
241  data.close();
242 }
243 
244 ///@brief controls which dbase entry will be used. Three options: 1. specific one according to user instruction; 2. randomized out of a subset of the dbase with fitting sequence lengths (if user specified 0); 3. iterating over dbase subset
247 {
248  core::Size dbase_entry( database_entry() );
249  if( first_pass_ ){/// setup the dbase subset where loop lengths fit the selection criteria
250  for( core::Size i = 1; i <= torsion_database_.size(); ++i ){// find entries that fit the length criteria
251  using namespace protocols::rosetta_scripts;
252 
253  ResidueBBDofs const & dofs( torsion_database_[ i ] );
254  core::Size const nearest_to_entry_start_on_pose( find_nearest_res( pose, *template_pose_, dofs.start_loop(), 1/*chain*/ ) );
255  core::Size const nearest_to_entry_stop_on_pose( find_nearest_res( pose, *template_pose_, dofs.stop_loop(), 1/*chain*/ ) );
256  core::Size const pose_residues = nearest_to_entry_stop_on_pose - nearest_to_entry_start_on_pose + 1;
257  int const delta( dofs.size() - pose_residues );
258  if( locked_res() >= nearest_to_entry_start_on_pose && locked_res() <= nearest_to_entry_stop_on_pose ){
259  /// if locked_res is within the loop, don't select different loop lengths
260  if( delta != 0 )
261  continue;
262  }
263  bool const fit = std::find( delta_lengths_.begin(), delta_lengths_.end(), delta ) != delta_lengths_.end();
264  if( fit || database_pdb_entry_ != "" || dbase_entry != 0 )
265  dbase_subset_.push_back( i );
266  }
267  if( dbase_subset_.empty() ){
268  TR<<"Loop of appropriate length not found in database. Returning"<<std::endl;
269  retrieve_values();
270  return 0;
271  }
272  TR<<"Found "<<dbase_subset_.size()<<" entries in the torsion dbase that match the length criteria"<<std::endl;
273  numeric::random::random_permutation( dbase_subset_.begin(), dbase_subset_.end(), RG );
274  current_dbase_entry_ = dbase_subset_.begin();
275  load_from_checkpoint();
276  } // fi first_pass
277  if( dbase_iterate() ){
278  load_from_checkpoint();
279  if( current_dbase_entry_ == dbase_end() ){
280  TR<<"Request to read past end of dbase. Splice returns without doing anything."<<std::endl;
281  return 0;
282  }
283  dbase_entry = *current_dbase_entry_;
284  if( !first_pass_ )
285  current_dbase_entry_++;
286  if( current_dbase_entry_ == dbase_end() ){
287  TR<<"Reached last dbase entry"<<std::endl;
288  end_dbase_subset_->obj = true;
289  }
290  } // fi dbase_iterate
291  else if( dbase_entry == 0 ){
292  if( database_pdb_entry_ == "" )//randomize dbase entry
293  dbase_entry = ( core::Size )( RG.uniform() * dbase_subset_.size() + 1 );
294  else{ // look for the pdb_entry name
295  for( core::Size count = 1; count <= dbase_subset_.size(); ++count ){
296  if( torsion_database_[ dbase_subset_[ count ] ].source_pdb() == database_pdb_entry_ ){
297  TR<<"Found entry for "<<database_pdb_entry_<<" at number "<<dbase_subset_[ count ]<<std::endl;
298  dbase_entry = dbase_subset_[ count ];
299  break;
300  }
301  }
302  runtime_assert( dbase_entry <= dbase_subset_.size() );
303  }
304  }//fi dbase_entry==0
305 
306  return dbase_entry;
307 }
308 
309 void
311 {
312  using namespace protocols::rosetta_scripts;
314 
315  set_last_move_status( protocols::moves::MS_SUCCESS );
316  TR<<"Starting splice apply"<<std::endl;
317  save_values();
318  if( locked_res() ){
319  locked_res_id( pose.residue( locked_res() ).name1() );
320  TR<<"locked residue/locked_residue_id set to: "<<locked_res()<<','<<locked_res_id()<<std::endl;
321  }
322 
323 /// from_res() and to_res() can be determined directly on the tag, through a taskfactory, or through a template file. If through a template file,
324 /// we start by translating from_res/to_res from the template file to the in coming pose as in the following paragraph
325  if( template_file_ != "" ){ /// using a template file to determine from_res() to_res()
326  core::Size template_from_res( 0 ), template_to_res( 0 );
327 
328  if( from_res() && to_res() ){
329  template_from_res = find_nearest_res( pose, *template_pose_, from_res(), 1/*chain*/);
330  template_to_res = find_nearest_res( pose, *template_pose_, to_res(), 1/*chain*/ );
331  runtime_assert( template_from_res );
332  runtime_assert( template_to_res );
333  }
334 
335  from_res( template_from_res );
336  to_res( template_to_res );
337  }// fi template_file != ""
338 
339  core::pose::Pose const in_pose_copy( pose );
340  pose.conformation().detect_disulfides(); // just in case; but I think it's unnecessary
341 
342 /// from_res/to_res can also be determined through task factory, by identifying the first and last residues that are allowed to design in this tf
343  if( torsion_database_fname_ == "" && from_res() == 0 && to_res() == 0 ){/// set the splice site dynamically according to the task factory
344  utility::vector1< core::Size > designable( protocols::rosetta_scripts::residue_packer_states( pose, task_factory(), true/*designable*/, false/*packable*/ ) );
345  std::sort( designable.begin(), designable.end() );
346  from_res( designable[ 1 ] );
347  to_res( designable[ designable.size() ] );
348  }
349  core::pose::Pose source_pose;
350  core::Size nearest_to_from( 0 ), nearest_to_to( 0 ), residue_diff( 0 ); // residues on source_pose that are nearest to from_res and to_res; what is the difference in residue numbers between incoming pose and source pose
351  ResidueBBDofs dofs; /// used to store the torsion/resid dofs from any of the input files
352  dofs.clear();
353  core::Size cut_site( 0 );
354  if( torsion_database_fname_ == "" ){ // read dofs from source pose rather than database
355  core::import_pose::pose_from_pdb( source_pose, source_pdb_ );
356  nearest_to_from = find_nearest_res( source_pose, pose, from_res(), 1/*chain*/ );
357  nearest_to_to = find_nearest_res( source_pose, pose, to_res(), 1/*chain*/ );
358  residue_diff = nearest_to_to - nearest_to_from - ( to_res() - from_res() );
359  if( nearest_to_from == 0 || nearest_to_to == 0 ){
360  TR<<"nearest_to_from: "<<nearest_to_from<<" nearest_to_to: "<<nearest_to_to<<". Failing"<<std::endl;
361  set_last_move_status( protocols::moves::FAIL_DO_NOT_RETRY );
362  retrieve_values();
363  return;
364  }
365  for( core::Size i = nearest_to_from; i <= nearest_to_to; ++i ){
366  if( source_pose.residue( i ).has_variant_type( DISULFIDE ) ){/// in future, using disulfides would be a great boon as it rigidifies loops.
367  TR<<"Residue "<<i<<" is a disulfide. Failing"<<std::endl;
368  set_last_move_status( protocols::moves::FAIL_DO_NOT_RETRY );
369  retrieve_values();
370  return;
371  }
372 /// Feed the source_pose dofs into the BBDofs array
373  BBDofs residue_dofs;
374  residue_dofs.resid( i ); /// resid is probably never used
375  residue_dofs.phi( source_pose.phi( i ) );
376  residue_dofs.psi( source_pose.psi( i ) );
377  residue_dofs.omega( source_pose.omega( i ) );
378 
379  //core::Size const nearest_on_target( find_nearest_res( pose, source_pose, i ) );
380 
381 /// convert 3let residue code to 1let code
382  std::stringstream ss; std::string s;
383  ss << source_pose.residue( i ).name1();
384  ss >> s;
385  residue_dofs.resn( s );
386 
387  dofs.push_back( residue_dofs );
388  }// for i nearest_to_from..nearest_to_to
389  cut_site = dofs.cut_site() ? dofs.cut_site() + from_res() - 1: to_res(); // isn't this always going to be to_res()? I think so...
390  }// fi torsion_database_fname==NULL
391  else{/// read from dbase
392  core::Size const dbase_entry( find_dbase_entry( pose ) );
393  if( dbase_entry == 0 )// failed to read entry
394  return;
395  dofs = torsion_database_[ dbase_entry ];
396  std::string const source_pdb_name( dofs.source_pdb() );
397  if( use_sequence_profiles_ ){
398  load_pdb_segments_from_pose_comments( pose );
399  modify_pdb_segments_with_current_segment( source_pdb_name );
400  }
401  TR<<"Taking loop from source pdb "<<source_pdb_name<<std::endl;
402  if( mover_tag_() != NULL )
403  mover_tag_->obj = "segment_" + source_pdb_name;
404  foreach( BBDofs & resdofs, dofs ){/// transform 3-letter code to 1-letter code
405  using namespace core::chemical;
406  if( resdofs.resn() == "CYD" ){// at one point it would be a good idea to use disfulfides rather than bail out on them...; I think disulfided cysteins wouldn't be written as CYD. This requires something more clever...
407  TR<<"Residue "<<resdofs.resid()<<" is a disulfide. Failing"<<std::endl;
408  set_last_move_status( protocols::moves::FAIL_DO_NOT_RETRY );
409  retrieve_values();
410  return;
411  }
412  std::stringstream ss; std::string s;
413  ss << oneletter_code_from_aa( aa_from_name( resdofs.resn() ) );
414  ss >> s;
415  resdofs.resn( s );
416  }/// foreach resdof
417  nearest_to_to = dofs.size(); /// nearest_to_to and nearest_to_from are used below to compute the difference in residue numbers...
418  nearest_to_from = 1;
419  /// set from_res/to_res/cut_site on the incoming pose
420  if( template_file_ != "" ){/// according to the template pose
421  from_res( find_nearest_res( pose, *template_pose_, dofs.start_loop(), 1/*chain*/ ) );
422  to_res( find_nearest_res( pose, *template_pose_, dofs.stop_loop(), 1/*chain*/ ) );
423 // to_res( from_res() + dofs.size() -1);
424  runtime_assert( from_res() );
425  runtime_assert( to_res() );
426  cut_site = dofs.cut_site() - dofs.start_loop() + from_res();
427  } // fi template_file != ""
428  else{/// according to the dofs array (taken from the dbase)
429  from_res( dofs.start_loop() );
430  to_res( dofs.stop_loop() );
431  cut_site = dofs.cut_site();
432  runtime_assert( from_res() && to_res() && cut_site );
433  }
434  residue_diff = dofs.size() - ( dofs.stop_loop() - dofs.start_loop() + 1 );
435  }// read from dbase
436  TR<<"From res: "<<from_res()<<" to_res: "<<to_res()<<std::endl;
437  runtime_assert( to_res() > from_res() );
438 // if( saved_fold_tree_ )/// is saved_fold_tree_ being used?
439 // pose.fold_tree( *saved_fold_tree_ );
440 
441 /// The database is computed with respect to the template pose, so before applying dofs from the dbase it's important to make that stretch identical to
442 /// the template. from_res() and to_res() were previously computed to be with respect to the incoming pose, so within this subroutine the refer to pose rather
443 /// than template_pose (this is a bit confusing, but it works!)
444  copy_stretch( pose, *template_pose_, from_res(), to_res() );
445 // ( *scorefxn() ) ( pose );
446 
447  using namespace utility;
448 /// randomize_cut() should not be invoked with a database entry, b/c the dbase already specified the cut sites.
449 /// this is important b/c nearest_to_from/nearest_to_to are degenerate if the dbase is used.
450  if( randomize_cut() ){
451 /// choose cutsite randomly within loop residues on the loop (no 2ary structure)
452  core::scoring::dssp::Dssp dssp( source_pose );
453  dssp.dssp_reduced(); // switch to simplified H E L notation
454  std::vector< core::Size > loop_positions_in_source;
455  loop_positions_in_source.clear();
456  TR<<"DSSP of source segment: ";
457  for( core::Size i = nearest_to_from; i <= std::min( nearest_to_to, to_res() - from_res() + nearest_to_from ); ++i ){
458  if( dssp.get_dssp_secstruct( i ) == 'L' || cut_secondarystruc() ) // allow site for cutting if it's either in a loop or if cutting secondary structure is allowed
459  loop_positions_in_source.push_back( i );
460  TR<<dssp.get_dssp_secstruct( i );
461  }
462  TR<<std::endl;
463  cut_site = loop_positions_in_source[ (core::Size) ( RG.uniform() * loop_positions_in_source.size()) ] - nearest_to_from + from_res();
464  TR<<"Cut placed at: "<<cut_site<<std::endl;
465  }// fi randomize_cut
466 // pose.dump_pdb( "before_ft_test.pdb" );
467  fold_tree( pose, from_res(), pose.total_residue()/*to_res() SJF DEBUGGING 7Dec12*/, cut_site );/// the fold_tree routine will actually set the fold tree to surround the loop
468 // pose.dump_pdb( "after_ft_test.pdb" );
469 /// change the loop length
470  TR<<"Foldtree before loop length change: "<<pose.fold_tree()<<std::endl;
472  llc.loop_start( from_res() );
473  llc.loop_end( cut_site + residue_diff < from_res() ? to_res() : cut_site );
474  llc.delta( residue_diff );
475  llc.apply( pose );
476  TR<<"Foldtree after loop length change: "<<pose.fold_tree()<<std::endl;
477 
478 // pose.dump_pdb( "after_2ndllc_test.pdb" );
479 /// set torsions
480  core::Size const total_residue_new( dofs.size() );
481  TR<<"Changing dofs\n";
482  for( core::Size i = 0; i < total_residue_new; ++i ){
483  core::Size const pose_resi( from_res() + i );
484 // TR<<"Previous phi/psi/omega at resi: "<<pose_resi<<" "<<pose.phi( pose_resi )<<'/'<<pose.psi( pose_resi )<<'/'<<pose.omega( pose_resi )<<'\n';
485  pose.set_phi( pose_resi, dofs[ i + 1 ].phi() );
486  pose.set_psi( pose_resi, dofs[ i + 1 ].psi() );
487  pose.set_omega( pose_resi, dofs[ i + 1 ].omega() );
488 // pose.dump_pdb( "dump"+ utility::to_string( i ) + ".pdb" );
489  TR<<"resi, phi/psi/omega: "<< pose_resi<<' '<<pose.phi( pose_resi )<<'/'<<pose.psi( pose_resi )<<'/'<<pose.omega( pose_resi )<<std::endl;
490 // TR<<"requested phi/psi/omega: "<<dofs[ i + 1 ].phi()<<'/'<<dofs[i+1].psi()<<'/'<<dofs[i+1].omega()<<std::endl;
491  }
492 // pose.dump_pdb( "after_changedofs_test.pdb" );
493  TR<<std::endl;
494  std::string threaded_seq( "" );/// will be all ALA except for Pro/Gly on source pose and matching identities on source pose
495 /// Now decide on residue identities: Alanine throughout except when the template pose has Gly, Pro or a residue that is the same as that in the original pose
496  utility::vector1< core::Size > pro_gly_res; //keeping track of where pro/gly residues are placed
497  pro_gly_res.clear();
498  for( core::Size i = 0; i < total_residue_new; ++i ){
499  core::Size const pose_resi( from_res() + i );
500  std::string const dofs_resn( dofs[ i + 1 ].resn() );
501  runtime_assert( dofs_resn.length() == 1 );
502  if( pose_resi == locked_res() ){
503  threaded_seq += locked_res_id();
504  continue;
505  }
506  if( design() ){ // all non pro/gly residues in template are allowed to design
507  if( dofs_resn == "G" || dofs_resn == "P" ){
508  threaded_seq += dofs_resn;
509  pro_gly_res.push_back( pose_resi );
510  TR<<"Pro/Gly will be allowed at: "<<pose_resi<<std::endl;
511  }
512  else
513  threaded_seq += 'x';
514  continue;
515  }
516  core::Size const host_chain( 1 );
517  core::Size const nearest_in_copy( find_nearest_res( in_pose_copy, pose, pose_resi, host_chain ) );
518  if( ( nearest_in_copy > 0 && dofs_resn[ 0 ] == in_pose_copy.residue( nearest_in_copy ).name1() ) || dofs_resn == "G" || dofs_resn == "P" )
519  threaded_seq += dofs_resn;
520  else{
521  if( poly_ala() )
522  threaded_seq += "A";
523  else{
524  char orig_residue( 0 );
525  if( nearest_in_copy )
526  orig_residue = in_pose_copy.residue( nearest_in_copy ).name1();
527  if( orig_residue == 0 || orig_residue == 'G' || orig_residue == 'P' )
528  threaded_seq += 'x'; // residues that were originally Gly/Pro can be designed now
529  else
530  threaded_seq += ' '; // only repack
531  }
532  }
533  }
534 
535  using namespace protocols::toolbox::task_operations;
536  using namespace core::pack::task;
538  tso->target_sequence( threaded_seq );
539  tso->start_res( from_res() );
540  tso->allow_design_around( true ); // 21Sep12: from now on the design shell is determined downstream //false );
541  TR<<"Threading sequence: "<<threaded_seq<<" starting from "<<from_res()<<std::endl;
542  TaskFactoryOP tf;
543  if( design_task_factory()() == NULL )
544  tf = new TaskFactory;
545  else
546  tf = new TaskFactory( *design_task_factory() );
547 
548  for( core::Size i = 2; i <= pose.conformation().num_chains(); ++i ){
549  TR<<"Restricting chain "<<i<<" to repacking only"<<std::endl;
551  }
552 
553  tf->push_back( new operation::InitializeFromCommandline );
554  tf->push_back( new operation::NoRepackDisulfides );
555  tf->push_back( tso );
557  dao->design_shell( (design_task_factory()() == NULL ? 0.0 : 4.0 ) ); // threaded sequence operation needs to design, and will restrict design to the loop, unless design_task_factory is defined, in which case a larger shell can be defined
558  dao->repack_shell( 6.0 );
559  for( core::Size i = from_res() - 1; i <= from_res() + total_residue_new + 1; ++i ){
560  if( !pose.residue( i ).has_variant_type( DISULFIDE ) )
561  dao->include_residue( i );
562  }
563  tf->push_back( dao );
564  TR<<"allowing pro/gly only at positions: ";
565  for(core::Size res_num=1; res_num <= pose.total_residue(); res_num++ ){
566  if( std::find( pro_gly_res.begin(), pro_gly_res.end(), res_num ) == pro_gly_res.end() ){
568  racaas->keep_aas( "ADEFIKLMNQRSTVWY" ); /// disallow pro/gly/cys/his
569  racaas->include_residue( res_num );
570  tf->push_back( racaas);
571  }
572  else
573  TR<<res_num<<", ";
574  }
575  TR<<std::endl;
576 // if( locked_res() ){
577 // operation::PreventRepackingOP pr = new operation::PreventRepacking;
578 // pr->include_residue( locked_res() );
579 // tf->push_back( pr );
580 // TR<<"preventing locked residue "<<locked_res()<<" from repacking"<<std::endl;
581 // }
582 
583 
585  acb.resnum( utility::to_string( cut_site + residue_diff ) );
586  acb.find_automatically( false );
587  acb.change_foldtree( false );
588  acb.apply( pose );
589  TR<<"Adding chainbreak at: "<<cut_site + residue_diff <<std::endl;
590 //SJF debug pose.conformation().detect_disulfides();
591 // ( *scorefxn() ) ( pose );
592 // pose.update_residue_neighbors();
593  if( use_sequence_profiles_ )
594  add_sequence_constraints( pose );
595  if( ccd() ){
596  using namespace protocols::loops;
597  Loop loop( std::max( (core::Size) 2, from_res() - 6 )/*start*/, std::min( pose.total_residue()-1, to_res() + 6 )/*stop*/, cut_site/*cut*/ );
598  LoopsOP loops = new Loops();
599  loops->push_back( loop );
600 
601 /// Set ccd to minimize 4 residues at each loop terminus including the first residue of the loop. This way,
602 /// the torsion in the loop are maintained. Allow repacking around the loop.
603 /// If disulfide occurs in the range that is allowed to minimize, adjust that region to not include disulf
604  core::scoring::ScoreFunctionOP scorefxn_local( scorefxn()->clone() );/// in case you want to modify the scorefxn. Currently not used
605  protocols::loops::loop_mover::refine::LoopMover_Refine_CCD ccd_mover( loops, scorefxn_local );
606  ccd_mover.temp_initial( 1.5 );
607  ccd_mover.temp_final( 0.5 );
609  mm = new core::kinematics::MoveMap;
610  mm->set_chi( false ); mm->set_bb( false ); mm->set_jump( false );
611  /// First look for disulfides. Those should never be moved.
612  core::Size disulfn( 0 ), disulfc( 0 );
613  for( core::Size i = from_res() - 3; i <= from_res(); ++i ){
614  if( pose.residue( i ).has_variant_type( DISULFIDE ) ){
615  disulfn = i;
616  }
617  }
618  for( core::Size i = from_res() + total_residue_new - 1; i <= from_res() + total_residue_new + 2; ++i ){
619  if( pose.residue( i ).has_variant_type( DISULFIDE ) ){
620  disulfc = i;
621  break;
622  }
623  }
624  core::Size const startn( disulfn > 0 ? disulfn + 1 : from_res() - 3 );
625  core::Size const startc( disulfc > 0 ? disulfc - 6 : from_res() + total_residue_new - ( res_move() - 3 ) );
626  for( core::Size i = startn; i <= startn + res_move() - 1; ++i ){
627  mm->set_chi( i, true );
628  mm->set_bb( i, true );
629  }
630  for( core::Size i = startc; i <= startc + res_move() - 1; ++i ){
631  mm->set_chi( i, true );
632  mm->set_bb( i, true );
633  }
634  ccd_mover.set_task_factory( tf );
635  ccd_mover.move_map( mm );
636  ccd_mover.apply( pose );
637 
638 /// following ccd, compute rmsd to source loop to ensure that you haven't moved too much. This is pretty decent filter
639  if( torsion_database_fname_ == "" ){ // no use computing rms if coming from a database (no coordinates)
640  core::Real rms( 0 );
641  for( core::Size i = 0; i <= total_residue_new - 1; ++i ){
642  core::Real const dist( pose.residue( from_res() + i ).xyz( "CA" ).distance( source_pose.residue( nearest_to_from+ i ).xyz("CA" ) ) );
643  rms += dist;
644  }
645  core::Real const average_rms( rms / total_residue_new );
646  TR<<"Average distance of spliced segment to original: "<< average_rms<<std::endl;
647  if( average_rms >= rms_cutoff() ){
648  TR<<"Failing because rmsd = "<<average_rms<<std::endl;
649  set_last_move_status( protocols::moves::FAIL_RETRY );
650  retrieve_values();
651  return;
652  }
653  if( !splice_filter()->apply( pose ) ){
654  TR<<"Failing because filter fails"<<std::endl;
655  set_last_move_status( protocols::moves::FAIL_RETRY );
656  retrieve_values();
657  return;
658  }
659  }
660 /// tell us what the torsions of the new (closed) loop are. This is used for dbase construction. At one point, might be a good idea to make the mover
661 /// output the dofs directly to a dbase file rather than to a log file.
662  TaskFactoryOP tf_dofs = new TaskFactory;
664  for( core::Size i = startn; i <= startc + res_move() - 1; ++i )
665  dao_dofs->include_residue( i );
666  dao_dofs->design_shell( 0 );/// only include the loop residues
667  tf_dofs->push_back( dao_dofs );
669  torsion.task_factory( tf_dofs );
670  torsion.task_factory_set( true );
671  torsion.apply( pose );
672  core::Size const stop_on_template( startc + res_move() - 1 - residue_diff );
673  TR_ccd << "start, stop, cut: "<<startn<<" "<<stop_on_template<<" "<<cut_site<<std::endl; /// used for the dbase
674 
675 /// Now write to dbase disk file
676  if( loop_dbase_file_name_ != "" ){
677  std::ofstream dbase_file;
678  dbase_file.open( loop_dbase_file_name_.c_str(), std::ios::app );
679  for( core::Size i = startn; i <= startc + res_move() - 1; ++i )
680  dbase_file << pose.phi( i )<<' '<<pose.psi( i )<<' '<<pose.omega( i )<<' '<<pose.residue( i ).name3()<<' ';
681  dbase_file << startn<<' '<<stop_on_template<<' '<<cut_site<<' ';
682  if( loop_pdb_source_ != "" )
683  dbase_file << loop_pdb_source_<<std::endl;
684  else
685  dbase_file << "cut" << std::endl; // the word cut is used as a placeholder. It is advised to use instead the source pdb file in this field so as to keep track of the origin of dbase loops
686  dbase_file.close();
687  }
688  }// fi ccd
689  else{ // if no ccd, still need to thread sequence
690  PackerTaskOP ptask = tf()->create_task_and_apply_taskoperations( pose );
691  protocols::simple_moves::PackRotamersMover prm( scorefxn(), ptask );
692 // pose.conformation().detect_disulfides();
693 // pose.update_residue_neighbors();
694 // (*scorefxn())(pose);
695  prm.apply( pose );
696  }
697  saved_fold_tree_ = new core::kinematics::FoldTree( pose.fold_tree() );
698  retrieve_values();
699 }
700 
701 /// splice apply might change the from_res/to_res internals since they sometimes refer to the template file. If that happens, we want the values to
702 /// revert to their original values before the end of the apply function (so retrieve_values) below must be called before return.
703 void
705  saved_from_res_ = from_res();
706  saved_to_res_ = to_res();
707 }
708 
709 void
711  from_res( saved_from_res_ );
712  to_res( saved_to_res_ );
713  first_pass_ = false;
714  save_to_checkpoint();
715 }
716 
719  return SpliceCreator::mover_name();
720 }
721 
722 void
724 {
725  start_pose_ = new core::pose::Pose( pose );
726  runtime_assert( tag->hasOption( "task_operations" ) != (tag->hasOption( "from_res" ) || tag->hasOption( "to_res" ) ) || tag->hasOption( "torsion_database" ) ); // it makes no sense to activate both taskoperations and from_res/to_res.
727  runtime_assert( tag->hasOption( "torsion_database" ) != tag->hasOption( "source_pdb" ) );
728  task_factory( protocols::rosetta_scripts::parse_task_operations( tag, data ) );
729  if( !tag->hasOption( "task_operations" ) ){
730  from_res( core::pose::parse_resnum( tag->getOption< std::string >( "from_res", "0" ), pose ) );
731  to_res( core::pose::parse_resnum( tag->getOption< std::string >( "to_res", "0" ), pose ) );
732  }
733  if( tag->hasOption( "design_task_operations" ) ){
734  TR<<"Defined design_task_factory, which will be used during splice design"<<std::endl;
735  design_task_factory( protocols::rosetta_scripts::parse_task_operations( tag->getOption< std::string >( "design_task_operations" ), data ) );
736  }
737  if( tag->hasOption( "residue_numbers_setter" ) ){
738  runtime_assert( !tag->hasOption( "locked_res" ) );
739  locked_res_ = protocols::moves::get_set_from_datamap< protocols::moves::DataMapObj< utility::vector1< core::Size > > >( "residue_numbers", tag->getOption< std::string >( "residue_numbers_setter" ), data );
740  }
741  if( tag->hasOption( "torsion_database" ) ){
742  torsion_database_fname( tag->getOption< std::string >( "torsion_database" ) );
743  database_entry( tag->getOption< core::Size >( "database_entry", 0 ) );
744  database_pdb_entry( tag->getOption< std::string >( "database_pdb_entry", "" ) );
745  runtime_assert( !( tag->hasOption( "database_entry" ) && tag->hasOption( "database_pdb_entry" ) ) );
746  runtime_assert( !( tag->hasOption( "delta_lengths" ) && (tag->hasOption( "database_pdb_entry" ) || tag->hasOption( "database_entry" ) ) ) );
747  read_torsion_database();
748  TR<<"torsion_database: "<<torsion_database_fname()<<" ";
749  if( database_entry() == 0 ){
750  if( database_pdb_entry_ == "" )
751  TR<<" database entry will be randomly picked at run time. ";
752  else
753  TR<<" picking database entry "<<database_pdb_entry()<<std::endl;
754  }
755  else{
756  TR<<" database_entry: "<<database_entry()<<" ";
757  runtime_assert( database_entry() <= torsion_database_.size() );
758  }
759  }
760  else
761  source_pdb( tag->getOption< std::string >( "source_pdb" ) );
762  scorefxn( protocols::rosetta_scripts::parse_score_function( tag, data ) );
763  ccd( tag->getOption< bool >( "ccd", 1 ) );
764  rms_cutoff( tag->getOption< core::Real >( "rms_cutoff", 999999 ) );
765  runtime_assert( !(tag->hasOption( "torsion_database" ) && tag->hasOption( "rms_cutoff" )) ); // torsion database doesn't specify coordinates so no point in computing rms
766  res_move( tag->getOption< core::Size >( "res_move", 4 ) );
767  randomize_cut( tag->getOption< bool >( "randomize_cut", false ) );
768  runtime_assert( ( tag->hasOption( "randomize_cut" ) && tag->hasOption( "source_pose" ) ) || !tag->hasOption( "source_pose" ) );
769  cut_secondarystruc( tag->getOption< bool >( "cut_secondarystruc", false ) );
770 // runtime_assert( (tag->hasOption( "cut_secondarystruc ") && tag->hasOption( "randomize_cut" )) || !tag->hasOption( "cut_secondarystruc" ) );
771  template_file( tag->getOption< std::string >( "template_file", "" ) );
772  equal_length( tag->getOption< bool >( "equal_length", false ) );
773  poly_ala( tag->getOption< bool >( "thread_ala", true ) );
774 
775  typedef utility::vector1< std::string > StringVec;
776  std::string delta;
777  if( tag->hasOption( "delta_lengths" ) ){
778  delta = tag->getOption< std::string >( "delta_lengths" );
779  StringVec const lengths_keys( utility::string_split( delta, ',' ) );
780  foreach( std::string const delta, lengths_keys ){
781  if( delta == "" ) continue;
782  int const delta_i( 1 * atoi( delta.c_str() ) );
783  delta_lengths_.push_back( delta_i );
784  }
785  }
786  else
787  delta_lengths_.push_back( 0 );
788  std::sort( delta_lengths_.begin(), delta_lengths_.end() );
789  std::unique( delta_lengths_.begin(), delta_lengths_.end() );
790 
791  if( template_file_ != "" ){ /// using a template file to determine from_res() to_res()
792  if( data.has( "poses", template_file_ ) ){
793  template_pose_ = data.get< core::pose::Pose * >( "poses", template_file_ );
794  TR<<"using template pdb from datamap"<<std::endl;
795  }
796  else if( tag->hasOption( "template_file" ) ){
797  template_pose_ = new core::pose::Pose;
798  core::import_pose::pose_from_pdb( *template_pose_, template_file_ );
799  data.add( "poses", template_file_, template_pose_ );
800  TR<<"loading template_pose from "<<template_file_<<std::endl;
801  }
802  }
803  else
804  template_pose_ = new core::pose::Pose( pose );
805 
806  design( tag->getOption< bool >( "design", false ) );
807  dbase_iterate( tag->getOption< bool >( "dbase_iterate", false ) );
808  if( dbase_iterate() ){ /// put the end_dbase_subset_ variable on the datamap for LoopOver & MC to be sensitive to it
809  std::string const curr_mover_name( tag->getOption< std::string >( "name" ) );
810  data.add( "stopping_condition", curr_mover_name, end_dbase_subset_ );
811  TR<<"Placed stopping_condition "<<curr_mover_name<<" on the DataMap"<<std::endl;
812  }
813  if( tag->hasOption( "locked_residue" ) ){
814  locked_res( core::pose::parse_resnum( tag->getOption< std::string >( "locked_residue" ), pose ) );
815  locked_res_id( pose.residue( locked_res() ).name1() );
816  TR<<"locking residue "<<locked_res()<<" of identity "<<locked_res_id()<<std::endl;
817  }
818  checkpointing_file( tag->getOption< std::string > ( "checkpointing_file", "" ) );
819  loop_dbase_file_name( tag->getOption< std::string > ( "loop_dbase_file_name", "" ) );
820  if( tag->hasOption( "splice_filter" ))
821  splice_filter( protocols::rosetta_scripts::parse_filter( tag->getOption< std::string >( "splice_filter" ), filters ) );
822  if( tag->hasOption( "mover_tag" ) )
823  mover_tag_ = protocols::moves::get_set_from_datamap< protocols::moves::DataMapObj< std::string > >( "tags", tag->getOption< std::string >( "mover_tag" ), data );
824  loop_pdb_source( tag->getOption< std::string >( "loop_pdb_source", "" ) );
825 
826  utility::vector1< TagPtr > const sub_tags( tag->getTags() );
827  foreach( TagPtr const sub_tag, sub_tags ){
828  if( sub_tag->getName() == "Segments" ){
829  use_sequence_profiles_ = true;
830  profile_weight_away_from_interface( tag->getOption< core::Real >( "profile_weight_away_from_interface", 1.0 ) );
831  segment_type_ = sub_tag->getOption< std::string >( "current_segment" );
832  TR<<"reading segments in splice "<<tag->getName()<<std::endl;
833 /* e.g.,
834 <Splice name=splice_L2...
835  <Segments current_segment=L2>
836  <L1 pdb_profile_match="pdb_profile_match.L1" profiles="L1.1:config/L1.1.pssm,L1.2:config/L1.2.pssm"/>
837  <L2 pdb_profile_match="pdb_profile_match.L2" profiles="L2.1:config/L2.2.pssm,L2.2:config/L2.2.pssm"/>
838  </Segments>
839 </Splice>
840 */
841  utility::vector1< TagPtr > const segment_tags( sub_tag->getTags() );
842  foreach( TagPtr const segment_tag, segment_tags ){
843  std::string const segment_name( segment_tag->getName() );
844  std::string const pdb_profile_match( segment_tag->getOption< std::string >( "pdb_profile_match" ) );
845  std::string const profiles_str( segment_tag->getOption< std::string >( "profiles" ) );
846  StringVec const profile_name_pairs( utility::string_split( profiles_str, ',' ) );
847  SpliceSegmentOP splice_segment( new SpliceSegment );
848  foreach( std::string const s, profile_name_pairs ){
849  StringVec const profile_name_file_name( utility::string_split( s, ':' ) );
850  splice_segment->read_profile( profile_name_file_name[ 2 ], profile_name_file_name[ 1 ] );
851  }
852  splice_segment->read_pdb_profile( pdb_profile_match );
853  splice_segments_.insert( std::pair< std::string, SpliceSegmentOP >( segment_name, splice_segment ) );
854  }//foreach segment_tag
855  }// fi Segments
856  }//foreach sub_tag
857 
858  TR<<"from_res: "<<from_res()<<" to_res: "<<to_res()<<" dbase_iterate: "<<dbase_iterate()<<" randomize_cut: "<<randomize_cut()<<" cut_secondarystruc: "<<cut_secondarystruc()<<" source_pdb: "<<source_pdb()<<" ccd: "<<ccd()<<" rms_cutoff: "<<rms_cutoff()<<" res_move: "<<res_move()<<" template_file: "<<template_file()<<" checkpointing_file: "<<checkpointing_file_<<" loop_dbase_file_name: "<<loop_dbase_file_name_<<" loop_pdb_source: "<<loop_pdb_source()<<" mover_tag: "<<mover_tag_<<" torsion_database: "<<torsion_database_fname_<<std::endl;
859 }
860 
862 Splice::clone() const {
863  return( protocols::moves::MoverOP( new Splice( *this ) ));
864 }
865 
866 void
868  scorefxn_ = sf;
869 }
870 
873  return scorefxn_;
874 }
875 
877 Splice::task_factory() const{ return task_factory_; }
878 
879 void
881 
883 Splice::design_task_factory() const{ return design_task_factory_; }
884 
885 void
887 
888 
889 /// the torsion dbase should have the following structure:
890 /// each line represents a single loop. Each four values represent <phi> <psi> <omega> <3-let resid>; the last entry in a line represents <loop start> <loop stop> <cut site> cut; where cut signifies that this is the loop designator
891 void
893  using namespace std;
894 
895  TR<<"Reading torsion database"<<std::endl;
896  utility::io::izstream data( torsion_database_fname_ );
897  if ( !data ) {
898  TR << "cannot open torsion database " << torsion_database_fname_ << std::endl;
899  utility_exit();
900  }
901  std::string line;
902  while( getline( data, line ) ) {
903  utility::vector1< std::string > const elements_in_line( utility::string_split( line, ' ' ) );
904  if( elements_in_line.size() % 4 != 0 )
905  utility_exit_with_message( "While reading torsion database "+torsion_database_fname_+" found a line where the number of elements is not divisible by 4. This likely stems from an error in the database:\n" + line );
906  std::istringstream line_stream( line );
907  ResidueBBDofs bbdof_entry;
908  bbdof_entry.clear();
909  while( !line_stream.eof() ){
911  std::string resn;
912  line_stream >> phi >> psi >> omega >> resn;
913  if( line_stream.eof() ){// the end of the line signifies that we're reading the start, stop, cut, source_pdb fields
914  bbdof_entry.start_loop( (core::Size ) phi );
915  bbdof_entry.stop_loop( (core::Size ) psi );
916  bbdof_entry.cut_site( (core::Size ) omega );
917  bbdof_entry.source_pdb( resn );
918  }
919  else
920  bbdof_entry.push_back( BBDofs( 0/*resid*/, phi, psi, omega, resn ) ); /// resid may one day be used. Currently it isn't
921  }
922  torsion_database_.push_back( bbdof_entry );
923  }
924  TR<<"Finished reading torsion database with "<<torsion_database_.size()<<" entries"<<std::endl;
925 }
926 
927 ///@brief set the fold tree around start/stop/cut sites.
928 /// presently makes a simple fold tree, but at one point may be a more complicated function to include two poses
929 void
931  using namespace protocols::loops;
932  core::conformation::Conformation const & conf( pose.conformation() );
933  core::Size const s1 = std::max( (core::Size) 2, start - 6 );
934  core::Size const s2 = std::min( conf.chain_end( 1 )/* - 1*/, stop + 6 );
936  ft.clear();
937  if( conf.num_chains() == 1 ){/// build simple ft for the cut
938  ft.add_edge( 1, s1, -1 );
939  ft.add_edge( s1, s2, 1 );
940  ft.add_edge( s1, cut, -1 );
941  ft.add_edge( s2, cut + 1, -1 );
942  ft.add_edge( s2, pose.total_residue(), -1 );
943  ft.delete_self_edges();
944  TR<<"single chain ft: "<<ft<<std::endl;
945  pose.fold_tree( ft );
946  return;
947  }
948  //core::Size from_res( 0 );
949  for( core::Size resi = conf.chain_begin( 1 ); resi <= conf.chain_end( 1 ); ++resi ){
950  if( pose.residue( resi ).has_variant_type( core::chemical::DISULFIDE ) ){
951  //from_res = resi; // set but never used ~Labonte
952  break;
953  }
954  }
955  ft.add_edge( 1, s1, -1 );
956  ft.add_edge( s1, s2, 1 );
957  ft.add_edge( s2, conf.chain_end( 1 ), -1 );
958  if( locked_res() > 0 && (locked_res() <= s2 && locked_res() >= s1 )){
959  TR<<"s1,s2,locked_res: "<<s1<<','<<s2<<','<<locked_res()<<std::endl;
960  if( locked_res() < cut ){
961  ft.add_edge( s1, locked_res(), -1 );
962  ft.add_edge( locked_res(), cut, -1 );
963  ft.add_edge( s2, cut+1, -1 );
964  }
965  if( locked_res() > cut ){
966  ft.add_edge( s1, cut, -1 );
967  ft.add_edge( s2, locked_res(), -1 );
968  ft.add_edge( locked_res(), cut + 1, -1 );
969  }
970  if( locked_res() == cut ){
971  ft.add_edge( s1, cut, -1 );
972  ft.add_edge( s2, cut+1, -1 );
973  }
974  using namespace protocols::protein_interface_design;
975  std::string const from_atom( optimal_connection_point( pose.residue( locked_res() ).name3() ) );
976  core::Real min_dist( 100000 );
977  core::Size nearest_res( 0 );
978  core::Size nearest_atom( 0 );
979  for( core::Size resi = conf.chain_begin( 2 ); resi <= conf.chain_end( 2 ); ++resi ){
980  core::conformation::Residue const residue( conf.residue( resi ) );
981  if( residue.is_ligand() ) continue;
982  for( core::Size atomi = 1; atomi <= residue.natoms(); ++atomi ){
983  core::Real const dist( conf.residue( locked_res() ).xyz( from_atom ).distance( residue.xyz( atomi ) ) );
984  if( dist <= min_dist ){
985  nearest_res = resi;
986  nearest_atom = atomi;
987  min_dist = dist;
988  }
989  }
990  }
991  runtime_assert( nearest_res );
992  ft.add_edge( locked_res(), nearest_res, 2 );
993  ft.add_edge( nearest_res, conf.chain_begin( 2 ), -1 );
994  ft.add_edge( nearest_res, conf.chain_end( 2 ), -1 );
995  ft.set_jump_atoms( 2, from_atom, conf.residue( nearest_res ).atom_name( nearest_atom ) );
996  }
997  else{
998  if(locked_res() > 0 && ! ( locked_res() > s1 && locked_res() < s2 ) ){
999  TR<<"locked_res "<<locked_res()<<" is outside loop scope so ignoring"<<std::endl;
1000  }
1001  ft.add_edge( s1, cut, -1 );
1002  ft.add_edge( s2, cut + 1, -1 );
1003  ft.add_edge( 1, conf.chain_begin( 2 ), 2 );
1004  }
1005  if( (!locked_res() || ( locked_res() <= s1 || locked_res() >= s2 ) ) && !pose.residue( conf.chain_begin( 2 ) ).is_ligand() )
1006  ft.add_edge( conf.chain_begin( 2 ), conf.chain_end( 2 ), -1 );
1007  ft.reorder(1);
1008  TR<<"Previous ft: "<<pose.fold_tree()<<std::endl;
1009  // pose.dump_pdb( "before_ft.pdb" );
1010  pose.fold_tree( ft );
1011  // pose.dump_pdb( "after_ft.pdb" );
1012  TR<<"Current ft: "<<pose.fold_tree()<<std::endl;
1013 }
1014 
1016 
1018 
1020 Splice::dbase_begin() const{ return dbase_subset_.begin(); }
1021 
1023 Splice::dbase_end() const{ return dbase_subset_.end(); }
1024 
1025 core::Size
1027  if( locked_res_ )
1028  return locked_res_->obj[ 1 ];
1029  else
1030  return 0;
1031 }
1032 
1033 void
1034 Splice::locked_res( core::Size const r ) { locked_res_->obj[ 1 ] = r; }
1035 
1036 void
1037 Splice::locked_res_id( char const c ){ locked_res_id_ = c ; }
1038 
1039 char
1040 Splice::locked_res_id() const{ return locked_res_id_; }
1041 
1043 Splice::checkpointing_file() const { return checkpointing_file_; }
1044 
1045 void
1046 Splice::checkpointing_file( std::string const cf ){ checkpointing_file_ = cf; }
1047 
1048 void
1050  loop_dbase_file_name_ = s;
1051 }
1052 
1055  return loop_dbase_file_name_;
1056 }
1057 
1058 void
1060  loop_pdb_source_ = s;
1061 }
1062 
1065  return loop_pdb_source_;
1066 }
1067 
1070  return splice_filter_;
1071 }
1072 
1073 void
1075  splice_filter_ = f;
1076 }
1077 
1078 void
1079 Splice::read_splice_segments( std::string const segment_type, std::string const segment_name, std::string const file_name ){
1080  splice_segments_[ segment_type ]->read_profile( file_name, segment_name );
1081  TR<<"In segment_type "<<segment_type_<<": reading profile for segment "<<segment_name<<" from file "<<file_name<<std::endl;
1082 }
1083 
1086  using namespace core::sequence;
1087  using namespace std;
1088 
1090  profile_vector.clear();
1091  runtime_assert( pdb_segments_.size() );
1092  for( map< string, string >::const_iterator i = pdb_segments_.begin(); i != pdb_segments_.end(); ++i ){
1093  std::string const segment_type( i->first );
1094  std::string const pdb_name( i->second );
1095  profile_vector.push_back( splice_segments_[ segment_type ]->pdb_profile( pdb_name ) );
1096  }
1097  return concatenate_profiles( profile_vector );
1098 }
1099 
1100 void
1102  using namespace std;
1103  map< string, string > const comments = core::pose::get_all_comments( pose );
1104  for( std::map< string, string >::const_iterator i = comments.begin(); i != comments.end(); ++i ){
1105  std::string const key( i->first );
1106  std::string const val( i->second );
1107  if( key.substr( 0, 7 ) != "segment" )/// the expected format is segment_??, where we're interested in ??
1108  continue;
1109  std::string const short_key( key.substr(8, 1000 ) );
1110  pdb_segments_[ short_key ] = val;
1111  TR<<"recording segment/pdb pair: "<<short_key<<'/'<<val<<std::endl;
1112  }
1113 }
1114 
1115 void
1117  pdb_segments_[ segment_type_ ] = pdb_name;
1118 }
1119 
1120 // @brief utility function for computing which residues on chain1 are away from the interface
1123  using namespace protocols::toolbox::task_operations;
1125  pido->repack_chain1( true );
1126  pido->design_chain1( true );
1127  pido->repack_chain2( false );
1128  pido->design_chain2( false );
1129  pido->interface_distance_cutoff( 8.0 );
1131  tf_outside_interface->push_back( pido );
1132 ///// FIND COMPLEMENT ////////
1133  utility::vector1< core::Size > const chain1_outside_interface( protocols::rosetta_scripts::residue_packer_states( pose, tf_outside_interface, false, true ) ); /// find packable but not designable residues; according to pido specifications above these will be on chain1 outside an 8A shell around chain2
1134 
1135  return chain1_outside_interface;
1136 }
1137 
1138 void
1140  using namespace core::scoring::constraints;
1141 
1142 /// first remove existing sequence constraints
1143  TR<<"Removing existing sequence profile constraints from pose"<<std::endl;
1144  ConstraintCOPs constraints( pose.constraint_set()->get_all_constraints() );
1145  TR<<"Total number of constraints at start: "<<constraints.size()<<std::endl;
1146  core::Size cst_num( 0 );
1147  foreach( ConstraintCOP const c, constraints ){
1148  if( c->type() == "SequenceProfile" ){
1149  pose.remove_constraint( c );
1150  cst_num++;
1151  }
1152  }
1153  TR<<"Removed a total of "<<cst_num<<" sequence constraints."<<std::endl;
1154  TR<<"After removal the total number of constraints is: "<<pose.constraint_set()->get_all_constraints().size()<<std::endl;
1155 /// then impose new sequence constraints
1156  core::sequence::SequenceProfileCOP seqprof( generate_sequence_profile() );
1157  TR<<"Chain length/seqprof size: "<<pose.conformation().chain_end( 1 ) - pose.conformation().chain_begin( 1 ) + 1<<", "<<seqprof->size()<<std::endl;
1158  runtime_assert( seqprof->size() == pose.conformation().chain_end( 1 ) - pose.conformation().chain_begin( 1 ) + 1 );
1159  cst_num = 0;
1160 
1161  TR<<"Upweighting sequence constraint for residues: ";
1163  for( core::Size seqpos = pose.conformation().chain_begin( 1 ); seqpos <= pose.conformation().chain_end( 1 ); ++seqpos ){
1164  using namespace core::scoring::constraints;
1165  SequenceProfileConstraintOP spc( new SequenceProfileConstraint( pose, seqpos, seqprof ) );
1166  if( std::find( upweighted_residues.begin(), upweighted_residues.end(), seqpos ) != upweighted_residues.end() ){
1167  spc->weight( profile_weight_away_from_interface() );
1168  TR<<seqpos<<",";
1169  }
1170  TR<<std::endl;
1171  pose.add_constraint( spc );
1172  cst_num++;
1173  }
1174  TR<<"Added a total of "<<cst_num<<" sequence constraints."<<std::endl;
1175  TR<<"Now the pose has a total of "<<pose.constraint_set()->get_all_constraints().size()<<" constraints"<<std::endl;
1176 
1177 /// just checking that the scorefxn has upweighted res_type_constraint
1178  core::Real const score_weight( scorefxn()->get_weight( core::scoring::res_type_constraint ) );
1179  TR<<"res_type_constraint weight is set to "<<score_weight<<std::endl;
1180  if( score_weight <= 0.001 )
1181  TR<<"Warning! res_type_constraint weight is low, even though I've just added sequence constraints to the pose! These sequence constraints will have no effect. This could be an ERROR"<<std::endl;
1182 }
1183 
1184 core::Real
1186  return profile_weight_away_from_interface_;
1187 }
1188 
1189 void
1191  profile_weight_away_from_interface_ = p;
1192 }
1193 
1194 } //movers
1195 } //protein_interface_design
1196 } //protocols