Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
util.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file util.cc
11 /// @author ashworth
12 
13 #include <protocols/dna/util.hh>
17 
21 #include <core/graph/Graph.hh>
22 // AUTO-REMOVED #include <core/io/pdb/pose_io.hh>
23 #include <basic/options/option.hh>
29 #include <core/pose/Pose.hh>
30 #include <core/pose/PDBInfo.hh>
39 #include <basic/Tracer.hh>
40 
41 #include <utility/file/file_sys_util.hh> // file_exists, create_directory
42 #include <utility/vector1.hh>
43 using utility::vector1;
44 #include <utility/vector0.hh>
45 #include <utility/io/izstream.hh>
46 #include <utility/io/ozstream.hh>
47 #include <utility/string_util.hh>
48 using utility::string_split;
49 
50 #include <numeric/xyzVector.hh>
51 #include <numeric/conversions.hh>
53 
54 
55 #include <algorithm> // std::min
56 #include <iostream>
57 #include <sstream>
58 
59 // option key includes
60 
61 #include <basic/options/keys/constraints.OptionKeys.gen.hh>
62 #include <basic/options/keys/dna.OptionKeys.gen.hh>
63 #include <basic/options/keys/out.OptionKeys.gen.hh>
64 #include <basic/options/keys/score.OptionKeys.gen.hh>
65 
68 #include <ObjexxFCL/format.hh>
69 
70 
71 //#include <fstream> no, use zstreams
72 
73 namespace protocols {
74 namespace dna {
75 
76 using namespace core;
77 using namespace conformation;
78 using namespace chemical;
79 using namespace basic::options;
80 using namespace pack;
81 using namespace rotamer_set;
82 using namespace scoring;
83 using namespace ObjexxFCL::fmt;
84 
85 static basic::Tracer TR( "protocols.dna.util", basic::t_info );
86 
88 
89 /// @begin close_to_dna
90 /// @details checks c-beta (except glycine) to base atom-atom distances, not including ribose or phosphate backbone.
91 /// @authors ashworth
92 bool
94  Residue const & pres,
95  Residue const & dres,
96  Real threshold,
97  bool base_only /* = false */
98 )
99 {
100 // TR << "pres " << pres << " dres " << dres << std::endl;
101  // iterate over dna base ('sidechain') atoms, check for distance to protein sidechain takeoff point
102  Atoms::const_iterator baseatom = ( base_only ? dres.sidechainAtoms_begin() : dres.atom_begin() );
103  for ( Atoms::const_iterator end( dres.heavyAtoms_end() ); baseatom != end; ++baseatom ) {
104  if ( baseatom->xyz().distance_squared( pres.nbr_atom_xyz() ) < threshold ) return true;
105  }
106  return false;
107 }
108 
109 /// @begin argrot_dna_dis2
110 /// @details arginine rotamer sweep at a protein residue to see if it should be considered a (potentially) 'dna-contacting' residue
111 /// @authors ashworth
112 Real
114  pose::Pose const & pose,
115  Size presid,
116  Residue const & pres,
117  Residue const & dres,
118  Real threshold,
119  bool base_only /* = false */
120 )
121 {
122  using namespace pack;
123  using namespace scoring;
124  using namespace task;
125 
126 // TR << "Arg rot screen for " << pres << " " << presid << " vs " << dres << std::endl;
127 
129  ptask->set_bump_check( false );
130  ptask->temporarily_set_pack_residue( presid, true );
131 
132  // use ex1 rotamers
133  ResidueLevelTask & restask( ptask->nonconst_residue_task( presid ) );
134  restask.or_ex1( true );
135 
136  // restrict to arginine (not currently necessary if calling build...concrete directly
137  vector1< bool > keep_aas( num_canonical_aas, false );
138  keep_aas[ aa_arg ] = true;
139  restask.restrict_absent_canonical_aas( keep_aas );
140 
141  // mostly if not completely irrelevant here, but required as an argument for building rotamers
142  std::string weights_tag("dna");
143  if ( option[ OptionKeys::score::weights ].user() )
144  weights_tag = option[ OptionKeys::score::weights ]();
146  // unnecessary here, yet also required
147  graph::GraphOP dummygraph = new graph::Graph( pose.total_residue() );
148 
149  RotamerSetFactory rsf;
150  RotamerSetOP rotset( rsf.create_rotamer_set( pres ) );
151  rotset->set_resid( presid );
152  rotset->build_rotamers( pose, *scrfxn, *ptask, dummygraph, false );
153 
154 // TR(basic::t_debug) << "arg screen w/ " << rotset->num_rotamers() << " rots" << std::endl;
155 
156  // add a bump check here first
157 
158  Real shortest_dis2(10000), dis2;
159 
160  for ( Rotamers::const_iterator rotamer( rotset->begin() ); rotamer != rotset->end(); ++rotamer ) {
161  if ( (*rotamer)->aa() != aa_arg ) {
162  // for packer safety, RotamerSet will add in a native rotamer if it didn't actually build any rotamers
163  if ( rotset->num_rotamers() == 1 ) continue;
164  TR << "warning non-arg rotamer " << (*rotamer)->aa() << std::endl;
165  runtime_assert( false );
166  }
167 
168  Atoms::const_iterator prot_begin( (*rotamer)->sidechainAtoms_begin() ),
169  prot_end( (*rotamer)->heavyAtoms_end() ),
170  dna_end( dres.heavyAtoms_end() );
171  Atoms::const_iterator dna_begin =
172  ( base_only ? dres.sidechainAtoms_begin() : dres.atom_begin() );
173 
174  dis2 = contact_distance2( prot_begin, prot_end, dna_begin, dna_end, threshold );
175  if ( dis2 < shortest_dis2 ) shortest_dis2 = dis2;
176  if ( shortest_dis2 < threshold ) return shortest_dis2;
177  }
178  return shortest_dis2;
179 }
180 
181 /// @begin contact_distance2
182 /// @details distance check for contact between two sets of atoms
183 /// @authors ashworth
184 Real
186  Atoms::const_iterator a_begin,
187  Atoms::const_iterator a_end,
188  Atoms::const_iterator b_begin,
189  Atoms::const_iterator b_end,
190  Real threshold // default is 0.0
191 )
192 {
193  Real shortest_dis2(10000), dis2;
194 
195  for ( Atoms::const_iterator atm_a( a_begin ); atm_a != a_end; ++atm_a ) {
196  for ( Atoms::const_iterator atm_b( b_begin ); atm_b != b_end; ++atm_b ) {
197 
198  dis2 = atm_a->xyz().distance_squared( atm_b->xyz() );
199  if ( dis2 < shortest_dis2 ) shortest_dis2 = dis2;
200  if ( shortest_dis2 < threshold ) return shortest_dis2; // early exit mode
201  }
202  }
203  return shortest_dis2;
204 }
205 
206 /// @begin z_axis_dist
207 /// @details A sanity check for the arginine rotamer screen. Can prevent the design of positions that are best left alone because they are too far away along the helical axis ('laterally').
208 /// @authors ashworth
210  Residue const & pres,
211  Residue const & dres
212 )
213 {
214  using namespace scoring::dna;
215 
216  xyzVec const Z( get_z_axis( dres, get_y_axis(dres,1) ) ),
217  prot( pres.nbr_atom_xyz() ),
218  dna( dres.xyz( dres.first_sidechain_atom() ) );
219 
220  // vector from first protein sidechain atom to DNA N1/N9
221  xyzVec vec( prot - dna );
222  // return scalar projection onto DNA helical axis
223  return std::abs( dot(vec,Z) );
224 }
225 
226 /// @begin dna_comp_name_str
227 /// @brief also consider using the dna_base_partner function below
228 /// @authors ashworth
230  if ( dna == "ADE" ) return "THY";
231  if ( dna == "CYT" ) return "GUA";
232  if ( dna == "GUA" ) return "CYT";
233  if ( dna == "THY" ) return "ADE";
234  if ( dna == " A" ) return " T";
235  if ( dna == " C" ) return " G";
236  if ( dna == " G" ) return " C";
237  if ( dna == " T" ) return " A";
238  utility_exit_with_message( "Bad DNA name " + dna );
239  return "NONE";
240 }
241 
242 /// @begin dna_full_name3
243 /// @brief intended to convert any DNA "threeletter code" into the full three-letter code. Note that this does not (necessarily) return the same thing as residue_type::name3 (which returns " N" format as of Dec 2008)
245 {
246  if ( name3 == " A" || name3 == " DA" || name3 == "ADE" ) return "ADE";
247  if ( name3 == " C" || name3 == " DC" || name3 == "CYT" ) return "CYT";
248  if ( name3 == " G" || name3 == " DG" || name3 == "GUA" ) return "GUA";
249  if ( name3 == " T" || name3 == " DT" || name3 == "THY" ) return "THY";
250  if ( name3 == " rA" ) return "RAD";
251  if ( name3 == " rC" ) return "RCY";
252  if ( name3 == " rG" ) return "RGU";
253  if ( name3 == " rU" ) return "URA";
254  return name3;
255 }
256 
257 /// helper function
260 {
261  using namespace chemical;
262 
263  switch( na ) {
264  case na_ade:
265  return na_thy;
266  case na_thy:
267  return na_ade;
268  case na_gua:
269  return na_cyt;
270  case na_cyt:
271  return na_gua;
272  default:
273  utility_exit_with_message( "Bad DNA aa "+chemical::name_from_aa(na) );
274  }
275 // return na_ade;
276  return aa_unk;
277 }
278 
279 /// @begin find_basepairs
280 /// @details DnaChains version, adapted from pbradley's code. More paranoid geometry checks, in order to allow highly distorted basepairs without making mistakes
281 /// @authors ashworth
282 void
284  pose::Pose const & pose,
285  DnaChains & dna_chains,
286  bool include_unpaired // defaults to true
287 )
288 {
289  using namespace scoring::dna;
290 
291  TR << "\nFinding basepairs:\n";
292 
293  Real const max_d( 4.0 );
294  Size const nres( pose.total_residue() );
295 
296  dna_chains.clear();
297  runtime_assert( dna_chains.empty() );
298 
299  std::map< AA, AA > base_partner;
300  base_partner[ na_ade ] = na_thy;
301  base_partner[ na_thy ] = na_ade;
302  base_partner[ na_gua ] = na_cyt;
303  base_partner[ na_cyt ] = na_gua;
304 
305  std::map< AA, std::string > hbond_atom;
306  hbond_atom[ na_ade ] = "N1";
307  hbond_atom[ na_thy ] = "N3";
308  hbond_atom[ na_gua ] = "N1";
309  hbond_atom[ na_cyt ] = "N3";
310 
311  //ja RNA support
312  base_partner[ na_rad ] = na_ura;
313  base_partner[ na_ura ] = na_rad;
314  base_partner[ na_rgu ] = na_rcy;
315  base_partner[ na_rcy ] = na_rgu;
316 
317  hbond_atom[ na_rad ] = "N1";
318  hbond_atom[ na_ura ] = "N3";
319  hbond_atom[ na_rgu ] = "N1";
320  hbond_atom[ na_rcy ] = "N3";
321 
322  std::map< Size, Size > partner; // temporary
323 
324  for ( Size i(1); i <= nres; ++i ) {
325  Residue const & i_rsd( pose.residue(i) );
326  AA const & i_aa( i_rsd.aa() );
327  if ( !i_rsd.is_DNA() ) continue;
328  // the following is false for already-added bottom strand residue indices
329  if ( dna_chains.contains(i) ) continue;
330 
331  // hbond atom, base y-axis, base z-axis
332  xyzVec const hbatm_xyz_i( i_rsd.xyz( hbond_atom[ i_aa ] ) ),
333  base_yaxis_i( get_y_axis( i_rsd, 1 /*strand*/ ) );
334  xyzVec const base_zaxis_i( get_z_axis( i_rsd, base_yaxis_i ) );
335 
336  bool paired( false );
337  // check for a basepairing partner
338  Real bestdotsum(0.);
339  Size best_j(0);
340  for ( Size j(i+1); j <= nres; ++j ) {
341 
342  Residue const & j_rsd( pose.residue(j) );
343  AA const & j_aa( j_rsd.aa() );
344  if ( !j_rsd.is_DNA() ) continue;
345  // no Watson-Crick check here on purpose
346 
347  // -distance check-
348  xyzVec const hbatm_xyz_j( j_rsd.xyz( hbond_atom[ j_aa ] ) );
349 
350  Real d( hbatm_xyz_i.distance( hbatm_xyz_j ) );
351  if ( d >= max_d ) continue;
352 
353  // -geometry check-
354  xyzVec const base_yaxis_j( get_y_axis( j_rsd, 2 ) );
355  xyzVec const base_zaxis_j( get_z_axis( j_rsd, base_yaxis_j ) ),
356  hb_vec( ( hbatm_xyz_i - hbatm_xyz_j ).normalized() );
357 
358  Real const
359  // base y-axes parallel?
360  ydot( std::abs( dot( base_yaxis_i, base_yaxis_j ))),
361  // hbond vector parallel to base y axis?
362  dothbyi( std::abs( dot( base_yaxis_i, hb_vec ))),
363  dothbyj( std::abs( dot( base_yaxis_j, hb_vec ))),
364  // hbond vector perpendicular to base z axis?
365  dothbzi( std::abs( dot( base_zaxis_i, hb_vec ))),
366  dothbzj( std::abs( dot( base_zaxis_j, hb_vec )));
367 
368  Real const dotsum( 2*ydot + dothbyi + dothbyj - dothbzi - dothbzj );
369  int pdbi(i), pdbj(j);
370  if ( pose.pdb_info() ) {
371  pdbi = pose.pdb_info()->number(i);
372  pdbj = pose.pdb_info()->number(j);
373  }
374  int verbosity(0); // to do: learn to use Tracer properly
375  if ( verbosity >= 2 ) {
376  TR << "basepair geom "
377  << pdbi << " vs. " << pdbj << " dis " << d
378  << " ydot " << ydot
379  << " hbydots " << dothbyi << " " << dothbyj
380  << " hbzdots " << dothbzi << " " << dothbzj;
381  }
382 
383  if ( dotsum < bestdotsum || ydot < 0.8 ||
384  dothbyi < 0.6 || dothbyj < 0.6 ||
385  dothbzi > 0.5 || dothbzj > 0.5 )
386  {
387  if ( verbosity >= 2 ) TR << '\n';
388  continue;
389  }
390  if ( verbosity >= 2 ) TR << " acceptable" << '\n';
391 
392  // -complementarity check-
393  if ( j_aa != base_partner[ i_aa ] ) {
394  std::cerr << "Warning: nucleic acids " << i_rsd.name3() << " " <<
395  pdbi << " and " << j_rsd.name3() << " " <<
396  pdbj << " have basepaired geometry, but are not " <<
397  "complementary types" << '\n';
398  continue; // skip non-canonical basepairs for now
399  }
400  // passed: save j as optimal basepair partner
401  bestdotsum = dotsum;
402  best_j = j;
403  } // end j loop
404  if ( bestdotsum != 0. ) {
405  dna_chains[ i ] = DnaPosition( i, best_j );
406  paired = true;
407  }
408  if ( paired || !include_unpaired ) continue;
409  // include unpaired dna position
410  dna_chains[ i ] = DnaPosition( i );
411  } // end i loop
412  dna_chains.print( pose, TR );
413 }
414 
415 /// @begin make_sequence_combinations
416 /// @details populates a set of all possible sequence combinations over a given range of positions. recursive.
417 /// @authors ashworth
418 void
421  utility::vector1< Size > const & seq_indices,
422  task::PackerTaskCOP ptask,
423  ResTypeSequence & sequence,
424  ResTypeSequences & sequences
425 )
426 {
427  using namespace task;
428 
429  Size resid( *seqset_iter );
430  ResidueLevelTask const & restask( ptask->residue_task( resid ) );
431 
432  for ( ResidueLevelTask::ResidueTypeCOPListConstIter type( restask.allowed_residue_types_begin() );
433  type != restask.allowed_residue_types_end(); ++type ) {
434  // ignore adduct variant types for now (probably hydrated)
435  if ( (*type)->has_variant_type( chemical::ADDUCT ) ) continue;
436  sequence[ resid ] = *type;
437 
438  if ( seqset_iter == seq_indices.end() - 1 ) sequences.push_back( sequence );
439  else make_sequence_combinations( seqset_iter+1, seq_indices, ptask, sequence, sequences );
440  }
441 }
442 
443 /// @begin make_single_mutants
444 /// @brief make a list of all single mutants from a base sequence
445 /// @authors ashworth
446 void
448  ResTypeSequence const & sequence,
449  task::PackerTaskCOP ptask,
450  ResTypeSequences & sequences
451 )
452 {
453  using namespace task;
454  for ( ResTypeSequence::const_iterator it( sequence.begin() ); it != sequence.end(); ++it ) {
455  Size index( it->first );
456  ResidueLevelTask const & rtask( ptask->residue_task(index) );
457  for ( ResidueLevelTask::ResidueTypeCOPListConstIter type( rtask.allowed_residue_types_begin() );
458  type != rtask.allowed_residue_types_end(); ++type ) {
459  // ignore adduct variant types for now (probably hydrated)
460  if ( (*type)->has_variant_type( chemical::ADDUCT ) ) continue;
461  if ( (*type)->aa() == it->second->aa() ) continue; // avoid duplicating input sequence
462  ResTypeSequence mutant( sequence );
463  mutant[ index ] = *type;
464  sequences.push_back( mutant );
465  }
466  }
467 }
468 
469 void
471  std::list< PositionType > & design_residues,
472  pose::Pose const & pose,
473  task::PackerTask const & ptask
474 )
475 {
476  Size nres( pose.total_residue() );
477  for ( Size index(1); index <= nres; ++index ) {
478  if ( pose.residue_type(index).is_DNA() ) {
479  if ( !ptask.residue_task(index).has_behavior("TARGET") &&
480  !ptask.residue_task(index).has_behavior("SCAN") &&
481  !ptask.residue_task(index).being_designed() ) continue;
482  }
483  else if ( !ptask.pack_residue( index ) ) continue;
484  design_residues.push_back(
485  PositionType( index, &pose.residue_type( index ), ptask.design_residue( index ) ) );
486  }
487 }
488 
489 // (relevant typdefs are in fwd.hh)
490 std::ostream & operator << ( std::ostream & os, ResTypeSequence const & seq )
491 {
492  for ( ResTypeSequence::const_iterator pos( seq.begin() ); pos != seq.end(); ++pos ) {
493  if ( pos != seq.begin() ) os << ", ";
494  os << pos->first << "-" << pos->second->name1();
495  }
496  return os;
497 }
498 
501  for ( ResTypeSequence::const_iterator pos( seq.begin() ); pos != seq.end(); ++pos ) {
502  str += pos->second->name1();
503  }
504  return str;
505 }
506 
507 std::ostream & operator << ( std::ostream & os, ResTypeSequences const & seqs )
508 {
509  for ( ResTypeSequences::const_iterator seq( seqs.begin() ); seq != seqs.end(); ++seq ) {
510  os << *seq << '\n';
511  }
512  return os;
513 }
514 
516  ResTypeSequence const & seq,
517  pose::Pose const & pose
518 )
519 {
520  std::ostringstream os;
521  for ( ResTypeSequence::const_iterator pos( seq.begin() ); pos != seq.end(); ++pos ) {
522  Size const index( pos->first );
523  if ( index < 1 || index > pose.total_residue() ) {
524  assert(false);
525  continue;
526  }
527  if ( pos != seq.begin() ) os << ",";
528  if ( pose.pdb_info() ) {
529  os << pose.pdb_info()->chain( index ) << "." << pose.pdb_info()->number( index );
530  } else {
531  os << pose.chain( index ) << "." << index;
532  }
533  os << "." << dna_full_name3( pos->second->name3() );
534  }
535  return os.str();
536 }
537 
539  ResTypeSequence const & seq,
540  pose::Pose const & pose,
541  std::ostream & os
542 )
543 {
544  os << seq_pdb_str( seq, pose ) << '\n';
545 }
546 
548  ResTypeSequences const & seqs,
549  pose::Pose const & pose,
550  std::ostream & os
551 )
552 {
553  for ( ResTypeSequences::const_iterator seq( seqs.begin() ); seq != seqs.end(); ++seq ) {
554  print_sequence_pdb_nums( *seq, pose, os );
555  }
556 }
557 
558 /// @begin restrict_dna_rotamers
559 /// @details for packing a single DNA sequence out of a multi-DNA-sequence RotamerSet
560 /// @authors ashworth
561 void
563  RotamerSetsCOP rotamer_sets,
564  ResTypeSequence const & seq,
565  utility::vector0<int> & rot_to_pack
566 )
567 {
568  rot_to_pack.clear();
569  Size const nrot( rotamer_sets->nrotamers() );
570  for ( Size roti(1); roti <= nrot; ++roti ) {
571 
572  Size const rotpos( rotamer_sets->res_for_rotamer(roti) );
573  ResidueTypeCOP rot_type( rotamer_sets->rotamer(roti)->type() );
574 
575  ResTypeSequence::const_iterator seqindex( seq.find( rotpos ) );
576  if ( seqindex != seq.end() ) {
577  // compare only the name3's on order to allow variants
578  std::string seq_typename( (seqindex->second)->name3() ),
579  rot_typename( rot_type->name3() );
580  if ( seq_typename != rot_typename ) continue;
581  }
582  rot_to_pack.push_back( roti );
583  }
584  Size const rots_off( nrot - rot_to_pack.size() );
585  TR << "Fixing DNA rotamers: " << rots_off
586  << " out of " << nrot << " rotamers disabled." << std::endl;
587 }
588 
589 /// @begin restrict_to_single_sequence
590 /// @details for packing a single sequence out of a RotamerSets that (potentially) represents sequence variability
591 /// @authors ashworth
592 void
594  rotamer_set::RotamerSetsCOP rotamer_sets,
595  vector1< ResidueTypeCOP > const & single_sequence,
596  utility::vector0< int > & rot_to_pack
597 )
598 {
599  rot_to_pack.clear();
600  Size const nrot( rotamer_sets->nrotamers() );
601  for ( Size roti(1); roti <= nrot; ++roti ) {
602  Size const rotpos( rotamer_sets->res_for_rotamer(roti) );
603  ResidueTypeCOP rot_type( rotamer_sets->rotamer(roti)->type() );
604  // a comparison operator is not defined for the ResidueType class
605  // compare names here
606  // name3 comparison should allow variants
607  std::string seq_typename( ( single_sequence[ rotpos ] )->name3() ),
608  rot_typename( rot_type->name3() );
609  if ( seq_typename != rot_typename ) continue;
610  rot_to_pack.push_back( roti );
611  }
612  Size const rots_off( nrot - rot_to_pack.size() );
613  TR << "Fixing rotamers for a single sequence: " << rots_off
614  << " out of " << nrot << " rotamers disabled." << std::endl;
615 }
616 
617 /// @begin substitute_residue
618 /// @details
619 /// @authors ashworth
620 void
622  pose::Pose & pose,
623  Size index,
624  ResidueType const & new_type
625 )
626 {
627  Residue const & existing( pose.residue( index ) );
628  ResidueOP new_res( ResidueFactory::create_residue( new_type, existing, pose.conformation() ) );
629  new_res->set_chi( 1, existing.chi(1) );
630  pose.replace_residue( index, *new_res, false );
631 }
632 
633 // @begin write_checkpoint
634 // @brief
635 // @author ashworth
636 void
638 {
639  if ( ! option[ OptionKeys::dna::design::checkpoint ].user() ) return;
640  std::string fileroot( option[ OptionKeys::dna::design::checkpoint ]() );
641 
642  TR << "writing dna mode checkpoint files..." << '\n';
643 
644  // write out current Pose
645  std::string pdbname( fileroot + ".pdb.checkpoint" );
646  utility::io::ozstream pdbout( pdbname.c_str() );
647  pose.dump_pdb( pdbout );
648  pdbout.close();
649 
650  // write checkpoint file
651  std::string checkpointname( fileroot + ".checkpoint" );
652  utility::io::ozstream out( checkpointname.c_str() );
653 // std::ofstream out( checkpointname.c_str() );
654 
655  if ( !out ) {
656  std::cerr << "trouble opening file " << checkpointname
657  << " for writing... skipping checkpoint" << std::endl;
658  runtime_assert( false ); // die here in debug mode
659  return;
660  }
661 
662  // here iter should refer to the last complete iteration
663  out << "Iteration " << iter << '\n' << pdbname << '\n';
664  out.close();
665 
666  TR << "wrote " << pdbname << ", " << checkpointname << std::endl;
667 }
668 
669 // @begin load_checkpoint
670 // @brief
671 // @author ashworth
672 void
673 load_checkpoint( pose::Pose & pose, Size & iter )
674 {
675  if ( ! option[ OptionKeys::dna::design::checkpoint ].user() ) return;
676  std::string fileroot( option[ OptionKeys::dna::design::checkpoint ]() );
677 
678  utility::io::izstream file;
679  std::string filename( fileroot + ".checkpoint" );
680  file.open( filename.c_str() );
681  if ( !file ) return;
682 
683  TR << "Reading DNA design checkpoint info from " << filename << '\n';
684 
685  std::string line, word, pdbfile;
686  // get iteration
687  Size last_iter;
688  file >> word >> last_iter >> skip; // first line
689  if ( ( word != "Iteration" ) ) return;
690  file >> pdbfile >> skip;
691  file.close();
692 
693  if ( option[ OptionKeys::out::pdb_gz ]() ) pdbfile += ".gz";
694  pose::Pose temp_pose;
695  core::import_pose::pose_from_pdb( temp_pose, filename );
696 
697  pose = temp_pose;
698  // here iter should refer to the last complete iteration
699  iter = last_iter + 1;
700 
701  TR << "loaded " << pdbfile << " for iteration " << iter << std::endl;
702 }
703 
704 // @begin checkpoint_cleanup
705 // @brief make sure that old checkpoint files will not be accidentally reused
706 // @author ashworth
707 void
709 {
710  if ( ! option[ OptionKeys::dna::design::checkpoint ].user() ) return;
711  std::string fileroot( option[ OptionKeys::dna::design::checkpoint ]() );
712 
713  std::list< std::string > filenames;
714  filenames.push_back( fileroot + ".checkpoint" );
715  filenames.push_back( fileroot + ".pdb.checkpoint" );
716 
717  for ( std::list< std::string >::const_iterator filename( filenames.begin() );
718  filename != filenames.end(); ++filename ) {
720  std::string nameold( *filename + ".old" );
721  std::rename( (*filename).c_str(), nameold.c_str() );
722  }
723  }
724 }
725 
726 /// @begin load_dna_design_defs
727 /// @brief loads command-line dna design definitions (shorthand alternative to using resfile)
728 /// option value is string vector
729 /// i.e. -dna_defs C.-6 C.-5
730 /// or -dna_defs C.-6.GUA C.-5.CYT
731 /// @author ashworth
732 void
734  DnaDesignDefOPs & defs,
735  Strings const & str_defs
736 )
737 {
738  for ( Strings::const_iterator str_def( str_defs.begin() ), end( str_defs.end() );
739  str_def != end; ++str_def ) {
740  defs.push_back( new DnaDesignDef( *str_def ) );
741  }
742 }
743 
744 void
746  DnaDesignDefOPs & defs,
747  std::string const & filename,
748  std::string const & pdb_prefix
749 )
750 {
751  std::string stripped_prefix( pdb_prefix );
752 
753  TR << "Getting dna_defs from file " << filename;
754  if ( ! stripped_prefix.empty() ) {
755  stripped_prefix = string_split( stripped_prefix, '/' ).back();
756  TR << " for " << stripped_prefix;
757  }
758  TR << '\n';
759 
760  utility::io::izstream defs_file( filename.c_str() );
761  std::string line;
762  while ( getline( defs_file, line ) ) {
763  utility::vector1< std::string > words( string_split( line, ' ' ) );
764  // multiple pdbs may be specified in this file:
765  // only match lines beginning with stripped_prefix, if specified
766  if ( ! stripped_prefix.empty() && words.front() != stripped_prefix ) continue;
767  Strings str_defs;
768  str_defs.insert( str_defs.begin(), words.begin()+1, words.end() );
769  load_dna_design_defs_from_strings( defs, str_defs );
770  }
771 }
772 
773 void
775  DnaDesignDefOPs & defs,
776  std::string pdb_prefix /* = std::string() */
777 )
778 {
779  if ( option[ OptionKeys::dna::design::dna_defs ].user() ) {
780  // list of defs for a single pdb
781  Strings str_defs( option[ OptionKeys::dna::design::dna_defs ]().vector() );
782  load_dna_design_defs_from_strings( defs, str_defs );
783  } else if ( option[ OptionKeys::dna::design::dna_defs_file ].user() ) {
784  // file containing lists of defs, with format 'pdbcode def def def'
786  defs,
787  option[ OptionKeys::dna::design::dna_defs_file ](),
788  pdb_prefix
789  );
790  }
791  TR.flush();
792 }
793 
794 void
796  pose::Pose & pose
797 )
798 {
799  using namespace scoring::constraints;
800 
801  std::string cst_file;
802  if ( option[ OptionKeys::constraints::cst_file ].user() ) {
803  cst_file = option[ OptionKeys::constraints::cst_file ]().front();
804  }
805  else return;
806 
807  ConstraintSetOP cst_set =
809 
810  pose.constraint_set( cst_set );
811 }
812 
815 {
816 
817  Size const nres( pose.total_residue() );
818 
819  pose::PDBInfoCOP pdb_data( pose.pdb_info() );
820 
821  // Identify DNA duplexed regions
822  protocols::dna::DNAParameters dna_info( pose );
823 // dna_info.calculate( pose );
824 
825  Size num_chains( 1 );
826  utility::vector1< Size > chain_start;
827  utility::vector1< Size > chain_end;
828  utility::vector1< Size > chain_type;
829 
830  chain_start.push_back( 1 );
831  for( Size resid = 1 ; resid < nres ; ++resid ) {
832  if( pdb_data->chain( resid ) != pdb_data->chain( resid + 1 ) ){
833  chain_end.push_back( resid );
834  chain_start.push_back( resid + 1 );
835  num_chains++;
836  }
837  }
838  chain_end.push_back( nres );
839 
840  // Allocate the FArrays to call FoldTree::tree_from_cuts_and_jumps
841  Size num_cuts( num_chains - 1 );
842  ObjexxFCL::FArray1D_int cut_positions( num_cuts, 0 );
843  ObjexxFCL::FArray2D_int jump_pairs( 2, num_cuts, 0 );
844  Size jump_pair_count( 1 );
845 
846  // We can fill the cut info now
847  for( Size cut_num = 1 ; cut_num < chain_end.size() ; ++cut_num ){
848  cut_positions( cut_num ) = chain_end[ cut_num ];
849  }
850 
851  Size const amino( 1 );
852  Size const bped_dna( 2 );
853  Size const non_bped_dna( 3 );
854 
855  utility::vector1< Size > protein_root( num_chains, 0 );
856  utility::vector1< Size > closest_base( num_chains, 0 );
857 
858  // Analyze each chain
859  for( Size this_chain = 1 ; this_chain <= num_chains ; ++this_chain ) {
860  TR << "Working on chain " << this_chain << std::endl;
861 
862  // Check for amino acid chain
863  if( pose.residue( chain_start[ this_chain ] ).is_protein() ) {
864  chain_type.push_back( amino );
865  TR << "Found 1 initial segments for chain " << this_chain << std::endl;
866  TR << "Chain " << this_chain << " segment 1 start res " << chain_start[ this_chain ] <<
867  " end res " << chain_end[ this_chain ] << " of type 1" << std::endl;
868 
869 
870  // Find the DNA base with the closest C1' atom to some Calpha in this protein
871  Real best_dist( 9999.0 );
872  for( Size prot_res = chain_start[ this_chain ] ; prot_res <= chain_end[ this_chain ] ; ++prot_res ) {
873  for( Size dna_res = 1 ; dna_res <= nres ; ++dna_res ) {
874  if( !pose.residue( dna_res ).is_DNA() ) continue;
875  Real check_dist = pose.residue( prot_res ).xyz( "CA" ).distance_squared( pose.residue( dna_res ).xyz( "C1*" ) );
876  if( check_dist < best_dist ) {
877  best_dist = check_dist;
878  protein_root[ this_chain ] = prot_res;
879  closest_base[ this_chain ] = dna_res;
880  }
881  }
882  }
883 
884  TR << "Protein closest approach is res " << protein_root[ this_chain ]<< " with base " << closest_base[ this_chain ] << " with distance " << std::sqrt( best_dist ) << std::endl;
885 
886  // Note this pair as a jump
887  if( protein_root[this_chain] < closest_base[this_chain] ) {
888  jump_pairs( 1, jump_pair_count ) = protein_root[this_chain];
889  jump_pairs( 2, jump_pair_count ) = closest_base[this_chain];
890  } else {
891  jump_pairs( 2, jump_pair_count ) = protein_root[this_chain];
892  jump_pairs( 1, jump_pair_count ) = closest_base[this_chain];
893  }
894  jump_pair_count++;
895 
896  continue;
897  }
898 
899  // Bail if it's something other than protein or DNA
900  if( !pose.residue( chain_start[ this_chain ] ).is_DNA() ) {
901  std::cerr << "Bad call to make_basepair_aware_fold_tree() with non-protein, non-DNA type" << std::endl;
902  utility_exit_with_message( "make_base_aware_fold_tree() takes only protein, DNA!" );
903  }
904 
905  chain_type.push_back( bped_dna );
906 
907  // Break up this DNA into segments
908 
909  Size num_segments( 1 );
910  utility::vector1< Size > segment_start;
911  utility::vector1< Size > segment_end;
912  utility::vector1< Size > segment_type;
913 
914  segment_start.push_back( chain_start[ this_chain ] );
915  if( dna_info.find_partner( chain_start[ this_chain ] ) != 0 ) {
916  segment_type.push_back( bped_dna );
917  } else {
918  segment_type.push_back( non_bped_dna );
919  }
920  for( Size resid = chain_start[this_chain] ; resid < chain_end[ this_chain ] ; ++resid ) {
921  // Check for difference
922  bool this_bped( dna_info.find_partner( resid ) != 0 );
923  bool next_bped( dna_info.find_partner( resid + 1 ) != 0 );
924  // Switch from base paired to not base paired
925  if( this_bped && !next_bped ) { // Switch from base paired to not base paired
926  segment_end.push_back( resid );
927  segment_start.push_back( resid + 1 );
928  segment_type.push_back( non_bped_dna );
929  } else if ( !this_bped && next_bped ) { // Switch from not base paired to base paired
930  segment_end.push_back( resid );
931  segment_start.push_back( resid + 1 );
932  segment_type.push_back( bped_dna );
933  } else if (!this_bped && !next_bped ) { // No switch - do nothing
934  continue;
935  } else if ( pdb_data->chain( dna_info.find_partner( resid ) ) !=
936  pdb_data->chain( dna_info.find_partner( resid + 1 ) ) ) { // Both base-paired, but to different strands
937  segment_end.push_back( resid );
938  segment_start.push_back( resid + 1 );
939  segment_type.push_back( bped_dna );
940  }
941  }
942  segment_end.push_back( chain_end[ this_chain ] );
943 
944  num_segments = segment_start.size();
945 
946  // Let's see what we have
947  TR << "Found " << num_segments << " initial segments for chain " << this_chain << std::endl;
948  for( Size this_segment = 1 ; this_segment <= num_segments ; ++this_segment ) {
949  TR << "Chain " << this_chain << " segment " << this_segment << " start res " << segment_start[ this_segment ] <<
950  " end res " << segment_end[ this_segment ] << " of type " << segment_type[ this_segment ] << std::endl;
951  }
952 
953 
954  // Record mid-points of base paired segments
955  utility::vector1< Size > bp_middle( num_segments, 0 );
956 
957  for( Size this_segment = 1 ; this_segment <= num_segments ; ++this_segment ) {
958  if( segment_type[ this_segment ] == bped_dna ) {
959  bp_middle[ this_segment ] = ( segment_start[ this_segment ] + segment_end[ this_segment ] ) / 2;
960  }
961  }
962 
963  // Merge non-base paired segments into base paired segments if possible
964 
965  Size num_processed( num_segments );
966 
967  if( num_segments > 1 ) {
968  for( Size this_segment = 1 ; this_segment <= num_segments ; ++this_segment ) {
969  if( segment_type[ this_segment ] == non_bped_dna && num_segments > 1 ) {
970  num_processed--;
971  if( this_segment == 1 ) { // Just merge with the next segment
972  segment_start[ this_segment + 1 ] = segment_start[ this_segment ];
973  } else if( this_segment == num_segments ) { // Just merge with the previous segment
974  segment_end[ this_segment - 1 ] = segment_end[ this_segment ];
975  } else { // Must be between two base paired segments - split evenly
976  // Handle single residue segment
977  if( segment_start[ this_segment ] == segment_end[ this_segment ] ) {
978  // Just give it to the previous
979  segment_end[ this_segment - 1 ] = segment_end[ this_segment ];
980  } else {
981  // Divide in half
982  Size split_pos( ( segment_start[ this_segment ] + segment_end[ this_segment ] ) / 2 );
983  segment_end[ this_segment - 1 ] = split_pos;
984  segment_start[ this_segment + 1 ] = split_pos + 1;
985  }
986  }
987  }
988  }
989  }
990 
991  // Need to handle the case of a single segment chain of non-base paired DNA - it needs a jump somewhere
992  if( num_segments == 1 && chain_type[ this_chain ] == non_bped_dna ) {
993  // Find the amino acid with the closest Calpha atom to some C1' atom this chain
994  Real best_dist( 9999.0 );
995  for( Size dna_res = chain_start[ this_chain ] ; dna_res <= chain_end[ this_chain ] ; ++dna_res ) {
996  for( Size prot_res = 1 ; prot_res <= nres ; ++prot_res ) {
997  if( !pose.residue( prot_res ).is_protein() ) continue;
998  Real check_dist = pose.residue( prot_res ).xyz( "CA" ).distance_squared( pose.residue( dna_res ).xyz( "C1*" ) );
999  if( check_dist < best_dist ) {
1000  best_dist = check_dist;
1001  protein_root[ this_chain ] = prot_res;
1002  closest_base[ this_chain ] = dna_res;
1003  }
1004  }
1005  }
1006 
1007  TR << "Unpaired DNA closest approach is res " << closest_base[ this_chain ]<< " with amino acid " << protein_root[ this_chain ] << " with distance " << std::sqrt( best_dist ) << std::endl;
1008 
1009  // Note this pair as a jump
1010  if( protein_root[this_chain] < closest_base[this_chain] ) {
1011  jump_pairs( 1, jump_pair_count ) = protein_root[this_chain];
1012  jump_pairs( 2, jump_pair_count ) = closest_base[this_chain];
1013  } else {
1014  jump_pairs( 2, jump_pair_count ) = protein_root[this_chain];
1015  jump_pairs( 1, jump_pair_count ) = closest_base[this_chain];
1016  }
1017  jump_pair_count++;
1018  }
1019 
1020 
1021  // Let's see what we have
1022  TR << "Found " << num_processed << " final segments for chain " << this_chain << std::endl;
1023  Size accum_count( 0 );
1024  for( Size this_segment = 1 ; this_segment <= num_segments ; ++this_segment ) {
1025  if( segment_type[ this_segment ] == bped_dna ) {
1026  accum_count++;
1027  TR << "Chain " << this_chain << " segment " << accum_count << " start res " << segment_start[ this_segment ] <<
1028  " end res " << segment_end[ this_segment ] << " of type " << segment_type[ this_segment ] << std::endl;
1029 
1030  // retrieve the mid-point base pair and its partner
1031  Size mid_partner = dna_info.find_partner( bp_middle[ this_segment ] );
1032 
1033  // Store the jump info if this chain is the lower number (to avoid adding twice)
1034  // Also check to make sure these chains haven't already been connected. This
1035  // can happen in two strands are base-paired at the ends but have a non-bp-ed
1036  // bubble in between.
1037  if( ( bp_middle[ this_segment ] < mid_partner ) &&
1038  not_already_connected( pose, jump_pair_count - 1, pose.pdb_info()->chain( bp_middle[ this_segment] ), pose.pdb_info()->chain( mid_partner ), jump_pairs ) ) {
1039  TR << "Making jump between " << bp_middle[ this_segment ] << " and " << mid_partner << std::endl;
1040  jump_pairs( 1, jump_pair_count ) = bp_middle[ this_segment ];
1041  jump_pairs( 2, jump_pair_count ) = mid_partner;
1042  jump_pair_count++;
1043  }
1044  }
1045  }
1046  }
1047 
1048  kinematics::FoldTree ft( nres );
1049 
1050  ft.tree_from_jumps_and_cuts( nres, num_cuts, jump_pairs, cut_positions, 1 );
1051 
1052  return ft;
1053 }
1054 
1055 bool
1057  pose::Pose const & pose,
1058  Size const num_jumps,
1059  char const this_chain,
1060  char const other_chain,
1061  ObjexxFCL::FArray2D_int & jump_pairs
1062 )
1063 {
1064 
1065  for( Size i = 1 ; i <= num_jumps ; ++i ) {
1066 
1067  // Get chain ids for residues involved in jumps
1068  char const jump_chain1( pose.pdb_info()->chain( jump_pairs( 1, i ) ) );
1069  char const jump_chain2( pose.pdb_info()->chain( jump_pairs( 2, i ) ) );
1070 
1071  // Check versus pre-existing jump ( both ways )
1072 
1073  if( ( jump_chain1 == this_chain && jump_chain2 == other_chain ) ||
1074  ( jump_chain2 == this_chain && jump_chain1 == other_chain ) ) {
1075  return false;
1076  }
1077  }
1078 
1079  return true;
1080 }
1081 
1082 
1083 void
1085  pose::Pose & pose,
1086  core::Size const start_base,
1087  core::Size const end_base
1088 )
1089 {
1090  using namespace scoring::constraints;
1091  using namespace id;
1092  using numeric::conversions::radians;
1093 
1094  pose::PDBInfoCOP pdb_data( pose.pdb_info() );
1095 
1096 // Size const nres( pose.total_residue() );
1097 
1098  // From Phil
1099  Real const O3_P_distance( 1.608 );
1100  Real const O3_angle( 119.8 );
1101  Real const P_angle( 103.4 );
1102  Real const O1P_angle( 108.23 );
1103 
1104  Real const distance_stddev( 0.3 ); // amber is 0.0659
1105  Real const angle_stddev_degrees( 35 ); // amber is 8.54 (P angle), 5.73 (O3 angle)
1106 
1107  FuncOP const distance_func( new HarmonicFunc( O3_P_distance, distance_stddev ) );
1108  FuncOP const O3_angle_func( new HarmonicFunc( radians( O3_angle ), radians( angle_stddev_degrees ) ) );
1109  FuncOP const P_angle_func( new HarmonicFunc( radians( P_angle ), radians( angle_stddev_degrees ) ) );
1110  FuncOP const O1P_angle_func( new HarmonicFunc( radians( O1P_angle ), radians( angle_stddev_degrees ) ) );
1111 
1112  assert( start_base <= end_base );
1113 
1114  // First the start base
1115  if( !pose.residue_type( start_base ).is_lower_terminus() ) {
1116  conformation::Residue const & rsd1( pose.residue( start_base-1 ) );
1117  conformation::Residue const & rsd2( pose.residue( start_base ) );
1118 
1119  // Setup constraints to close bb
1120 
1121  AtomID const C3_id( rsd1.atom_index( "C3*" ), start_base - 1 );
1122  AtomID const O3_id( rsd1.atom_index( "O3*" ), start_base - 1 );
1123  AtomID const P_id( rsd2.atom_index( "P" ), start_base );
1124  AtomID const O5_id( rsd2.atom_index( "O5*" ), start_base );
1125  AtomID const O1P_id( rsd2.atom_index( "O1P" ), start_base );
1126 
1127  // distance from O3* to P
1128  pose.add_constraint( new AtomPairConstraint( O3_id, P_id, distance_func ) );
1129  // angle at O3*
1130  pose.add_constraint( new AngleConstraint( C3_id, O3_id, P_id, O3_angle_func ) );
1131  // angle at P
1132  pose.add_constraint( new AngleConstraint( O3_id, P_id, O5_id, P_angle_func ) );
1133  // another angle at P - try not to get goofy geometries
1134  pose.add_constraint( new AngleConstraint( O3_id, P_id, O5_id, P_angle_func ) );
1135  pose.add_constraint( new AngleConstraint( O3_id, P_id, O1P_id, O1P_angle_func ) );
1136  }
1137 
1138  // Next the end base
1139  if( !pose.residue_type( end_base ).is_upper_terminus() ) {
1140  conformation::Residue const & rsd1( pose.residue( end_base ) );
1141  conformation::Residue const & rsd2( pose.residue( end_base+1 ) );
1142 
1143  // Setup constraints to close bb
1144 
1145  AtomID const C3_id( rsd1.atom_index( "C3*" ), end_base );
1146  AtomID const O3_id( rsd1.atom_index( "O3*" ), end_base );
1147  AtomID const P_id( rsd2.atom_index( "P" ), end_base + 1 );
1148  AtomID const O5_id( rsd2.atom_index( "O5*" ), end_base + 1 );
1149  AtomID const O1P_id( rsd2.atom_index( "O1P" ), end_base + 1 );
1150 
1151  // distance from O3* to P
1152  pose.add_constraint( new AtomPairConstraint( O3_id, P_id, distance_func ) );
1153  pose.add_constraint( new AngleConstraint( O3_id, P_id, O1P_id, O1P_angle_func ) );
1154  // angle at O3*
1155  pose.add_constraint( new AngleConstraint( C3_id, O3_id, P_id, O3_angle_func ) );
1156  // angle at P
1157  pose.add_constraint( new AngleConstraint( O3_id, P_id, O5_id, P_angle_func ) );
1158  }
1159 
1160 }
1161 
1162 
1163 
1164 } // namespace dna
1165 } // namespace protocols