Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
util.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file src/protocols/comparative_modeling/util.cc
11 /// @brief set of utilities used in comparative modeling of protein structures
12 /// @author James Thompson
13 
14 #include <core/types.hh>
15 #include <basic/Tracer.hh>
16 #include <core/pose/util.hh>
17 #include <core/pose/Pose.hh>
18 // AUTO-REMOVED #include <core/io/pdb/pose_io.hh>
19 
20 #include <core/chemical/util.hh>
26 
27 // Symmetry
29 // AUTO-REMOVED #include <core/conformation/symmetry/util.hh>
30 
33 
34 #include <core/sequence/util.hh>
42 
43 #include <basic/options/option.hh>
44 #include <basic/options/keys/in.OptionKeys.gen.hh>
45 #include <basic/options/keys/cm.OptionKeys.gen.hh>
46 
49 
51 #include <protocols/loops/Loop.hh>
52 #include <protocols/loops/Loops.hh>
55 
56 #include <utility/vector1.hh>
57 #include <utility/string_util.hh>
58 #include <utility/file/FileName.hh>
59 #include <utility/file/file_sys_util.hh>
60 
62 #include <core/id/NamedAtomID.hh>
63 #include <core/sequence/Aligner.hh>
65 
66 #include <numeric/random/random.hh>
67 
70 
71 namespace protocols {
72 namespace comparative_modeling {
73 
74 #define NO_LOOP_SIZE_CST 0
75 
76 using utility::vector1;
77 using std::string;
78 
79 static numeric::random::RandomGenerator RG(298211);
80 static basic::Tracer tr("protocols.comparative_modeling.util");
81 
83  using namespace basic::options;
84  using namespace basic::options::OptionKeys;
85  using namespace core::sequence;
86 
88  utility::file::FileName const fn( option[ in::file::alignment ]()[1] );
89 
90  if ( option[ cm::aln_format ]() == "mini" ) {
91  alns = read_grishin_aln_file( static_cast< std::string > (fn) );
92  } else if ( option[ cm::aln_format ]() == "grishin" ) {
93  alns = read_general_aln_file( static_cast< std::string > (fn) );
94  } else {
95  std::string const msg(
96  "Error: don't recognize alignment format " +
97  option[ cm::aln_format ]() + "!"
98  );
99  utility_exit_with_message( msg );
100  }
101 
102  runtime_assert( alns.size() > 0 );
103  return alns[1];
104 }
105 
106 /// @detail The premise underlying this tortuous method is simple--
107 /// identify aligned/unaligned regions in a sequence alignment with
108 /// the constraint that each region has a certain minimum length.
109 ///
110 /// The current implementation achieves this goal in a roundabout
111 /// manner by making use of existing, less specialized utility
112 /// functions.
114  const core::Size num_residues,
115  const core::Size min_size,
116  const core::sequence::SequenceAlignment& alignment,
117  protocols::loops::LoopsOP & unaligned_regions) {
118 
119  using core::Size;
123  assert(unaligned_regions);
124 
125  const Size query_idx = 1;
126  const Size templ_idx = 2;
127  SequenceMapping mapping(alignment.sequence_mapping(query_idx, templ_idx));
128 
129  vector1<Size> unaligned_residues;
130  for (Size resi = 1; resi <= num_residues; resi++) {
131  Size t_resi = mapping[resi];
132 
133  bool const gap_exists(
134  t_resi == 0 || // query residue maps to a gap
135  (resi > 1 && mapping[ resi - 1 ] != t_resi - 1) || // last residue was gapped
136  (resi < num_residues && mapping[ resi + 1 ] != t_resi + 1)); // next residue is gapped
137 
138  if (gap_exists) unaligned_residues.push_back( resi );
139  }
140 
141  // Ensure the unaligned regions meet size constraints.
142  // Aligned regions are incorrect at this point.
143  protocols::loops::LoopsOP unaligned_ok = pick_loops_unaligned(num_residues, unaligned_residues, min_size);
144 
145  // Ensure the aligned regions meet size constraints.
146  unaligned_residues.clear();
147  for (Loops::const_iterator i = unaligned_ok->begin(); i != unaligned_ok->end(); ++i) {
148  const Loop& loop = *i;
149  for (Size j = loop.start(); j <= loop.stop(); ++j) {
150  unaligned_residues.push_back(j);
151  }
152  }
153 
154  vector1<Size> bounded_unaligned_residues(unaligned_residues);
155  for (Size i = 2; i <= unaligned_residues.size(); ++i) {
156  Size prev_residue = unaligned_residues[i - 1];
157  Size curr_residue = unaligned_residues[i];
158 
159  // Length of the unaligned region is (curr - 1) - (prev + 1) + 1,
160  Size delta = curr_residue - prev_residue - 1;
161  if (delta == 0 || delta >= min_size)
162  continue;
163 
164  for (Size j = (prev_residue + 1); j <= (curr_residue - 1); ++j)
165  bounded_unaligned_residues.push_back(j);
166  }
167  std::sort(bounded_unaligned_residues.begin(), bounded_unaligned_residues.end());
168 
169  // Retrieve loops without affecting unaligned region length
170  unaligned_regions = pick_loops_unaligned(num_residues, bounded_unaligned_residues, NO_LOOP_SIZE_CST);
171 }
172 
174  core::Size nres,
176  core::Size const min_loop_size
177 ) {
178  using core::Size;
179 
180  Size const query_idx( 1 );
181  Size const templ_idx( 2 );
182  core::id::SequenceMapping mapping_(
183  aln.sequence_mapping( query_idx, templ_idx )
184  );
185  tr.Debug << "called loops_from_alignment with arguments:" << std::endl;
186  tr.Debug << nres << std::endl;
187  tr.Debug << aln << std::endl;
188  tr.Debug << min_loop_size << std::endl;
189  mapping_.show( tr.Debug );
190  vector1< core::Size > unaligned_residues;
191  for ( Size resi = 1; resi <= nres; resi++ ) {
192  Size t_resi = mapping_[ resi ];
193 
194  // gap checks
195  bool const gap_exists(
196  t_resi == 0 || // query residue maps to a gap
197  ( resi > 1 && mapping_[ resi - 1 ] != t_resi - 1 ) || // last residue was gapped
198  ( resi < nres && mapping_[ resi + 1 ] != t_resi + 1 ) // next residue is gapped
199  );
200  if ( gap_exists ) unaligned_residues.push_back( resi );
201  }
202 
203  tr.flush_all_channels();
204 
205  return pick_loops_unaligned(
206  nres,
207  unaligned_residues,
208  min_loop_size
209  );
210 }
211 
212 
213 //fpd build a loopfile from the intersection of loops from multiple aln files
215  core::Size nres1,
217  core::Size nres2,
219  core::Size const min_loop_size
220 ) {
221  using core::Size;
222 
223  Size const query_idx( 1 );
224  Size const templ_idx( 2 );
225  core::id::SequenceMapping mapping1_( aln1.sequence_mapping( query_idx, templ_idx ) );
226  core::id::SequenceMapping mapping2_( aln2.sequence_mapping( query_idx, templ_idx ) );
227  tr.Debug << "called loops_from_multiple_alignments with arguments:" << std::endl;
228  tr.Debug << nres1 << std::endl;
229  tr.Debug << aln1 << std::endl;
230  tr.Debug << nres1 << std::endl;
231  tr.Debug << aln2 << std::endl;
232  tr.Debug << min_loop_size << std::endl;
233  mapping1_.show( tr.Debug );
234  mapping2_.show( tr.Debug );
235  vector1< core::Size > unaligned_residues;
236  for ( Size resi = 1; resi <= nres1; resi++ ) {
237  Size t_resi1 = mapping1_[ resi ];
238 
239  // gap checks
240  //fpd First check to see if there is a gap in the alignment
241  bool gap_exists =
242  t_resi1 == 0 || // query residue maps to a gap (aln1)
243  ( resi > 1 && mapping1_[ resi - 1 ] != t_resi1 - 1 ) || // last residue was gapped
244  ( resi < nres1 && mapping1_[ resi + 1 ] != t_resi1 + 1 ); // next residue is gapped
245 
246  //fpd Now check if there is this maps to a part of the template sequence
247  //fpd that is missing in the input template PDB
248  if (!gap_exists) {
249  Size t_resi2 = mapping2_[ t_resi1 ];
250  gap_exists = t_resi2 == 0 || // query residue maps to a gap (aln2)
251  ( t_resi1 > 1 && mapping2_[ t_resi1 - 1 ] != t_resi2 - 1 ) || // last residue was gapped
252  ( t_resi1 < nres2 && mapping2_[ t_resi1 + 1 ] != t_resi2 + 1 ); // next residue is gapped
253  }
254 
255  if ( gap_exists )
256  unaligned_residues.push_back( resi );
257  }
258 
259  tr.flush_all_channels();
260 
261  return pick_loops_unaligned(
262  nres1,
263  unaligned_residues,
264  min_loop_size
265  );
266 }
267 
269  core::Size nres,
270  utility::vector1< core::Size > const & unaligned_residues,
271  core::Size min_loop_size
272 ) {
273  typedef core::Size Size;
274 
276  if ( unaligned_residues.size() == 0 ) {
277  tr.Warning << "No unaligned residues, no loops found." << std::endl;
278  return query_loops;
279  }
280 
281  Size loop_stop ( *unaligned_residues.begin() );
282  Size loop_start( *unaligned_residues.begin() );
283 
284  for ( vector1< Size >::const_iterator it = unaligned_residues.begin(),
285  next = it + 1,
286  end = unaligned_residues.end();
287  next != end; ++it, ++next
288  ) {
289  tr.Debug << "residue " << *it << " is unaligned." << std::endl;
290  if ( *next - *it > 1 ) {
291  // add loop
292  loop_stop = *it;
293  while ( (loop_stop - loop_start + 1) < min_loop_size ) {
294  if ( loop_stop < nres )
295  ++loop_stop;
296  if ( loop_start > 1 && (loop_stop - loop_start + 1) < min_loop_size )
297  --loop_start;
298  }
299  tr.Debug << "adding loop from " << loop_start << " to " << loop_stop
300  << std::endl;
301  protocols::loops::Loop loop( loop_start, loop_stop, 0, 0, false );
302  query_loops->add_loop( loop, 1 );
303 
304  loop_start = *next;
305  }
306  }
307 
308  loop_stop = ( *(unaligned_residues.end() - 1) );
309 
310  while ( (loop_stop - loop_start + 1) < min_loop_size ) {
311  if ( loop_stop < nres ) ++loop_stop;
312  if ( loop_start > 1 ) --loop_start;
313  }
314  tr.Debug << "adding loop from " << loop_start << " to " << loop_stop
315  << std::endl;
316  protocols::loops::Loop loop( loop_start, loop_stop, 0, 0, false );
317  query_loops->add_loop( loop , 1 );
318 
319  tr.flush_all_channels();
320 
321  return query_loops;
322 } // pick_loops
323 
325  core::pose::Pose & query_pose,
326  core::Size min_loop_size
327 ) {
328  typedef core::Size Size;
329 
330  core::Real const chainbreak_cutoff( 4.0 );
331  core::Size nres = query_pose.total_residue();
332 
333  //fpd symm
334  if ( core::pose::symmetry::is_symmetric(query_pose) ) {
336  dynamic_cast<core::conformation::symmetry::SymmetricConformation &> ( query_pose.conformation()) );
338  nres = symm_info->num_independent_residues();
339  }
340 
341  vector1< Size > residues_near_chainbreak;
342  for ( Size i = 1; i <= nres - 1; ++i ) {
343  if ( query_pose.residue_type(i).is_protein() && query_pose.residue_type(i+1).is_protein()) {
344  core::Real dist = query_pose.residue(i).xyz("CA").distance(
345  query_pose.residue(i+1).xyz("CA")
346  );
347  //std::cout << "dist(" << i << "," << i+1 << ") = " << dist << std::endl;
348  if ( dist > chainbreak_cutoff ) {
349  residues_near_chainbreak.push_back( i );
350  }
351  }
352  } // for ( Size i )
353 
354  if ( residues_near_chainbreak.size() == 0 ) {
355  tr.Warning << "No chainbreaks found, so not picking any loops!"
356  << std::endl;
357  }
358 
359  tr.flush();
360 
361  return pick_loops_unaligned(
362  query_pose.total_residue(),
363  residues_near_chainbreak,
364  min_loop_size
365  );
366 } // pick_loops
367 
369  core::pose::Pose & query_pose,
370  core::Size const min_loop_size,
371  core::Size const max_rebuild,
372  std::string const & loop_mover_name
373 ) {
374  using namespace basic::options;
375  using namespace basic::options::OptionKeys;
376 
377  // switch to centroid ResidueTypeSet for loop remodeling
379  query_pose,
380  min_loop_size
381  );
382 
383  if ( my_loops->size() == 0 ) {
384  tr.Debug << "no loops found." << std::endl;
385  return;
386  }
387 
388  std::string const orig_rsd_set_name(
389  query_pose.residue_type(1).residue_type_set().name()
390  );
392 
393  bool closed( false );
394  for ( core::Size iter = 1; !closed && iter <= max_rebuild; iter++ ) {
396  loop_mover_name, my_loops
397  );
398  loop_mover->apply( query_pose );
399 
400  my_loops = pick_loops_chainbreak(
401  query_pose,
402  min_loop_size
403  );
404 
405  if ( my_loops->size() == 0 ) {
406  tr.Debug << "closed loops on iteration " << iter << " ." << std::endl;
407  closed = true;
408  }
409  }
410 
411  tr.flush();
412 
413  core::util::switch_to_residue_type_set( query_pose, orig_rsd_set_name );
414 } // rebuild_loops_until_closed
415 
417  core::pose::Pose & dest_pose,
418  core::pose::Pose const & source_pose_in,
419  core::id::NamedAtomID const anchor_atom_dest,
420  core::id::NamedAtomID const anchor_atom_source,
421  utility::vector1< core::id::NamedAtomID > const ligand_indices
422 ) {
423  using core::Size;
424  using utility::vector1;
425 
426  // add some runtime asserts here!
427  if ( !anchor_atom_dest.valid() ) {
428  tr.Error << "Error: can't place ligands. "
429  << "Destination anchor atom is not valid!" << anchor_atom_dest
430  << std::endl;
431  return;
432  }
433  if ( !anchor_atom_source.valid() ) {
434  tr.Error << "Error: can't place ligands. "
435  << "Source anchor atom is not valid! (" << anchor_atom_source << ")"
436  << std::endl;
437 
438  return;
439  }
440 
441  // create a copy to avoid modifying original
442  core::pose::Pose source_pose = source_pose_in;
443 
444  // set up FoldTree for source_pose that has the jump orientation that we want
445  core::kinematics::FoldTree new_fold_tree;
446  core::Size old_fold_tree_end(
447  source_pose.total_residue() - ligand_indices.size()
448  ); // stupid assumption!
449  new_fold_tree.add_edge(
450  1,
451  anchor_atom_source.rsd(),
453  );
454  new_fold_tree.add_edge(
455  anchor_atom_source.rsd(),
456  old_fold_tree_end,
458  );
459  tr.Debug << "adding ligand residues to fold-tree" << std::endl;
460 
461  // add edges from anchor to ligand residues
462  for ( Size jj = 1; jj <= ligand_indices.size(); ++jj ) {
463  tr.Error << "adding " << jj << std::endl;
464  core::kinematics::Edge out_edge(
465  anchor_atom_source.rsd(), // start
466  ligand_indices[jj].rsd(), // stop
467  static_cast< int > (jj), // label
468  anchor_atom_source.atom(), // start_atom
469  ligand_indices[jj].atom(), // stop_atom
470  false // bKeepStubInResidue
471  );
472  tr.Error << out_edge << std::endl;
473  new_fold_tree.add_edge( out_edge );
474  }
475 
476  tr.Error << source_pose.fold_tree();
477  source_pose.fold_tree( new_fold_tree );
478 
479  // copy the residues from the source_pose into the dest_pose
480  // using the jump geometry defined above.
481  for ( Size jj = 1; jj <= ligand_indices.size(); ++jj ) {
482  dest_pose.append_residue_by_jump(
483  source_pose.residue( ligand_indices[jj].rsd() ),
484  anchor_atom_dest.rsd(),
485  anchor_atom_dest.atom(),
486  ligand_indices[jj].atom()
487  );
488  dest_pose.set_jump(
489  static_cast< int > (jj),
490  source_pose.jump( static_cast< int > (jj) )
491  );
492  }
493 
494  tr.flush();
495 } // steal_ligands
496 
498  using namespace core::pose;
499  using namespace basic::options;
500  using namespace basic::options::OptionKeys;
501  bool psipred_ss2_ok = loops::set_secstruct_from_psipred_ss2( pose );
502  if ( !psipred_ss2_ok ) {
503  std::string dssp_name( option[ in::file::dssp ]().name() );
504  bool dssp_ok = loops::set_secstruct_from_dssp(pose, dssp_name);
505  if ( !dssp_ok ) {
506  set_ss_from_phipsi( pose );
507  }
508  }
509 }
510 
513  using std::string;
514  using utility::vector1;
515  using namespace basic::options;
516  using namespace basic::options::OptionKeys;
517 
518  vector1< string > template_pdb_fns(
519  option[ in::file::template_pdb ]()
520  );
521  vector1< core::pose::Pose > template_poses
522  = core::import_pose::poses_from_pdbs( template_pdb_fns );
523 
524  return template_poses;
525 }
526 
528  return ( pick_loops_chainbreak(pose, 3)->size() == 0 ) ;
529 }
530 
531 std::map< std::string, core::pose::Pose >
533  utility::vector1< std::string > const & fn_list
534 ) {
535  using std::map;
536  using std::string;
537  using core::pose::Pose;
540  using namespace core::chemical;
541 
543  map< string, Pose > poses;
544 
546  for ( iter it = fn_list.begin(), end = fn_list.end(); it != end; ++it ) {
547  if ( file_exists(*it) ) {
548  Pose pose;
549  core::import_pose::pose_from_pdb( pose, *rsd_set, *it );
550  string name = utility::file_basename( *it );
551  name = name.substr( 0, 5 );
552  poses[name] = pose;
553  }
554  }
555 
556  return poses;
557 }
558 
559 AlignmentSet
561  using core::Real;
562  using std::string;
563  using utility::vector1;
565 
566  using namespace basic::options;
567  using namespace basic::options::OptionKeys;
568  using namespace core::sequence;
569 
570  // options set up
571  FileName fn1( option[ in::file::pssm ]()[1] );
572  FileName fn2( option[ in::file::pssm ]()[2] );
573  string const aligner_type( option[ cm::aligner ]() );
574  string const seq_score( option[ cm::seq_score ]()[1] );
575  Real const min_gap_open( option[ cm::min_gap_open ]() );
576  Real const max_gap_open( option[ cm::max_gap_open ]() );
577  Real const min_gap_extend( option[ cm::min_gap_extend ]() );
578  Real const max_gap_extend( option[ cm::max_gap_extend ]() );
579  Real const step_size( 0.5 ); // maybe make this an option?
580 
581  runtime_assert( min_gap_open <= max_gap_open );
582  runtime_assert( min_gap_extend <= max_gap_extend );
583 
584  // setup objects
586  AlignerOP aligner( AlignerFactory::get_aligner( aligner_type ) );
587  ScoringSchemeOP ss( ssf.get_scoring_scheme( seq_score ) );
588 
589  SequenceProfileOP prof1( new SequenceProfile );
590  prof1->read_from_file( fn1 );
591  prof1->convert_profile_to_probs( 1.0 ); // was previously implicit in read_from_file()
592 
593  SequenceProfileOP prof2( new SequenceProfile );
594  prof2->read_from_file( fn2 );
595  prof2->convert_profile_to_probs( 1.0 ); // was previously implicit in read_from_file()
596 
597  // eliminate leading paths from prof1 and prof2
598  prof1->id( FileName( prof1->id() ).base() );
599  prof2->id( FileName( prof2->id() ).base() );
600 
601  AlignmentSet set;
602  for ( Real o = min_gap_open; o <= max_gap_open; o += step_size ) {
603  for ( Real e = min_gap_extend; e <= max_gap_extend;
604  e += step_size
605  ) {
606  ss->gap_open ( o );
607  ss->gap_extend( e );
608 
609  SequenceAlignment align = aligner->align( prof1, prof2, ss );
610  set.insert( align );
611  } // g_extend
612  } // g_open
613 
614  // add i/o of alignments from files here
615 
616  return set;
617 } // alignments_from_cmd_line
618 
619 
621  core::pose::Pose & query_pose,
622  core::id::AtomID_Mask const & selected
623 ) {
624  using core::Size;
625  for ( Size pos = 1; pos <= query_pose.total_residue(); ++pos ) {
626  Size atomj( 1 );
627  for ( core::id::AtomID_Mask::AtomMap::const_iterator
628  it = selected[ pos ].begin(), eit = selected[ pos ].end(); it != eit;
629  ++it, ++atomj
630  ) {
631 
632  if ( query_pose.residue( pos ).atom_is_hydrogen( atomj ) ) continue;
633  if ( *it ) { //entry is missng == true
634  core::Vector ai(
635  900.000 + RG.uniform()*100.000,
636  900.000 + RG.uniform()*100.000,
637  900.000 + RG.uniform()*100.000
638  );
639  query_pose.set_xyz( core::id::AtomID( atomj, pos ), ai );
640  //now randomize also attached hydrogens
641  for ( Size atom_nr = query_pose.residue( pos ).attached_H_begin( atomj );
642  atom_nr <= query_pose.residue( pos ).attached_H_end( atomj ); ++atom_nr ) {
643  core::Vector ai(
644  900.000 + RG.uniform()*100.000,
645  900.000 + RG.uniform()*100.000,
646  900.000 + RG.uniform()*100.000
647  );
648  query_pose.set_xyz( core::id::AtomID( atom_nr, pos ), ai );
649  }
650  }
651  }
652  } // for selected atoms
653 } // randomize_selected_atoms
654 
655 } // comparative_modeling
656 } // protocols