Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ShortestPathInFoldTree.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file ShortestPathInFoldTree.cc
11 /// @brief helper class to FoldTree: allows to measure distance in fold-trees
12 /// @detailed This class provides a fast mechanism to determine the distance between residues
13 /// according to a given fold-tree
14 /// instead of storing a full NxN matrix with N number of residues
15 /// we store only MxM matrix for distances between jump_residue (at most M=2*J J nr of jumps)
16 /// a table with N entries gives for each peptide the distance to the next jump-node.
17 /// thus memory requirement is low and still,
18 /// a single dist-evaluation requires to check only 4 possible pathways
19 ///
20 /// @author Oliver Lange
21 ///
22 
23 
24 
25 // Unit Headers
27 
28 // Package Headers
30 
31 // Project Headers
32 // AUTO-REMOVED #include <core/pose/Pose.hh>
33 #include <core/types.hh>
34 
35 // ObjexxFCL Headers
36 // AUTO-REMOVED #include <ObjexxFCL/format.hh>
37 
38 // Utility headers
39 // AUTO-REMOVED #include <utility/vector1.hh>
40 
41 //// C++ headers
42 // AUTO-REMOVED #include <cstdlib>
43 #include <string>
44 
45 #include <basic/Tracer.hh>
46 
47 //Auto using namespaces
48 namespace ObjexxFCL {
49 namespace fmt {
50 }
51 }
52 using namespace ObjexxFCL::fmt; // AUTO USING NS
53 //Auto using namespaces end
54 
55 
56 
57 namespace core {
58 namespace kinematics {
59 
60 /// @details Auto-generated virtual destructor
61 ShortestPathInFoldTree::~ShortestPathInFoldTree() {}
62 
63 static basic::Tracer tr("core.kinematics.ShortestPathInFoldTree",basic::t_info);
64 
65 
66 /// @detail cs-tor
67 ShortestPathInFoldTree::ShortestPathInFoldTree(
69 ) : nres_( f.nres() ), max_dist_( 0 )
70 {
71  simple_fold_tree_ = ( f.num_jump() == 0 );
73  if ( !simple_fold_tree_ ) {
76  }
77 }
78 
79 
80 
81 /// @detail the core of the distance cache is build here: node_dist_
82 /// a 2D array that knows distances for each pair of jump-residues
83 /// i.e., a 10 100 1 / 100 120 -1 / 120 150 2 -- fold-tree would have
84 /// jump_residues 10, 100, 120 and 150.
85 /// distances would be 10, 100 1
86 /// 120 150 1
87 // 10 150 22
88 /// 100 150 21
89 /// ....
90 /// for the jump_residues we use internal numbering, each seqpos that is a jump-res in one or more jumps will get a
91 /// individual number, starting at 1 and counting continously.
92 /// thus the distance of jump_res-pair i,j is found as node_dist(i,j)
93 ///
94 /// first we go through the fold-tree to find all jump_residues, and build the map: jump_res_
95 /// to keep track between mapping "seqpos<-->internal_numbering"
96 ///
97 /// then assign distance 1 to each pair of jump_residues connected by a jump
98 /// and using Warshall algorithm to build up full distance matrix
99 /// diagonal is distance 0
100 void
102  using namespace core::kinematics;
103  // go through jumps and find the jump-residues start distance table
104  // maintain an EdgeList to memorize the pairs until full number of unique jump-residues is known
105 
106  // typedef utility::vector1< Edge > EdgeList;
107  EdgeList edges;
108 
109 
110  /// cycle through list of jumps --> store jump_residues and assign running number (ct) to them
111  int ct = 1; /// for giving individual numbers to jump_residues
112  for ( Size jump = 1; jump <= f.num_jump(); jump++ ) {
113  Size const start ( f.jump_edge( jump ).start() );
114  Size const stop ( f.jump_edge( jump ).stop() );
115  tr.Trace << "add jump " << start << "-" << stop << std::endl;
116 
117  //
118  // search in our list of jump-residues, assign to my_start/my_stop if we got it already
119  //
120  // setup
121  int my_start = -1; //-1 denotes: "not found"
122  int my_stop = -1;
123  std::map< Size, Size>::const_iterator fit;
124 
125  // look for start residue
126  fit = jump_res_.find( start );
127  if ( fit != jump_res_.end() ) {
128  my_start = fit->second;
129  } else {
130  my_start = ct;
131  jump_res_[ start ] = ct++;
132  }
133 
134  // look for stop residue
135  fit = jump_res_.find( stop );
136  if ( fit != jump_res_.end() ) {
137  my_stop = fit->second;
138  } else {
139  my_stop = ct;
140  jump_res_[ stop ] = ct++;
141  }
142  edges.push_back( Edge(my_start, my_stop, jump ) );
143  };
144 
145  //if root is not residue 1
146  //add the root of the tree to the list of jumps as a jump-onto-itself
147  if ( f.root() > 1 ) {
148  int nr = get_jump( f.root() );
149  if ( nr < 0 ) { //haven't got root already in my list of jumps
150  jump_res_[ f.root() ] = ct++;
151  }
152  }
153 
154  // create some debug output
155  if ( tr.Trace.visible() ) {
156  tr.Trace << " jump_res_nr - seqpos \n";
157  for ( std::map< Size, Size>::const_iterator it=jump_res_.begin(), eit=jump_res_.end();
158  it!=eit; ++it ) {
159  tr.Trace << it->second << " - " << it->first << std::endl;
160  }
161 
162  tr.Trace << " jump-edges -- internal enumeration \n" ;
163  for ( EdgeList::const_iterator it=edges.begin(), eit=edges.end(); it!=eit; ++it ) {
164  tr.Trace << it->start() << " -- " << it->stop() << std::endl;
165  }
166  }
167 
168  init_dist_map( edges );
169  compute_dist_map( f );
170 }
171 
172 
173 ///@detail initialize dist map with dist 1 for each pair of residues connected by jumps
174 /// as stored in the EdgeList
175 void
177 
178  // Warshall algorithm
179  Size const inf( 12345678 ); //assumption: fewer than 12 million nodes in the graph.
180  assert( jump_res_.size() < inf );
181  node_dist_.dimension( jump_res_.size(), jump_res_.size(), inf );
182 
183  // initialize distance array with jump-edges
184  for ( EdgeList::const_iterator it=edges.begin(), eit=edges.end(); it!=eit; ++it ) {
185  node_dist_( it->start(), it->stop() ) = 1;
186  node_dist_( it->stop(), it->start() ) = 1;
187  node_dist_( it->start(), it->start() ) = 0;
188  node_dist_( it->stop(), it->stop() ) = 0;
189  }
190 }
191 
192 
193 /// @detail
194 // to compute the full dist map we go through 2 steps:
195 //
196 // (1) get distanes on "simple paths" i.e., length of peptide edges that connect two jumps
197 // (2) use warshall algo to get all distances by combining distances via jumps and simple peptide edges
198 void
200 
201 
202  // look for peptid edges that connect two jumps
203  for ( FoldTree::const_iterator it=f.begin(), eit=f.end();
204  it!=eit; ++it )
205  {
206  if ( it->is_jump() ) continue; // only look at peptide edges
207  std::map< Size, Size>::const_iterator fit;
208 
209  // do we have start and stop reside listed as jump_residues?
210  int my_start = -1; //-1 denotes "not found"
211  int my_stop = -1;
212 
213  // look for start residue
214  fit = jump_res_.find( it->start() );
215  if ( fit != jump_res_.end() ) {
216  my_start = fit->second;
217  }
218 
219  // look for stop residue
220  fit = jump_res_.find( it->stop() );
221  if ( fit != jump_res_.end() ) {
222  my_stop = fit->second;
223  }
224 
225  // if start and stop are jump-residues this is an internal peptide edge!
226  if ( my_start > 0 && my_stop > 0 ) {
227  Size dd = node_dist_( my_start, my_stop ) = std::abs( it->start() - it->stop() );
228  node_dist_( my_stop, my_start ) = dd;
229  };
230  } // for fold-tree edges
231 
232 
233  // Warshall algorithm
234  // symmetry makes this marginally inefficient, but easy to read
235  // if this shows up in a hotspot, it can be made more efficient
236  for ( Size ii = 1; ii <= jump_res_.size(); ++ii ) {
237  for ( Size jj = 1; jj <= jump_res_.size(); ++jj ) {
238  for ( Size kk = 1; kk <= jump_res_.size(); ++kk ) {
239  Size const jj_2_kk = node_dist_( jj, kk );
240  Size const jj_2_ii = node_dist_( jj, ii );
241  Size const ii_2_kk = node_dist_( ii, kk );
242 
243  Size const jj_2_ii_2_kk = jj_2_ii + ii_2_kk;
244 
245  if ( jj_2_kk > jj_2_ii_2_kk ) {
246  node_dist_( jj, kk ) = jj_2_ii_2_kk;
247  node_dist_( kk, jj ) = jj_2_ii_2_kk;
248  }
249  }
250  }
251  }
252 
253  // produce some debug output
254 // if ( tr.Trace.visible() ) {
255 // tr.Trace << "jump_res distance table:\n";
256 // for ( Size ii = 1; ii <= jump_res_.size(); ++ ii ) {
257 // for ( Size jj = 1; jj <= jump_res_.size(); ++ jj ) {
258 // //tr.Trace << node_dist_( ii, jj ) << " ";
259 // }
260 // tr.Trace << std::endl;
261 // }
262 // }
263 }// compute_dist_map
264 
265 ///@detail build table that gives for each residue the distance to
266 /// upstream and downstream jump-residues (if available)
267 ///
268 /// format:
269 /// <edge_nr> <jump1> <dist1> <jump2> <dist2>
270 //
271 /// edge_nr is a unique number that identifes peptide-edges
272 /// jump1 and jump2 refers to our internally-numbered jump-residues (entries in node_dist_ )
273 /// dist -- distance in sequence to the respective jump-residues
274 void
276  using namespace core::kinematics;
277  using namespace ObjexxFCL::fmt;
278  res2jumps_.dimension( f.nres(), 5, -1 ); //5 entries per residue.
280  // go thru edges and fill res2jump array accordingly
281  Size edge_nr = 1;
282  for ( FoldTree::const_iterator it=f.begin(), eit=f.end();
283  it!=eit;
284  ++it, ++edge_nr )
285  {
286  int start_jump = get_jump( it->start() ); // returns -1 if node is not a jump residue
287  int stop_jump = get_jump( it->stop() );
288  if ( start_jump < 0 ) leaves.push_back( it->start() );
289  if ( stop_jump < 0 ) leaves.push_back( it->stop() );
290  if ( !it->is_jump() ) { // a peptide edge
291  for ( int seqpos = std::min(it->start(),it->stop()); seqpos<=std::max( it->stop(),it->start() ); seqpos++ ) {
292  res2jumps_( seqpos, 1 ) = edge_nr;
293  res2jumps_( seqpos, 2 ) = start_jump;
294  res2jumps_( seqpos, 3 ) = std::abs( (int) seqpos - (int) it->start() );
295  res2jumps_( seqpos, 4 ) = stop_jump;
296  res2jumps_( seqpos, 5 ) = std::abs( (int) seqpos - (int) it->stop() );
297  } // for seqpos
298  // int edge_length = std::abs( (int) it->stop() - (int) it->start() );
299  // if (( start_jump > 0 && stop_jump < 0 ) && ( maxdist2leave( start_jump ) < edge_length ) ) maxdist2leave( start_jump ) = edge_length;
300  // if (( start_jump < 0 && stop_jump > 0 ) && ( maxdist2leave( stop_jump ) < edge_length ) ) maxdist2leave( stop_jump ) = edge_length;
301  } else { // a jump edge
302  res2jumps_( it->start(), 1 ) = edge_nr;
303  res2jumps_( it->start(), 2 ) = start_jump;
304  res2jumps_( it->start(), 3 ) = 0;
305  res2jumps_( it->start(), 4 ) = stop_jump;
306  res2jumps_( it->start(), 5 ) = 1;
307 
308  res2jumps_( it->stop(), 1 ) = edge_nr;
309  res2jumps_( it->stop(), 2 ) = start_jump;
310  res2jumps_( it->stop(), 3 ) = 1;
311  res2jumps_( it->stop(), 4 ) = stop_jump;
312  res2jumps_( it->stop(), 5 ) = 0;
313  } //if jump
314  } //for edges
315  // find maximum distance:
316  // it will be from leave to leave ... go throu all combinations and get distance...
317  max_dist_ = 0;
318  for ( Size i1 = 1; i1 <= leaves.size() ; i1++ ) {
319  for ( Size i2 = i1; i2 <= leaves.size() ; i2++ ) {
320  max_dist_ = std::max( max_dist_, dist( leaves[ i1 ],leaves[ i2 ] ) );
321  }
322  }
323 
324  // already no maximum distance to leaves in fold-tree
325  // now go throu all combination of
326 // for ( int seqpos = 1; seqpos <= f.nres(); seqpos++ ) {
327 // if ( res2jumps_( seqpos, 1) == -1 ) { //haven't found this residue in any peptide edge
328 // res2jump2_( seqpos, 1 ) =
329 // }
330 // }
331  //produce some debug output
332 // if ( tr.Trace.visible() ) {
333 // tr.Trace << " edge_nr jump1 dist1 jump2 dist2 \n";
334 // for ( Size ii = 1; ii<=f.nres(); ii++ ) {
335 // for ( Size k = 1; k<=5; k++ ) {
336 // tr.Trace << I(3, res2jumps_( ii, k ) );
337 // }
338 // tr.Trace << std::endl;
339 // }
340 
341 // tr.Trace << "\n total distance list \n";
342 // for ( Size ii = 1; ii<=f.nres(); ii++ ) {
343 // for ( Size jj = 1; jj<=f.nres(); jj++ ) {
344 // tr.Trace << "(" << ii << "," << jj << ") " << dist( ii, jj ) << std::endl;
345 // }
346 // }
347 // }
348 }
349 
350 ///@detail distance between two residues
351 /// with the help of our pre-computed data
352 /// this only requires comparison of 4 possible pathways:
353 /// go via upstream/downstream jump-residue for pos1/pos2
354 Size
356  if ( simple_fold_tree_ ) {
357  return std::abs( (int) pos1 - (int) pos2 );
358  }
359 
360  // on same edge ?
361  if ( res2jumps_( pos1, 1 /*asks for edge-nr*/ ) == res2jumps_( pos2, 1 /*asks for edge-nr*/ ) ) {
362  return std::abs( (int) pos1 - (int) pos2 );
363  };
364 
365  // compute for possibilities and take smallest
366  int const inf( 12345678 ); //assumption: fewer than 12 million nodes in the graph.
367 
368  Size min_dist = inf;
369 
370  // check 2x2 possibilities of up-/downstream jump-residues for pos1/pos2
371  for ( Size ii=1; ii<=2 ; ++ii ) { //choose jump-node for pos1
372  if ( res2jumps_( pos1, 2*ii ) < 0 ) continue; // is not a jump-residue
373  for ( Size jj=1; jj<=2; ++jj ) { //choose jump-node for pos2
374  if ( res2jumps_( pos2, 2*jj ) < 0 ) continue; // is not a jump-residue
375  // tr.Trace << "dist1: " << res2jumps_( pos1, 2*ii+1) << " " << pos1 << " --> " << res2jumps_( pos1, 2*ii ) << "\n";
376  // tr.Trace << "dist2: " << res2jumps_( pos2, 2*jj+1) << " " << pos2 << " --> " << res2jumps_( pos2, 2*jj ) << "\n";
377  Size dist = res2jumps_( pos1, 2*ii+1) + res2jumps_( pos2, 2*jj+1 )
378  + node_dist_( res2jumps_( pos1, 2*ii ), res2jumps_( pos2, 2*jj ) );
379  if ( dist < min_dist ) min_dist = dist;
380  }
381  }
382  assert ( min_dist <= nres_ );
383  return min_dist;
384 }
385 
386 
387 } //kinematics
388 } //core