Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
SheetBuilder.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file
11 /// @brief secondary structure will hold statistics about secondary structure predictions
12 /// sources can be from
13 /// - fragments
14 /// - psipred files ? other stuff
15 ///
16 /// @detailed
17 /// from converting jumping_pairings.cc of rosetta++ into mini
18 ///
19 ///
20 ///
21 /// @author Oliver Lange
22 /// @author Christopher Miles (cmiles@uw.edu)
23 
24 // Unit Headers
26 
27 // Package Headers
31 
32 // Project Headers
33 #include <core/types.hh>
35 
36 // Utility headers
37 #include <basic/Tracer.hh>
38 
39 // ObjexxFCL Headers
40 #include <ObjexxFCL/FArray1D.hh>
41 #include <ObjexxFCL/FArray1A.hh>
42 #include <ObjexxFCL/FArray2A.hh>
43 #include <ObjexxFCL/FArray3D.hh>
44 #include <ObjexxFCL/StaticIndexRange.hh>
45 #include <ObjexxFCL/format.hh>
46 
47 // numeric headers
48 #include <numeric/random/random.hh>
49 #include <numeric/numeric.functions.hh>
50 
51 //// C++ headers
52 #include <cstdlib>
53 
54 #include <utility/vector1.hh>
55 
56 
57 static basic::Tracer tr("protocols.jumping");
58 static numeric::random::RandomGenerator RG(14934); // <- Magic number, do not change
59 
60 namespace protocols {
61 namespace jumping {
62 
63 using namespace ObjexxFCL;
64 using namespace ObjexxFCL::fmt;
65 using namespace core;
66 
67 //////////////////////////////////////////////////////////////////////////////
68 //////////////////////////////////////////////////////////////////////////////
69 // NOTES ( old comment copied from jumping_pairings )
70 //
71 ////////////////////////
72 // PAIRINGS FILE FORMAT:
73 //
74 // one line per pairing, 4 numbers per line, white-space delimited
75 //
76 // format: "pos1 pos2 orientation pleating"
77 //
78 // pos1 and pos2 are the sequence numbers of the beta-paired positions
79 //
80 // orientation is the beta-strand orientation:
81 // "1" for antiparallel, "2" for parallel
82 // or just "A" or "P"
83 //
84 // pleating determines the pleat of the beta-carbons
85 // "1" if the N and O of pos1 are pointed away from pos2
86 // "2" if the N and O of pos1 are pointed toward pos2
87 //
88 // eg, in the antiparallel case, a pleat of 2 would mean that
89 // there are two backbone-backbone hydrogen bonds between pos1 and pos2
90 // In the parallel case a pleat of 2 means that pos1 is Hbonding with
91 // pos2-1 and pos2+1
92 //
93 // if you check out rosetta_benchmarks, in the directory 1d3z/
94 // is a pairing file "pairings.dat" with two pairings in it.
95 // These are native pairings, ie they match the pdb structure
96 // 1d3z.pdb in the same directory. In Fortran you had to tell
97 // Rosetta how many pairings to expect; that's why the first line
98 // has a "2" on it. This is no longer necessary.
99 
100 //////////////////////
101 // COMMAND LINE SYNTAX
102 //
103 // for ab initio folding with pairings: the usual "xx 1xyz _ -silent"
104 // plus:
105 //
106 // -pairing_file <pairings-file>
107 //
108 // with no other arguments and it will try to build decoys with
109 // *all* the pairings in the file. You can also specify what
110 // kind of sheet topology Rosetta should try to construct:
111 //
112 // -sheet1 <N1> -sheet2 <N2> ... -sheetk <Nk>
113 //
114 // Here Nj is the number of strand pairs in sheet number j. So the
115 // number of forced pairings will be (Nj-1). The sheet can
116 // get other strands during the folding simulation -- this is
117 // just specifying how many Rosetta should actual build from
118 // the start using the broken chain stuff.
119 //
120 // So the total number of forced pairings will be:
121 // N1 + N2 + N3 + ... + Nk
122 //
123 // For example, to specify two strand pairings in two different
124 // sheets, use the args "-sheet1 2 -sheet2 2"
125 //
126 //
127 //////////////////////////////////////////////////////////////////////////////
128 //////////////////////////////////////////////////////////////////////////////
129 //////////////////////////////////////////////////////////////////////////////
131  total_residue_( ss->total_residue() ),
132  pairings_( pairings ),
133  same_strand_( new SameStrand( ss ) ),
134  secondary_structure_( ss ),
135  sheet_sizes_( sheet_topol ),
136  bForceSingleSheet_( true ) // this should be set by an option or so
137 {}
138 
139 //copy c'stor
141  : BaseJumpSetup(other),
142  total_residue_( other.total_residue_ ),
143  pairings_( other.pairings_ ),
144  same_strand_( other.same_strand_ ),
145  secondary_structure_( other.secondary_structure_ ),
146  sheet_sizes_( other.sheet_sizes_ ),
147  bForceSingleSheet_( other.bForceSingleSheet_ ) // this should be set by an option or so
148 {}
149 
150 //d'stor
152 
153 ///@brief simply random choice of pairing from pool
154 void
155 SheetBuilder::choose_next_pairing( FArray3D_int& sheet_pairing, Size pairing, Size sheet ) const {
156  int const p = static_cast< int >( RG.uniform() * pairings_.size() ) + 1;
157  // tr.Trace << "Picked pairing " << p << " out of " << pairings_.size() << std::endl;
158  runtime_assert( p>=1 && p<= (int) pairings_.size() );
159  // you should replace sheet_pairing with array of Pairings!!!!!
160  // then just use operator=
161  sheet_pairing( 1, pairing, sheet ) = pairings_[ p ].Pos1();
162  sheet_pairing( 2, pairing, sheet ) = pairings_[ p ].Pos2();
163  sheet_pairing( 3, pairing, sheet ) = pairings_[ p ].Orientation();
164  sheet_pairing( 4, pairing, sheet ) = pairings_[ p ].Pleating();
165 }
166 
167 ///////////////////////////////////////////////////////////////////////////////
168 ///@detail return true if parings have a strand in common
169 bool
171  FArray1A_int p1,
172  FArray1A_int p2
173 ) const
174 {
175  for ( int i = 1; i <= 2; ++i ) {
176  for ( int j = 1; j <= 2; ++j ) {
177  if ( same_strand_->eval( p1(i), p2(j) ) ) return true;
178  }
179  }
180  return false;
181 }
182 
183 ///////////////////////////////////////////////////////////////////////////////
184 ///////////////////////////////////////////////////////////////////////////////
185 bool
187  FArray2A_int pairing_list,
188  const int last_pairing,
189  const bool force_single_sheet
190 ) const
191 {
192  if ( last_pairing < 2 ) return true; // first strand is OK by default
193  // tr.Trace << "check_sheet_pairings ... last_pairing: " << last_pairing << std::endl;
194  const int max_sheet_size( 40 ); // can we get rid of this ? XXXX
195  pairing_list.dimension( 4, max_sheet_size );
196 
197  int total_common_strands( 0 );
198  for ( int i = 1; i <= last_pairing-1; ++i ) {
199  int common_strands(0);
200 
201  //count comman_strands and check if pleating is compatible
202  const bool ok( check_two_pairings( pairing_list(1,last_pairing),
203  pairing_list(1,i), common_strands ) );
204 
205  // tr.Trace << "( " << i << "-"<<last_pairing <<" common_strands: " << common_strands << ") " << ( ok ? "good" : "bad" ) << " pleating" << std::endl;
206  //wrong pleating or two common_strands doesn't help to build beta-sheets
207  if ( !ok || common_strands > 1 ) return false;
208 
209  //???
210  total_common_strands += common_strands;
211  }
212 
213  return ( ( total_common_strands >= 1 || !force_single_sheet ) &&
214  ( total_common_strands < 2 ) );
215 }
216 
217 ///////////////////////////////////////////////////////////////////////////////
218 ///@detail count how many strands are in common : 0, 1, or 2
219 /// if 1 or 2 strands are in common check if pleating of the two pairings is compatible
220 bool
222  FArray1A_int pairing1,
223  FArray1A_int pairing2,
224  int & common_strands
225 ) const
226 {
227  pairing1.dimension(4);
228  pairing2.dimension(4);
229 
230  common_strands = 0;
231 
232  core::scoring::dssp::Pairing p1 ( pairing1 );
233  core::scoring::dssp::Pairing p2 ( pairing2 );
234 
235  if ( same_strand_->eval( p1.Pos1(), p2.Pos1() ) && same_strand_->eval( p1.Pos2(), p2.Pos2() ) ) {
236  common_strands = 2;
237  } else if ( same_strand_->eval(p1.Pos1(), p2.Pos2()) && same_strand_->eval(p1.Pos2(), p2.Pos1()) ) {
238  common_strands = 2;
239  // could reverse either one:
240  p2.reverse();
241  } else if ( same_strand_->eval(p1.Pos1(), p2.Pos1()) ) {
242  common_strands = 1;
243  } else if ( same_strand_->eval(p1.Pos1(), p2.Pos2()) ) {
244  p2.reverse();
245  common_strands = 1;
246  } else if ( same_strand_->eval(p1.Pos2(), p2.Pos1()) ) {
247  p1.reverse();
248  common_strands = 1;
249  } else if ( same_strand_->eval(p1.Pos2(), p2.Pos2()) ) {
250  p1.reverse();
251  p2.reverse();
252  common_strands = 1;
253  }
254 
255  // now we have set things up so that p1.pos1 and p2.pos1
256  // are in the same strand.
257  //
258  // and maybe also p1.pos2 and p2.pos2, if common_strands == 2
259  runtime_assert ( common_strands == 0 || same_strand_->eval(p1.Pos1(), p2.Pos1()) );
260 
261  const int seqsep_mod2( numeric::mod( std::abs( (int) p1.Pos1() - (int) p2.Pos1() ), 2) );
262 
263  return ( ( common_strands == 0 ) ||
264  ( common_strands == 1 && seqsep_mod2 == 0 && p1.Pleating() != p2.Pleating() ) ||
265  ( common_strands == 1 && seqsep_mod2 == 1 && p1.Pleating() == p2.Pleating() ) ||
266  ( common_strands == 2 && seqsep_mod2 == 0 && p1.Pleating() == p2.Pleating() ) ||
267  ( common_strands == 2 && seqsep_mod2 == 1 && p1.Pleating() != p2.Pleating() ) );
268 }
269 
270 bool
271 SheetBuilder::check_next_pairing( FArray3D_int& sheet_pairings, Size pairing, Size sheet ) const {
272  for ( Size ii = 1; ii<=2; ii++ ) { //check if both residues of the pairing make sense at all
273  Size pos = sheet_pairings( ii, pairing, sheet );
274  if ( pos == 1 || pos >= total_residue_ ) return false;
275  }
276 
277  // full compatibility chec
278  //check1: dont want to intersect previous sheets
279  for ( int prev_sheet = 1; prev_sheet <= (int) sheet - 1; ++prev_sheet ) {
280  int const num_pairings = sheet_sizes_[prev_sheet];
281  for ( int prev_pairing = 1; prev_pairing <= num_pairings; ++prev_pairing ) {
282  if ( check_pairing_intersect( sheet_pairings( 1, prev_pairing, prev_sheet ),
283  sheet_pairings( 1, pairing, sheet ) ) ) {
284  // tr.Debug << "paring intersect : check failed for pairing" << pairing << " sheet: " << sheet << std::endl;
285  return false;
286  }
287  }
288  }
289  //check2:
290  return check_sheet_pairings(
291  sheet_pairings( 1, 1, sheet ),
292  pairing, bForceSingleSheet_ );
293 }
294 
297  FArray1A_int cuts;
298  for ( int trial = 1; trial < 30; trial ++ ) {
299  core::scoring::dssp::PairingsList jump_pairings;
300  bool success = builder_loop( jump_pairings );
301  if ( !success ) {
302  tr.Warning << "redo builder_loop failed, will try " << 10-trial << " more times" << std::endl;
304  continue;
305  }
306  JumpSample jumps( total_residue_, jump_pairings, *secondary_structure_ );
307  // tr.Debug << " created jump sample " << jumps << std::endl;
308  /* if ( tr.Debug.visible() && jumps.is_valid() ) { //debugging output
309  // tr.Debug << "same_strand: ";
310  typedef utility::vector1< int > Int_List;
311  Int_List jres;
312  for ( core::scoring::dssp::PairingsList::const_iterator it = jump_pairings.begin(),
313  eit = jump_pairings.end(); it != eit; ++it ) {
314  jres.push_back( it->Pos1() );
315  jres.push_back( it->Pos2() );
316  }
317  for ( Int_List::iterator it=jres.begin(), eit=jres.end(); it!=eit; ++it ) {
318  for ( Int_List::iterator sit=jres.begin(), seit=jres.end(); sit!=seit; ++sit ) {
319  if ( *it > *sit ) {
320  // tr.Debug << *it << ":"<< *sit << " " << (same_strand_->eval(*it,*sit ) ? "same" : "diff") << " ";
321  }
322  }
323  }
324  // tr.Debug << std::endl;
325  } // is_valid()
326  */
327  if ( jumps.is_valid() ) return jumps;
328  // tr.Debug << " ...which had corrupted fold-tree. Try again! " << std::endl;
329  }
330  //utility_exit_with_message( "impossible to find a valid fold-tree with given sheet-topology and pairings" );
331  return JumpSample(); //to make compiler happy
332 }
333 
334 bool
336  Size const max_sheet_size( 40 );
337  Size const max_sheets( 40 ); // can we determine these from something ?
338  int num_sheets( sheet_sizes_.size() );
339  //// fill the sheet_pairing array:
340  FArray3D_int sheet_pairing( 4, max_sheet_size, max_sheets );
341  tr.Info << "Start Sheet Building for " << num_sheets << " sheets of size ";
342  for ( int ii = 1; ii <= num_sheets; ii++ ) tr.Info << sheet_sizes_[ ii ] << " ";
343  tr.Info << std::endl;
344  bool success = false;
345  int failed_once = 0;
346  for ( int tries1 = 0; tries1 < 30 && !success; ++tries1 ) { // redo_same_strand: try different sheet boundaries
347  same_strand_->redo();
348  for ( int tries2 = 0; tries2 < 30 && !success; ++tries2 ) { // sheet_fail
349  tr.Debug << "SheetBuilder-loop round: " << tries1 << "/" << tries2 << std::endl;
350  // here we fill the array "sheet_pairing" that has all the pairings
351  // organized by sheets
352 
353  int tries3 ( 0 );
354  int const max_tries3 ( 1000 );
355 
356  // for each sheet
357  for ( int sheet = 1; sheet <= num_sheets; ++sheet ) {
358  // select sheet_size-1 pairings
359  for ( int pairing = 1; pairing <= (int) sheet_sizes_[ sheet ]; ++pairing ) {
360  choose_next_pairing( sheet_pairing, pairing, sheet ); //draw randomly from pool
361  for ( int s = 1; s<=sheet; s++ ) {
362  for ( int p =1; p<=pairing; p ++ ) {
363  //tr.Debug << s << ":" << p << " pairs: " << sheet_pairing(1, p, s) << "-" <<sheet_pairing(2, p, s) << std::endl;
364  }
365  }
366 
367 
368  while ( !( success=check_next_pairing( sheet_pairing, pairing, sheet ) ) && tries3 < max_tries3 ) {
369  choose_next_pairing( sheet_pairing, pairing, sheet );
370  for ( int s = 1; s<=sheet; s++ ) {
371  for ( int p =1; p<=pairing; p ++ ) {
372  // tr.Debug << s << ":" << p << " pairs: " << sheet_pairing(1, p, s) << "-" <<sheet_pairing(2, p, s) << std::endl;
373  }
374  }
375 
376  ++tries3;
377  } // loop to choose pairings
378  // tr.Trace << "Pairing " << pairing << " chosen for sheet " << sheet << std::endl;
379  } // for pairings per sheet
380  } // for sheets
381  } // for tries2 ( sheet_fail )
382  // if not successful try different sheet boundaries
383  if ( !success ) {
384  failed_once=tries1;
385  tr.Info << "redo same_strand: too many tries with this one!" <<
386  fmt::SS( tries1 ) << std::endl;
387  // makes stochastic decisions about strand boundaries:
388  // some decisions may not be compatible with the desired pairings
389  // and our logic for choosing sheets
390  }
391  } // for tries1 ( redo_same_strand)
392 
393  if ( !success ) {
394  return false;
395  // utility_exit_with_message( "problem in SheetBuilder::builder_loop(): tries1 > 100." );
396  }
397  if ( failed_once ) tr.Warning << "figured out a valid strand after " << failed_once << " outer loop iterations " << std::endl;
398  // now load these pairings into the jump_pairings list:
399  jump_pairings.clear();
400  for ( int sheet = 1; sheet <= num_sheets; ++sheet ) {
401  for ( int pairing = 1, pe = sheet_sizes_[sheet]; pairing <= pe; ++pairing ) {
402 
403  core::scoring::dssp::Pairing p( sheet_pairing( 1,pairing,sheet) );
404  jump_pairings.push_back( p );
405 
406  // tr.Debug << "sheet_pairing:" << SS(sheet) << SS(pairing) << ' ' <<
407  // p.Pos1() << ' ' << p.Pos2() << ' ' <<
408  // p.Orientation() << ' ' << p.Pleating() << std::endl;
409  }
410  }
411  return true;
412 }
413 
414 } //protocols
415 } //jumping