Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
LoopHashSampler.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file protocols/loops/LoopHashSampler.cc
11 /// @brief
12 /// @author Mike Tyka
13 
19 
20 #include <core/pose/util.hh>
21 #include <core/pose/Pose.hh>
22 #include <basic/Tracer.hh>
26 #include <utility/string_util.hh>
27 
28 #include <basic/options/option.hh>
29 #include <basic/options/keys/lh.OptionKeys.gen.hh>
30 
31 #include <utility/vector1.hh>
32 
33 #include <numeric/random/random.hh>
34 #include <numeric/random/random_permutation.hh>
35 
36 
37 #if defined(WIN32) || defined(__CYGWIN__)
38  #include <ctime>
39 #endif
40 
41 
42 
43 
44 
45 
46 namespace protocols {
47 namespace loophash {
48 
49  static basic::Tracer TR("LocalHashSampler");
50 
52  LoopHashLibraryOP library,
53  LocalInserterOP inserter
54 ):
55  library_(library),
56  inserter_(inserter),
57  start_res_ ( 2 ),
58  stop_res_ ( 0 ),
59  min_bbrms_ ( 0.0 ),
60  max_bbrms_ ( 100000.0 ),
61  min_rms_ ( 0.0 ),
62  max_rms_ ( 100.0 ),
63  max_struct_ (10),
64  max_struct_per_radius_ (10),
65  nonideal_ ( false ),
66  nprefilter_ ( 0 ) // OBSOLETE?
67 {
68  set_defaults();
69 }
70 
72 
73 void
75  using namespace basic::options;
76  using namespace basic::options::OptionKeys;
77 
78  set_max_radius( option[ lh::max_radius ]() );
79 
80  set_min_bbrms( option[ lh::min_bbrms ]() );
81  set_max_bbrms( option[ lh::max_bbrms ] () );
82  set_min_rms( option[ lh::min_rms ]() );
83  set_max_rms( option[ lh::max_rms ]() );
84  set_max_struct( option[ lh::max_struct ]() );
85  set_max_struct_per_radius( option[ lh::max_struct_per_radius ]() );
86  set_max_nstruct( 10000000 ); // OBSOLETE?
87 
88  filter_by_phipsi_ = option[ lh::filter_by_phipsi ]();
89 }
90 
92  std::string scoreterm = "censcore";
93  core::Real as, bs;
94  core::pose::getPoseExtraScores( a, scoreterm, as ); core::pose::getPoseExtraScores( b, scoreterm, bs);
95  return as < bs;
96 }
97 
98 // returns a vector of real numbers, one per residue of pose, giving the
99 // sampling weight. weight of 1.0 corresponds to "normal", i.e. unmodified sampling weight.
101  std::string sample_weight_str;
102  core::pose::get_comment(pose, "sample_weight", sample_weight_str);
103 
104  utility::vector1 < std::string > sample_weight_input_parameters;
105  sample_weight_input_parameters = utility::split(sample_weight_str);
106 
107  utility::vector1 < core::Real > sample_weight;
108  for ( core::Size res_count = 1; res_count <= pose.total_residue(); ++res_count ){
109  core::Real new_sample_weight = 1.0;
110  if( res_count < sample_weight_input_parameters.size() ){
111  new_sample_weight = utility::string2float( sample_weight_input_parameters[res_count] );
112  }
113  sample_weight.push_back( new_sample_weight );
114  }
115 
116  return sample_weight;
117 }
118 
119 
121  const std::string &sequence,
122  const core::Size &ir, // sequence offset
123  const std::vector< core::Real > &phi,
124  const std::vector< core::Real > &psi,
125  bool &filter_pro,
126  bool &filter_beta,
127  bool &filter_gly
128 ){
129  runtime_assert( phi.size() == psi.size() )
130 
131  // Check phi/psi angles against the sequence
132  // Pose counts residues starting from one, so offset that
133  filter_pro = false;
134  filter_beta = false;
135  filter_gly = false;
136 
137  // now check every residue
138  for( core::Size bs_position = 0; bs_position < phi.size() ; ++bs_position ){
139  int sequence_position = ir - 1 + bs_position;
140 
141  // Proline
142  if( sequence[sequence_position] == 'P' ) {
143  if( phi[bs_position] < -103 || phi[bs_position] > -33 ) filter_pro = true;
144  }
145  // Beta branched residues
146  if( sequence[sequence_position] == 'I' || sequence[sequence_position] == 'V' || sequence[sequence_position] == 'T' ) {
147  if( phi[bs_position] > -40 ) filter_beta = true;
148  }
149  // Non glycine residues are confined to only part of the positive phi region
150  // populated by glycine residues
151  if( sequence[sequence_position] != 'G' ) {
152  if( phi[bs_position] > 70 ) filter_gly = true;
153  }
154  if( sequence[sequence_position] != 'G' ) {
155  if( psi[bs_position] < -75 && psi[bs_position] > -170 ) filter_gly = true;
156  }
157  }
158 
159  // were any of the filters triggered ? only return true if all filters are false!
160  return !( filter_pro || filter_beta || filter_gly );
161 }
162 
163 // Just a handy datastructure to carry over some statistics together with the actual retrieve index
164 struct FilterBucket {
166  retrieve_index(0),
167  BBrms(0),
168  filter_pro(false),
169  filter_beta(false),
170  filter_gly(false)
171  {}
177 };
178 
179  // @brief create a set of structures for a the given range of residues and other parameters
180  void
182  const core::pose::Pose& start_pose,
183  std::vector< core::io::silent::SilentStructOP > &lib_structs
184  )
185  {
186  using namespace core;
187  using namespace core::pose;
188  using namespace conformation;
189  using namespace kinematics;
190  using namespace numeric::geometry::hashing;
191  using namespace optimization;
192  using namespace id;
193  using namespace basic::options;
194  using namespace basic::options::OptionKeys;
195 
196  runtime_assert( library_ );
197 
198  long starttime = time(NULL);
199 
200  // Statistics counters
201  Size count_filter_rejects = 0;
202  Size count_total_loops = 0;
203  Size count_loop_builds = 0;
204  Size count_filter_pro = 0;
205  Size count_filter_beta = 0;
206  Size count_filter_gly = 0;
207  Size count_rejected_carms = 0;
208  Size count_rejected_bbrms = 0;
209  Size count_max_rad = 0;
210 
211  // Parameters
212  core::Size models_build_this_loopsize_max = std::max( Size(1), Size( max_struct_ / library_->hash_sizes().size()) );
213  core::Size models_build_this_loopsize_per_rad_max = std::max( Size(1), Size( models_build_this_loopsize_max * 2 / max_radius_ ));
214  core::Size fragments_tried_this_loopsize_max = models_build_this_loopsize_max * 200;
215  TR << "LoopHashSampler limits: " << max_struct_ << " " << models_build_this_loopsize_max << " " << models_build_this_loopsize_per_rad_max << " " << fragments_tried_this_loopsize_max << std::endl;
216 
217  std::string sequence = start_pose.sequence();
218 
219  core::pose::Pose original_pose = start_pose;
220  core::pose::Pose edit_pose = start_pose;
221  core::optimization::MinimizerOptions options( "lbfgs_armijo", 0.2, true , false );
222  core::optimization::MinimizerOptions options2( "lbfgs_armijo", 0.02,true , false );
223 
224  kinematics::MoveMap final_mm;
225  final_mm.set_bb(true);
226 
227  Size nres = start_pose.total_residue();
228  Size ir, jr;
229 
230  core::Size start_res = start_res_;
231  core::Size stop_res = stop_res_;
232 
233  // figure out start and stop residues
234  if ( stop_res == 0 ) stop_res = nres; // to do the whole protein just set stop_res to 0
235  start_res = std::max( start_res, (core::Size)2 ); // dont start before 2 - WHY ? << cos you need a stub of at least 2 residues to calculate a proper takeoff point. Why ? I dont know. Rosettavoodoo. This knowledge has been lost in history. Historians have struggled for centuries to recover it.
236 
237  if( start_res > stop_res ) stop_res = start_res;
238 
239  TR << "Running: Start:" << start_res << " End: " << stop_res << std::endl;
240  for( ir = start_res; ir <= stop_res; ir ++ ){
241 
242  // Loop over loopsizes in library
243  for( core::Size k = 0; k < library_->hash_sizes().size(); k ++ ){
244  core::Size loop_size = library_->hash_sizes()[ k ];
245 
246  jr = ir + loop_size;
247  if ( ir > nres ) continue;
248  if ( jr > nres ) continue;
249 
250  // get the rigid body transform for the current segment
251  BackboneSegment pose_bs;
252  pose_bs.read_from_pose( start_pose, ir, loop_size );
253  Real6 loop_transform;
254  if(!get_rt_over_leap( original_pose, ir, jr, loop_transform )) continue;
255 
256  LoopHashMap &hashmap = library_->gethash( loop_size );
257 
258 
259  // Now we compute the per residue sample weight averaged over the segment
260 
261  // we want models_build_this_loopsize_max models no matter what
262  // but if there is a brrms constraint, we might never reach x models
263  // some breakpoint, like x bins checked or x frags checked
264  // or radius check
265  core::Size fragments_tried_this_loopsize = 0;
266  core::Size models_build_this_loopsize = 0;
267 
268  for( Size radius = 0; radius <= max_radius_; radius++ ) {
269  count_max_rad = std::max( count_max_rad, radius );
270  core::Size models_build_this_loopsize_this_rad = 0;
271  std::vector < core::Size > leap_index_bucket;
272  std::vector < FilterBucket > filter_leap_index_bucket;
273 
274  hashmap.radial_lookup( radius, loop_transform, leap_index_bucket ); // grab list of fragments using radial lookup out from our loop transform
275  TR.Debug << "Rad: " << radius << " " << leap_index_bucket.size() << std::endl;
276  if( leap_index_bucket.size() == 0) continue; // no fragments found
277 
278  // Now for every hit, get the internal coordinates and make a short list of replacement loops
279  // according to the RMS criteria
280  for( std::vector < core::Size >::const_iterator it = leap_index_bucket.begin();
281  it != leap_index_bucket.end();
282  ++it ){
283 
284  // Get the actual strucure index (not just the bin index)
285  core::Size retrieve_index = (core::Size) (*it);
286  LeapIndex cp = hashmap.get_peptide( retrieve_index );
287 
288  // Retrieve the actual backbone structure
289  BackboneSegment new_bs;
290  library_->backbone_database().get_backbone_segment( cp.index, cp.offset, hashmap.get_loop_size() , new_bs );
291 
292  // Check the values against against any RMS limitations
293  // if violated then skip rest of loop
294  core::Real BBrms = get_rmsd( pose_bs, new_bs );
295  if( ( BBrms < min_bbrms_) || ( BBrms > max_bbrms_ ) ){
296  count_rejected_bbrms ++;
297  continue;
298  }
299 
300 
301  FilterBucket bucket;
302  bucket.retrieve_index = *it; // save the bucket index for the next step later
303  bucket.BBrms = BBrms; // also save the back bone RMS for later analysis & stats
304 
305  bool is_valid =
306  is_valid_backbone( sequence, ir, new_bs.phi(), new_bs.psi(), // input is sequence, current position in sequence, and the phi/psi's of the proposed angles.
307  bucket.filter_pro, bucket.filter_beta, bucket.filter_gly ); // output is a bunch of booleans giving information about any clashes.
308 
309  // count rejection stats
310  if( bucket.filter_pro ) count_filter_pro ++;
311  if( bucket.filter_beta ) count_filter_beta ++;
312  if( bucket.filter_gly ) count_filter_gly ++;
313 
314  if( (!get_filter_by_phipsi()) || is_valid ){ // should we filter at all and if so is it valid.
315  filter_leap_index_bucket.push_back( bucket ); // add to our short list of good fragments
316  }else{
317  count_filter_rejects++; // or increment reject counter
318  }
319 
320  count_total_loops++;
321  fragments_tried_this_loopsize++;
322  if( fragments_tried_this_loopsize > fragments_tried_this_loopsize_max ) break; // continue with however many are in the bucket now, and break at end
323  }
324 
325  // treat the fragments in a random order so shuffle them up
326  //std::random__shuffle( filter_leap_index_bucket.begin(), filter_leap_index_bucket.end());
327  numeric::random::random_permutation(filter_leap_index_bucket.begin(), filter_leap_index_bucket.end(), numeric::random::RG);
328 
329  // Now create models and check rms after insertion
330  for( std::vector < FilterBucket >::const_iterator it = filter_leap_index_bucket.begin();
331  it != filter_leap_index_bucket.end();
332  ++it ){
333 
334  clock_t starttime = clock();
335 
336  core::Size retrieve_index = it->retrieve_index;
337  LeapIndex cp = hashmap.get_peptide( retrieve_index );
338 
339  BackboneSegment new_bs;
340  library_->backbone_database().get_backbone_segment( cp.index, cp.offset, hashmap.get_loop_size() , new_bs );
341 
342  core::pose::Pose newpose( start_pose );
343  //transfer_phi_psi( start_pose, newpose ); //fpd necessary??
344 
345  core::Real final_rms = inserter_->make_local_bb_change( newpose, original_pose, new_bs, ir );
346  count_loop_builds++;
347 
348  bool isok = false;
349  if ( ( final_rms < max_rms_ ) && ( final_rms > min_rms_) ){
350 
351  core::pose::Pose mynewpose( start_pose );
352 
353  transfer_phi_psi( newpose, mynewpose );
354  transfer_jumps( newpose, mynewpose );
355 
359  new_struct->fill_struct( mynewpose ); // make the silent struct from the copy pose
360  new_struct->energies_from_pose( newpose ); // take energies from the modified pose, not the copy pose
361  new_struct->add_energy( "lh_carms", final_rms );
362  new_struct->add_energy( "lh_bbrms", it->BBrms );
363  new_struct->add_energy( "lh_radius", radius );
364  new_struct->add_energy( "lh_loopsize", loop_size );
365  new_struct->add_energy( "lh_filter_pro", it->filter_pro );
366  new_struct->add_energy( "lh_filter_beta", it->filter_beta );
367  new_struct->add_energy( "lh_filter_gly", it->filter_gly );
368 
369  //TR << "SAMPLER: " << new_struct->get_energy("censcore") << std::endl;
370  // Add donor history for this round of loophash only
371 
372  // Assume extra data is loade, because we need it!
373  BBData bb;
374  BBExtraData bbextra;
375  library_->backbone_database().get_protein( cp.index, bb );
376 
377  std::string donorhistory = new_struct->get_comment("donorhistory");
378  if( library_->backbone_database().extra_size() <= bb.extra_key ){
379  std::cerr << "ERROR: No extra data ?: " << library_->backbone_database().extra_size() << " < " << bb.extra_key << std::endl;
380 
381  donorhistory = donorhistory
382  + utility::to_string( loop_size )
383  + "/" + utility::to_string( library_->loopdb_range().first + cp.index )
384  + "/" + utility::to_string( cp.offset/3 )
385  + "/" + utility::to_string( ir-1 ) + ";";
386  }else{
387 
388  library_->backbone_database().get_extra_data( bb.extra_key, bbextra );
389 
390  donorhistory = donorhistory
391  + utility::to_string( loop_size )
392  + "/" + utility::to_string( bbextra.pdb_id)
393  //+ "/" + utility::to_string( bbextra.sequence.substr(cp.offset/3, 5) )
394  + "/" + utility::to_string( library_->loopdb_range().first + cp.index )
395  + "/" + utility::to_string( cp.offset/3 )
396  + "/" + utility::to_string( ir-1 ) + ";";
397  }
398  new_struct->erase_comment( "donorhistory" );
399  new_struct->add_comment( "donorhistory", donorhistory );
400  lib_structs.push_back( new_struct );
401 
402  models_build_this_loopsize++;
403  models_build_this_loopsize_this_rad++;
404 
405 
406  isok = true;
407  }else{
408  count_rejected_carms ++;
409  }
410 
411  //if ( lib_structs.size() > 2 ) return;
412 
413  clock_t endtime = clock();
414 
415  TR.Debug << "Clocks: " << endtime - starttime << " " << final_rms << (isok ? " OK" : " Reject") << std::endl;
416 
417  if( models_build_this_loopsize >= models_build_this_loopsize_max ) break;
418  if( models_build_this_loopsize_this_rad >= models_build_this_loopsize_per_rad_max ) break;
419 
420  }
421  // To break out of the outer for loop when these conditions are met
422  if( models_build_this_loopsize >= models_build_this_loopsize_max ) break;
423  if( fragments_tried_this_loopsize >= fragments_tried_this_loopsize_max) break;
424  }
425 
426  TR.Debug << " IR: " << ir << " LS: " << loop_size
427  << " Frag: " << fragments_tried_this_loopsize << " ( " << fragments_tried_this_loopsize_max << " ) "
428  << " Modls: " << models_build_this_loopsize << " ( " << models_build_this_loopsize_max << " ) "
429  << std::endl;
430 
431 
432  } // Loop over fragment sizes
433  } // Loop iver residue window
434 
435 
436  // Now just print some final statistics
437  long endtime = time(NULL);
438  TR.Info << "LHS: " << start_res << "-" << stop_res << ": "
439  << " struc " << lib_structs.size()
440  << " (max) " << max_struct_
441  << " secs " << endtime - starttime << " secs "
442  << " Total: " << count_total_loops
443  << " RjTor: " << count_filter_rejects
444  << " RjPro: " << count_filter_pro
445  << " RjBeta: " << count_filter_beta
446  << " RjGly: " << count_filter_gly
447  << " RjCA(" << min_rms_ << "-" << max_rms_ << "): " << count_rejected_carms
448  << " RjBB: " << count_rejected_bbrms
449  << " Built: " << count_loop_builds
450  << " MaxRad: " << count_max_rad
451 
452  << std::endl;
453 
454  for( std::vector< core::io::silent::SilentStructOP >::iterator it=lib_structs.begin();
455  it != lib_structs.end(); ++it ){
456  TR.Debug << "Samples: " << (*it)->get_energy("censcore") << std::endl;
457  }
458 
459 
460 
461 }
462 
463 // closes gaps. focuses on decreasing variability, not for use with loophash mpi
464 // very very similar to build_structures(), but too lazy to make separate functions to reduce redundancy
465  void
467  const core::pose::Pose& /*start_pose*/,
468  std::vector< core::pose::Pose> &/*lib_structs*/,
469  core::Size /*loop_size*/
470  )
471  {
472 }
473 
474 
475 } // namespace loops
476 } // namespace protocols
477 
478 
479 
480