Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
StructureDependentPeakCalibrator.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file FragmentSampler.cc
11 /// @brief ab-initio fragment assembly protocol for proteins
12 /// @detailed
13 /// Contains currently: Classic Abinitio
14 ///
15 ///
16 /// @author Oliver Lange
17 
18 // Define a suitable replacement for lrint() on Windows
19 #if (defined WIN32)
20  #include <boost/math/special_functions/round.hpp>
21  int lrint(double x) {
22  return boost::math::iround(x); }
23 #endif
24 
25 // Unit Headers
27 
28 // Package Headers
29 // AUTO-REMOVED #include <protocols/noesy_assign/CrossPeakList.hh>
31 
32 // Project Headers
34 
35 #include <core/pose/Pose.hh>
36 
37 // Utility headers
38 #include <basic/Tracer.hh>
39 #include <utility/vector1.hh>
40 
41 //// C++ headers
42 #include <cmath>
43 #include <iomanip>
44 
45 static basic::Tracer tr("protocols.noesy_assign.calibration");
46 
47 using core::Real;
48 using namespace core;
49 using namespace basic;
50 //using namespace basic::options;
51 //using namespace basic::options::OptionKeys;
52 
53 namespace protocols {
54 namespace noesy_assign {
55 
56 
57 
58 
59 void StructureDependentPeakCalibrator::init_calibrator() {
60  generate_constraints();
61 }
62 
63 void StructureDependentPeakCalibrator::generate_constraints() {
64  core::Size npeaks( peaks().size() );
65  constraints_.resize( npeaks, NULL );
67  core::pose::Pose dummy_pose;
68  core::Size ct( 1 );
69  runtime_assert( structures_.size() )
70  core::pose::Pose const& pose( **(structures_.begin()) );
71  for ( utility::vector1< CrossPeakOP >::const_iterator it = peaks().begin(); it != peaks().end(); ++it, ++ct ) {
72  (*it)->create_fa_and_cen_constraint( constraints_[ ct ], dummy, pose, dummy_pose, 1, 0.0 /*padding*/, true /*only fa cst*/ );
73  }
74 }
75 
76 void StructureDependentPeakCalibrator::collect_upperbound_statistics( core::Size peak, TypeCumulator const& types ) {
77  Size violated( 0 );
78  Real inv_n_struct( 1.0 / structures_.size() );
79  runtime_assert( peak <= peaks().size() );
80  runtime_assert( constraints_.size() == peaks().size() );
81  Size pose_ct( 1 );
82  Real stddev( 0.0);
83  Real mean( 0.0 );
84  PeakAssignmentParameters const& params( *PeakAssignmentParameters::get_instance() );
85  if ( constraints_[ peak ] &&
86  !( params.calibration_ignore_eliminated_peaks_ && peaks()[ peak ]->eliminated() ) ) {
87  for ( PoseVector::const_iterator pose_it = structures_.begin(); pose_it != structures_.end(); ++pose_it, ++pose_ct ) {
88  Real dist( constraints_[ peak ]->dist( **pose_it ) );
89  stddev += dist*dist;
90  mean += dist;
91  violated += ( dist - peaks()[ peak ]->distance_bound() ) > dcalibrate_;
92  // if ( pose_ct == 1 && tr.Trace.visible() ) tr.Trace << peaks()[ peak ]->peak_id() << " " << peaks()[ peak ]->filename() << " sum_dist " << constraints_[ peak ]->dist( **pose_it ) << std::endl;
93  }
94  mean *= inv_n_struct;
95  stddev = stddev*inv_n_struct - mean*mean;
96  // tr.Debug << "peak: " << peaks()[ peak ]->peak_id() << " " << peaks()[ peak ]->filename() << " violated: " << violated << " " << 1.0/inv_n_struct << " " << std::endl;
97  if ( stddev < params.calibration_convergence_ || params.calibration_convergence_ < 0.01 ) {
98  collect_target_statistics( violated*inv_n_struct, types );
99  }
100 // for ( core::Size type = BACKBONE; type < MAX_TYPE; ++type ) {
101 // if ( types.test( type ) ) {
102 // accumulated_count_[ type ] += 1;
103 // accumulated_target_[ type ] += violated * inv_n_struct;
104 // }
105 // }
106  }
107 }
108 
109 void StructureDependentPeakCalibrator::eliminate_violated_constraints() {
110  PeakAssignmentParameters const& params( *PeakAssignmentParameters::get_instance() );
111 
112  Size ct( 1 );
113 
114  typedef utility::vector1< Real > RealVector;
115  RealVector distance_deltas( structures_.size(), 0.0 );
116 
117 
118  for ( utility::vector1< CrossPeakOP >::const_iterator it = peaks().begin(); it != peaks().end(); ++it, ++ct ) {
119  if ( !constraints_[ ct ] ) continue;
120  Size violated( 0 );
121  Size pose_ct( 1 );
122 
123  for ( PoseVector::const_iterator pose_it = structures_.begin(); pose_it != structures_.end(); ++pose_it, ++pose_ct ) {
124  Real delta( constraints_[ ct ]->dist( **pose_it ) - peaks()[ ct ]->distance_bound() );
125  distance_deltas[ pose_ct ] = delta;
126  violated += delta > params.dcut_;
127  }
128 
129  if ( params.calibration_max_nudging_ > 1.0 && violated > params.calibration_start_nudging_*structures_.size() ) {
130  tr.Trace << "Check peak " << (*it)->peak_id() << " for nudging... "<< std::endl;
131  Real const CORRECTION_STEP( 0.1 );
132  Real const max_correction( peaks()[ ct ]->distance_bound()*( params.calibration_max_nudging_ - 1) );
133  Real const old_violated( violated );
134  for ( Real correction = 0.1; correction <= max_correction; correction += CORRECTION_STEP ) {
135  violated = 0;
136  Size pose_ct( 1 );
137  for ( PoseVector::const_iterator pose_it = structures_.begin(); pose_it != structures_.end(); ++pose_it, ++pose_ct ) {
138  Real delta( distance_deltas[ pose_ct ] - correction );
139  violated += delta > params.dcut_;
140  }
141  if ( violated <= params.calibration_stop_nudging_*structures_.size() ) {
142  peaks()[ ct ]->nudge_distance_bound( correction );
143  for ( PoseVector::const_iterator pose_it = structures_.begin(); pose_it != structures_.end(); ++pose_it, ++pose_ct ) {
144  distance_deltas[ pose_ct ] -= correction;
145  }
146  tr.Debug << "peak: " << (*it)->peak_id() <<" " << (*it)->filename()
147  << " original violations: " << old_violated
148  << std::setprecision(2) << " new distance: " << peaks()[ ct ]->distance_bound()
149  << " nudged by: " << correction
150  << std::setprecision(2) << " of max " << max_correction
151  << " new violations: " << violated << std::endl;
152  break;
153  }
154  violated=old_violated;
155  }
156  }
157 
158  if ( !params.use_local_distviol_ ) {
159  tr.Debug << "peak: " << (*it)->peak_id() <<" " << (*it)->filename() << " violations: " << violated << std::endl;
160  (*it)->set_eliminated_due_to_dist_violations( violated > ( params.nr_conformers_violatable_*structures_.size() ) );
161  std::ostringstream elim_msg;
162  elim_msg << violated << " ("<<distance_deltas.size()<<") violated by >" << distance_deltas[1] << "A (" << params.dcut_ << "A) ";
163  (*it)->set_elimination_comment( elim_msg.str() );
164  } else { //local dist viol
165 
166  //first sort to get a 90% distribution length --i.e., ignore 5% on each side and take distance between those
167  std::sort( distance_deltas.begin(), distance_deltas.end() );
168 
169  //find smallest interval that fits 99% of the deltas
170  //with default setting of 99% this is basically the length difference between shortest and longest distance
171  Size const num_element_cluster( lrint( 1.0*distance_deltas.size() * params.local_distviol_range_ ) );
172  Size const low_quartil_pos( lrint( 1.0*distance_deltas.size()*0.25 ) );
173  Real const low_quartil_dist( distance_deltas[ low_quartil_pos ]+(*it)->distance_bound() );
174  tr.Debug << "peak: " << (*it)->peak_id() << " " << (*it)->filename() << " check " << num_element_cluster << " of a total " << distance_deltas.size() << " distances for max-extension " << std::endl;
175  Real max_extension( 1000 );
176  if ( low_quartil_dist > params.local_distviol_cutoff_ || low_quartil_dist > (*it)->distance_bound() + params.local_distviol_cutoff_buffer_ ) {
177  tr.Debug << "peak: " << (*it)->peak_id() << " " << (*it)->filename() << " dist " << (*it)->distance_bound() << " REMOVED due to large Q1 dist of " << low_quartil_dist << std::endl;
178  (*it)->set_eliminated_due_to_dist_violations( true );
179  std::ostringstream elim_msg;
180  elim_msg << "Q1 dist to high: " << low_quartil_dist;
181  (*it)->set_elimination_comment( elim_msg.str() );
182  } else {
183  for ( Size start_cluster = 1; start_cluster+num_element_cluster-1 <= distance_deltas.size(); start_cluster++ ) {
184  Real ext = distance_deltas[ start_cluster+num_element_cluster-1 ] - distance_deltas[ start_cluster ];
185  if ( max_extension > ext ) max_extension = ext;
186  }
187 
188  tr.Debug << num_element_cluster << " distances are in an interval of only " << max_extension << " with a Q1 dist of " << low_quartil_dist << std::endl;
189  //get extension between high and low.
190  // Size const ind_low_5( 1+lrint( params.local_distviol_range_*distance_deltas.size() ) ); //lower 5% -
191  // Size const ind_high_5( lrint( 1.0*distance_deltas.size()*(1-params.local_distviol_range_) ) ); //upper 5%
192  // Real max_extension( distance_deltas[ ind_high_5 ] - distance_deltas[ ind_low_5 ] );
193 
194  // tr.Debug << "ind_low_5 " << ind_low_5 << " ind_high_5 " << ind_high_5 << " min_delta: "
195  // << distance_deltas[ 1 ] << " max_delta "
196  //<< distance_deltas.back() << std::endl;
197 
198  Size viol_count( 0 );
199  tr.Trace << " dist: " << (*it)->distance_bound() << "| " ;
200  core::Real violation_cutoff( max_extension * params.local_distviol_global_factor_ + params.local_distviol_global_buffer_ );
201  for ( RealVector::const_iterator delta_it = distance_deltas.begin(); delta_it != distance_deltas.end(); ++delta_it ) {
202  tr.Trace << " " << *delta_it;
203  viol_count += ( *delta_it > violation_cutoff ) ? 1 : 0;
204  }
205  tr.Trace << std::endl;
206 
207  tr.Debug << "peak: " << (*it)->peak_id() <<" " << (*it)->filename() << " dist: " << (*it)->distance_bound()
208  << " max_extension " << max_extension << " viol_count " << viol_count
209  << ( viol_count > (params.nr_conformers_violatable_*distance_deltas.size() ) ? " REMOVED " : "" ) << std::endl;
210 
211  (*it)->set_eliminated_due_to_dist_violations( viol_count > ( params.nr_conformers_violatable_*distance_deltas.size() ) );
212  std::ostringstream elim_msg;
213  elim_msg << viol_count << " ("<<distance_deltas.size()<<") violated by >" << distance_deltas[1] << "A (" << violation_cutoff << "A) ";
214  (*it)->set_elimination_comment( elim_msg.str() );
215  }
216  //what is an elimination candidate ?
217  (*it)->set_elimination_candidate( violated > ( params.nr_conformers_violatable_*structures_.size() ) );
218  } // use_local_distviol
219  } // for peaks
220 }
221 
222 }
223 }