Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
FragmentPicker.hh
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file protocols/frag_picker/FragmentPicker.hh
11 /// @brief Fragment picker - the core part of picking machinery
12 /// @author Dominik Gront (dgront@chem.uw.edu.pl)
13 
14 #ifndef INCLUDED_protocols_frag_picker_FragmentPicker_hh
15 #define INCLUDED_protocols_frag_picker_FragmentPicker_hh
16 
17 // unit headers
19 
20 // package headers
35 
36 // type headers
37 #include <core/types.hh>
38 
39 // core headers
43 
44 // utility headers
45 #include <utility/pointer/ReferenceCount.hh>
46 #include <utility/vector1.hh>
47 
48 // C++
49 #include <string>
50 #include <map>
51 #include <sstream>
52 #include <set>
53 
54 namespace protocols {
55 namespace frag_picker {
56 
57 typedef std::pair<Size,Size> PosPair;
58 typedef std::pair<FragmentCandidateOP, scores::FragmentScoreMapOP> Candidate;
60 typedef std::map<Size, CandidatesCollectorOP> CandidatesSink;
61 
62 class QuotaDebug : public std::ostringstream {
63 
64 public:
66  QuotaDebug(Size nFrags) { nFrags_ = nFrags; }
68  std::map<std::string,Size> tag_map_;
69  void setup_summary(quota::QuotaCollector const & collector_);
70  void write_summary();
72  Size max_pools();
73 
74 private:
75  QuotaDebug(QuotaDebug const &);
76 };
77 
78 
79 /// @brief The core of the fragment picking machinery
80 /// @detailed FragmentPicker class does:\n
81 /// - know about query data: sequence, sequence profile, secondary structure.
82 /// All other query data must be loaded directly to the relevant scoring methods
83 /// - provide slots for 'plugable' parts of the machinery, like chunk filters, scoring methods and so on.
84 /// - pick fragments
86 public:
87 
88 // constructors
89 
91  scores_.push_back(new scores::FragmentScoreManager());
92  CandidatesSink storage;
93  candidates_sinks_.push_back(storage);
94  max_frag_size_ = 0;
95  max_threads_ = 1;
96  prefix_ = "frags"; // umm... let's not make hidden files
100  }
101 
102  FragmentPicker(std::string fragment_score_manager_type) {
103  if (fragment_score_manager_type.compare("PValuedFragmentScoreManager") == 0)
104  scores_.push_back( static_cast< scores::FragmentScoreManager * > ( new scores::PValuedFragmentScoreManager()));
105  else
106  scores_.push_back(new scores::FragmentScoreManager());
107  CandidatesSink storage;
108  candidates_sinks_.push_back(storage);
109  max_frag_size_ = 0;
110  max_threads_ = 1;
111  prefix_ = "frags"; // umm... let's not make hidden files
115  }
116 
117 // destructor
118 
119  virtual ~FragmentPicker();
120 
121 public:
122 
123 // public methods
124 
125  // Command line processing and high-level stuff -----------------
126  void parse_command_line();
127 
128  /// @brief Picks fragments and saves them into proper files - independently for each query position.
129  /// @detailed This protocol scores all vall data against a given position
130  /// and keeps all the candidates unless they fail an energy filter
131  /// When all candidates for a positions are scored, it selects fragments
132  /// for the position and proceeds to the next position. Bounded queue is not used
133  void keep_all_protocol();
134 
135  /// @brief Picks fragments and saves them into proper files - uses bounded queue.
136  /// @detailed This protocol scores all vall data against all query positions
137  /// and keeps a limited number of candidates per position using a bounded queue.
138  /// When all candidates for all positions are scored, it selects final fragments.
139  void bounded_protocol();
140 
141  void quota_protocol();
142 
143  void fragment_contacts( Size const fragment_size, utility::vector1<Candidates> const & fragment_set );
144 
145  // multi-threaded task
146  void nonlocal_pairs_at_positions( utility::vector1<Size> const & positions, Size const & fragment_size, utility::vector1<bool> const & skip,
148 
149  void nonlocal_pairs( Size const fragment_size, utility::vector1<Candidates> const & fragment_set );
150 
151  // these should be private methods but some classes directly access these
152 
153 
154  /// @brief returns a pointer to a scoring manager
156  assert(index <= scores_.size());
157  return scores_[index];
158  }
159 
160  // vall stuff -----------------
161  /// @brief reads a vall file
162  void read_vall( std::string const & fn );
163  void read_vall( utility::vector1< std::string > const & fns );
164 
165  /// @brief sets vall data
166  void set_vall(VallProviderOP vall_chunks) {
167  chunks_ = vall_chunks;
168  }
169 
170  /// @returns a pointer to Vall provider
172  return chunks_;
173  }
174 
175  // query sequence -----------------
176 
177  /// @brief sets the query sequence
178  /// @detailed Well, it is a sequence profile, but the sequence can be extracted from it
180  query_profile_ = query_sequence;
181  query_seq_as_string_ = query_profile_->sequence();
182  set_picked_positions(1,query_sequence->length());
183  }
184 
185  /// @brief sets the query sequence
186  void set_query_seq(std::string & query_sequence) {
187  if (query_profile_ == 0)
189  query_profile_->sequence(query_sequence);
190  query_seq_as_string_ = query_sequence;
192  }
193 
194  /// @brief Returns the sequence we are picking fragments for (as a string)
196  return query_seq_as_string_;
197  }
198 
200  return query_profile_;
201  }
202 
203  /// Returns the solvent accessibility prediction
205  return query_sa_prediction_;
206  }
207 
208  /// Returns the phi prediction
210  return query_phi_prediction_;
211  }
212 
213  /// Returns the psi prediction
215  return query_psi_prediction_;
216  }
217 
218  /// Returns the phi prediction confidence
221  }
222 
223  /// Returns the psi prediction confidence
226  }
227 
228  /// Returns residue depth values
230  return query_residue_depth_;
231  }
232 
233  /// @brief Sets the sequence object we are picking fragments for
235  query_profile_ = profile;
236  query_seq_as_string_ = query_profile_->sequence();
237  set_picked_positions(1,profile->length());
238  }
239 
240  // query secondary structure -----------------
241  /// @brief Returns the query secondary structure as a SecondaryStructure object
243  if (query_ss_profile_.find(prediction_name) != query_ss_profile_.end())
244  return query_ss_profile_.find(prediction_name)->second;
245  else
246  return 0;
247  }
248 
249  /// @brief Reads a bunch of ss predicitons from files and plugs them into the picker
250  void read_ss_files(utility::vector1<std::string> sec_str_input);
251 
252  /// @brief Returns the query secondary structure as a string
253  inline std::string & get_query_ss_string(std::string prediction_name) {
254  return query_ss_as_string_.find(prediction_name)->second;
255  }
256 
257  /// @brief Sets the query secondary structure
259 
260  /// @brief Sets the query secondary structure
261  inline Size count_query_ss() {
262  return query_ss_profile_.size();
263  }
264 
265  /// @brief Identifies if the ss file is psipred or talos, calls appropriate reader
266  void read_ss_file(std::string const &, std::string);
267 
268  /// @brief Reads spine-x phi, psi, and solvent accessibility predictions
269  void read_spine_x(std::string const & file_name);
270 
271  /// @brief Reads DEPTH residue depth values
272  void read_depth(std::string const & file_name);
273 
274  // other stuff -----------------
275  inline Size size_of_query() {
276  assert(query_seq_as_string_.length() == query_profile_->length());
277  return query_seq_as_string_.length();
278  }
279 
280  /// @brief Asks the picker to pick fragments from a given range in a query sequence
282 
283  /// @brief Asks the picker to pick fragments for given positions in a query sequence
285 
286  /// @brief picks fragment candidates.
287  /// @detailed These basically become fragments if pass the final selection.
288  /// Fragment candidates are stored in a container that a user must plug into the picker
289 
290  /// multi-threaded task
291  void pick_chunk_candidates(utility::vector1<VallChunkOP> const & chunks, Size const & index);
292 
293  void pick_candidates();
294 
295  void pick_candidates(Size i_pos,Size frag_len);
296 
297 
298  /// @brief Calculates total score for a given vector of small scores
299  /// (FragmentScoreMap object pointer)
300  /// @detailed FragmentScoreManager that is stored inside the picker is used
301  /// for this calculation. It particular it provides weights
302  double total_score(scores::FragmentScoreMapOP f, Size index=1);
303 
304  // save results
305  void save_candidates();
306  void save_fragments();
307 
308  /// @brief How long is the longest fragment?
309  Size max_frag_len() { return *std::max_element( frag_sizes_.begin(), frag_sizes_.end() ); }
310 
311  // Delegators from FragmentScoreManager ---------------
312  void show_scoring_methods(std::ostream & out, Size index=1) {
313  if (index > scores_.size()) return;
314  scores_[index]->show_scoring_methods(out);
315  }
316 
317  /// @brief adds a new scoring method to the scoring scheme
319  scores::FragmentScoringMethodOP scoring_term,
320  Real weight, Size index=1) {
321  if (index > scores_.size()) return;
322  scores_[index]->add_scoring_method(scoring_term, weight);
323  }
324 
325  // Convert to a FragSet
327 
328  /// @brief Reads query secondary structure prediction from a PsiPred file
330 
331  /// @brief Reads query secondary structure prediction from a Talos+ file
332  void read_talos_ss(std::string const &, std::string);
333 
334 // should be private but some classes directly access these
336 
339 
341 
343 
344  void set_up_ss_abego_quota();
345 
346  bool is_valid_chunk( VallChunkOP chunk );
347 
348  bool is_valid_chunk( Size const frag_len, VallChunkOP chunk );
349 
350  // Output fragments
351  void output_fragments( Size const fragment_size, utility::vector1<Candidates> const & final_fragments );
352 
353  // Picking machinery -----------------
354  /// @brief sets a collector where fragment candidates will be kept until final selection
356  CandidatesCollectorOP sink, Size index = 1) {
357  if (index == 0)
358  candidates_sink_.insert(std::pair<Size, CandidatesCollectorOP>(frag_size, sink));
359  else
360  candidates_sinks_[index].insert(std::pair<Size, CandidatesCollectorOP>(frag_size, sink));
361  }
362 
363  /// @brief returns a pointer to the candidates collector currently used
364  /// @details one may need this instance to access the candidates that have been found by the picker
366  if (index == 0)
367  return candidates_sink_[frag_size];
368  else
369  return candidates_sinks_[index][frag_size];
370  }
371 
372  /// @brief adds a chunk filter that will be used to screen chunks before they are cut into fragments
374  filters_.push_back(filter);
375  }
376 
381 
382 private:
384 
386 
389  std::map<std::string, core::fragment::SecondaryStructureOP> query_ss_profile_;
390  std::map<std::string, std::string> query_ss_as_string_;
392 
395 
397 
398  // for multi-threaded picking there should only be one candidate sink per thread
400 
401  // for multi-threaded scoring there should only be one FragmentScoreManager per thread due to cached chunk scores
403 
404 
405  // for frag contacts
407  std::set<ContactType> contact_types_;
411 
412  // for nonlocal contacts
414 
415  // phi,psi,sa predictions
421 
422  // residue depth
424 
425  // atom pair constraint contact map
427 
428 };
429 
430 
431 
432 } // frag_picker
433 } // protocols
434 
435 #endif /* INCLUDED_protocols_frag_picker_FragmentPicker_HH */
436