Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
FragmentPicker.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file protocols/frag_picker/FragmentPicker.cc
11 /// @brief Fragment picker - the core part of picking machinery
12 /// @author Dominik Gront (dgront@chem.uw.edu.pl)
13 
14 // unit headers
16 
45 
49 
50 #include <basic/options/keys/cm.OptionKeys.gen.hh>
51 #include <core/sequence/util.hh>
54 
55 #include <core/fragment/FragID.hh> // required for windows build
58 
62 #include <core/pose/util.hh>
63 #include <core/scoring/rms_util.hh>
64 #include <numeric/model_quality/rms.hh>
65 #include <core/fragment/util.hh>
66 
67 // option key includes
68 #include <basic/options/option.hh>
69 #include <basic/options/keys/OptionKeys.hh>
70 #include <basic/options/keys/out.OptionKeys.gen.hh>
71 #include <basic/options/keys/frags.OptionKeys.gen.hh>
72 #include <basic/options/keys/in.OptionKeys.gen.hh>
73 #include <basic/options/keys/constraints.OptionKeys.gen.hh>
74 
75 #include <basic/prof.hh>
76 #include <basic/Tracer.hh>
77 
78 #include <utility/exit.hh>
79 #include <utility/io/izstream.hh>
80 #include <utility/io/ozstream.hh>
81 
82 #include <utility>
83 #include <sstream>
84 #include <iostream>
85 #include <fstream>
86 
87 #include <ObjexxFCL/format.hh>
88 
89 #ifdef USE_BOOST_THREAD
90 // Boost headers
91 #include <boost/thread.hpp>
92 #include <boost/bind.hpp>
93 #endif
94 
95 namespace protocols {
96 namespace frag_picker {
97 
98 using namespace core;
99 using namespace core::fragment;
100 using namespace protocols::frag_picker;
101 using namespace protocols::frag_picker::scores;
102 using namespace basic::options;
103 using namespace basic::options::OptionKeys;
104 
105 //#ifdef USE_BOOST_THREAD
106 //boost::mutex picker_mutex;
107 //#endif
108 
109 static basic::Tracer tr("protocols.frag_picker.FragmentPicker");
110 
112 
114  tr.Info << "pick fragments using bounded protocol..." << std::endl;
115  pick_candidates();
116  save_fragments();
117 }
118 
120  using namespace ObjexxFCL;
121  tr.Info << "pick fragments using quota protocol..." << std::endl;
122  pick_candidates();
123 
124  const bool skip_merge = (candidates_sinks_.size() == 1) ? true : false;
125  for (Size iFragSize = 1; iFragSize <= frag_sizes_.size(); ++iFragSize) { // Loop over various sizes of fragments
126  Size fragment_size = frag_sizes_[iFragSize];
127  quota::QuotaCollectorOP c = (skip_merge) ? dynamic_cast<quota::QuotaCollector*> (candidates_sinks_[1][fragment_size]()) :
128  dynamic_cast<quota::QuotaCollector*> (candidates_sink_[fragment_size]()); // merged storage
129  if (c == 0)
130  utility_exit_with_message("Cant' cast candidates' collector to QuotaCollector. Is quota set up correctly?");
131  log_25_.setup_summary(*c);
132  log_200_.setup_summary(*c);
133  Size maxqpos = size_of_query() - fragment_size + 1;
134 
135  utility::vector1<Candidates> final_fragments(maxqpos); // final fragments
136 
137  for (Size iqpos = 1; iqpos <= query_positions_.size(); ++iqpos) {
138  Size qPos = query_positions_[iqpos];
139  if ( qPos > maxqpos) continue;
140  Candidates dummy_input, final_out;
141  if (!skip_merge) { // merge candidates
142  for (Size i=1;i<=candidates_sinks_.size();++i)
143  candidates_sink_[fragment_size]->insert(qPos, candidates_sinks_[i][fragment_size]);
144  }
145  // select the final fragments
146  quota::QuotaSelector selector(n_frags_, qPos, c );
147  selector.select_fragments(dummy_input,final_out);
148  final_fragments[qPos] = final_out;
149  }
150  log_25_.write_summary();
151  log_200_.write_summary();
152 
153  output_fragments( fragment_size, final_fragments );
154  }
155 }
156 
158  using namespace ObjexxFCL;
159  tr.Info << "pick fragments using keep-all protocol..." << std::endl;
160  // memory usage makes the following options impractical
161  if (max_threads_ > 1)
162  tr.Warning << "Ignoring -j option for keep_all_protocol" << std::endl;
163  if (option[frags::nonlocal_pairs].user())
164  tr.Warning << "Ignoring -nonlocal_pairs option for keep_all_protocol" << std::endl;
165 
166  CompareTotalScore comparator(get_score_manager());
167  for (Size iFragSize = 1; iFragSize <= frag_sizes_.size(); ++iFragSize) { // Loop over various sizes of fragments
168  Size fragment_size = frag_sizes_[iFragSize];
169 
170  utility::io::ozstream out_file;
171  if (option[frags::describe_fragments].user()) {
172  std::string describe_name = option[frags::describe_fragments]()+"." + string_of(n_frags_) +"."+string_of(fragment_size)+"mers";
173  out_file.open(describe_name.c_str());
174  }
175 
176  std::string out_file_name = prefix_ + "." + string_of(n_frags_) + "." + string_of(fragment_size) + "mers";
177  utility::io::ozstream output(out_file_name);
178  CandidatesCollectorOP storage = get_candidates_collector(fragment_size);
179 
180  Size maxqpos = size_of_query() - fragment_size + 1;
181  for (Size iqpos = 1; iqpos <= query_positions_.size(); ++iqpos) {
182  Size qPos = query_positions_[iqpos];
183  if ( qPos > maxqpos ) continue;
184 
185  Candidates out;
186  pick_candidates(qPos,fragment_size);
187  Candidates candidates = storage->get_candidates(qPos);
188  std::sort(candidates.begin(),candidates.end(),comparator);
189  selector_->select_fragments(candidates, out);
190  if(out.size() == 0) continue;
191  output << "position: " << I(12, qPos) << " neighbors: " << I(10,out.size()) << std::endl << std::endl;
192  FragmentScoreManagerOP ms = get_score_manager();
193  if( ms->if_late_scoring_for_zeros() ) {
194  for (Size fi = 1; fi <= out.size(); ++fi)
195  ms->score_zero_scores(out[fi].first,out[fi].second);
196  }
197  for (Size fi = 1; fi <= out.size(); ++fi) {
198  out[fi].first->print_fragment(output);
199  output << std::endl;
200  }
201  if (option[frags::describe_fragments].user()) {
202  get_score_manager()->describe_fragments(out, out_file);
203  }
204  tr.Info << "Collected candidates of size "<<fragment_size<<" at pos"<<qPos<<std::endl;
205  storage->clear();
206  tr.Debug<< storage->count_candidates()<<" candidates left in a sink after flushing"<<std::endl;
207  }
208  output.close();
209  out_file.close();
210  }
211  tr.Info<<std::endl;
212 }
213 
214 
215 
216 void FragmentPicker::fragment_contacts( Size const fragment_size, utility::vector1<Candidates> const & fragment_set ) {
217  using namespace ObjexxFCL;
218 
219  // how many neighboring residues shall we also find contacts for?
220  Size neighbors = option[frags::contacts::neighbors]();
221 
222  // output all contacts?
223  std::set<ContactType>::iterator it;
224  utility::io::ozstream output_all_contacts;
225  bool output_all = option[frags::contacts::output_all]();
226  if (output_all) {
227  std::string scale_factor = "0";
228  for (it=contact_types_.begin(); it!=contact_types_.end(); it++)
229  if (*it == CEN) scale_factor = string_of(sidechain_contact_dist_cutoff_->scale_factor());
230  replace( scale_factor.begin(), scale_factor.end(), '.', '_' );
231  const std::string out_file_name_all_contacts = prefix_ + "." + string_of(contacts_min_seq_sep_) + "." + string_of(sqrt(contacts_dist_cutoff_squared_)) + "." + scale_factor +
232  "." + string_of(n_frags_) + "." + string_of(fragment_size) + "mers.contacts";
233  output_all_contacts.open(out_file_name_all_contacts.c_str());
234  }
235 
236  // initialize contact counts
237  std::map<std::pair<Real,ContactType>, ContactCountsOP> contact_counts;
238  for (it=contact_types_.begin(); it!=contact_types_.end(); it++) {
239  if (*it == CEN) {
240  std::pair<Real,ContactType> p(0,*it);
241  contact_counts[p] = new ContactCounts();
242  } else {
243  for (Size i=1; i<=contacts_dist_cutoffs_squared_.size();++i) {
244  std::pair<Real,ContactType> p(contacts_dist_cutoffs_squared_[i],*it);
245  contact_counts[p] = new ContactCounts();
246  }
247  }
248  }
249 
250  // output all header
251  if (output_all)
252  output_all_contacts << "# i j type dist cutoff frag_pos frag_rank" << std::endl;
253 
254  for (Size iqpos = 1; iqpos <= query_positions_.size()-fragment_size+1; ++iqpos) {
255  Size qPosi = query_positions_[iqpos];
256  Candidates const & outi = fragment_set[qPosi];
257  for (Size fi = 1; fi <= outi.size(); ++fi) { // loop through selected fragments at qPosi
258 
259  // for neighboring contacts
260  // chunks can be different chains
261  VallChunkOP chunk = outi[fi].first->get_chunk();
262  int cPos_offset = outi[fi].first->get_first_index_in_vall() - qPosi;
263 
264  for (Size i=1; i<=fragment_size;++i) {
265  VallResidueOP ri = outi[fi].first->get_residue(i);
266  Size q_pos_i = qPosi + i - 1;
267  for (Size j=i+1; j<=fragment_size;++j) {
268  Size q_pos_j = qPosi + j - 1;
269 
270  // skip local contacts relative to query
271  if (std::abs(int(q_pos_i-q_pos_j)) < (int)contacts_min_seq_sep_) continue;
272  // skip local contacts relative to fragments
273  if (std::abs(int(ri->resi() - outi[fi].first->get_residue(j)->resi() )) < (int)contacts_min_seq_sep_) continue;
274 
275  for (it=contact_types_.begin(); it!=contact_types_.end(); it++) {
276 
277  // pair distance
278  Real distance_squared = ri->distance_squared(outi[fi].first->get_residue(j), *it);
279 
280  // distance cutoff
281  Real cutoff_dist_squared = (*it == CEN) ?
282  sidechain_contact_dist_cutoff_->get_cutoff_squared( ri->aa(), outi[fi].first->get_residue(j)->aa() ) :
283  contacts_dist_cutoff_squared_;
284 
285  if (distance_squared <= cutoff_dist_squared) {
286 
287  // output all row
288  if (output_all)
289  output_all_contacts << q_pos_i << " " << q_pos_j << " " << contact_name(*it) << " " <<
290  fmt::F(5, 2, sqrt(distance_squared)) << " " << fmt::F(5, 2, sqrt(cutoff_dist_squared)) << " " << qPosi << " "<< fi << std::endl;
291 
292  // sorry for the copy and paste code below
293  if (*it == CEN) {
294  // iterate contact counts
295  std::pair<Real,ContactType> p(0,*it);
296  std::pair<Size,Size> querypair(q_pos_i, q_pos_j);
297  contact_counts[p]->iterate(querypair);
298 
299  // iterate neighboring contact counts
300  if (neighbors > 0) {
301  int m_min_tmp = q_pos_i-neighbors;
302  Size m_min = (m_min_tmp >= 1) ? m_min_tmp : 1;
303  Size m_max = q_pos_i+neighbors;
304  int n_min_tmp = q_pos_j-neighbors;
305  Size n_min = (n_min_tmp >= 1) ? n_min_tmp : 1;
306  Size n_max = q_pos_j+neighbors;
307  // m == query position i
308  for (Size m = m_min; m <= m_max; ++m) {
309  if (m > size_of_query()) continue;
310  // chunk_i = chunk position i
311  Size chunk_i = cPos_offset + m;
312  if (chunk_i < 1 || chunk_i > chunk->size()) continue;
313  // n == query position j
314  for (Size n = n_min; n <= n_max; ++n) {
315  if (n > size_of_query()) continue;
316  if (m == q_pos_i && n == q_pos_j) continue;
317  // chunk_j = chunk position j
318  int chunk_j = cPos_offset + n;
319  if (chunk_j < 1 || chunk_j > (int)chunk->size()) continue;
320 
321  // skip local contacts relative to query
322  if (std::abs(int(m-n)) < (int)contacts_min_seq_sep_) continue;
323  // skip local contacts relative to fragments
324  if (std::abs(int( chunk->at(chunk_i)->resi() - chunk->at(chunk_j)->resi() )) < (int)contacts_min_seq_sep_) continue;
325 
326  Real dist_squared = chunk->at(chunk_i)->distance_squared(chunk->at(chunk_j), *it);
327  if (dist_squared <= sidechain_contact_dist_cutoff_->get_cutoff_squared( chunk->at(chunk_i)->aa(), chunk->at(chunk_j)->aa() )) {
328  std::pair<Size,Size> neighbor_pair(m, n);
329  contact_counts[p]->iterate_neighbor(querypair, neighbor_pair);
330  }
331  }
332  }
333  }
334 
335  } else {
336  for (Size cdi=1; cdi<=contacts_dist_cutoffs_squared_.size();++cdi) {
337  if (distance_squared < contacts_dist_cutoffs_squared_[cdi]) {
338  // iterate contact counts
339  std::pair<Real,ContactType> p(contacts_dist_cutoffs_squared_[cdi],*it);
340  std::pair<Size,Size> querypair(q_pos_i, q_pos_j);
341  contact_counts[p]->iterate(querypair);
342 
343  // iterate neighboring contact counts
344  if (neighbors > 0) {
345  int m_min_tmp = q_pos_i-neighbors;
346  Size m_min = (m_min_tmp >= 1) ? m_min_tmp : 1;
347  Size m_max = q_pos_i+neighbors;
348  int n_min_tmp = q_pos_j-neighbors;
349  Size n_min = (n_min_tmp >= 1) ? n_min_tmp : 1;
350  Size n_max = q_pos_j+neighbors;
351  // m == query position i
352  for (Size m = m_min; m <= m_max; ++m) {
353  if (m > size_of_query()) continue;
354  // chunk_i = chunk position i
355  int chunk_i = cPos_offset + m;
356  if (chunk_i < 1 || chunk_i > (int)chunk->size()) continue;
357  // n == query position j
358  for (Size n = n_min; n <= n_max; ++n) {
359  if (n > size_of_query()) continue;
360  if (m == q_pos_i && n == q_pos_j) continue;
361  // chunk_j = chunk position j
362  int chunk_j = cPos_offset + n;
363  if (chunk_j < 1 || chunk_j > (int)chunk->size()) continue;
364 
365  // skip local contacts relative to query
366  Size m_n_sep = std::abs(int(m - n)); // sequence separation
367  if (m_n_sep < contacts_min_seq_sep_) continue;
368  // skip local contacts relative to fragments
369  if (std::abs(int( chunk->at(chunk_i)->resi() - chunk->at(chunk_j)->resi() )) < (int)contacts_min_seq_sep_) continue;
370 
371  Real dist_squared = chunk->at(chunk_i)->distance_squared(chunk->at(chunk_j), *it);
372  if (dist_squared <= contacts_dist_cutoffs_squared_[cdi]) {
373  std::pair<Size,Size> neighbor_pair(m, n);
374  contact_counts[p]->iterate_neighbor(querypair, neighbor_pair);
375  }
376  }
377  }
378  }
379  }
380  }
381  }
382  }
383  }
384  }
385  }
386  }
387  }
388  if (output_all) output_all_contacts.close();
389 
390  // now output pair counts - sorry for the copy and paste code here
391  for (it=contact_types_.begin(); it!=contact_types_.end(); it++) {
392  if (*it == CEN) {
393  std::string scale_factor = string_of(sidechain_contact_dist_cutoff_->scale_factor());
394  replace( scale_factor.begin(), scale_factor.end(), '.', '_' );
395  const std::string out_file_name_contacts = prefix_ + "." + contact_name(*it) + "." + string_of(contacts_min_seq_sep_) + "." + scale_factor + "." +
396  string_of(n_frags_) + "." + string_of(fragment_size) + "mers.contacts";
397  utility::io::ozstream output_contacts(out_file_name_contacts);
398  output_contacts << "# i j count";
399  if (neighbors > 0) output_contacts << " neighbors_" << neighbors << "_i_j_count";
400  output_contacts << std::endl;
401  std::pair<Real,ContactType> p(0,*it);
402  std::map<std::pair<Size,Size>, Size> query_counts = contact_counts[p]->counts();
403  std::map<std::pair<Size,Size>, Size>::iterator iter;
404  for ( iter = query_counts.begin(); iter != query_counts.end(); iter++ ) {
405  std::pair<Size,Size> query_pair = iter->first;
406  output_contacts << query_pair.first << " " << query_pair.second << " " << iter->second;
407  if (neighbors > 0 && contact_counts[p]->neighbor_counts_exist(query_pair)) {
408  std::map<std::pair<Size,Size>, Size> neighbor_counts = contact_counts[p]->neighbor_counts(query_pair);
409  std::map<std::pair<Size,Size>, Size>::iterator neigh_iter;
410  for ( neigh_iter = neighbor_counts.begin(); neigh_iter != neighbor_counts.end(); neigh_iter++ ) {
411  std::pair<Size,Size> neighbor_pair = neigh_iter->first;
412  output_contacts << " " << neighbor_pair.first << " " << neighbor_pair.second << " " << neigh_iter->second;
413  }
414  }
415  output_contacts << std::endl;
416  }
417  output_contacts.close();
418  } else {
419  for (Size i=1; i<=contacts_dist_cutoffs_squared_.size();++i) {
420  const std::string out_file_name_contacts = prefix_ + "." + contact_name(*it) + "." + string_of(contacts_min_seq_sep_) + "." + string_of(sqrt(contacts_dist_cutoffs_squared_[i])) +
421  "." + string_of(n_frags_) + "." + string_of(fragment_size) + "mers.contacts";
422  utility::io::ozstream output_contacts(out_file_name_contacts);
423  output_contacts << "# i j count";
424  if (neighbors > 0) output_contacts << " neighbors_" << neighbors << "_i_j_count";
425  output_contacts << std::endl;
426  std::pair<Real,ContactType> p(contacts_dist_cutoffs_squared_[i],*it);
427  std::map<std::pair<Size,Size>, Size> query_counts = contact_counts[p]->counts();
428  std::map<std::pair<Size,Size>, Size>::iterator iter;
429  for ( iter = query_counts.begin(); iter != query_counts.end(); iter++ ) {
430  std::pair<Size,Size> query_pair = iter->first;
431  output_contacts << query_pair.first << " " << query_pair.second << " " << iter->second;
432  if (neighbors > 0 && contact_counts[p]->neighbor_counts_exist(query_pair)) {
433  std::map<std::pair<Size,Size>, Size> neighbor_counts = contact_counts[p]->neighbor_counts(query_pair);
434  std::map<std::pair<Size,Size>, Size>::iterator neigh_iter;
435  for ( neigh_iter = neighbor_counts.begin(); neigh_iter != neighbor_counts.end(); neigh_iter++ ) {
436  std::pair<Size,Size> neighbor_pair = neigh_iter->first;
437  output_contacts << " " << neighbor_pair.first << " " << neighbor_pair.second << " " << neigh_iter->second;
438  }
439  }
440  output_contacts << std::endl;
441  }
442  output_contacts.close();
443  }
444  }
445  }
446 }
447 
448 
449 // should be thread safe
450 void FragmentPicker::nonlocal_pairs_at_positions( utility::vector1<Size> const & positions, Size const & fragment_size, utility::vector1<bool> const & skip,
452 
453  Size const maxjqpos = size_of_query()-fragment_size+1;
454 
455  // loop through query positions, qPosi
456  for (Size p = 1; p <= positions.size(); ++p) {
457  Size const qPosi = positions[p];
458  Candidates const & outi = fragment_set[qPosi]; // candidates at i
459  Size const minjqpos = qPosi+fragment_size+contacts_min_seq_sep_-1;
460  // loop through nonlocal query positions, qPosj
461  for (Size jqpos = 1; jqpos <= query_positions_.size(); ++jqpos) {
462  Size qPosj = query_positions_[jqpos];
463  if (qPosj > maxjqpos || qPosj < minjqpos) continue;
464  bool skip_it = true;
465  for (Size i=0; i<fragment_size;++i) {
466  if (!skip[qPosi+i] || !skip[qPosj+i]) {
467  skip_it = false;
468  break;
469  }
470  }
471  if (skip_it) continue;
472  Candidates const & outj = fragment_set[qPosj]; // candidates at j
473  for (Size fi = 1; fi <= outi.size(); ++fi) { // loop through selected fragments at qPosi
474  for (Size fj = 1; fj <= outj.size(); ++fj) { // loop through selected fragments at qPosj
475  if (!outi[fi].first->same_chain( outj[fj].first )) continue; // skip if not from same pdb chain
476  //if (outi[fi].first->get_residue(1)->resi() >= outj[fj].first->get_residue(1)->resi()) continue; // skip inverse pairs
477  //if (std::abs(int(outi[fi].first->get_residue(1)->resi()-outj[fj].first->get_residue(1)->resi())) < min_pdb_seq_sep) continue; // skip if too local in PDB
478  if (std::abs(int(outi[fi].first->get_residue(1)->resi()-outj[fj].first->get_residue(1)->resi())) < (int)fragment_size) continue; // skip overlapping fragments in PDB
479  Size qpi = qPosi; // query position i in fragment
481  bool skip = false;
482  bool has_good_constraint = false;
483  bool has_constraints = (atom_pair_constraint_contact_map_.size() > 0) ? true : false;
484  for (Size i=1; i<=fragment_size;++i) {
485  VallResidueOP ri = outi[fi].first->get_residue(i);
486  Size qpj = qPosj; // query position j in fragment
487  for (Size j=1; j<=fragment_size;++j) {
488  if (skip) continue;
489  if (std::abs(int(qpi-qpj)) < (int)contacts_min_seq_sep_) continue;
490  // skip local contacts relative to fragments
491  if (std::abs(int( ri->resi()-outj[fj].first->get_residue(j)->resi() )) < (int)contacts_min_seq_sep_) continue;
492  std::set<ContactType>::iterator it;
493  for (it=contact_types_.begin(); it!=contact_types_.end(); it++) {
494  // contact distance cutoff
495  Real cutoff_dist_squared = (*it == CEN) ?
496  sidechain_contact_dist_cutoff_->get_cutoff_squared( ri->aa(), outj[fj].first->get_residue(j)->aa() ) :
497  contacts_dist_cutoff_squared_;
498  // contact distance
499  Real dist_squared = ri->distance_squared(outj[fj].first->get_residue(j), *it);
500  if (has_constraints && atom_pair_constraint_contact_map_[qpi][qpj] > 0) {
501  if (dist_squared > atom_pair_constraint_contact_map_[qpi][qpj]) {
502  skip = true;
503  continue;
504  } else {
505  has_good_constraint = true;
506  }
507  }
508  if (dist_squared <= cutoff_dist_squared) contacts.push_back(new Contact( qpi, qpj, dist_squared, *it ));
509  }
510  qpj++;
511  }
512  qpi++;
513  }
514  if (!skip && contacts.size() > 0 && (!has_constraints || has_good_constraint)) {
515  // save all fragment pairs with contacts
516  nonlocal::NonlocalPairOP pair = new nonlocal::NonlocalPair( qPosi, qPosj, outi[fi], outj[fj], fi, fj, contacts );
517  pairs.push_back(pair);
518  } // contact
519  } // fi
520  } // fj
521  } // jqpos
522  }
523 
524 }
525 
526 void FragmentPicker::nonlocal_pairs( Size const fragment_size, utility::vector1<Candidates> const & fragment_set ) {
527  using namespace ObjexxFCL;
528 
529  // always print ca coords
530  bool orig_opt = option[frags::write_ca_coordinates]();
531  option[frags::write_ca_coordinates].value(true);
532 
533  // how many neighboring residues shall we also find contacts for?
534  Size neighbors = option[frags::contacts::neighbors]();
535 
536  // native
537  bool has_native = false;
538  core::pose::PoseOP nativePose;
539  if (option[in::file::native].user()) {
540  nativePose = new core::pose::Pose;
541  core::import_pose::pose_from_pdb(*nativePose, option[in::file::native]());
542  has_native = true;
543  } else if (option[in::file::s].user()) {
544  nativePose = new core::pose::Pose;
545  core::import_pose::pose_from_pdb(*nativePose, option[in::file::s]()[1]);
546  has_native = true;
547  }
548 
549  // atom pair constraints?
550  if (option[constraints::cst_file].user()) {
551  tr.Info << "Reading constraints from: "
552  << option[constraints::cst_file]()[1] << std::endl;
553  // initialize
554  atom_pair_constraint_contact_map_.resize(size_of_query());
555  for ( Size qi = 1; qi <= size_of_query(); qi++) {
556  for ( Size qj = 1; qj <= size_of_query(); qj++) {
557  atom_pair_constraint_contact_map_[qi].push_back( 0 );
558  }
559  }
560 
561  utility::io::izstream data(option[constraints::cst_file]()[1].c_str());
562  if (!data) {
563  utility_exit_with_message("[ERROR] Unable to open constraints file: "
564  + option[constraints::cst_file]()[1]);
565  }
566  std::string line;
567  getline(data, line); // header line
568  std::string tag;
569  Size n_constr = 0;
570  while (!data.fail()) {
571  char c = data.peek();
572  if (c == '#' || c == '\n') {
573  getline(data, line); //comment
574  continue;
575  }
576  data >> tag;
577  if (data.fail()) {
578  tr.Debug << option[constraints::cst_file]()[1]
579  << " end of file reached" << std::endl;
580  break;
581  }
582  if (tag == "AtomPair") {
583  std::string name1, name2, func_type;
584  Size id1, id2;
585  data >> name1 >> id1 >> name2 >> id2 >> func_type;
586  tr.Debug << "read: " << name1 << " " << id1
587  << " " << name2 << " " << id2 << " func: " << func_type
588  << std::endl;
589  if (id1 <= size_of_query() && id2 <= size_of_query()) {
590  atom_pair_constraint_contact_map_[id1][id2] = 81.0; // hard code this for a casp10 hack
591  atom_pair_constraint_contact_map_[id2][id1] = 81.0;
592  n_constr++;
593  }
594  }
595  }
596  tr.Info << n_constr << " constraints loaded from a file" << std::endl;
597  }
598 
599 
600  // skip positions from an input alignment if one exists
601  utility::vector1<bool> skip_position( size_of_query(), false );
602  //bool has_positions_to_skip = false;
603  if (option[ in::file::alignment ].user()) {
605  core::sequence::read_aln( option[ cm::aln_format ](), option[ in::file::alignment ]()[1] );
606  tr.Info << "Input alignment used to skip aligned positions: " << std::endl;
607  tr.Info << alns[1] << std::endl;
608  Size const query_idx( 1 );
609  Size const templ_idx( 2 );
610  Size nres = size_of_query();
611  core::id::SequenceMapping mapping_(
612  alns[1].sequence_mapping( query_idx, templ_idx )
613  );
614  for ( Size resi = 1; resi <= nres; resi++ ) {
615  Size t_resi = mapping_[ resi ];
616  bool const gap_exists( t_resi == 0 ); // query residue maps to a gap
617  if ( !gap_exists ) {
618  skip_position[resi] = true;
619  //has_positions_to_skip = true; set but never used ~Labonte
620  }
621  }
622  }
623 
624  // initialize contact counts
625  std::map<std::pair<Real,ContactType>, ContactCountsOP> contact_counts;
626  std::set<ContactType>::iterator it;
627  for (it=contact_types_.begin(); it!=contact_types_.end(); it++) {
628  if (*it == CEN) {
629  std::pair<Real,ContactType> p(0,*it);
630  contact_counts[p] = new ContactCounts();
631  } else {
632  for (Size i=1; i<=contacts_dist_cutoffs_squared_.size();++i) {
633  std::pair<Real,ContactType> p(contacts_dist_cutoffs_squared_[i],*it);
634  contact_counts[p] = new ContactCounts();
635  }
636  }
637  }
638 
639 
640  Real const min_contacts = (nonlocal_min_contacts_per_res_ < 1.0) ? 1.0 : nonlocal_min_contacts_per_res_*(Real)fragment_size;
641  Size const maxiqpos = size_of_query()-(contacts_min_seq_sep_-1)-fragment_size-fragment_size+1;
642 
643  time_t time_start = time(NULL);
644 
645  utility::vector1<utility::vector1<Size> > qPosi_to_run( max_threads_ );
646  Size positions_cnt = 0;
647  for (Size iqpos = 1; iqpos <= query_positions_.size(); ++iqpos) {
648  Size qPosi = query_positions_[iqpos];
649  if (qPosi > maxiqpos) continue;
650  positions_cnt++;
651  }
652  const Size qPosi_per_thread = positions_cnt/max_threads_;
653  Size thread = 1;
654  for (Size iqpos = 1; iqpos <= query_positions_.size(); ++iqpos) {
655  Size qPosi = query_positions_[iqpos];
656  if (qPosi > maxiqpos) continue;
657  qPosi_to_run[thread].push_back( qPosi );
658  if (qPosi_to_run[thread].size() >= qPosi_per_thread && thread < max_threads_) ++thread;
659  }
661 #ifdef USE_BOOST_THREAD
662  boost::thread_group threads;
663  tr.super_mute(true); // lets suppress tracer output when running multi threads
664  for (Size j = 1; j <= max_threads_; ++j) {
665  if (qPosi_to_run[j].size() > 0) {
666  std::cout << "thread: " << j << " - " << qPosi_to_run[j].size() << " positions -";
667  for (Size pos = 1; pos <= qPosi_to_run[j].size(); ++pos) std::cout << " " << qPosi_to_run[j][pos];
668  std::cout << std::endl;
669  threads.create_thread(boost::bind(&FragmentPicker::nonlocal_pairs_at_positions, this, boost::ref(qPosi_to_run[j]), fragment_size, boost::ref(skip_position),
670  boost::ref(fragment_set), boost::ref(thread_pairs[j])));
671  }
672  }
673  threads.join_all();
674  tr.super_mute(false);
675 #else
676  // single thread
677  nonlocal_pairs_at_positions( qPosi_to_run[1], fragment_size, skip_position, fragment_set, thread_pairs[1] );
678 #endif
679 
680  // silent output
681  std::string scale_factor = "0";
682  for (it=contact_types_.begin(); it!=contact_types_.end(); it++)
683  if (*it == CEN) scale_factor = string_of(sidechain_contact_dist_cutoff_->scale_factor());
684  replace( scale_factor.begin(), scale_factor.end(), '.', '_' );
685  const std::string silent_out_file_name = prefix_ + "." + string_of(contacts_min_seq_sep_) + "." + string_of(sqrt(contacts_dist_cutoff_squared_)) + "." + scale_factor +
686  "." + string_of(n_frags_) + "." + string_of(fragment_size) + "mers.nonlocal_pairs.out";
688 
689  // contacts output
690  utility::io::ozstream contacts_output_all;
691  bool output_all = option[frags::contacts::output_all]();
692  if (output_all) {
693  const std::string contacts_out_file_name = prefix_ + "." + string_of(contacts_min_seq_sep_) + "." + string_of(sqrt(contacts_dist_cutoff_squared_)) + "." + scale_factor +
694  "." + string_of(n_frags_) + "." + string_of(fragment_size) + "mers.nonlocal_pairs.contacts";
695  contacts_output_all.open(contacts_out_file_name.c_str());
696  // contacts output header
697  contacts_output_all << "# i j type dist frag_i frag_j rank_i rank_j" << std::endl;
698  }
699 
700  // get score manager
701  FragmentScoreManagerOP ms = get_score_manager();
702 
703  // now output the thread pairs and merge the contacts maps
704  for (Size j = 1; j <= thread_pairs.size(); ++j) {
705  for (Size k = 1; k <= thread_pairs[j].size(); ++k) {
706 
707  Size qPosi = thread_pairs[j][k]->get_query_pos_i(); // fragment i pos
708  Size qPosj = thread_pairs[j][k]->get_query_pos_j(); // fragment j pos
709 
710  // for neighboring contacts
711  // chunks can be different chains
712  VallChunkOP chunki = thread_pairs[j][k]->get_candidate_i().first->get_chunk();
713  VallChunkOP chunkj = thread_pairs[j][k]->get_candidate_j().first->get_chunk();
714  int cPosi_offset = thread_pairs[j][k]->get_candidate_i().first->get_first_index_in_vall() - qPosi;
715  int cPosj_offset = thread_pairs[j][k]->get_candidate_j().first->get_first_index_in_vall() - qPosj;
716 
717  // get contacts
718  utility::vector1<ContactOP> contacts = thread_pairs[j][k]->get_contacts();
719 
720  if (option[frags::nonlocal::output_silent]()) {
721  // check if enough contacts exist to output fragment pair
722  std::map<ContactType, Size> contact_type_cnt;
723  std::map<ContactType, Size>::iterator iter;
724  bool output_pair = false;
725  for (it=contact_types_.begin(); it!=contact_types_.end(); it++)
726  contact_type_cnt[*it] = 0;
727  for (Size i=1; i<=contacts.size(); ++i) contact_type_cnt[contacts[i]->type()]++;
728  for ( iter = contact_type_cnt.begin(); iter != contact_type_cnt.end(); iter++ ) {
729  if ((Real)iter->second >= min_contacts) {
730  output_pair = true;
731  break;
732  }
733  }
734 
735  if (output_pair) {
736 
737  // make pose from frag
739  fragdatapair.push_back(thread_pairs[j][k]->get_candidate_i().first->get_frag_data());
740  fragdatapair.push_back(thread_pairs[j][k]->get_candidate_j().first->get_frag_data());
741  std::string const & sequence = get_query_seq_string().substr(qPosi-1,fragment_size) +
742  get_query_seq_string().substr(qPosj-1,fragment_size);
743  pose::Pose pose;
744  fragment::make_pose_from_frags( pose, sequence, fragdatapair, false );
745 
746  // output silent file
747  std::stringstream tag; // put candidate id and fragment start positions of fragment pair in tag
748  // tag format example: 12asA_1_17_28_132 DO NOT CHANGE THIS FORMAT SINCE THE SCORING APP USES THIS
749  tag << thread_pairs[j][k]->get_candidate_i().first->get_pdb_id() << thread_pairs[j][k]->get_candidate_i().first->get_chain_id() << // candidate id
750  "_" << qPosi << "_" << qPosj << "_" << // fragment query positions
751  thread_pairs[j][k]->get_candidate_i().first->get_residue(1)->resi() << "_" << thread_pairs[j][k]->get_candidate_j().first->get_residue(1)->resi(); // fragment template positions
752  if (!pose::is_ideal_pose(pose)) {
753  tr.Warning << "skipping " << tag.str() << ": non-ideal pose from VALL" << std::endl;
754  continue;
755  }
756 
757  // calculate rms to native
758  if (has_native) {
759  // get pose CA coords
760  std::vector< core::Vector > pose_coords;
761  std::vector< core::Vector > native_pose_coords;
762  for (Size i=1; i<=pose.total_residue(); i++)
763  pose_coords.push_back( pose.residue(i).xyz("CA") );
764  for (Size i=0; i<fragment_size; i++) {
765  // get native CA coords for frag i
766  Size respos = qPosi+i;
767  native_pose_coords.push_back( nativePose->residue(respos).xyz("CA") );
768  }
769  for (Size i=0; i<fragment_size; i++) {
770  // get native CA coords for frag j
771  Size respos = qPosj+i;
772  native_pose_coords.push_back( nativePose->residue(respos).xyz("CA") );
773  }
774  int const natoms = pose_coords.size();
775  FArray2D< core::Real > p1a( 3, natoms ); // orig pose
776  FArray2D< core::Real > p2a( 3, natoms ); // native pose
777  for ( int i = 0; i < natoms; ++i ) {
778  for ( int l = 0; l < 3; ++l ) { // l = X, Y and Z
779  p1a(l+1,i+1) = pose_coords[i][l];
780  p2a(l+1,i+1) = native_pose_coords[i][l];
781  }
782  }
783  // calculate rms of native to original pose
784  core::Real rms_orig_native = numeric::model_quality::rms_wrapper( natoms, p1a, p2a );
785  core::pose::setPoseExtraScores( pose, "frms", rms_orig_native );
786  }
787 
788  // save fragment i and fragment j data
789  core::pose::add_score_line_string( pose, "qpos_i", string_of(qPosi) ); // frag query i start position
790  core::pose::add_score_line_string( pose, "qpos_j", string_of(qPosj) ); // frag query j start position
791  core::pose::add_score_line_string( pose, "tpos_i", string_of(thread_pairs[j][k]->get_candidate_i().first->get_residue(1)->resi()) ); // frag template i start position
792  core::pose::add_score_line_string( pose, "tpos_j", string_of(thread_pairs[j][k]->get_candidate_j().first->get_residue(1)->resi()) ); // frag template j start position
793  core::pose::add_score_line_string( pose, "rank_i", string_of(thread_pairs[j][k]->get_candidate_i_rank()) ); // frag i rank
794  core::pose::add_score_line_string( pose, "rank_j", string_of(thread_pairs[j][k]->get_candidate_j_rank()) ); // frag j rank
795  core::pose::setPoseExtraScores( pose, "fscore_i", ms->total_score(thread_pairs[j][k]->get_candidate_i().second)); // frag i score
796  core::pose::setPoseExtraScores( pose, "fscore_j", ms->total_score(thread_pairs[j][k]->get_candidate_j().second)); // frag j score
797  core::pose::setPoseExtraScores( pose, "fscore", ms->total_score(thread_pairs[j][k]->get_candidate_i().second) + ms->total_score(thread_pairs[j][k]->get_candidate_j().second)); // frag i+j score
798 
799  // save contact counts
800  for ( iter = contact_type_cnt.begin(); iter != contact_type_cnt.end(); iter++ )
801  core::pose::add_score_line_string( pose, contact_name(iter->first), string_of(iter->second) );
803  ss->fill_struct( pose, tag.str() );
804  sfd.write_silent_struct( *ss, silent_out_file_name );
805 
806  }
807  }
808 
809 
810  for (Size i = 1; i <= contacts.size(); ++i) {
811 
812  // output all contacts
813  if (output_all)
814  // i j type dist frag_i_pos frag_j_pos frag_i_rank frag_j_rank
815  contacts_output_all << contacts[i]->i() << " " << contacts[i]->j() << " " << contacts[i]->type_name() << " " <<
816  fmt::F(5, 2, contacts[i]->dist()) << " " << qPosi << " " << qPosj << " " <<
817  thread_pairs[j][k]->get_candidate_i_rank() << " " << thread_pairs[j][k]->get_candidate_j_rank() << std::endl;
818 
819  // iterate contact counts
820  // sorry for the copy and paste code below
821  if (contacts[i]->type() == CEN) {
822  std::pair<Real,ContactType> p(0,CEN);
823  std::pair<Size,Size> querypair(contacts[i]->i(), contacts[i]->j());
824  contact_counts[p]->iterate(querypair);
825 
826  // iterate neighboring contact counts
827  if (neighbors > 0) {
828  int m_min_tmp = contacts[i]->i()-neighbors;
829  int n_min_tmp = contacts[i]->j()-neighbors;
830  Size m_min = (m_min_tmp >= 1) ? m_min_tmp : 1;
831  Size n_min = (n_min_tmp >= 1) ? n_min_tmp : 1;
832  Size m_max = contacts[i]->i()+neighbors;
833  Size n_max = contacts[i]->j()+neighbors;
834  // m == query position i
835  for (Size m = m_min; m <= m_max; ++m) {
836  if (m > size_of_query()) continue;
837  // chunk_i = chunk position i
838  int chunk_i = cPosi_offset + m;
839  if (chunk_i < 1 || chunk_i > (int)chunki->size()) continue;
840  // n == query position j
841  for (Size n = n_min; n <= n_max; ++n) {
842  if (n > size_of_query()) continue;
843  if (m == contacts[i]->i() && n == contacts[i]->j()) continue;
844  // chunk_j = chunk position j
845  int chunk_j = cPosj_offset + n;
846  if (chunk_j < 1 || chunk_j > (int)chunkj->size()) continue;
847 
848  // skip local contacts relative to query
849  if (std::abs(int(m-n)) < (int)contacts_min_seq_sep_) continue;
850  // skip local contacts relative to fragments
851  if (std::abs(int( chunki->at(chunk_i)->resi() - chunkj->at(chunk_j)->resi() )) < (int)contacts_min_seq_sep_) continue;
852 
853  // contact distance
854  Real dist_squared = chunki->at(chunk_i)->distance_squared(chunkj->at(chunk_j), contacts[i]->type());
855  if (dist_squared <= sidechain_contact_dist_cutoff_->get_cutoff_squared( chunki->at(chunk_i)->aa(), chunkj->at(chunk_j)->aa() )) {
856  std::pair<Size,Size> neighbor_pair(m, n);
857  contact_counts[p]->iterate_neighbor(querypair, neighbor_pair);
858  }
859  }
860  }
861  }
862  } else {
863 
864  for (Size cdi=1; cdi<=contacts_dist_cutoffs_squared_.size();++cdi) {
865  if (contacts[i]->dist_squared() <= contacts_dist_cutoffs_squared_[cdi]) {
866  std::pair<Real,ContactType> p(contacts_dist_cutoffs_squared_[cdi],contacts[i]->type());
867  std::pair<Size,Size> querypair(contacts[i]->i(), contacts[i]->j());
868  contact_counts[p]->iterate(querypair);
869 
870  // iterate neighboring contact counts
871  if (neighbors > 0) {
872  int m_min_tmp = contacts[i]->i()-neighbors;
873  int n_min_tmp = contacts[i]->j()-neighbors;
874  Size m_min = (m_min_tmp >= 1) ? m_min_tmp : 1;
875  Size n_min = (n_min_tmp >= 1) ? n_min_tmp : 1;
876  Size m_max = contacts[i]->i()+neighbors;
877  Size n_max = contacts[i]->j()+neighbors;
878  // m == query position i
879  for (Size m = m_min; m <= m_max; ++m) {
880  if (m > size_of_query()) continue;
881  // chunk_i = chunk position i
882  int chunk_i = cPosi_offset + m;
883  if (chunk_i < 1 || chunk_i > (int)chunki->size()) continue;
884  // n == query position j
885  for (Size n = n_min; n <= n_max; ++n) {
886  if (n > size_of_query()) continue;
887  if (m == contacts[i]->i() && n == contacts[i]->j()) continue;
888  // chunk_j = chunk position j
889  int chunk_j = cPosj_offset + n;
890  if (chunk_j < 1 || chunk_j > (int)chunkj->size()) continue;
891 
892  // skip local contacts relative to query
893  if (std::abs(int(m-n)) < (int)contacts_min_seq_sep_) continue;
894  // skip local contacts relative to fragments
895  if (std::abs(int( chunki->at(chunk_i)->resi() - chunkj->at(chunk_j)->resi() )) < (int)contacts_min_seq_sep_) continue;
896 
897  // contact distance
898  Real dist_squared = chunki->at(chunk_i)->distance_squared(chunkj->at(chunk_j), contacts[i]->type());
899  if (dist_squared <= contacts_dist_cutoffs_squared_[cdi]) {
900  std::pair<Size,Size> neighbor_pair(m, n);
901  contact_counts[p]->iterate_neighbor(querypair, neighbor_pair);
902  }
903  }
904  }
905  }
906  }
907  }
908  }
909  }
910  }
911  }
912  if (output_all) contacts_output_all.close();
913 
914  // now output pair counts
915  // sorry for the copy and paste code below
916  for (it=contact_types_.begin(); it!=contact_types_.end(); it++) {
917  if (*it == CEN) {
918  std::string scale_factor = string_of(sidechain_contact_dist_cutoff_->scale_factor());
919  replace( scale_factor.begin(), scale_factor.end(), '.', '_' );
920  const std::string out_file_name_contacts = prefix_ + "." + contact_name(*it) + "." + string_of(contacts_min_seq_sep_) + "." + scale_factor + "." +
921  string_of(n_frags_) + "." + string_of(fragment_size) + "mers.nonlocal_pairs.contacts";
922  utility::io::ozstream output_contacts(out_file_name_contacts);
923  output_contacts << "# i j count";
924  if (neighbors > 0) output_contacts << " neighbors_" << neighbors << "_i_j_count";
925  output_contacts << std::endl;
926  std::pair<Real,ContactType> p(0,*it);
927  std::map<std::pair<Size,Size>, Size> query_counts = contact_counts[p]->counts();
928  std::map<std::pair<Size,Size>, Size>::iterator iter;
929  for ( iter = query_counts.begin(); iter != query_counts.end(); iter++ ) {
930  std::pair<Size,Size> query_pair = iter->first;
931  output_contacts << query_pair.first << " " << query_pair.second << " " << iter->second;
932  if (neighbors > 0 && contact_counts[p]->neighbor_counts_exist(query_pair)) {
933  std::map<std::pair<Size,Size>, Size> neighbor_counts = contact_counts[p]->neighbor_counts(query_pair);
934  std::map<std::pair<Size,Size>, Size>::iterator neigh_iter;
935  for ( neigh_iter = neighbor_counts.begin(); neigh_iter != neighbor_counts.end(); neigh_iter++ ) {
936  std::pair<Size,Size> neighbor_pair = neigh_iter->first;
937  output_contacts << " " << neighbor_pair.first << " " << neighbor_pair.second << " " << neigh_iter->second;
938  }
939  }
940  output_contacts << std::endl;
941  }
942  output_contacts.close();
943  } else {
944  for (Size i=1; i<=contacts_dist_cutoffs_squared_.size();++i) {
945  const std::string out_file_name_contacts = prefix_ + "." + contact_name(*it) + "." + string_of(contacts_min_seq_sep_) + "." + string_of(sqrt(contacts_dist_cutoffs_squared_[i])) +
946  "." + string_of(n_frags_) + "." + string_of(fragment_size) + "mers.nonlocal_pairs.contacts";
947  utility::io::ozstream output_contacts(out_file_name_contacts);
948  output_contacts << "# i j count";
949  if (neighbors > 0) output_contacts << " neighbors_" << neighbors << "_i_j_count";
950  output_contacts << std::endl;
951  std::pair<Real,ContactType> p(contacts_dist_cutoffs_squared_[i],*it);
952  std::map<std::pair<Size,Size>, Size> query_counts = contact_counts[p]->counts();
953  std::map<std::pair<Size,Size>, Size>::iterator iter;
954  for ( iter = query_counts.begin(); iter != query_counts.end(); iter++ ) {
955  std::pair<Size,Size> query_pair = iter->first;
956  output_contacts << query_pair.first << " " << query_pair.second << " " << iter->second;
957  if (neighbors > 0 && contact_counts[p]->neighbor_counts_exist(query_pair)) {
958  std::map<std::pair<Size,Size>, Size> neighbor_counts = contact_counts[p]->neighbor_counts(query_pair);
959  std::map<std::pair<Size,Size>, Size>::iterator neigh_iter;
960  for ( neigh_iter = neighbor_counts.begin(); neigh_iter != neighbor_counts.end(); neigh_iter++ ) {
961  std::pair<Size,Size> neighbor_pair = neigh_iter->first;
962  output_contacts << " " << neighbor_pair.first << " " << neighbor_pair.second << " " << neigh_iter->second;
963  }
964  }
965  output_contacts << std::endl;
966  }
967  output_contacts.close();
968  }
969  }
970  }
971 
972  time_t time_end = time(NULL);
973 
974  tr.Info << "... done. Processed " << query_positions_.size() << " positions. Time elapsed: "
975  << (time_end - time_start) << " seconds." << std::endl;
976  tr.flush();
977 
978  option[frags::write_ca_coordinates].value(orig_opt);
979 }
980 
981 
982 // should be thread safe
984  for (Size i=1; i<=chunks.size(); ++i) {
985  VallChunkOP chunk = chunks[i];
986  scores_[index]->do_caching(chunk);
987  scores::FragmentScoreMapOP empty_map = scores_[index]->create_empty_map();
988  for (Size iFragSize = 1; iFragSize <= frag_sizes_.size(); ++iFragSize) { // Loop over various sizes of fragments
989  Size fragment_size = frag_sizes_[iFragSize];
990  if (chunk->size() < fragment_size) continue; // This fragment is too short
991  CandidatesCollectorOP sink = candidates_sinks_[index][fragment_size];
992  for (Size iqpos = 1; iqpos <= query_positions_.size(); ++iqpos) { // loop over positions in a query
993  Size iPos = query_positions_[iqpos];
994  if ( iPos > size_of_query() - fragment_size + 1 ) continue;
995  // split chunk into fragment candidates and score them
996  for (Size j = 1; j <= chunk->size() - fragment_size + 1; j++) {
997  FragmentCandidateOP f = new FragmentCandidate(iPos, j, chunk, fragment_size);
998  if (scores_[index]->score_fragment_from_cache(f, empty_map)) {
999  std::pair<FragmentCandidateOP,scores::FragmentScoreMapOP> p(f,empty_map);
1000  if(sink->add(p)) empty_map = scores_[index]->create_empty_map();
1001  }
1002  }
1003  } // all query positions done
1004  } // all fragment sizes done
1005  scores_[index]->clean_up();
1006  } // all chunks
1007 }
1008 
1010 
1011  PROF_START( basic::FRAGMENTPICKING );
1012 
1013  tr.Info << "Picking candidates..." << std::endl;
1014  tr.flush();
1015 
1016  time_t time_start = time(NULL);
1017 
1018 #ifdef USE_BOOST_THREAD
1019  if (max_threads_ > 1) {
1020  utility::vector1<utility::vector1<VallChunkOP> > chunks_to_run( max_threads_ );
1021  Size valid_chunks_cnt = 0;
1022  for (Size i = 1; i <= chunks_->size(); ++i) { // loop over provided chunks
1023  VallChunkOP chunk = chunks_->at(i);
1024  if (!is_valid_chunk( chunk )) continue;
1025  valid_chunks_cnt++;
1026  }
1027  const Size chunks_per_thread = valid_chunks_cnt/max_threads_;
1028  Size thread = 1;
1029  for (Size i = 1; i <= chunks_->size(); ++i) { // loop over provided chunks
1030  VallChunkOP chunk = chunks_->at(i);
1031  if (!is_valid_chunk( chunk )) continue;
1032  chunks_to_run[thread].push_back( chunk );
1033  if (chunks_to_run[thread].size() >= chunks_per_thread && thread < max_threads_) ++thread;
1034  }
1035  boost::thread_group threads;
1036  tr.super_mute(true); // lets suppress tracer output when running multi threads
1037  for (Size j = 1; j <= max_threads_; ++j) {
1038  if (chunks_to_run[j].size() > 0) {
1039  std::cout << "thread: " << j << " - " << chunks_to_run[j].size() << " chunks" << std::endl;
1040  threads.create_thread(boost::bind(&FragmentPicker::pick_chunk_candidates, this, boost::ref(chunks_to_run[j]), j));
1041  }
1042  }
1043  threads.join_all();
1044  tr.super_mute(false);
1045 
1046  time_t time_end = time(NULL);
1047  tr.Info << "... done. Processed " << chunks_->size() << " chunks. Time elapsed: "
1048  << (time_end - time_start) << " seconds." << std::endl;
1049  tr.flush();
1050 
1051  PROF_STOP( basic::FRAGMENTPICKING );
1052 
1053  return;
1054  }
1055 #endif // USE_BOOST_THREAD
1056 
1057  scores::FragmentScoreMapOP empty_map = scores_[1]->create_empty_map();
1058 
1059  for (Size i = 1; i <= chunks_->size(); i++) { // loop over provided chunks
1060  VallChunkOP chunk = chunks_->at(i); // For each chunk from a provider...
1061  if (!is_valid_chunk( chunk )) continue;
1062  tr.Trace << "Processing sequence from vall: " << chunk->get_sequence() << std::endl;
1063 
1064  // cache the new chunk
1065  scores_[1]->do_caching(chunk);
1066 
1067  for (Size iFragSize = 1; iFragSize <= frag_sizes_.size(); ++iFragSize) { // Loop over various sizes of fragments
1068  Size fragment_size = frag_sizes_[iFragSize];
1069  if (chunk->size() < fragment_size) continue; // This fragment is too short
1070 
1071  Size maxqpos = size_of_query() - fragment_size + 1;
1072  CandidatesCollectorOP sink = candidates_sinks_[1][fragment_size];
1073  tr.Trace << "Picking fragments of size "<<fragment_size<<
1074  " at "<<query_positions_.size()<<" query positions"<<std::endl;
1075  for (Size iqpos = 1; iqpos <= query_positions_.size(); ++iqpos) { // loop over positions in a query
1076  Size iPos = query_positions_[iqpos];
1077  if ( iPos > maxqpos ) continue;
1078 
1079  // split chunk into fragment candidates and score them
1080  for (Size j = 1; j <= chunk->size() - fragment_size + 1; ++j) {
1081  FragmentCandidateOP f = new FragmentCandidate(iPos, j, chunk, fragment_size);
1082  if (scores_[1]->score_fragment_from_cache(f, empty_map)) {
1083  std::pair<FragmentCandidateOP,scores::FragmentScoreMapOP> p(f,empty_map);
1084  if(sink->add(p)) empty_map = scores_[1]->create_empty_map();
1085  }
1086  }
1087  } // all query positions done
1088  } // all fragment sizes done
1089  scores_[1]->clean_up();
1090  tr.Trace << chunk->get_pdb_id() << " done" << std::endl;
1091  if ( (i*100) % (chunks_->size()/100*100) == 0 ) tr.Info << (i*100) / chunks_->size()
1092  << "% done at "<< chunk->get_pdb_id() << std::endl;
1093  } // all chunks done
1094 
1095  time_t time_end = time(NULL);
1096  tr.Info << "... done. Processed " << chunks_->size() << " chunks. Time elapsed: "
1097  << (time_end - time_start) << " seconds." << std::endl;
1098  tr.flush();
1099 
1100  PROF_STOP( basic::FRAGMENTPICKING );
1101 }
1102 
1104 
1105  utility::vector1<Real> components = f->get_score_components();
1106  utility::vector1<Real> weights = scores_[index]->get_weights();
1107  Real total = 0.0;
1108  for (Size i = 1; i <= components.size(); i++)
1109  total += components.at(i) * weights.at(i);
1110 
1111  return total;
1112 }
1113 
1114 
1116  tr.Debug << sec_str_input.size() / 2 << " secondary structure assignment(s):\n";
1117  for (Size i = 1; i <= sec_str_input.size(); i += 2) {
1118  tr.Debug << i / 2 << " " << sec_str_input[i]
1119  << " file will be loaded under \"" << sec_str_input[i + 1] << "\" name\n";
1120  read_ss_file(sec_str_input[i], sec_str_input[i + 1]);
1121  }
1122  tr.Debug << std::endl;
1123 }
1124 
1126  std::string prediction_name) {
1127 
1128  utility::io::izstream data( file_name.c_str() );
1129  if ( !data ) {
1130  data.close();
1131  utility_exit_with_message( "Can't read secondary structure file: "+file_name );
1132  }
1133 
1134  std::string line, l1, l2, l3, l4, l5;
1135  getline( data, line );
1136  data.close();
1137 
1138  std::istringstream line_stream( line );
1139  line_stream >> l1 >> l2 >> l3 >> l4 >> l5;
1140 
1141  if ( (l1 == "#") && (l2 == "PSIPRED") && (l3 == "VFORMAT")
1142  && (l4 == "(PSIPRED") ) {
1143  read_psipred_ss2( file_name, prediction_name);
1144  } else {
1145  if ( (l1 == "REMARK") && (l2 == "Neural") && (l3 == "network")
1146  && (l4 == "secondary") && (l5 == "structure") ) {
1147  read_talos_ss( file_name, prediction_name);
1148  } else {
1149  utility_exit_with_message( "Can't identify secondary structure file type (needs vertical psipred_ss2 or talos+ pred.ss): "+file_name );
1150  }
1151  }
1152 }
1153 
1155  std::string prediction_name) {
1156 
1159  ss_profile->read_psipred_ss2(file_name);
1160 
1161  std::string query_ss_as_string;
1162  for (Size i = 1; i <= ss_profile->total_residue(); i++)
1163  query_ss_as_string += ss_profile->secstruct(i);
1164 
1165  query_ss_as_string_[prediction_name] = query_ss_as_string;
1166  query_ss_profile_[prediction_name] = ss_profile;
1167 }
1168 
1170  std::string prediction_name) {
1171 
1174  ss_profile->read_talos_ss(file_name);
1175  //TALOS files can be shortened if there is no data at end of sequence. Fill up with 1/3 1/3 1/3 propensities until end is reached
1176  ss_profile->extend(query_profile_->length());
1177  for ( Size pos = ss_profile->total_residue()+1; pos <= query_profile_->length(); pos++ ) {
1178  ss_profile->set_fractions( pos, 1.0/3.0, 1.0/3.0, 1.0/3.0, 0.0 );
1179  }
1180 
1181  std::string query_ss_as_string;
1182  for (Size i = 1; i <= ss_profile->total_residue(); i++)
1183  query_ss_as_string += ss_profile->secstruct(i);
1184 
1185  query_ss_as_string_[prediction_name] = query_ss_as_string;
1186  query_ss_profile_[prediction_name] = ss_profile;
1187 }
1188 
1189 void FragmentPicker::read_depth(std::string const & file_name) {
1190 
1191  utility::io::izstream data( file_name.c_str() );
1192  if ( !data ) {
1193  data.close();
1194  utility_exit_with_message( "Can't read DEPTH file: "+file_name );
1195  }
1196  std::string line, jnk, aathree;
1197  core::Real depth;
1198  query_residue_depth_.clear();
1199  getline( data, line ); // skip header
1200  while ( getline( data, line ) ) {
1201  std::istringstream line_stream( line );
1202 //# chain:residue all-atom all-atom(stdev) MC-atom MC-atom(stdev) SC-atom SC-atom(stdev) SC-polar-atom SC-polar-atom(stdev) SC-nonpolar-atom SC-nonpolar-atom(stdev)
1203  line_stream >> jnk >> aathree >> depth;
1204  if (aathree != "UNK")
1205  query_residue_depth_.push_back( depth );
1206  if ( line_stream.fail() )
1207  utility_exit_with_message( "Error reading in FragmentPicker::read_depth()!" );
1208  }
1209  data.close();
1210  if (query_residue_depth_.size() != size_of_query())
1211  utility_exit_with_message( "Error reading in FragmentPicker::read_depth(): does not match size of query!" );
1212 }
1213 
1215 
1216  utility::io::izstream data( file_name.c_str() );
1217  if ( !data ) {
1218  data.close();
1219  utility_exit_with_message( "Can't read spine-x file: "+file_name );
1220  }
1221  std::string line, jnk;
1222  core::Real phi, psi, asa, pkc_phi, pkc_psi;
1223  query_sa_prediction_.clear();
1224  query_phi_prediction_.clear();
1225  query_psi_prediction_.clear();
1226  getline( data, line ); // skip header
1227  while ( getline( data, line ) ) {
1228  std::istringstream line_stream( line );
1229 //# index AA SS phi1 psi1 P_E P_C P_H phi0 psi0 ASA S_pk S_SS pk_phi pk_psi pkc_phi pkc_ps
1230  line_stream >> jnk >> jnk >> jnk >> phi >> psi >> jnk >> jnk >> jnk >> jnk >> jnk >> asa >> jnk >> jnk >> jnk >> jnk >> pkc_phi >> pkc_psi;
1231  query_sa_prediction_.push_back( asa );
1232  query_phi_prediction_.push_back( phi );
1233  query_psi_prediction_.push_back( psi );
1234  query_phi_prediction_conf_.push_back( pkc_phi );
1235  query_psi_prediction_conf_.push_back( pkc_psi );
1236  if ( line_stream.fail() )
1237  utility_exit_with_message( "Error reading in FragmentPicker::read_spine_x()!" );
1238  }
1239  data.close();
1240  if (query_sa_prediction_.size() != size_of_query())
1241  utility_exit_with_message( "Error reading in FragmentPicker::read_spine_x(): does not match size of query!" );
1242 }
1243 
1245  std::string prediction_name) {
1246 
1249  ss_profile->extend(query_secondary.length());
1250 
1251  for (Size i = 1; i <= query_secondary.length(); ++i) {
1252  char ss = query_secondary[i - 1];
1253  if (ss == 'E')
1254  ss_profile->set_fractions(i, 0.0, 1.0, 0.0);
1255  else if (ss == 'L')
1256  ss_profile->set_fractions(i, 0.0, 0.0, 1.0);
1257  else
1258  ss_profile->set_fractions(i, 1.0, 0.0, 0.0);
1259  }
1260  query_ss_as_string_[prediction_name] = query_secondary;
1261  query_ss_profile_[prediction_name] = ss_profile;
1262 }
1263 
1265  using namespace ObjexxFCL;
1266  tr.Info << "Saving Fragments..." << std::endl;
1267  const bool skip_merge = (candidates_sinks_.size() == 1) ? true : false;
1268  tr.Debug << "skip_merge: " << ( skip_merge ? "true" : "false" ) << std::endl;
1269  for (Size iFragSize = 1; iFragSize <= frag_sizes_.size(); ++iFragSize) { // Loop over various sizes of fragments
1270  Size fragment_size = frag_sizes_[iFragSize];
1271  Size maxqpos = size_of_query() - fragment_size + 1;
1272 
1273  utility::vector1<Candidates> final_fragments(maxqpos); // final fragments
1274 
1275  for (Size iqpos = 1; iqpos <= query_positions_.size(); ++iqpos) {
1276  Size qPos = query_positions_[iqpos];
1277  if ( qPos > maxqpos) continue;
1278  tr.Debug << "saving " << fragment_size << "mers for position..." << qPos << std::endl;
1279  if (!skip_merge) { // merge candidates
1280  for (Size i=1;i<=candidates_sinks_.size();++i)
1281  candidates_sink_[fragment_size]->insert(qPos, candidates_sinks_[i][fragment_size]);
1282  }
1283  Candidates in, out;
1284  if (skip_merge) {
1285  in = candidates_sinks_[1][fragment_size]->get_candidates(qPos);
1286  } else {
1287  in = candidates_sink_[fragment_size]->get_candidates(qPos);
1288  }
1289  if ( in.size() == 0 ) continue;
1290  selector_->select_fragments(in, out);
1291  final_fragments[qPos] = out;
1292  }
1293  tr.Debug << "call output_fragments now: " << std::endl;
1294  output_fragments( fragment_size, final_fragments );
1295  if (skip_merge) {
1296  // tr.Debug << "write report..." << std::endl;
1297  // candidates_sinks_[1][fragment_size]->print_report(tr.Info, get_score_manager());
1298  } else {
1299  // candidates_sink_[fragment_size]->print_report(tr.Info, get_score_manager());
1300  }
1301  }
1302 }
1303 
1305  using namespace ObjexxFCL;
1306  const bool skip_merge = (candidates_sinks_.size() == 1) ? true : false;
1307  for (Size iFragSize = 1; iFragSize <= frag_sizes_.size(); ++iFragSize) { // Loop over various sizes of fragments
1308  Size fragment_size = frag_sizes_[iFragSize];
1309  Size maxqpos = size_of_query() - fragment_size + 1;
1310  std::string out_file_name = prefix_ + "." + string_of(fragment_size)
1311  + "mers";
1312  utility::io::ozstream output(out_file_name);
1313 
1314  for (Size iqpos = 1; iqpos <= query_positions_.size(); ++iqpos) {
1315  Size qPos = query_positions_[iqpos];
1316  if ( qPos > maxqpos) continue;
1317 
1318  if (!skip_merge) { // merge candidates
1319  for (Size i=1;i<=candidates_sinks_.size();++i)
1320  candidates_sink_[fragment_size]->insert(qPos, candidates_sinks_[i][fragment_size]);
1321  }
1322  Candidates out;
1323  if (skip_merge) {
1324  out = candidates_sinks_[1][fragment_size]->get_candidates(qPos);
1325  } else {
1326  out = candidates_sink_[fragment_size]->get_candidates(qPos);
1327  }
1328  if (out.size() == 0) continue;
1329  output << "position: " << I(12, qPos) << " neighbors: " << I(10,
1330  out.size()) << std::endl << std::endl;
1331  for (Size fi = 1; fi <= out.size(); ++fi) {
1332  out[fi].first->print_fragment(output);
1333  output << std::endl;
1334  }
1335  }
1336  if (skip_merge) {
1337  candidates_sinks_[1][fragment_size]->print_report(tr.Debug, get_score_manager());
1338  } else {
1339  candidates_sink_[fragment_size]->print_report(tr.Debug, get_score_manager());
1340  }
1341  output.close();
1342  }
1343 }
1344 
1346  if (candidates_sinks_.size() > 1)
1347  utility_exit_with_message( "pick_candidates(Size i_pos,Size frag_len) does not support multiple CandidateCollectors" );
1348  scores::FragmentScoreMapOP empty_map = scores_[1]->create_empty_map();
1349  for (Size i = 1; i <= chunks_->size(); i++) { // loop over provided chunks
1350  VallChunkOP chunk = chunks_->at(i); // For each chunk from a provider...
1351  if (!is_valid_chunk( frag_len, chunk )) continue;
1352 
1353  tr.Trace << "Processing sequence from vall: " << chunk->get_sequence() << std::endl;
1354 
1355  CandidatesCollectorOP sink = candidates_sinks_[1][frag_len];
1356 
1357  // split chunk into fragment candidates and score them
1358  for (Size j = 1; j <= chunk->size() - frag_len + 1; j++) {
1359  FragmentCandidateOP f = new FragmentCandidate(i_pos, j,chunk, frag_len);
1360  if (scores_[1]->score_fragment(f, empty_map)) {
1361  scores::FragmentScoreMapOP new_map = empty_map->clone();
1362  std::pair<FragmentCandidateOP, scores::FragmentScoreMapOP> p(f, new_map);
1363  sink->add(p);
1364  }
1365  } // All chunk locations done
1366  tr.Trace << chunk->get_pdb_id() << " done" << std::endl;
1367  tr.Trace << sink->count_candidates()<<" candidates stored at pos. "
1368  <<i_pos<<", "<<sink->count_candidates()<<" in total"<< std::endl;
1369  tr.flush();
1370  } // all chunks done
1371 }
1372 
1373 // called in main
1375 
1376 #ifdef USE_BOOST_THREAD
1377  //## multi-threaded?
1378  if (option[ frags::j ].user()) max_threads_ = option[ frags::j ]();
1379 #endif
1380  // score with multiple threads
1381  while (max_threads_ > scores_.size())
1382  scores_.push_back(new scores::FragmentScoreManager());
1383  while (max_threads_ > candidates_sinks_.size()) {
1384  CandidatesSink storage;
1385  candidates_sinks_.push_back(storage);
1386  }
1387 
1388  //## -------- setup query profile
1389  if (option[in::file::checkpoint].user()) {
1391  tr.Info << "reading a query profile from: "
1392  << option[in::file::checkpoint]() << std::endl;
1393  q_prof->read_from_checkpoint(option[in::file::checkpoint]());
1394  set_query_seq(q_prof);
1395  tr.Info << "picking fragments for query profile: "
1396  << get_query_seq_string() << std::endl;
1397  }
1398  if (option[in::file::pssm].user()) {
1400  tr.Info << "reading a query profile from: "
1401  << option[in::file::pssm]()[1] << std::endl;
1402  q_prof->read_from_file(option[in::file::pssm]()[1] );
1403  q_prof->convert_profile_to_probs(1.0); // was previously implicit in read_from_file()
1404  set_query_seq(q_prof);
1405  tr.Info << "picking fragments for query profile: "
1406  << get_query_seq_string() << std::endl;
1407  }
1408 
1409  //Fasta file trumps sequence profile as far as query_seq_string_ is concerned
1410  if (option[in::file::fasta].user()) {
1411  std::string q_seq = core::sequence::read_fasta_file(option[in::file::fasta]()[1])[1]->sequence();
1412  tr.Info << "reading a query sequence from: "
1413  << option[in::file::fasta]()[1] << std::endl;
1414 
1415  set_query_seq(q_seq);
1416  tr.Info << "picking fragments for query sequence: "
1417  << get_query_seq_string() << std::endl;
1418  }
1419 
1420  // --------- setup query secondary structure
1421  if (option[frags::ss_pred].user()) {
1422  utility::vector1<std::string> sec_str_input(option[frags::ss_pred]());
1423  read_ss_files(sec_str_input);
1424  }
1425 
1426  // --------- setup query phi,psi,sa predictions from spine-x file
1427  if (option[frags::spine_x].user()) {
1428  read_spine_x(option[frags::spine_x]());
1429  }
1430 
1431  // --------- setup query residue depth values from DEPTH file
1432  if (option[frags::depth].user()) {
1433  read_depth(option[frags::depth]());
1434  }
1435 
1436  //---------- setup chunk filters
1437  if (option[frags::allowed_pdb].user()) {
1438  AllowPdbIdFilterOP allow = new AllowPdbIdFilter();
1439  allow->load_pdb_id_from_file(option[frags::allowed_pdb]());
1440  add_chunk_filter(allow);
1441  tr.Info << "Allowed PDB chains:\n";
1442  allow->show_pdb_ids(tr.Info);
1443  }
1444 
1445  if (option[frags::denied_pdb].user()) {
1446  DenyPdbIdFilterOP deny = new DenyPdbIdFilter();
1447  deny->load_pdb_id_from_file(option[frags::denied_pdb]());
1448  add_chunk_filter(deny);
1449  tr.Info << "Excluded PDBs:\n";
1450  deny->show_pdb_ids(tr.Info);
1451  }
1452 
1453  // ##--------- setup VALL
1454  PROF_START( basic::FRAGMENTPICKING_READ_VALL );
1455  if (option[in::file::vall].user()) {
1456  read_vall(option[in::file::vall]());
1457  }
1458  PROF_STOP( basic::FRAGMENTPICKING_READ_VALL );
1459 
1460  // -------- fragment sizes
1461  if (option[frags::frag_sizes].user()) {
1462  utility::vector1<Size> frag_sizes_tmp = option[frags::frag_sizes]();
1463  for (Size i = 1; i <= frag_sizes_tmp.size(); ++i) {
1464  if(frag_sizes_tmp[i] > max_frag_size_)
1465  max_frag_size_ = frag_sizes_tmp[i];
1466  frag_sizes_.push_back(frag_sizes_tmp[i]);
1467  }
1468  } else {
1469  max_frag_size_ = 9;
1470  frag_sizes_.push_back(3);
1471  frag_sizes_.push_back(9);
1472  }
1473  tr.Info << "Will pick fragments of size:";
1474  for (Size i = 1; i <= frag_sizes_.size(); ++i)
1475  tr.Info << frag_sizes_[i] << " ";
1476  tr.Info << std::endl;
1477 
1478  //---------- setup scoring scheme
1479  tr.Info << "Creating fragment scoring scheme" << std::endl;
1480  if (option[frags::scoring::config].user()) {
1481  // todo: the create scores method should be improved so the score files aren't read more than once -dk
1482  for (Size i = 1; i <= scores_.size(); ++i)
1483  scores_[i]->create_scores(option[frags::scoring::config](), this);
1484  }
1485 
1486  // -------- how many fragments and candidates
1487  n_frags_ = option[frags::n_frags]();
1488  n_candidates_ = option[frags::n_candidates]();
1489 
1490  if (n_frags_ > n_candidates_) n_candidates_ = n_frags_;
1491 
1492  tr.Info << "Picking " << n_frags_ << " fragments based on "
1493  << n_candidates_ << " candidates" << std::endl;
1494 
1495  //-------- this comparator is used both for collecting and selecting fragments
1496  // note: The comparator is based on the first score manager so the score managers have to have the same scoring scheme! -dk
1497  CompareTotalScore comparator(get_score_manager());
1498 
1499  //---------- setup scoring scheme for the selection step
1500  tr.Info << "Creating fragment scoring scheme for the selection step" << std::endl;
1501  FragmentScoreManagerOP selection_scoring;
1502  if (option[frags::picking::selecting_scorefxn].user()) {
1503  selection_scoring = new FragmentScoreManager();
1504  selection_scoring->create_scores(option[frags::picking::selecting_scorefxn](), this);
1505  selector_ = new CustomScoreSelector(n_frags_, selection_scoring);
1506  } else {
1507  // note: The selector is based on the first score manager so the score managers have to have the same scoring scheme! -dk
1508  selector_ = new BestTotalScoreSelector(n_frags_, get_score_manager());
1509  }
1510 
1511  //-------- collector & selector set up
1512  if (option[frags::quota_protocol].user() || option[frags::picking::quota_config_file].user()) {
1513  // This setup is a bit more complicated when user needs quota.
1514  // The quota version of this code was moved into a separate method
1515  parse_quota_command_line();
1516  // This setup is a bit more complicated, when user needs quota. The quota version of this code was moved into a separate method
1517  } else {
1518  if (option[frags::keep_all_protocol].user()) {
1519  for (Size i = 1; i <= frag_sizes_.size(); ++i) {
1520  CandidatesCollectorOP collector = new GrabAllCollector(size_of_query());
1521  set_candidates_collector(frag_sizes_[i], collector);
1522  tr.Info << "Collector for fragment size: " << frag_sizes_[i] << " set to: GrabAllCollector" << std::endl;
1523  }
1524  } else {
1525  for (Size i = 1; i <= frag_sizes_.size(); ++i) {
1526  for (Size j = 0; j <= max_threads_; ++j) { // 0 for merged collector
1527  CandidatesCollectorOP collector = new BoundedCollector<CompareTotalScore> (size_of_query(), n_candidates_,
1528  comparator,get_score_manager()->count_components());
1529  set_candidates_collector(frag_sizes_[i], collector, j);
1530  }
1531  tr.Info << "Collector for fragment size: " << frag_sizes_[i] << " set to: BoundedCollector" << std::endl;
1532  }
1533  }
1534  //-------- Selecting fragments from candidates
1535 /* if (option[frags::picking::selecting_rule].user()) {
1536  std::string type = option[frags::picking::selecting_rule]();
1537  if (type.compare("BestTotalScoreSelector")==0) {
1538  selector_ = new BestTotalScoreSelector(n_frags_, selection_scoring);
1539  tr.Info << "Fragment selector: BestTotalScoreSelector"
1540  << std::endl;
1541  } else {
1542  utility_exit_with_message("[ERROR]: unknown fragment selecting rule: " + type + "!");
1543  }
1544  } else {
1545  selector_ = new BestTotalScoreSelector(n_frags_, selection_scoring);
1546  tr.Info << "Fragment selector: BestTotalScoreSelector" << std::endl;
1547  }*/
1548  }
1549  // # ---------- output file prefix:
1550  if (option[out::file::frag_prefix].user()) {
1551  prefix_ = option[out::file::frag_prefix]();
1552  }
1553 
1554  if (option[frags::picking::query_pos].user()) {
1555  set_picked_positions( option[frags::picking::query_pos]() );
1556  }
1557 
1558  // non-local contacts options
1559  nonlocal_min_contacts_per_res_ = option[ frags::nonlocal::min_contacts_per_res ]();
1560 
1561  // frag contacts options
1562  contact_types_.clear();
1563  utility::vector1<std::string> contact_types = option[ frags::contacts::type ]();
1564  for (Size i = 1; i <= contact_types.size(); ++i) {
1565  contact_types_.insert(contact_type(contact_types[i]));
1566  if (contact_type(contact_types[i]) == CEN)
1567  sidechain_contact_dist_cutoff_ = new SidechainContactDistCutoff( option[ frags::contacts::centroid_distance_scale_factor ]() );
1568  }
1569  // sequence separation
1570  contacts_min_seq_sep_ = option[ frags::contacts::min_seq_sep ](); // j>=i+contacts_min_seq_sep_
1571  // distance cutoffs
1572  utility::vector1<Real> dist_cutoffs = option[ frags::contacts::dist_cutoffs ]();
1573  Real max_dist = 0.0;
1574  for (Size i = 1; i <= dist_cutoffs.size(); ++i) {
1575  if (dist_cutoffs[i] > max_dist) max_dist = dist_cutoffs[i];
1576  contacts_dist_cutoffs_squared_.push_back( dist_cutoffs[i]*dist_cutoffs[i] );
1577  }
1578  contacts_dist_cutoff_squared_ = max_dist*max_dist;
1579 
1580  show_scoring_methods(tr);
1581  tr << std::endl;
1582 }
1583 
1585 
1586  std::string quota_config_file("UNKNOWN-QUOTA-CONFIG_FILE");
1587  if (option[frags::picking::quota_config_file].user())
1588  quota_config_file = option[frags::picking::quota_config_file]();
1589  quota::ABEGO_SS_Config q_config(quota_config_file);
1590 
1591  utility::vector1<Size> components;
1592  utility::vector1<Real> weights;
1593  utility::vector1<Real> scoring_weights = scores_[1]->get_weights();
1594  for (Size i = 1; i <= scores_[1]->count_components(); ++i) {
1595  ABEGO_SS_ScoreOP s0 =
1596  dynamic_cast<ABEGO_SS_Score*> (scores_[1]->get_component(i).get());
1597  if (s0 != 0) {
1598  components.push_back( i );
1599  weights.push_back( scoring_weights[s0->get_id()] );
1600  }
1601  ProfileScoreL1OP s1 =
1602  dynamic_cast<ProfileScoreL1*> (scores_[1]->get_component(i).get());
1603  if (s1 != 0) {
1604  components.push_back( i );
1605  weights.push_back( scoring_weights[s1->get_id()] );
1606  }
1607 
1608  RamaScoreOP s2 =
1609  dynamic_cast<RamaScore*> (scores_[1]->get_component(i).get());
1610  if (s2 != 0) {
1611  components.push_back( i );
1612  weights.push_back( scoring_weights[s2->get_id()] );
1613  }
1614 
1615  CSScoreOP s3 =
1616  dynamic_cast<CSScore*> (scores_[1]->get_component(i).get());
1617  if (s3 != 0) {
1618  components.push_back( i );
1619  weights.push_back( scoring_weights[s3->get_id()] );
1620  }
1621 
1623  dynamic_cast<SecondarySimilarity*> (scores_[1]->get_component(i).get());
1624  if (s4 != 0) {
1625  components.push_back( i );
1626  weights.push_back( scoring_weights[s4->get_id()] );
1627  }
1628  }
1629 
1630  tr.Debug<<"Scoring scheme for ABEGO_SS quota pool sorting is:";
1631  for(Size l=1;l<=weights.size();l++) {
1632  tr.Debug<<"\n\t"<<components[l]<<"\t"<<weights[l];
1633  }
1634  tr.Debug<<std::endl;
1635  Size buffer_factor = 5;
1636  for(Size f=1;f<=frag_sizes_.size();f++) {
1637  for (Size j = 0; j <= max_threads_; ++j) { // 0 for the merged collector
1638  quota::QuotaCollectorOP collector = new quota::QuotaCollector( size_of_query(), frag_sizes_[f] );
1639  set_candidates_collector(frag_sizes_[f],collector, j);
1640  }
1641  Size middle = frag_sizes_[f] / 2 + 1;
1642  assert( size_of_query() == q_config.size() ); // Test if the abego-ss table has the same size as the query sequence
1643  for(Size j=1;j<=size_of_query()-frag_sizes_[f]+1;j++) {
1644  tr.Debug<<"Creating "<<q_config.n_columns()<<" quota pools at pos "<<j<<std::endl;
1645  for(Size i=1;i<=q_config.n_columns();i++) {
1646  Real prob = q_config.probability(j+middle-1,i);
1647  for (Size k = 0; k <= max_threads_; ++k) { // 0 for the merged collector
1648  CandidatesCollectorOP storage = get_candidates_collector(frag_sizes_[f], k);
1649  quota::QuotaCollectorOP collector = dynamic_cast<quota::QuotaCollector*> (storage());
1650  quota::QuotaPoolOP p = new quota::ABEGO_SS_Pool(n_candidates_,q_config.get_pool_name(i),
1651  q_config.get_pool_bins((i)),components,weights,prob,scores_[1]->count_components(),buffer_factor);
1652  collector->add_pool(j,p);
1653  }
1654  }
1655  }
1656  }
1657 }
1658 
1660  std::string quota_config_file("UNKNOWN-QUOTA-CONFIG_FILE");
1661  if (option[frags::picking::quota_config_file].user())
1662  quota_config_file = option[frags::picking::quota_config_file]();
1663  quota::QuotaConfig q_config(quota_config_file);
1664 
1665  utility::vector1<Size> components;
1666  utility::vector1<Real> weights;
1667  components.push_back( 0 ); // the free entry in the vector is for secondary structure score (only one for each pool)
1668  weights.push_back( 0.0 ); // score weight for SecondarySimilarity; will be changed later
1669  components.push_back( 0 ); // this free entry in the vector is for RamaScore
1670  weights.push_back( 0.0 ); // score weight for RamaScore; will be changed later
1671  utility::vector1<Real> scoring_weights = scores_[1]->get_weights();
1672  for (Size i = 1; i <= scores_[1]->count_components(); ++i) {
1673  ProfileScoreL1OP s1 =
1674  dynamic_cast<ProfileScoreL1*> (scores_[1]->get_component(i).get());
1675  if (s1 != 0) {
1676  components.push_back( i );
1677  weights.push_back( scoring_weights[s1->get_id()] );
1678  }
1679 
1680 /****** RamaScore is a special case, dispatched below
1681  RamaScore *s2 =
1682  dynamic_cast<RamaScore*> (scores_->get_component(i).get());
1683  if (s2 != 0) {
1684  components.push_back( i );
1685  weights.push_back( scoring_weights[s2->get_id()] );
1686  }
1687 *********/
1688  CSScoreOP s3 =
1689  dynamic_cast<CSScore*> (scores_[1]->get_component(i).get());
1690  if (s3 != 0) {
1691  components.push_back( i );
1692  weights.push_back( scoring_weights[s3->get_id()] );
1693  }
1694  ABEGO_SS_ScoreOP s4 =
1695  dynamic_cast<ABEGO_SS_Score*> (scores_[1]->get_component(i).get());
1696  if (s4 != 0) {
1697  components.push_back( i );
1698  weights.push_back( scoring_weights[s4->get_id()] );
1699  }
1701  dynamic_cast<TorsionBinSimilarity*> (scores_[1]->get_component(i).get());
1702  if (s5 != 0) {
1703  components.push_back( i );
1704  weights.push_back( scoring_weights[s5->get_id()] );
1705  }
1706  }
1707 
1709  utility::vector1<Real> ss_weights;
1710  std::map<std::string, core::fragment::SecondaryStructureOP>::iterator it;
1711  Real weight = 1.0 / ((Real) query_ss_profile_.size());
1712  for ( it=query_ss_profile_.begin() ; it != query_ss_profile_.end(); it++ ) {
1713  predictions.push_back((*it).second);
1714  ss_weights.push_back(weight);
1715  }
1716 // core::fragment::SecondaryStructureOP avg_ss = new core::fragment::SecondaryStructure(predictions,ss_weights);
1717 
1718  for(Size f=1;f<=frag_sizes_.size();f++) {
1719  for (Size j = 0; j <= max_threads_; ++j) { // 0 for the merged collector
1720  quota::QuotaCollectorOP collector = new quota::QuotaCollector( size_of_query(), frag_sizes_[f] );
1721  set_candidates_collector(frag_sizes_[f], collector, j);
1722  }
1723 // --------- This part puts RamaScore into quota scoring; each Rama is based on a certain SS prediction and this part of the code
1724 // --------- dispatches each Rama into a proper pool
1725  for (Size i = 1; i <= scores_[1]->count_components(); ++i) {
1726  RamaScoreOP sr = dynamic_cast<RamaScore*> (scores_[1]->get_component(i).get());
1727  if (sr != 0) {
1728  std::string & name = sr->get_prediction_name();
1729  if( ! q_config.is_valid_quota_pool_name( name ) ) continue;
1730  components[2] = i;
1731  weights[2] = scoring_weights[sr->get_id()];
1732  tr.Warning<<"RamaScore with ID "<<sr->get_id()<<" named "<<name<<
1733  " has been attached to its quota pool with weight "<<weights[2]<<std::endl;
1734  }
1735  }
1736 
1737 // ---------- end of RamaScore dispatch
1738 
1739 // Create secondary structure pools (if any)
1740  for (Size i = 1; i <= scores_[1]->count_components(); ++i) {
1741  SecondarySimilarityOP ss = dynamic_cast<SecondarySimilarity*> (scores_[1]->get_component(i).get());
1742 
1743  //PartialSecondarySimilarity is a variant of SecondarySimilarity, this means they're not compatible
1744  //So what is it compatible with???
1745 // if (s == 0) {
1746 // PartialSecondarySimilarity *s =
1747 // dynamic_cast<PartialSecondarySimilarity*> (scores_->get_component(i).get());
1748 // }
1749 
1750  if (ss != 0) {
1751  std::string & name = ss->get_prediction_name();
1752  if( ! q_config.is_valid_quota_pool_name( name ) ) continue;
1753  components[1] = i;
1754  weights[1] = scoring_weights[ss->get_id()];
1755  Size size = (Size)(q_config.get_fraction( name ) * n_candidates_);
1756  if( size == 0 ) {
1757  tr.Warning<<"Config file couldn't provide quota fraction for the pool named "
1758  <<name<<". Skipping the pool"<<std::endl;
1759  continue;
1760  }
1761 
1762  for (Size j = 0; j <= max_threads_; ++j) { // 0 for the merged collector
1763  CandidatesCollectorOP storage = get_candidates_collector(frag_sizes_[f], j);
1764  quota::QuotaCollectorOP collector = dynamic_cast<quota::QuotaCollector*> (storage());
1765 
1766  collector->attach_secondary_structure_pools(q_config.get_fraction( name ) ,
1767  get_query_ss( name ),name,n_candidates_,components,weights,scores_[1]->count_components());
1768 // avg_ss,name,n_candidates_,components,weights,scores_->count_components());
1769  }
1770  }
1771  }
1772  }
1773 }
1774 
1776 
1777  set_up_quota_nnmake_style();
1778 
1779  if (option[frags::picking::query_pos].user()) {
1780  set_picked_positions( option[frags::picking::query_pos]() );
1781  }
1782 }
1783 
1785  chunks_ = new VallProvider();
1786  chunks_->vallChunksFromLibraries(fns);
1787 }
1788 
1790  chunks_ = new VallProvider();
1791  chunks_->vallChunksFromLibrary(fn);
1792 }
1793 
1795  query_positions_.clear();
1796  for(Size i=from;i<=to;i++)
1797  query_positions_.push_back( i );
1798 }
1799 
1801  query_positions_.clear();
1802  for(Size i=1;i<=q_positions.size();i++)
1803  query_positions_.push_back( q_positions[i] );
1804 }
1805 
1806 
1808  return tags_.size();
1809 }
1810 
1812  tr<< "Quota report: difference between the total expected and total picked foreach pool"<<std::endl;
1813  tr<< "This table is for first "<<nFrags_<<" fragments"<<std::endl;
1814  tr<< "Negative value says that was picked more that expected."<<std::endl;
1815  tr<< this->str()<<std::endl;
1816  this->str("");
1817 }
1818 
1820  *this << std::setw(4)<<q_pos<< std::setw(4)<<frag_len;
1821  for(Size i=1;i<=data.size();i++)
1822  if(data[i]<1000000)
1823  *this << std::setw(10)<<std::setprecision(3)<<data[i];
1824  else
1825  *this << std::setw(10)<<" --- ";
1826  *this << std::endl;
1827 }
1828 
1830  quota::QuotaCollector const & collector
1831 )
1832 {
1833  Size last_tag = 0;
1834  for(Size i=1;i<=collector.query_length();++i) {
1835  for(Size j=1;j<=collector.count_pools(i);++j) {
1836  if( tag_map_.find(collector.get_pool(i,j)->get_pool_name())==tag_map_.end() ) {
1837  tags_.push_back(collector.get_pool(i,j)->get_pool_name());
1838  last_tag++;
1839  tag_map_[collector.get_pool(i,j)->get_pool_name()] = last_tag;
1840  }
1841  }
1842  }
1843 
1844  *this <<"\n#len pos ";
1845  for(Size i=1;i<=tags_.size();i++) {
1846  *this << std::setw(10)<<tags_[i];
1847  }
1848  *this<<std::endl;
1849 }
1850 
1852 
1853  // Storage for the resulting FragSets
1855 
1856  // Loop over various sizes of fragments
1857  for (Size iFragSize = 1; iFragSize <= frag_sizes_.size(); ++iFragSize) {
1858 
1860 
1861  Size fragment_size = frag_sizes_[iFragSize];
1862  CandidatesCollectorOP storage = get_candidates_collector(fragment_size);
1863  for (Size qPos = 1; qPos <= size_of_query(); ++qPos) {
1864  if(storage->get_candidates(qPos).size() == 0) continue;
1865 
1866  Candidates out;
1867  selector_->select_fragments(storage->get_candidates(qPos), out);
1868 
1869  //FrameOP frame = new Frame(out[1].first->get_residue(1)->resi()); // start pos = residue id from the fragment residue (sequence from original pdb file?) or is this some internal index?
1870 
1871  /*
1872  start pos = residue id from the fragment residue (sequence from original pdb file?) or is this some internal index?
1873 
1874  In ConstantLengthFragSet, this is set from insertion_pos of fragment file.
1875 
1876  Therefore, I think this is the connection between Pose and Fragment File. In design mode,
1877  we do not have this, therefore we just add an incremented index for each qPos
1878  */
1879  FrameOP frame = new Frame(residueInPose_++);
1880 
1881  for (Size fi = 1; fi <= out.size(); ++fi) {
1882 
1883  FragDataOP current_fragment( NULL );
1884 
1885  for (Size i = 1; i <= out[1].first->get_length(); ++i) {
1886  VallResidueOP r = out[fi].first->get_residue(i);
1887  string pdbid = out[fi].first->get_pdb_id();
1888  //char chainid = out[fi].first->get_chain_id();
1889  Size index = r->resi();
1890  char aa = toupper(r->aa());
1891  char ss = r->ss();
1892  Real phi = r->phi();
1893  Real psi = r->psi();
1894  Real omega = r->omega();
1895 
1896  if (i == 1){
1897  current_fragment = new AnnotatedFragData( pdbid, index );
1898  }
1899  utility::pointer::owning_ptr< BBTorsionSRFD > res_torsions( new BBTorsionSRFD(3,ss,aa) ); // 3 protein torsions
1900  res_torsions->set_torsion ( 1, phi ); // ugly numbers 1-3, but pose.set_phi also uses explicit numbers
1901  res_torsions->set_torsion ( 2, psi );
1902  res_torsions->set_torsion ( 3, omega );
1903  res_torsions->set_secstruct ( ss );
1904 
1905  // Add residue to fragment
1906  current_fragment->add_residue( res_torsions );
1907 
1908  } // End VallResidue loop
1909 
1910  if (current_fragment) { // != NULL) {
1911  current_fragment->set_valid(); //it actually containts data
1912 
1913  // Add fragment to frame
1914  if (!frame->add_fragment(current_fragment)){
1915  cerr << "ERROR Bad fragment : "<<endl;
1916  current_fragment->show(cout);
1917  exit(1111);
1918  }
1919  }
1920  } // End FragmentCandidate loop
1921 
1922  // Add frame to myFragSet.
1923  myFragSet->add(frame);
1924 
1925  } // End size of query
1926 
1927  // For each size fragment add to vector of ConstantLengthFragSet
1928  result.push_back(myFragSet);
1929 
1930  } // End size of frags
1931 
1932 
1933  return (result);
1934 
1935 }
1936 
1938  bool flag = true;
1939  for (Size iFilter = 1; iFilter <= filters_.size(); iFilter++) {
1940  if ((flag = filters_[iFilter]->test_chunk(chunk)) == false) {
1941  tr.Debug << "Chunk: " << chunk->get_pdb_id()
1942  << " didn't pass a filter" << std::endl;
1943  break;
1944  }
1945  }
1946  return flag;
1947 }
1948 
1949 bool FragmentPicker::is_valid_chunk( Size const frag_len, VallChunkOP chunk ) {
1950  if (chunk->size() < frag_len) return false; // This fragment is too short
1951  return is_valid_chunk( chunk );
1952 }
1953 
1954 // Output fragments
1955 void FragmentPicker::output_fragments( Size const fragment_size, utility::vector1<Candidates> const & final_fragments ) {
1956  using namespace ObjexxFCL;
1957 
1958  // find and output nonlocal pairs
1959  if (option[frags::nonlocal_pairs].user())
1960  nonlocal_pairs( fragment_size, final_fragments );
1961 
1962  // find and output fragment contacts
1963  if (option[frags::fragment_contacts].user())
1964  fragment_contacts( fragment_size, final_fragments );
1965 
1966 
1967  std::string out_file_name = prefix_ + "." + string_of(n_frags_) + "." + string_of(fragment_size) + "mers";
1968  std::string silent_out_file_name = out_file_name + ".out";
1969  utility::io::ozstream output_file(out_file_name);
1970  utility::io::ozstream output_info_file;
1971  if (option[frags::describe_fragments].user()) {
1972  std::string describe_name = option[frags::describe_fragments]()+"." + string_of(n_frags_) +"."+string_of(fragment_size)+"mers";
1973  output_info_file.open(describe_name.c_str());
1974  }
1975 
1976  FragmentScoreManagerOP ms = get_score_manager();
1977  Size maxqpos = size_of_query() - fragment_size + 1;
1978  for ( Size iqpos = 1; iqpos <= query_positions_.size(); ++iqpos ) {
1979  Size qPos = query_positions_[iqpos];
1980  if ( qPos > maxqpos) continue;
1981  // std::cerr << "h1 " << qPos << std::endl;
1982  output_file << "position: " << I(12, qPos) << " neighbors: " << I(10, final_fragments[qPos].size()) << std::endl << std::endl;
1983  for (Size fi = 1; fi <= final_fragments[qPos].size(); ++fi) {
1984  if (option[frags::write_sequence_only]()) {
1985  final_fragments[qPos][fi].first->print_fragment_seq(output_file);
1986  } else {
1987  if ( !final_fragments[qPos][fi].first || !final_fragments[qPos][fi].second ) {
1988  tr.Warning << "final_frag candidate " << fi << " at position " << qPos << " is corrupted. skipping... " << std::endl;
1989  continue;
1990  }
1991  final_fragments[qPos][fi].first->print_fragment(output_file, final_fragments[qPos][fi].second, ms);
1992  }
1993  output_file << std::endl;
1994  }
1995  if ( ms->if_late_scoring_for_zeros() ) {
1996  // std::cerr << "h3" << std::endl;
1997  for ( Size fi = 1; fi <= final_fragments[qPos].size(); ++fi ) {
1998  if ( !final_fragments[qPos][fi].first || !final_fragments[qPos][fi].second ) {
1999  tr.Warning << "final_frag candidate " << fi << " at position " << qPos << " is corrupted. skipping... " << std::endl;
2000  continue;
2001  }
2002  ms->score_zero_scores(final_fragments[qPos][fi].first,final_fragments[qPos][fi].second);
2003  }
2004  }
2005  if ( option[frags::describe_fragments].user() ) {
2006  // std::cerr << "h4" << std::endl;
2007  ms->describe_fragments(final_fragments[qPos], output_info_file);
2008  }
2009  }
2010  output_file.close();
2011  output_info_file.close();
2012 
2013  // silent file output
2014  if (option[frags::output_silent]() || option[frags::score_output_silent]()) {
2016  for (Size iqpos = 1; iqpos <= query_positions_.size(); ++iqpos) {
2017  Size qPos = query_positions_[iqpos];
2018  if ( qPos > maxqpos) continue;
2019  std::string const & sequence = get_query_seq_string().substr(qPos-1,fragment_size);
2020  for (Size fi = 1; fi <= final_fragments[qPos].size(); ++fi) {
2021  std::string tag = "frag_" + ObjexxFCL::lead_zero_string_of(qPos,6) + "_" + ObjexxFCL::lead_zero_string_of(fi,6);
2022  final_fragments[qPos][fi].first->output_silent( sfd, sequence, silent_out_file_name, tag, final_fragments[qPos][fi].second, ms );
2023  }
2024  }
2025  }
2026 
2027 }
2028 
2031 
2032 } // frag_picker
2033 } // protocols