50 #include <basic/options/keys/cm.OptionKeys.gen.hh>
64 #include <numeric/model_quality/rms.hh>
68 #include <basic/options/option.hh>
69 #include <basic/options/keys/OptionKeys.hh>
70 #include <basic/options/keys/out.OptionKeys.gen.hh>
71 #include <basic/options/keys/frags.OptionKeys.gen.hh>
72 #include <basic/options/keys/in.OptionKeys.gen.hh>
73 #include <basic/options/keys/constraints.OptionKeys.gen.hh>
75 #include <basic/prof.hh>
76 #include <basic/Tracer.hh>
78 #include <utility/exit.hh>
79 #include <utility/io/izstream.hh>
80 #include <utility/io/ozstream.hh>
87 #include <ObjexxFCL/format.hh>
89 #ifdef USE_BOOST_THREAD
91 #include <boost/thread.hpp>
92 #include <boost/bind.hpp>
96 namespace frag_picker {
99 using namespace core::fragment;
100 using namespace protocols::frag_picker;
101 using namespace protocols::frag_picker::scores;
102 using namespace basic::options;
103 using namespace basic::options::OptionKeys;
109 static basic::Tracer
tr(
"protocols.frag_picker.FragmentPicker");
114 tr.Info <<
"pick fragments using bounded protocol..." << std::endl;
120 using namespace ObjexxFCL;
121 tr.Info <<
"pick fragments using quota protocol..." << std::endl;
124 const bool skip_merge = (candidates_sinks_.size() == 1) ?
true :
false;
125 for (
Size iFragSize = 1; iFragSize <= frag_sizes_.size(); ++iFragSize) {
126 Size fragment_size = frag_sizes_[iFragSize];
127 quota::QuotaCollectorOP c = (skip_merge) ? dynamic_cast<quota::QuotaCollector*> (candidates_sinks_[1][fragment_size]()) :
128 dynamic_cast<quota::QuotaCollector*> (candidates_sink_[fragment_size]());
130 utility_exit_with_message(
"Cant' cast candidates' collector to QuotaCollector. Is quota set up correctly?");
131 log_25_.setup_summary(*c);
132 log_200_.setup_summary(*c);
133 Size maxqpos = size_of_query() - fragment_size + 1;
137 for (
Size iqpos = 1; iqpos <= query_positions_.size(); ++iqpos) {
138 Size qPos = query_positions_[iqpos];
139 if ( qPos > maxqpos)
continue;
142 for (
Size i=1;i<=candidates_sinks_.size();++i)
143 candidates_sink_[fragment_size]->
insert(qPos, candidates_sinks_[i][fragment_size]);
148 final_fragments[qPos] = final_out;
150 log_25_.write_summary();
151 log_200_.write_summary();
153 output_fragments( fragment_size, final_fragments );
158 using namespace ObjexxFCL;
159 tr.Info <<
"pick fragments using keep-all protocol..." << std::endl;
161 if (max_threads_ > 1)
162 tr.Warning <<
"Ignoring -j option for keep_all_protocol" << std::endl;
163 if (option[frags::nonlocal_pairs].user())
164 tr.Warning <<
"Ignoring -nonlocal_pairs option for keep_all_protocol" << std::endl;
167 for (
Size iFragSize = 1; iFragSize <= frag_sizes_.size(); ++iFragSize) {
168 Size fragment_size = frag_sizes_[iFragSize];
170 utility::io::ozstream out_file;
171 if (option[frags::describe_fragments].user()) {
172 std::string describe_name = option[frags::describe_fragments]()+
"." + string_of(n_frags_) +
"."+string_of(fragment_size)+
"mers";
173 out_file.open(describe_name.c_str());
176 std::string out_file_name = prefix_ +
"." + string_of(n_frags_) +
"." + string_of(fragment_size) +
"mers";
177 utility::io::ozstream output(out_file_name);
180 Size maxqpos = size_of_query() - fragment_size + 1;
181 for (
Size iqpos = 1; iqpos <= query_positions_.size(); ++iqpos) {
182 Size qPos = query_positions_[iqpos];
183 if ( qPos > maxqpos )
continue;
186 pick_candidates(qPos,fragment_size);
187 Candidates candidates = storage->get_candidates(qPos);
188 std::sort(candidates.begin(),candidates.end(),comparator);
189 selector_->select_fragments(candidates, out);
190 if(out.size() == 0)
continue;
191 output <<
"position: " << I(12, qPos) <<
" neighbors: " << I(10,out.size()) << std::endl << std::endl;
193 if( ms->if_late_scoring_for_zeros() ) {
194 for (
Size fi = 1; fi <= out.size(); ++fi)
195 ms->score_zero_scores(out[fi].first,out[fi].second);
197 for (
Size fi = 1; fi <= out.size(); ++fi) {
198 out[fi].first->print_fragment(output);
201 if (option[frags::describe_fragments].user()) {
202 get_score_manager()->describe_fragments(out, out_file);
204 tr.Info <<
"Collected candidates of size "<<fragment_size<<
" at pos"<<qPos<<std::endl;
206 tr.Debug<< storage->count_candidates()<<
" candidates left in a sink after flushing"<<std::endl;
217 using namespace ObjexxFCL;
220 Size neighbors = option[frags::contacts::neighbors]();
223 std::set<ContactType>::iterator it;
224 utility::io::ozstream output_all_contacts;
225 bool output_all = option[frags::contacts::output_all]();
228 for (it=contact_types_.begin(); it!=contact_types_.end(); it++)
229 if (*it ==
CEN) scale_factor = string_of(sidechain_contact_dist_cutoff_->scale_factor());
230 replace( scale_factor.begin(), scale_factor.end(),
'.',
'_' );
231 const std::string out_file_name_all_contacts = prefix_ +
"." + string_of(contacts_min_seq_sep_) +
"." + string_of(sqrt(contacts_dist_cutoff_squared_)) +
"." + scale_factor +
232 "." + string_of(n_frags_) +
"." + string_of(fragment_size) +
"mers.contacts";
233 output_all_contacts.open(out_file_name_all_contacts.c_str());
238 for (it=contact_types_.begin(); it!=contact_types_.end(); it++) {
240 std::pair<Real,ContactType> p(0,*it);
243 for (
Size i=1; i<=contacts_dist_cutoffs_squared_.size();++i) {
244 std::pair<Real,ContactType> p(contacts_dist_cutoffs_squared_[i],*it);
252 output_all_contacts <<
"# i j type dist cutoff frag_pos frag_rank" << std::endl;
254 for (
Size iqpos = 1; iqpos <= query_positions_.size()-fragment_size+1; ++iqpos) {
255 Size qPosi = query_positions_[iqpos];
256 Candidates const & outi = fragment_set[qPosi];
257 for (
Size fi = 1; fi <= outi.size(); ++fi) {
262 int cPos_offset = outi[fi].first->get_first_index_in_vall() - qPosi;
264 for (
Size i=1; i<=fragment_size;++i) {
266 Size q_pos_i = qPosi + i - 1;
267 for (
Size j=i+1; j<=fragment_size;++j) {
268 Size q_pos_j = qPosi + j - 1;
271 if (std::abs(
int(q_pos_i-q_pos_j)) < (
int)contacts_min_seq_sep_)
continue;
273 if (std::abs(
int(ri->resi() - outi[fi].first->get_residue(j)->resi() )) < (
int)contacts_min_seq_sep_)
continue;
275 for (it=contact_types_.begin(); it!=contact_types_.end(); it++) {
281 Real cutoff_dist_squared = (*it ==
CEN) ?
282 sidechain_contact_dist_cutoff_->get_cutoff_squared( ri->aa(), outi[fi].first->get_residue(j)->aa() ) :
283 contacts_dist_cutoff_squared_;
285 if (distance_squared <= cutoff_dist_squared) {
289 output_all_contacts << q_pos_i <<
" " << q_pos_j <<
" " <<
contact_name(*it) <<
" " <<
290 fmt::F(5, 2, sqrt(distance_squared)) <<
" " << fmt::F(5, 2, sqrt(cutoff_dist_squared)) <<
" " << qPosi <<
" "<< fi << std::endl;
295 std::pair<Real,ContactType> p(0,*it);
296 std::pair<Size,Size> querypair(q_pos_i, q_pos_j);
297 contact_counts[p]->iterate(querypair);
301 int m_min_tmp = q_pos_i-neighbors;
302 Size m_min = (m_min_tmp >= 1) ? m_min_tmp : 1;
303 Size m_max = q_pos_i+neighbors;
304 int n_min_tmp = q_pos_j-neighbors;
305 Size n_min = (n_min_tmp >= 1) ? n_min_tmp : 1;
306 Size n_max = q_pos_j+neighbors;
308 for (
Size m = m_min; m <= m_max; ++m) {
309 if (m > size_of_query())
continue;
311 Size chunk_i = cPos_offset + m;
312 if (chunk_i < 1 || chunk_i > chunk->size())
continue;
314 for (
Size n = n_min; n <= n_max; ++n) {
315 if (n > size_of_query())
continue;
316 if (m == q_pos_i && n == q_pos_j)
continue;
318 int chunk_j = cPos_offset + n;
319 if (chunk_j < 1 || chunk_j > (
int)chunk->size())
continue;
322 if (std::abs(
int(m-n)) < (
int)contacts_min_seq_sep_)
continue;
324 if (std::abs(
int( chunk->at(chunk_i)->resi() - chunk->at(chunk_j)->resi() )) < (
int)contacts_min_seq_sep_)
continue;
326 Real dist_squared = chunk->at(chunk_i)->distance_squared(chunk->at(chunk_j), *it);
327 if (dist_squared <= sidechain_contact_dist_cutoff_->get_cutoff_squared( chunk->at(chunk_i)->aa(), chunk->at(chunk_j)->aa() )) {
328 std::pair<Size,Size> neighbor_pair(m, n);
329 contact_counts[p]->iterate_neighbor(querypair, neighbor_pair);
336 for (
Size cdi=1; cdi<=contacts_dist_cutoffs_squared_.size();++cdi) {
337 if (distance_squared < contacts_dist_cutoffs_squared_[cdi]) {
339 std::pair<Real,ContactType> p(contacts_dist_cutoffs_squared_[cdi],*it);
340 std::pair<Size,Size> querypair(q_pos_i, q_pos_j);
341 contact_counts[p]->iterate(querypair);
345 int m_min_tmp = q_pos_i-neighbors;
346 Size m_min = (m_min_tmp >= 1) ? m_min_tmp : 1;
347 Size m_max = q_pos_i+neighbors;
348 int n_min_tmp = q_pos_j-neighbors;
349 Size n_min = (n_min_tmp >= 1) ? n_min_tmp : 1;
350 Size n_max = q_pos_j+neighbors;
352 for (
Size m = m_min; m <= m_max; ++m) {
353 if (m > size_of_query())
continue;
355 int chunk_i = cPos_offset + m;
356 if (chunk_i < 1 || chunk_i > (
int)chunk->size())
continue;
358 for (
Size n = n_min; n <= n_max; ++n) {
359 if (n > size_of_query())
continue;
360 if (m == q_pos_i && n == q_pos_j)
continue;
362 int chunk_j = cPos_offset + n;
363 if (chunk_j < 1 || chunk_j > (
int)chunk->size())
continue;
366 Size m_n_sep = std::abs(
int(m - n));
367 if (m_n_sep < contacts_min_seq_sep_)
continue;
369 if (std::abs(
int( chunk->at(chunk_i)->resi() - chunk->at(chunk_j)->resi() )) < (
int)contacts_min_seq_sep_)
continue;
371 Real dist_squared = chunk->at(chunk_i)->distance_squared(chunk->at(chunk_j), *it);
372 if (dist_squared <= contacts_dist_cutoffs_squared_[cdi]) {
373 std::pair<Size,Size> neighbor_pair(m, n);
374 contact_counts[p]->iterate_neighbor(querypair, neighbor_pair);
388 if (output_all) output_all_contacts.close();
391 for (it=contact_types_.begin(); it!=contact_types_.end(); it++) {
393 std::string scale_factor = string_of(sidechain_contact_dist_cutoff_->scale_factor());
394 replace( scale_factor.begin(), scale_factor.end(),
'.',
'_' );
395 const std::string out_file_name_contacts = prefix_ +
"." +
contact_name(*it) +
"." + string_of(contacts_min_seq_sep_) +
"." + scale_factor +
"." +
396 string_of(n_frags_) +
"." + string_of(fragment_size) +
"mers.contacts";
397 utility::io::ozstream output_contacts(out_file_name_contacts);
398 output_contacts <<
"# i j count";
399 if (neighbors > 0) output_contacts <<
" neighbors_" << neighbors <<
"_i_j_count";
400 output_contacts << std::endl;
401 std::pair<Real,ContactType> p(0,*it);
402 std::map<std::pair<Size,Size>,
Size> query_counts = contact_counts[p]->counts();
403 std::map<std::pair<Size,Size>,
Size>::iterator iter;
404 for ( iter = query_counts.begin(); iter != query_counts.end(); iter++ ) {
405 std::pair<Size,Size> query_pair = iter->first;
406 output_contacts << query_pair.first <<
" " << query_pair.second <<
" " << iter->second;
407 if (neighbors > 0 && contact_counts[p]->neighbor_counts_exist(query_pair)) {
408 std::map<std::pair<Size,Size>,
Size> neighbor_counts = contact_counts[p]->neighbor_counts(query_pair);
409 std::map<std::pair<Size,Size>,
Size>::iterator neigh_iter;
410 for ( neigh_iter = neighbor_counts.begin(); neigh_iter != neighbor_counts.end(); neigh_iter++ ) {
411 std::pair<Size,Size> neighbor_pair = neigh_iter->first;
412 output_contacts <<
" " << neighbor_pair.first <<
" " << neighbor_pair.second <<
" " << neigh_iter->second;
415 output_contacts << std::endl;
417 output_contacts.close();
419 for (
Size i=1; i<=contacts_dist_cutoffs_squared_.size();++i) {
420 const std::string out_file_name_contacts = prefix_ +
"." +
contact_name(*it) +
"." + string_of(contacts_min_seq_sep_) +
"." + string_of(sqrt(contacts_dist_cutoffs_squared_[i])) +
421 "." + string_of(n_frags_) +
"." + string_of(fragment_size) +
"mers.contacts";
422 utility::io::ozstream output_contacts(out_file_name_contacts);
423 output_contacts <<
"# i j count";
424 if (neighbors > 0) output_contacts <<
" neighbors_" << neighbors <<
"_i_j_count";
425 output_contacts << std::endl;
426 std::pair<Real,ContactType> p(contacts_dist_cutoffs_squared_[i],*it);
427 std::map<std::pair<Size,Size>,
Size> query_counts = contact_counts[p]->counts();
428 std::map<std::pair<Size,Size>,
Size>::iterator iter;
429 for ( iter = query_counts.begin(); iter != query_counts.end(); iter++ ) {
430 std::pair<Size,Size> query_pair = iter->first;
431 output_contacts << query_pair.first <<
" " << query_pair.second <<
" " << iter->second;
432 if (neighbors > 0 && contact_counts[p]->neighbor_counts_exist(query_pair)) {
433 std::map<std::pair<Size,Size>,
Size> neighbor_counts = contact_counts[p]->neighbor_counts(query_pair);
434 std::map<std::pair<Size,Size>,
Size>::iterator neigh_iter;
435 for ( neigh_iter = neighbor_counts.begin(); neigh_iter != neighbor_counts.end(); neigh_iter++ ) {
436 std::pair<Size,Size> neighbor_pair = neigh_iter->first;
437 output_contacts <<
" " << neighbor_pair.first <<
" " << neighbor_pair.second <<
" " << neigh_iter->second;
440 output_contacts << std::endl;
442 output_contacts.close();
453 Size const maxjqpos = size_of_query()-fragment_size+1;
456 for (
Size p = 1; p <= positions.size(); ++p) {
457 Size const qPosi = positions[p];
458 Candidates const & outi = fragment_set[qPosi];
459 Size const minjqpos = qPosi+fragment_size+contacts_min_seq_sep_-1;
461 for (
Size jqpos = 1; jqpos <= query_positions_.size(); ++jqpos) {
462 Size qPosj = query_positions_[jqpos];
463 if (qPosj > maxjqpos || qPosj < minjqpos)
continue;
465 for (
Size i=0; i<fragment_size;++i) {
466 if (!skip[qPosi+i] || !skip[qPosj+i]) {
471 if (skip_it)
continue;
472 Candidates const & outj = fragment_set[qPosj];
473 for (
Size fi = 1; fi <= outi.size(); ++fi) {
474 for (
Size fj = 1; fj <= outj.size(); ++fj) {
475 if (!outi[fi].first->same_chain( outj[fj].first ))
continue;
478 if (std::abs(
int(outi[fi].first->get_residue(1)->resi()-outj[fj].first->get_residue(1)->resi())) < (
int)fragment_size)
continue;
482 bool has_good_constraint =
false;
483 bool has_constraints = (atom_pair_constraint_contact_map_.size() > 0) ?
true :
false;
484 for (
Size i=1; i<=fragment_size;++i) {
487 for (
Size j=1; j<=fragment_size;++j) {
489 if (std::abs(
int(qpi-qpj)) < (
int)contacts_min_seq_sep_)
continue;
491 if (std::abs(
int( ri->resi()-outj[fj].first->get_residue(j)->resi() )) < (
int)contacts_min_seq_sep_)
continue;
492 std::set<ContactType>::iterator it;
493 for (it=contact_types_.begin(); it!=contact_types_.end(); it++) {
495 Real cutoff_dist_squared = (*it ==
CEN) ?
496 sidechain_contact_dist_cutoff_->get_cutoff_squared( ri->aa(), outj[fj].first->get_residue(j)->aa() ) :
497 contacts_dist_cutoff_squared_;
499 Real dist_squared = ri->distance_squared(outj[fj].first->get_residue(j), *it);
500 if (has_constraints && atom_pair_constraint_contact_map_[qpi][qpj] > 0) {
501 if (dist_squared > atom_pair_constraint_contact_map_[qpi][qpj]) {
505 has_good_constraint =
true;
508 if (dist_squared <= cutoff_dist_squared) contacts.push_back(
new Contact( qpi, qpj, dist_squared, *it ));
514 if (!skip && contacts.size() > 0 && (!has_constraints || has_good_constraint)) {
517 pairs.push_back(pair);
527 using namespace ObjexxFCL;
530 bool orig_opt = option[frags::write_ca_coordinates]();
531 option[frags::write_ca_coordinates].value(
true);
534 Size neighbors = option[frags::contacts::neighbors]();
537 bool has_native =
false;
539 if (option[in::file::native].user()) {
543 }
else if (option[in::file::s].user()) {
550 if (option[constraints::cst_file].user()) {
551 tr.Info <<
"Reading constraints from: "
552 << option[constraints::cst_file]()[1] << std::endl;
554 atom_pair_constraint_contact_map_.resize(size_of_query());
555 for (
Size qi = 1; qi <= size_of_query(); qi++) {
556 for (
Size qj = 1; qj <= size_of_query(); qj++) {
557 atom_pair_constraint_contact_map_[qi].push_back( 0 );
561 utility::io::izstream data(option[constraints::cst_file]()[1].c_str());
563 utility_exit_with_message(
"[ERROR] Unable to open constraints file: "
564 + option[constraints::cst_file]()[1]);
570 while (!data.fail()) {
571 char c = data.peek();
572 if (c ==
'#' || c ==
'\n') {
578 tr.Debug << option[constraints::cst_file]()[1]
579 <<
" end of file reached" << std::endl;
582 if (tag ==
"AtomPair") {
585 data >> name1 >> id1 >> name2 >> id2 >> func_type;
586 tr.Debug <<
"read: " << name1 <<
" " << id1
587 <<
" " << name2 <<
" " << id2 <<
" func: " << func_type
589 if (id1 <= size_of_query() && id2 <= size_of_query()) {
590 atom_pair_constraint_contact_map_[id1][id2] = 81.0;
591 atom_pair_constraint_contact_map_[id2][id1] = 81.0;
596 tr.Info << n_constr <<
" constraints loaded from a file" << std::endl;
603 if (option[ in::file::alignment ].user()) {
606 tr.Info <<
"Input alignment used to skip aligned positions: " << std::endl;
607 tr.Info << alns[1] << std::endl;
608 Size const query_idx( 1 );
609 Size const templ_idx( 2 );
610 Size nres = size_of_query();
612 alns[1].sequence_mapping( query_idx, templ_idx )
614 for (
Size resi = 1; resi <= nres; resi++ ) {
615 Size t_resi = mapping_[ resi ];
616 bool const gap_exists( t_resi == 0 );
618 skip_position[resi] =
true;
626 std::set<ContactType>::iterator it;
627 for (it=contact_types_.begin(); it!=contact_types_.end(); it++) {
629 std::pair<Real,ContactType> p(0,*it);
632 for (
Size i=1; i<=contacts_dist_cutoffs_squared_.size();++i) {
633 std::pair<Real,ContactType> p(contacts_dist_cutoffs_squared_[i],*it);
640 Real const min_contacts = (nonlocal_min_contacts_per_res_ < 1.0) ? 1.0 : nonlocal_min_contacts_per_res_*(
Real)fragment_size;
641 Size const maxiqpos = size_of_query()-(contacts_min_seq_sep_-1)-fragment_size-fragment_size+1;
643 time_t time_start = time(NULL);
646 Size positions_cnt = 0;
647 for (
Size iqpos = 1; iqpos <= query_positions_.size(); ++iqpos) {
648 Size qPosi = query_positions_[iqpos];
649 if (qPosi > maxiqpos)
continue;
652 const Size qPosi_per_thread = positions_cnt/max_threads_;
654 for (
Size iqpos = 1; iqpos <= query_positions_.size(); ++iqpos) {
655 Size qPosi = query_positions_[iqpos];
656 if (qPosi > maxiqpos)
continue;
657 qPosi_to_run[thread].push_back( qPosi );
658 if (qPosi_to_run[thread].
size() >= qPosi_per_thread && thread < max_threads_) ++thread;
661 #ifdef USE_BOOST_THREAD
662 boost::thread_group threads;
664 for (
Size j = 1; j <= max_threads_; ++j) {
665 if (qPosi_to_run[j].
size() > 0) {
666 std::cout <<
"thread: " << j <<
" - " << qPosi_to_run[j].size() <<
" positions -";
667 for (
Size pos = 1; pos <= qPosi_to_run[j].size(); ++pos) std::cout <<
" " << qPosi_to_run[j][pos];
668 std::cout << std::endl;
674 tr.super_mute(
false);
677 nonlocal_pairs_at_positions( qPosi_to_run[1], fragment_size, skip_position, fragment_set, thread_pairs[1] );
682 for (it=contact_types_.begin(); it!=contact_types_.end(); it++)
683 if (*it ==
CEN) scale_factor = string_of(sidechain_contact_dist_cutoff_->scale_factor());
684 replace( scale_factor.begin(), scale_factor.end(),
'.',
'_' );
685 const std::string silent_out_file_name = prefix_ +
"." + string_of(contacts_min_seq_sep_) +
"." + string_of(sqrt(contacts_dist_cutoff_squared_)) +
"." + scale_factor +
686 "." + string_of(n_frags_) +
"." + string_of(fragment_size) +
"mers.nonlocal_pairs.out";
690 utility::io::ozstream contacts_output_all;
691 bool output_all = option[frags::contacts::output_all]();
693 const std::string contacts_out_file_name = prefix_ +
"." + string_of(contacts_min_seq_sep_) +
"." + string_of(sqrt(contacts_dist_cutoff_squared_)) +
"." + scale_factor +
694 "." + string_of(n_frags_) +
"." + string_of(fragment_size) +
"mers.nonlocal_pairs.contacts";
695 contacts_output_all.open(contacts_out_file_name.c_str());
697 contacts_output_all <<
"# i j type dist frag_i frag_j rank_i rank_j" << std::endl;
704 for (
Size j = 1; j <= thread_pairs.size(); ++j) {
705 for (
Size k = 1; k <= thread_pairs[j].size(); ++k) {
707 Size qPosi = thread_pairs[j][k]->get_query_pos_i();
708 Size qPosj = thread_pairs[j][k]->get_query_pos_j();
712 VallChunkOP chunki = thread_pairs[j][k]->get_candidate_i().first->get_chunk();
713 VallChunkOP chunkj = thread_pairs[j][k]->get_candidate_j().first->get_chunk();
714 int cPosi_offset = thread_pairs[j][k]->get_candidate_i().first->get_first_index_in_vall() - qPosi;
715 int cPosj_offset = thread_pairs[j][k]->get_candidate_j().first->get_first_index_in_vall() - qPosj;
720 if (option[frags::nonlocal::output_silent]()) {
722 std::map<ContactType, Size> contact_type_cnt;
723 std::map<ContactType, Size>::iterator iter;
724 bool output_pair =
false;
725 for (it=contact_types_.begin(); it!=contact_types_.end(); it++)
726 contact_type_cnt[*it] = 0;
727 for (
Size i=1; i<=contacts.size(); ++i) contact_type_cnt[contacts[i]->type()]++;
728 for ( iter = contact_type_cnt.begin(); iter != contact_type_cnt.end(); iter++ ) {
729 if ((
Real)iter->second >= min_contacts) {
739 fragdatapair.push_back(thread_pairs[j][k]->get_candidate_i().first->get_frag_data());
740 fragdatapair.push_back(thread_pairs[j][k]->get_candidate_j().first->get_frag_data());
741 std::string const & sequence = get_query_seq_string().substr(qPosi-1,fragment_size) +
742 get_query_seq_string().substr(qPosj-1,fragment_size);
747 std::stringstream tag;
749 tag << thread_pairs[j][k]->get_candidate_i().first->get_pdb_id() << thread_pairs[j][k]->get_candidate_i().first->get_chain_id() <<
750 "_" << qPosi <<
"_" << qPosj <<
"_" <<
751 thread_pairs[j][k]->get_candidate_i().first->get_residue(1)->resi() <<
"_" << thread_pairs[j][k]->get_candidate_j().first->get_residue(1)->resi();
753 tr.Warning <<
"skipping " << tag.str() <<
": non-ideal pose from VALL" << std::endl;
760 std::vector< core::Vector > pose_coords;
761 std::vector< core::Vector > native_pose_coords;
763 pose_coords.push_back( pose.
residue(i).
xyz(
"CA") );
764 for (
Size i=0; i<fragment_size; i++) {
766 Size respos = qPosi+i;
767 native_pose_coords.push_back( nativePose->residue(respos).xyz(
"CA") );
769 for (
Size i=0; i<fragment_size; i++) {
771 Size respos = qPosj+i;
772 native_pose_coords.push_back( nativePose->residue(respos).xyz(
"CA") );
774 int const natoms = pose_coords.size();
775 FArray2D< core::Real > p1a( 3, natoms );
776 FArray2D< core::Real > p2a( 3, natoms );
777 for (
int i = 0; i < natoms; ++i ) {
778 for (
int l = 0; l < 3; ++l ) {
779 p1a(l+1,i+1) = pose_coords[i][l];
780 p2a(l+1,i+1) = native_pose_coords[i][l];
784 core::Real rms_orig_native = numeric::model_quality::rms_wrapper( natoms, p1a, p2a );
797 core::pose::setPoseExtraScores( pose,
"fscore", ms->total_score(thread_pairs[j][k]->get_candidate_i().second) + ms->total_score(thread_pairs[j][k]->get_candidate_j().second));
800 for ( iter = contact_type_cnt.begin(); iter != contact_type_cnt.end(); iter++ )
803 ss->fill_struct( pose, tag.str() );
804 sfd.write_silent_struct( *ss, silent_out_file_name );
810 for (
Size i = 1; i <= contacts.size(); ++i) {
815 contacts_output_all << contacts[i]->i() <<
" " << contacts[i]->j() <<
" " << contacts[i]->type_name() <<
" " <<
816 fmt::F(5, 2, contacts[i]->dist()) <<
" " << qPosi <<
" " << qPosj <<
" " <<
817 thread_pairs[j][k]->get_candidate_i_rank() <<
" " << thread_pairs[j][k]->get_candidate_j_rank() << std::endl;
821 if (contacts[i]->type() ==
CEN) {
822 std::pair<Real,ContactType> p(0,
CEN);
823 std::pair<Size,Size> querypair(contacts[i]->i(), contacts[i]->j());
824 contact_counts[p]->iterate(querypair);
828 int m_min_tmp = contacts[i]->i()-neighbors;
829 int n_min_tmp = contacts[i]->j()-neighbors;
830 Size m_min = (m_min_tmp >= 1) ? m_min_tmp : 1;
831 Size n_min = (n_min_tmp >= 1) ? n_min_tmp : 1;
832 Size m_max = contacts[i]->i()+neighbors;
833 Size n_max = contacts[i]->j()+neighbors;
835 for (
Size m = m_min; m <= m_max; ++m) {
836 if (m > size_of_query())
continue;
838 int chunk_i = cPosi_offset + m;
839 if (chunk_i < 1 || chunk_i > (
int)chunki->size())
continue;
841 for (
Size n = n_min; n <= n_max; ++n) {
842 if (n > size_of_query())
continue;
843 if (m == contacts[i]->i() && n == contacts[i]->j())
continue;
845 int chunk_j = cPosj_offset + n;
846 if (chunk_j < 1 || chunk_j > (
int)chunkj->size())
continue;
849 if (std::abs(
int(m-n)) < (
int)contacts_min_seq_sep_)
continue;
851 if (std::abs(
int( chunki->at(chunk_i)->resi() - chunkj->at(chunk_j)->resi() )) < (
int)contacts_min_seq_sep_)
continue;
854 Real dist_squared = chunki->at(chunk_i)->distance_squared(chunkj->at(chunk_j), contacts[i]->type());
855 if (dist_squared <= sidechain_contact_dist_cutoff_->get_cutoff_squared( chunki->at(chunk_i)->aa(), chunkj->at(chunk_j)->aa() )) {
856 std::pair<Size,Size> neighbor_pair(m, n);
857 contact_counts[p]->iterate_neighbor(querypair, neighbor_pair);
864 for (
Size cdi=1; cdi<=contacts_dist_cutoffs_squared_.size();++cdi) {
865 if (contacts[i]->dist_squared() <= contacts_dist_cutoffs_squared_[cdi]) {
866 std::pair<Real,ContactType> p(contacts_dist_cutoffs_squared_[cdi],contacts[i]->type());
867 std::pair<Size,Size> querypair(contacts[i]->i(), contacts[i]->j());
868 contact_counts[p]->iterate(querypair);
872 int m_min_tmp = contacts[i]->i()-neighbors;
873 int n_min_tmp = contacts[i]->j()-neighbors;
874 Size m_min = (m_min_tmp >= 1) ? m_min_tmp : 1;
875 Size n_min = (n_min_tmp >= 1) ? n_min_tmp : 1;
876 Size m_max = contacts[i]->i()+neighbors;
877 Size n_max = contacts[i]->j()+neighbors;
879 for (
Size m = m_min; m <= m_max; ++m) {
880 if (m > size_of_query())
continue;
882 int chunk_i = cPosi_offset + m;
883 if (chunk_i < 1 || chunk_i > (
int)chunki->size())
continue;
885 for (
Size n = n_min; n <= n_max; ++n) {
886 if (n > size_of_query())
continue;
887 if (m == contacts[i]->i() && n == contacts[i]->j())
continue;
889 int chunk_j = cPosj_offset + n;
890 if (chunk_j < 1 || chunk_j > (
int)chunkj->size())
continue;
893 if (std::abs(
int(m-n)) < (
int)contacts_min_seq_sep_)
continue;
895 if (std::abs(
int( chunki->at(chunk_i)->resi() - chunkj->at(chunk_j)->resi() )) < (
int)contacts_min_seq_sep_)
continue;
898 Real dist_squared = chunki->at(chunk_i)->distance_squared(chunkj->at(chunk_j), contacts[i]->type());
899 if (dist_squared <= contacts_dist_cutoffs_squared_[cdi]) {
900 std::pair<Size,Size> neighbor_pair(m, n);
901 contact_counts[p]->iterate_neighbor(querypair, neighbor_pair);
912 if (output_all) contacts_output_all.close();
916 for (it=contact_types_.begin(); it!=contact_types_.end(); it++) {
918 std::string scale_factor = string_of(sidechain_contact_dist_cutoff_->scale_factor());
919 replace( scale_factor.begin(), scale_factor.end(),
'.',
'_' );
920 const std::string out_file_name_contacts = prefix_ +
"." +
contact_name(*it) +
"." + string_of(contacts_min_seq_sep_) +
"." + scale_factor +
"." +
921 string_of(n_frags_) +
"." + string_of(fragment_size) +
"mers.nonlocal_pairs.contacts";
922 utility::io::ozstream output_contacts(out_file_name_contacts);
923 output_contacts <<
"# i j count";
924 if (neighbors > 0) output_contacts <<
" neighbors_" << neighbors <<
"_i_j_count";
925 output_contacts << std::endl;
926 std::pair<Real,ContactType> p(0,*it);
927 std::map<std::pair<Size,Size>,
Size> query_counts = contact_counts[p]->counts();
928 std::map<std::pair<Size,Size>,
Size>::iterator iter;
929 for ( iter = query_counts.begin(); iter != query_counts.end(); iter++ ) {
930 std::pair<Size,Size> query_pair = iter->first;
931 output_contacts << query_pair.first <<
" " << query_pair.second <<
" " << iter->second;
932 if (neighbors > 0 && contact_counts[p]->neighbor_counts_exist(query_pair)) {
933 std::map<std::pair<Size,Size>,
Size> neighbor_counts = contact_counts[p]->neighbor_counts(query_pair);
934 std::map<std::pair<Size,Size>,
Size>::iterator neigh_iter;
935 for ( neigh_iter = neighbor_counts.begin(); neigh_iter != neighbor_counts.end(); neigh_iter++ ) {
936 std::pair<Size,Size> neighbor_pair = neigh_iter->first;
937 output_contacts <<
" " << neighbor_pair.first <<
" " << neighbor_pair.second <<
" " << neigh_iter->second;
940 output_contacts << std::endl;
942 output_contacts.close();
944 for (
Size i=1; i<=contacts_dist_cutoffs_squared_.size();++i) {
945 const std::string out_file_name_contacts = prefix_ +
"." +
contact_name(*it) +
"." + string_of(contacts_min_seq_sep_) +
"." + string_of(sqrt(contacts_dist_cutoffs_squared_[i])) +
946 "." + string_of(n_frags_) +
"." + string_of(fragment_size) +
"mers.nonlocal_pairs.contacts";
947 utility::io::ozstream output_contacts(out_file_name_contacts);
948 output_contacts <<
"# i j count";
949 if (neighbors > 0) output_contacts <<
" neighbors_" << neighbors <<
"_i_j_count";
950 output_contacts << std::endl;
951 std::pair<Real,ContactType> p(contacts_dist_cutoffs_squared_[i],*it);
952 std::map<std::pair<Size,Size>,
Size> query_counts = contact_counts[p]->counts();
953 std::map<std::pair<Size,Size>,
Size>::iterator iter;
954 for ( iter = query_counts.begin(); iter != query_counts.end(); iter++ ) {
955 std::pair<Size,Size> query_pair = iter->first;
956 output_contacts << query_pair.first <<
" " << query_pair.second <<
" " << iter->second;
957 if (neighbors > 0 && contact_counts[p]->neighbor_counts_exist(query_pair)) {
958 std::map<std::pair<Size,Size>,
Size> neighbor_counts = contact_counts[p]->neighbor_counts(query_pair);
959 std::map<std::pair<Size,Size>,
Size>::iterator neigh_iter;
960 for ( neigh_iter = neighbor_counts.begin(); neigh_iter != neighbor_counts.end(); neigh_iter++ ) {
961 std::pair<Size,Size> neighbor_pair = neigh_iter->first;
962 output_contacts <<
" " << neighbor_pair.first <<
" " << neighbor_pair.second <<
" " << neigh_iter->second;
965 output_contacts << std::endl;
967 output_contacts.close();
972 time_t time_end = time(NULL);
974 tr.Info <<
"... done. Processed " << query_positions_.size() <<
" positions. Time elapsed: "
975 << (time_end - time_start) <<
" seconds." << std::endl;
978 option[frags::write_ca_coordinates].value(orig_opt);
984 for (
Size i=1; i<=chunks.size(); ++i) {
986 scores_[index]->do_caching(chunk);
988 for (
Size iFragSize = 1; iFragSize <= frag_sizes_.size(); ++iFragSize) {
989 Size fragment_size = frag_sizes_[iFragSize];
990 if (chunk->size() < fragment_size)
continue;
992 for (
Size iqpos = 1; iqpos <= query_positions_.size(); ++iqpos) {
993 Size iPos = query_positions_[iqpos];
994 if ( iPos > size_of_query() - fragment_size + 1 )
continue;
996 for (
Size j = 1; j <= chunk->size() - fragment_size + 1; j++) {
998 if (scores_[index]->score_fragment_from_cache(f, empty_map)) {
999 std::pair<FragmentCandidateOP,scores::FragmentScoreMapOP> p(f,empty_map);
1000 if(sink->add(p)) empty_map = scores_[index]->create_empty_map();
1005 scores_[index]->clean_up();
1011 PROF_START( basic::FRAGMENTPICKING );
1013 tr.Info <<
"Picking candidates..." << std::endl;
1016 time_t time_start = time(NULL);
1018 #ifdef USE_BOOST_THREAD
1019 if (max_threads_ > 1) {
1021 Size valid_chunks_cnt = 0;
1022 for (
Size i = 1; i <= chunks_->size(); ++i) {
1024 if (!is_valid_chunk( chunk ))
continue;
1027 const Size chunks_per_thread = valid_chunks_cnt/max_threads_;
1029 for (
Size i = 1; i <= chunks_->size(); ++i) {
1031 if (!is_valid_chunk( chunk ))
continue;
1032 chunks_to_run[thread].push_back( chunk );
1033 if (chunks_to_run[thread].
size() >= chunks_per_thread && thread < max_threads_) ++thread;
1035 boost::thread_group threads;
1036 tr.super_mute(
true);
1037 for (
Size j = 1; j <= max_threads_; ++j) {
1038 if (chunks_to_run[j].
size() > 0) {
1039 std::cout <<
"thread: " << j <<
" - " << chunks_to_run[j].size() <<
" chunks" << std::endl;
1044 tr.super_mute(
false);
1046 time_t time_end = time(NULL);
1047 tr.Info <<
"... done. Processed " << chunks_->size() <<
" chunks. Time elapsed: "
1048 << (time_end - time_start) <<
" seconds." << std::endl;
1051 PROF_STOP( basic::FRAGMENTPICKING );
1055 #endif // USE_BOOST_THREAD
1059 for (
Size i = 1; i <= chunks_->size(); i++) {
1061 if (!is_valid_chunk( chunk ))
continue;
1062 tr.Trace <<
"Processing sequence from vall: " << chunk->get_sequence() << std::endl;
1065 scores_[1]->do_caching(chunk);
1067 for (
Size iFragSize = 1; iFragSize <= frag_sizes_.size(); ++iFragSize) {
1068 Size fragment_size = frag_sizes_[iFragSize];
1069 if (chunk->size() < fragment_size)
continue;
1071 Size maxqpos = size_of_query() - fragment_size + 1;
1073 tr.Trace <<
"Picking fragments of size "<<fragment_size<<
1074 " at "<<query_positions_.size()<<
" query positions"<<std::endl;
1075 for (
Size iqpos = 1; iqpos <= query_positions_.size(); ++iqpos) {
1076 Size iPos = query_positions_[iqpos];
1077 if ( iPos > maxqpos )
continue;
1080 for (
Size j = 1; j <= chunk->size() - fragment_size + 1; ++j) {
1082 if (scores_[1]->score_fragment_from_cache(f, empty_map)) {
1083 std::pair<FragmentCandidateOP,scores::FragmentScoreMapOP> p(f,empty_map);
1084 if(sink->add(p)) empty_map = scores_[1]->create_empty_map();
1089 scores_[1]->clean_up();
1090 tr.Trace << chunk->get_pdb_id() <<
" done" << std::endl;
1091 if ( (i*100) % (chunks_->size()/100*100) == 0 )
tr.Info << (i*100) / chunks_->size()
1092 <<
"% done at "<< chunk->get_pdb_id() << std::endl;
1095 time_t time_end = time(NULL);
1096 tr.Info <<
"... done. Processed " << chunks_->size() <<
" chunks. Time elapsed: "
1097 << (time_end - time_start) <<
" seconds." << std::endl;
1100 PROF_STOP( basic::FRAGMENTPICKING );
1108 for (
Size i = 1; i <= components.size(); i++)
1109 total += components.at(i) * weights.at(i);
1116 tr.Debug << sec_str_input.size() / 2 <<
" secondary structure assignment(s):\n";
1117 for (
Size i = 1; i <= sec_str_input.size(); i += 2) {
1118 tr.Debug << i / 2 <<
" " << sec_str_input[i]
1119 <<
" file will be loaded under \"" << sec_str_input[i + 1] <<
"\" name\n";
1120 read_ss_file(sec_str_input[i], sec_str_input[i + 1]);
1122 tr.Debug << std::endl;
1128 utility::io::izstream data( file_name.c_str() );
1131 utility_exit_with_message(
"Can't read secondary structure file: "+file_name );
1135 getline( data, line );
1138 std::istringstream line_stream( line );
1139 line_stream >> l1 >> l2 >> l3 >> l4 >> l5;
1141 if ( (l1 ==
"#") && (l2 ==
"PSIPRED") && (l3 ==
"VFORMAT")
1142 && (l4 ==
"(PSIPRED") ) {
1143 read_psipred_ss2( file_name, prediction_name);
1145 if ( (l1 ==
"REMARK") && (l2 ==
"Neural") && (l3 ==
"network")
1146 && (l4 ==
"secondary") && (l5 ==
"structure") ) {
1147 read_talos_ss( file_name, prediction_name);
1149 utility_exit_with_message(
"Can't identify secondary structure file type (needs vertical psipred_ss2 or talos+ pred.ss): "+file_name );
1159 ss_profile->read_psipred_ss2(file_name);
1162 for (
Size i = 1; i <= ss_profile->total_residue(); i++)
1163 query_ss_as_string += ss_profile->secstruct(i);
1165 query_ss_as_string_[prediction_name] = query_ss_as_string;
1166 query_ss_profile_[prediction_name] = ss_profile;
1174 ss_profile->read_talos_ss(file_name);
1176 ss_profile->extend(query_profile_->length());
1177 for (
Size pos = ss_profile->total_residue()+1; pos <= query_profile_->length(); pos++ ) {
1178 ss_profile->set_fractions( pos, 1.0/3.0, 1.0/3.0, 1.0/3.0, 0.0 );
1182 for (
Size i = 1; i <= ss_profile->total_residue(); i++)
1183 query_ss_as_string += ss_profile->secstruct(i);
1185 query_ss_as_string_[prediction_name] = query_ss_as_string;
1186 query_ss_profile_[prediction_name] = ss_profile;
1191 utility::io::izstream data( file_name.c_str() );
1194 utility_exit_with_message(
"Can't read DEPTH file: "+file_name );
1198 query_residue_depth_.clear();
1199 getline( data, line );
1200 while ( getline( data, line ) ) {
1201 std::istringstream line_stream( line );
1203 line_stream >> jnk >> aathree >> depth;
1204 if (aathree !=
"UNK")
1205 query_residue_depth_.push_back( depth );
1206 if ( line_stream.fail() )
1207 utility_exit_with_message(
"Error reading in FragmentPicker::read_depth()!" );
1210 if (query_residue_depth_.size() != size_of_query())
1211 utility_exit_with_message(
"Error reading in FragmentPicker::read_depth(): does not match size of query!" );
1216 utility::io::izstream data( file_name.c_str() );
1219 utility_exit_with_message(
"Can't read spine-x file: "+file_name );
1223 query_sa_prediction_.clear();
1224 query_phi_prediction_.clear();
1225 query_psi_prediction_.clear();
1226 getline( data, line );
1227 while ( getline( data, line ) ) {
1228 std::istringstream line_stream( line );
1230 line_stream >> jnk >> jnk >> jnk >> phi >> psi >> jnk >> jnk >> jnk >> jnk >> jnk >> asa >> jnk >> jnk >> jnk >> jnk >> pkc_phi >> pkc_psi;
1231 query_sa_prediction_.push_back( asa );
1232 query_phi_prediction_.push_back( phi );
1233 query_psi_prediction_.push_back( psi );
1234 query_phi_prediction_conf_.push_back( pkc_phi );
1235 query_psi_prediction_conf_.push_back( pkc_psi );
1236 if ( line_stream.fail() )
1237 utility_exit_with_message(
"Error reading in FragmentPicker::read_spine_x()!" );
1240 if (query_sa_prediction_.size() != size_of_query())
1241 utility_exit_with_message(
"Error reading in FragmentPicker::read_spine_x(): does not match size of query!" );
1249 ss_profile->extend(query_secondary.length());
1251 for (
Size i = 1; i <= query_secondary.length(); ++i) {
1252 char ss = query_secondary[i - 1];
1254 ss_profile->set_fractions(i, 0.0, 1.0, 0.0);
1256 ss_profile->set_fractions(i, 0.0, 0.0, 1.0);
1258 ss_profile->set_fractions(i, 1.0, 0.0, 0.0);
1260 query_ss_as_string_[prediction_name] = query_secondary;
1261 query_ss_profile_[prediction_name] = ss_profile;
1265 using namespace ObjexxFCL;
1266 tr.Info <<
"Saving Fragments..." << std::endl;
1267 const bool skip_merge = (candidates_sinks_.size() == 1) ?
true :
false;
1268 tr.Debug <<
"skip_merge: " << ( skip_merge ?
"true" :
"false" ) << std::endl;
1269 for (
Size iFragSize = 1; iFragSize <= frag_sizes_.size(); ++iFragSize) {
1270 Size fragment_size = frag_sizes_[iFragSize];
1271 Size maxqpos = size_of_query() - fragment_size + 1;
1275 for (
Size iqpos = 1; iqpos <= query_positions_.size(); ++iqpos) {
1276 Size qPos = query_positions_[iqpos];
1277 if ( qPos > maxqpos)
continue;
1278 tr.Debug <<
"saving " << fragment_size <<
"mers for position..." << qPos << std::endl;
1280 for (
Size i=1;i<=candidates_sinks_.size();++i)
1281 candidates_sink_[fragment_size]->
insert(qPos, candidates_sinks_[i][fragment_size]);
1285 in = candidates_sinks_[1][fragment_size]->get_candidates(qPos);
1287 in = candidates_sink_[fragment_size]->get_candidates(qPos);
1289 if ( in.size() == 0 )
continue;
1290 selector_->select_fragments(in, out);
1291 final_fragments[qPos] = out;
1293 tr.Debug <<
"call output_fragments now: " << std::endl;
1294 output_fragments( fragment_size, final_fragments );
1305 using namespace ObjexxFCL;
1306 const bool skip_merge = (candidates_sinks_.size() == 1) ?
true :
false;
1307 for (
Size iFragSize = 1; iFragSize <= frag_sizes_.size(); ++iFragSize) {
1308 Size fragment_size = frag_sizes_[iFragSize];
1309 Size maxqpos = size_of_query() - fragment_size + 1;
1310 std::string out_file_name = prefix_ +
"." + string_of(fragment_size)
1312 utility::io::ozstream output(out_file_name);
1314 for (
Size iqpos = 1; iqpos <= query_positions_.size(); ++iqpos) {
1315 Size qPos = query_positions_[iqpos];
1316 if ( qPos > maxqpos)
continue;
1319 for (
Size i=1;i<=candidates_sinks_.size();++i)
1320 candidates_sink_[fragment_size]->
insert(qPos, candidates_sinks_[i][fragment_size]);
1324 out = candidates_sinks_[1][fragment_size]->get_candidates(qPos);
1326 out = candidates_sink_[fragment_size]->get_candidates(qPos);
1328 if (out.size() == 0)
continue;
1329 output <<
"position: " << I(12, qPos) <<
" neighbors: " << I(10,
1330 out.size()) << std::endl << std::endl;
1331 for (
Size fi = 1; fi <= out.size(); ++fi) {
1332 out[fi].first->print_fragment(output);
1333 output << std::endl;
1337 candidates_sinks_[1][fragment_size]->print_report(
tr.Debug, get_score_manager());
1339 candidates_sink_[fragment_size]->print_report(
tr.Debug, get_score_manager());
1346 if (candidates_sinks_.size() > 1)
1347 utility_exit_with_message(
"pick_candidates(Size i_pos,Size frag_len) does not support multiple CandidateCollectors" );
1349 for (
Size i = 1; i <= chunks_->size(); i++) {
1351 if (!is_valid_chunk( frag_len, chunk ))
continue;
1353 tr.Trace <<
"Processing sequence from vall: " << chunk->get_sequence() << std::endl;
1358 for (
Size j = 1; j <= chunk->size() - frag_len + 1; j++) {
1360 if (scores_[1]->score_fragment(f, empty_map)) {
1362 std::pair<FragmentCandidateOP, scores::FragmentScoreMapOP> p(f, new_map);
1366 tr.Trace << chunk->get_pdb_id() <<
" done" << std::endl;
1367 tr.Trace << sink->count_candidates()<<
" candidates stored at pos. "
1368 <<i_pos<<
", "<<sink->count_candidates()<<
" in total"<< std::endl;
1376 #ifdef USE_BOOST_THREAD
1378 if (option[ frags::j ].user()) max_threads_ = option[ frags::j ]();
1381 while (max_threads_ > scores_.size())
1383 while (max_threads_ > candidates_sinks_.size()) {
1385 candidates_sinks_.push_back(storage);
1389 if (option[in::file::checkpoint].user()) {
1391 tr.Info <<
"reading a query profile from: "
1392 << option[in::file::checkpoint]() << std::endl;
1393 q_prof->read_from_checkpoint(option[in::file::checkpoint]());
1394 set_query_seq(q_prof);
1395 tr.Info <<
"picking fragments for query profile: "
1396 << get_query_seq_string() << std::endl;
1398 if (option[in::file::pssm].user()) {
1400 tr.Info <<
"reading a query profile from: "
1401 << option[in::file::pssm]()[1] << std::endl;
1402 q_prof->read_from_file(option[in::file::pssm]()[1] );
1403 q_prof->convert_profile_to_probs(1.0);
1404 set_query_seq(q_prof);
1405 tr.Info <<
"picking fragments for query profile: "
1406 << get_query_seq_string() << std::endl;
1410 if (option[in::file::fasta].user()) {
1412 tr.Info <<
"reading a query sequence from: "
1413 << option[in::file::fasta]()[1] << std::endl;
1415 set_query_seq(q_seq);
1416 tr.Info <<
"picking fragments for query sequence: "
1417 << get_query_seq_string() << std::endl;
1421 if (option[frags::ss_pred].user()) {
1423 read_ss_files(sec_str_input);
1427 if (option[frags::spine_x].user()) {
1428 read_spine_x(option[frags::spine_x]());
1432 if (option[frags::depth].user()) {
1433 read_depth(option[frags::depth]());
1437 if (option[frags::allowed_pdb].user()) {
1439 allow->load_pdb_id_from_file(option[frags::allowed_pdb]());
1440 add_chunk_filter(allow);
1441 tr.Info <<
"Allowed PDB chains:\n";
1442 allow->show_pdb_ids(
tr.Info);
1445 if (option[frags::denied_pdb].user()) {
1447 deny->load_pdb_id_from_file(option[frags::denied_pdb]());
1448 add_chunk_filter(deny);
1449 tr.Info <<
"Excluded PDBs:\n";
1450 deny->show_pdb_ids(
tr.Info);
1454 PROF_START( basic::FRAGMENTPICKING_READ_VALL );
1455 if (option[in::file::vall].user()) {
1456 read_vall(option[in::file::vall]());
1458 PROF_STOP( basic::FRAGMENTPICKING_READ_VALL );
1461 if (option[frags::frag_sizes].user()) {
1463 for (
Size i = 1; i <= frag_sizes_tmp.size(); ++i) {
1464 if(frag_sizes_tmp[i] > max_frag_size_)
1465 max_frag_size_ = frag_sizes_tmp[i];
1466 frag_sizes_.push_back(frag_sizes_tmp[i]);
1470 frag_sizes_.push_back(3);
1471 frag_sizes_.push_back(9);
1473 tr.Info <<
"Will pick fragments of size:";
1474 for (
Size i = 1; i <= frag_sizes_.size(); ++i)
1475 tr.Info << frag_sizes_[i] <<
" ";
1476 tr.Info << std::endl;
1479 tr.Info <<
"Creating fragment scoring scheme" << std::endl;
1480 if (option[frags::scoring::config].user()) {
1482 for (
Size i = 1; i <= scores_.size(); ++i)
1483 scores_[i]->create_scores(option[frags::scoring::config](),
this);
1487 n_frags_ = option[frags::n_frags]();
1488 n_candidates_ = option[frags::n_candidates]();
1490 if (n_frags_ > n_candidates_) n_candidates_ = n_frags_;
1492 tr.Info <<
"Picking " << n_frags_ <<
" fragments based on "
1493 << n_candidates_ <<
" candidates" << std::endl;
1500 tr.Info <<
"Creating fragment scoring scheme for the selection step" << std::endl;
1502 if (option[frags::picking::selecting_scorefxn].user()) {
1504 selection_scoring->create_scores(option[frags::picking::selecting_scorefxn](),
this);
1512 if (option[frags::quota_protocol].user() || option[frags::picking::quota_config_file].user()) {
1515 parse_quota_command_line();
1518 if (option[frags::keep_all_protocol].user()) {
1519 for (
Size i = 1; i <= frag_sizes_.size(); ++i) {
1521 set_candidates_collector(frag_sizes_[i], collector);
1522 tr.Info <<
"Collector for fragment size: " << frag_sizes_[i] <<
" set to: GrabAllCollector" << std::endl;
1525 for (
Size i = 1; i <= frag_sizes_.size(); ++i) {
1526 for (
Size j = 0; j <= max_threads_; ++j) {
1528 comparator,get_score_manager()->count_components());
1529 set_candidates_collector(frag_sizes_[i], collector, j);
1531 tr.Info <<
"Collector for fragment size: " << frag_sizes_[i] <<
" set to: BoundedCollector" << std::endl;
1550 if (option[out::file::frag_prefix].user()) {
1551 prefix_ = option[out::file::frag_prefix]();
1554 if (option[frags::picking::query_pos].user()) {
1555 set_picked_positions( option[frags::picking::query_pos]() );
1559 nonlocal_min_contacts_per_res_ = option[ frags::nonlocal::min_contacts_per_res ]();
1562 contact_types_.clear();
1564 for (
Size i = 1; i <= contact_types.size(); ++i) {
1567 sidechain_contact_dist_cutoff_ =
new SidechainContactDistCutoff( option[ frags::contacts::centroid_distance_scale_factor ]() );
1570 contacts_min_seq_sep_ = option[ frags::contacts::min_seq_sep ]();
1573 Real max_dist = 0.0;
1574 for (
Size i = 1; i <= dist_cutoffs.size(); ++i) {
1575 if (dist_cutoffs[i] > max_dist) max_dist = dist_cutoffs[i];
1576 contacts_dist_cutoffs_squared_.push_back( dist_cutoffs[i]*dist_cutoffs[i] );
1578 contacts_dist_cutoff_squared_ = max_dist*max_dist;
1580 show_scoring_methods(
tr);
1586 std::string quota_config_file(
"UNKNOWN-QUOTA-CONFIG_FILE");
1587 if (option[frags::picking::quota_config_file].user())
1588 quota_config_file = option[frags::picking::quota_config_file]();
1594 for (
Size i = 1; i <= scores_[1]->count_components(); ++i) {
1596 dynamic_cast<ABEGO_SS_Score*
> (scores_[1]->get_component(i).get());
1598 components.push_back( i );
1599 weights.push_back( scoring_weights[s0->get_id()] );
1602 dynamic_cast<ProfileScoreL1*
> (scores_[1]->get_component(i).get());
1604 components.push_back( i );
1605 weights.push_back( scoring_weights[s1->get_id()] );
1609 dynamic_cast<RamaScore*
> (scores_[1]->get_component(i).get());
1611 components.push_back( i );
1612 weights.push_back( scoring_weights[s2->get_id()] );
1616 dynamic_cast<CSScore*
> (scores_[1]->get_component(i).get());
1618 components.push_back( i );
1619 weights.push_back( scoring_weights[s3->get_id()] );
1625 components.push_back( i );
1626 weights.push_back( scoring_weights[s4->get_id()] );
1630 tr.Debug<<
"Scoring scheme for ABEGO_SS quota pool sorting is:";
1631 for(
Size l=1;l<=weights.size();l++) {
1632 tr.Debug<<
"\n\t"<<components[l]<<
"\t"<<weights[l];
1634 tr.Debug<<std::endl;
1635 Size buffer_factor = 5;
1636 for(
Size f=1;f<=frag_sizes_.size();f++) {
1637 for (
Size j = 0; j <= max_threads_; ++j) {
1639 set_candidates_collector(frag_sizes_[f],collector, j);
1641 Size middle = frag_sizes_[f] / 2 + 1;
1642 assert( size_of_query() == q_config.size() );
1643 for(
Size j=1;j<=size_of_query()-frag_sizes_[f]+1;j++) {
1644 tr.Debug<<
"Creating "<<q_config.n_columns()<<
" quota pools at pos "<<j<<std::endl;
1645 for(
Size i=1;i<=q_config.n_columns();i++) {
1646 Real prob = q_config.probability(j+middle-1,i);
1647 for (
Size k = 0; k <= max_threads_; ++k) {
1651 q_config.get_pool_bins((i)),components,weights,prob,scores_[1]->count_components(),buffer_factor);
1652 collector->add_pool(j,p);
1660 std::string quota_config_file(
"UNKNOWN-QUOTA-CONFIG_FILE");
1661 if (option[frags::picking::quota_config_file].user())
1662 quota_config_file = option[frags::picking::quota_config_file]();
1667 components.push_back( 0 );
1668 weights.push_back( 0.0 );
1669 components.push_back( 0 );
1670 weights.push_back( 0.0 );
1672 for (
Size i = 1; i <= scores_[1]->count_components(); ++i) {
1674 dynamic_cast<ProfileScoreL1*
> (scores_[1]->get_component(i).get());
1676 components.push_back( i );
1677 weights.push_back( scoring_weights[s1->get_id()] );
1689 dynamic_cast<CSScore*
> (scores_[1]->get_component(i).get());
1691 components.push_back( i );
1692 weights.push_back( scoring_weights[s3->get_id()] );
1695 dynamic_cast<ABEGO_SS_Score*
> (scores_[1]->get_component(i).get());
1697 components.push_back( i );
1698 weights.push_back( scoring_weights[s4->get_id()] );
1703 components.push_back( i );
1704 weights.push_back( scoring_weights[s5->get_id()] );
1710 std::map<std::string, core::fragment::SecondaryStructureOP>::iterator it;
1711 Real weight = 1.0 / ((
Real) query_ss_profile_.size());
1712 for ( it=query_ss_profile_.begin() ; it != query_ss_profile_.end(); it++ ) {
1713 predictions.push_back((*it).second);
1714 ss_weights.push_back(weight);
1718 for(
Size f=1;f<=frag_sizes_.size();f++) {
1719 for (
Size j = 0; j <= max_threads_; ++j) {
1721 set_candidates_collector(frag_sizes_[f], collector, j);
1725 for (
Size i = 1; i <= scores_[1]->count_components(); ++i) {
1729 if( ! q_config.is_valid_quota_pool_name( name ) )
continue;
1731 weights[2] = scoring_weights[sr->get_id()];
1732 tr.Warning<<
"RamaScore with ID "<<sr->get_id()<<
" named "<<name<<
1733 " has been attached to its quota pool with weight "<<weights[2]<<std::endl;
1740 for (
Size i = 1; i <= scores_[1]->count_components(); ++i) {
1752 if( ! q_config.is_valid_quota_pool_name( name ) )
continue;
1754 weights[1] = scoring_weights[ss->get_id()];
1755 Size size = (
Size)(q_config.get_fraction( name ) * n_candidates_);
1757 tr.Warning<<
"Config file couldn't provide quota fraction for the pool named "
1758 <<name<<
". Skipping the pool"<<std::endl;
1762 for (
Size j = 0; j <= max_threads_; ++j) {
1766 collector->attach_secondary_structure_pools(q_config.get_fraction( name ) ,
1767 get_query_ss( name ),name,n_candidates_,components,weights,scores_[1]->count_components());
1777 set_up_quota_nnmake_style();
1779 if (option[frags::picking::query_pos].user()) {
1780 set_picked_positions( option[frags::picking::query_pos]() );
1786 chunks_->vallChunksFromLibraries(fns);
1791 chunks_->vallChunksFromLibrary(fn);
1795 query_positions_.clear();
1796 for(
Size i=from;i<=to;i++)
1797 query_positions_.push_back( i );
1801 query_positions_.clear();
1802 for(
Size i=1;i<=q_positions.size();i++)
1803 query_positions_.push_back( q_positions[i] );
1808 return tags_.size();
1812 tr<<
"Quota report: difference between the total expected and total picked foreach pool"<<std::endl;
1813 tr<<
"This table is for first "<<nFrags_<<
" fragments"<<std::endl;
1814 tr<<
"Negative value says that was picked more that expected."<<std::endl;
1815 tr<< this->
str()<<std::endl;
1820 *
this << std::setw(4)<<q_pos<< std::setw(4)<<frag_len;
1821 for(
Size i=1;i<=data.size();i++)
1823 *
this << std::setw(10)<<std::setprecision(3)<<data[i];
1825 *
this << std::setw(10)<<
" --- ";
1836 if( tag_map_.find(collector.
get_pool(i,j)->get_pool_name())==tag_map_.end() ) {
1837 tags_.push_back(collector.
get_pool(i,j)->get_pool_name());
1839 tag_map_[collector.
get_pool(i,j)->get_pool_name()] = last_tag;
1844 *
this <<
"\n#len pos ";
1845 for(
Size i=1;i<=tags_.size();i++) {
1846 *
this << std::setw(10)<<tags_[i];
1857 for (
Size iFragSize = 1; iFragSize <= frag_sizes_.size(); ++iFragSize) {
1861 Size fragment_size = frag_sizes_[iFragSize];
1863 for (
Size qPos = 1; qPos <= size_of_query(); ++qPos) {
1864 if(storage->get_candidates(qPos).size() == 0)
continue;
1867 selector_->select_fragments(storage->get_candidates(qPos), out);
1881 for (
Size fi = 1; fi <= out.size(); ++fi) {
1885 for (
Size i = 1; i <= out[1].first->get_length(); ++i) {
1887 string pdbid = out[fi].first->get_pdb_id();
1889 Size index = r->resi();
1890 char aa = toupper(r->aa());
1900 res_torsions->set_torsion ( 1, phi );
1901 res_torsions->set_torsion ( 2, psi );
1902 res_torsions->set_torsion ( 3, omega );
1903 res_torsions->set_secstruct ( ss );
1906 current_fragment->add_residue( res_torsions );
1910 if (current_fragment) {
1911 current_fragment->set_valid();
1914 if (!frame->add_fragment(current_fragment)){
1915 cerr <<
"ERROR Bad fragment : "<<endl;
1916 current_fragment->show(cout);
1923 myFragSet->add(frame);
1928 result.push_back(myFragSet);
1939 for (
Size iFilter = 1; iFilter <= filters_.size(); iFilter++) {
1940 if ((flag = filters_[iFilter]->test_chunk(chunk)) ==
false) {
1941 tr.Debug <<
"Chunk: " << chunk->get_pdb_id()
1942 <<
" didn't pass a filter" << std::endl;
1950 if (chunk->size() < frag_len)
return false;
1951 return is_valid_chunk( chunk );
1956 using namespace ObjexxFCL;
1959 if (option[frags::nonlocal_pairs].user())
1960 nonlocal_pairs( fragment_size, final_fragments );
1963 if (option[frags::fragment_contacts].user())
1964 fragment_contacts( fragment_size, final_fragments );
1967 std::string out_file_name = prefix_ +
"." + string_of(n_frags_) +
"." + string_of(fragment_size) +
"mers";
1968 std::string silent_out_file_name = out_file_name +
".out";
1969 utility::io::ozstream output_file(out_file_name);
1970 utility::io::ozstream output_info_file;
1971 if (option[frags::describe_fragments].user()) {
1972 std::string describe_name = option[frags::describe_fragments]()+
"." + string_of(n_frags_) +
"."+string_of(fragment_size)+
"mers";
1973 output_info_file.open(describe_name.c_str());
1977 Size maxqpos = size_of_query() - fragment_size + 1;
1978 for (
Size iqpos = 1; iqpos <= query_positions_.size(); ++iqpos ) {
1979 Size qPos = query_positions_[iqpos];
1980 if ( qPos > maxqpos)
continue;
1982 output_file <<
"position: " << I(12, qPos) <<
" neighbors: " << I(10, final_fragments[qPos].
size()) << std::endl << std::endl;
1983 for (
Size fi = 1; fi <= final_fragments[qPos].size(); ++fi) {
1984 if (option[frags::write_sequence_only]()) {
1985 final_fragments[qPos][fi].first->print_fragment_seq(output_file);
1987 if ( !final_fragments[qPos][fi].first || !final_fragments[qPos][fi].second ) {
1988 tr.Warning <<
"final_frag candidate " << fi <<
" at position " << qPos <<
" is corrupted. skipping... " << std::endl;
1991 final_fragments[qPos][fi].first->print_fragment(output_file, final_fragments[qPos][fi].second, ms);
1993 output_file << std::endl;
1995 if ( ms->if_late_scoring_for_zeros() ) {
1997 for (
Size fi = 1; fi <= final_fragments[qPos].size(); ++fi ) {
1998 if ( !final_fragments[qPos][fi].first || !final_fragments[qPos][fi].second ) {
1999 tr.Warning <<
"final_frag candidate " << fi <<
" at position " << qPos <<
" is corrupted. skipping... " << std::endl;
2002 ms->score_zero_scores(final_fragments[qPos][fi].first,final_fragments[qPos][fi].second);
2005 if ( option[frags::describe_fragments].user() ) {
2007 ms->describe_fragments(final_fragments[qPos], output_info_file);
2010 output_file.close();
2011 output_info_file.close();
2014 if (option[frags::output_silent]() || option[frags::score_output_silent]()) {
2016 for (
Size iqpos = 1; iqpos <= query_positions_.size(); ++iqpos) {
2017 Size qPos = query_positions_[iqpos];
2018 if ( qPos > maxqpos)
continue;
2019 std::string const & sequence = get_query_seq_string().substr(qPos-1,fragment_size);
2020 for (
Size fi = 1; fi <= final_fragments[qPos].size(); ++fi) {
2021 std::string tag =
"frag_" + ObjexxFCL::lead_zero_string_of(qPos,6) +
"_" + ObjexxFCL::lead_zero_string_of(fi,6);
2022 final_fragments[qPos][fi].first->output_silent( sfd, sequence, silent_out_file_name, tag, final_fragments[qPos][fi].second, ms );