22 #include <basic/Tracer.hh>
23 #include <basic/options/option.hh>
24 #include <basic/options/keys/in.OptionKeys.gen.hh>
25 #include <ObjexxFCL/string.functions.hh>
26 #include <utility/io/izstream.hh>
27 #include <utility/vector1.hh>
28 #include <utility/exit.hh>
57 static basic::Tracer
tr(
"core.sequence");
68 std::copy(current.begin(), current.end(), std::back_inserter(*alignments));
87 std::ifstream data( filename.c_str() );
91 runtime_assert( line[0] ==
'>' );
92 getline( data, align1 );
94 getline( data, line );
95 runtime_assert( line[0] ==
'>' );
96 getline( data, align2 );
100 runtime_assert( align1.size() == align2.size() );
105 int pos1(0), pos2(0);
106 for (
Size i=0; i< align1.size(); ++i ) {
107 char const al1( align1[i] ), al2( align2[i] );
108 bool const gap1( al1 ==
'.' || al1 ==
'-' );
109 bool const gap2( al2 ==
'.' || al2 ==
'-' );
125 basic::T(
"core.sequence.DerivedSequenceMapping") <<
"align1: " << align1 <<
"\nalign2: " << align2 <<
126 "\nseq1: " << seq1 <<
"\nseq2: " << seq2 <<
'\n';
128 runtime_assert( mapping.
size1() == seq1.size() );
129 mapping.
size2( seq2.size() );
136 seq_strings.push_back( (*it)->sequence() );
144 utility::io::izstream input( filename.c_str() );
145 std::string line, current_sequence =
"", current_id =
"empty";
148 utility_exit_with_message(
"Warning: can't open file " + filename +
"!" );
152 while( getline( input, line ) ) {
153 if ( line.substr(0,1) ==
">" ) {
154 if ( current_sequence !=
"" ) {
157 sequences.push_back(
new Sequence( current_sequence, current_id ) );
158 current_sequence =
"";
160 current_id = line.substr(1,line.size());
163 current_sequence = current_sequence + ObjexxFCL::rstrip(line);
165 if ( current_sequence !=
"" ) {
168 sequences.push_back(
new Sequence( current_sequence, current_id ) );
176 utility_exit_with_message(
177 "This function is redundant with the functions above it. Ask for help with C++ if you need it, but this is really embarassing duplication."
181 utility::io::izstream input( filename.c_str() );
184 utility_exit_with_message(
"Warning: can't open file " + filename +
"!" );
193 getline( input, line );
194 while( getline( input,line) ) {
195 std::istringstream line_stream( line );
196 while( line_stream >> aa ) {
207 utility::io::izstream input( filename.c_str() );
211 utility_exit_with_message(
"Warning: can't open file " + filename +
"!" );
215 Size max_resi = 0, max_resj = 0;
218 Size start_seq2( 0 );
219 while( getline( input, line ) ) {
221 tr.Trace <<
"read line: " << line << std::endl;
223 std::istringstream line_stream( line );
226 if ( line_stream && tag.substr(0,
std::string(
"ungapped").size() ) ==
"ungapped" ) {
228 if ( type ==
"template:" ) {
230 tr.Info <<
"read template sequence " << seq2 << std::endl;
231 }
else if ( type ==
"query:" ) {
233 tr.Info <<
"read query sequence " << seq1 << std::endl;
235 utility_exit_with_message(
"expected either ungapped_template or ungapped_query in file " + filename );
240 std::istringstream line_stream( line );
242 line_stream >> resi >> resj;
244 aligned.push_back( std::make_pair( resi, resj ) );
245 max_resi = std::max( resi, max_resi );
246 max_resj = std::max( resj, max_resj );
247 if ( start_seq2 == 0 ) start_seq2 = resj;
252 for (
vector1< std::pair< Size,Size > >::const_iterator it = aligned.begin(),
end = aligned.end();
257 mapping.
seq1( seq1 );
258 mapping.
seq2( seq2 );
269 using namespace basic::options;
270 using namespace basic::options::OptionKeys;
274 if ( option[ in::file::fasta ].user() ) {
277 for ( iter it = fns.begin(),
end = fns.end(); it !=
end; ++it ) {
280 s_end = temp_seqs.end(); s_it != s_end; ++s_it
282 seqs.push_back( *s_it );
287 if ( option[ in::file::pssm ].user() ) {
293 prof->read_from_file( *it );
294 prof->convert_profile_to_probs();
295 seqs.push_back( prof );
308 if ( format ==
"general" ) {
310 }
else if ( format ==
"grishin" ) {
313 utility_exit_with_message(
314 std::string(
"No match for format " + format +
"!" )
317 tr.Debug <<
"read " << retval.size() <<
" alignments from file " << filename
318 <<
" with format " << format <<
"." << std::endl;
329 while( getline( input, line ) ) {
330 if ( line.substr(0,5) ==
"score" ) {
331 std::istringstream line_input( line );
335 line_input >> dummy >> score;
336 current->score( score );
340 }
else if ( line.substr(0,2) ==
"--" ) {
341 if ( current->size() > 0 ) alignments.push_back( *current );
343 }
else if ( line.substr(0,1) ==
"#" ) {
346 std::istringstream line_input( line );
348 new_seq->read_data( line_input );
349 current->add_sequence( new_seq );
352 if ( current->size() > 0 ) {
353 if (
tr.Trace.visible() ) {
354 tr.Trace <<
"have read alignment\n" << *current << std::endl;
356 alignments.push_back( *current );
364 utility::io::izstream input( filename.c_str() );
366 utility_exit_with_message(
"Warning: can't open file " + filename +
"!" );
377 utility::io::izstream input( filename.c_str() );
380 utility_exit_with_message(
"Warning: can't open file " + filename +
"!" );
385 while( getline( input, line ) ) {
386 if ( line.substr(0,2) ==
"--" ) {
387 if ( current->size() > 0 ) {
388 alignments.push_back( *current );
391 }
else if ( line.substr(0,2) ==
"##" ) {
392 std::istringstream line_input( line );
393 line_input >> dummy >> id1 >> id2;
394 }
else if ( line.substr(0,1) ==
"#" ) {
396 }
else if ( line.substr(0,5) ==
"score" ) {
397 std::istringstream line_input( line );
399 line_input >> dummy >> score;
400 current->score( score );
402 using ObjexxFCL::string_of;
405 while ( !line_input.fail() ) {
407 current->score( (
std::string) (
"score" + string_of(count)), score ) ;
413 std::istringstream line_input( line );
414 line_input >> start >> myseq;
415 if ( ! line_input.fail() ) {
418 if ( current->size() >= 1 ) {
425 current->add_sequence( new_seq );
430 if ( current->size() > 0 ) alignments.push_back( *current );
440 runtime_assert( candidate_aln.
size() == true_aln.
size() );
443 SequenceMapping true_map = true_aln. sequence_mapping( 1, 2 );
447 for (
Size i = 1; i <= true_aln.
length(); ++i ) {
449 if ( true_map[ resi_idx ] == 0 ) {
451 if ( candidate_map[ resi_idx ] == 0 ) {
457 if ( candidate_map[ resi_idx ] == true_map[ resi_idx ] ) {
476 runtime_assert( aln_to_steal.
size() == seqs.size() );
480 for (
Size ii = 1; ii <= aln_to_steal.
size(); ++ii ) {
482 insertion_positions.push_back(
start );
486 for (
Size ii = 1; ii <= aln_to_steal.
length(); ++ii ) {
487 for (
Size jj = 1; jj <= aln_to_steal.
size(); ++jj ) {
488 if ( aln_to_steal.
sequence(jj)->is_gap(ii) ) {
489 seqs[jj]->insert_gap( insertion_positions[jj] );
492 ++insertion_positions[jj];
497 for (
Size ii = 1; ii <= aln_to_steal.
size(); ++ii ) {
500 seqs[ii]->delete_position( 1 );
502 seqs[ii]->start( start );
506 for (
Size jj = 1; jj <= seqs.size(); ++jj ) {
507 Size const seq_length( seqs[jj]->length() );
508 Size const desired_length( aln_to_steal.
length() );
509 Size const n_to_delete( seq_length - desired_length + 1 );
511 if ( seq_length < desired_length ) {
512 std::cout <<
"--------------------------------------------------" << std::endl;
513 std::cout << aln_to_steal << std::endl;
514 std::cout <<
"seq: " << seqs[jj]->to_string() << std::endl;
515 std::cout <<
"seqs[jj]->sequence(): " << seqs[jj]->sequence() << std::endl;
516 std::cout <<
"length = " << seqs[jj]->length()
517 <<
" desired_length = " << desired_length << std::endl;
518 std::cout <<
"n_to_delete = " << n_to_delete << std::endl;
519 utility_exit_with_message(
"error!" );
521 for (
Size ii = 1; ii < n_to_delete; ++ii ) {
522 seqs[jj]->delete_position( seqs[jj]->length() );
528 runtime_assert( aln_to_steal.
size() == seqs.size() );
529 for (
Size ii = 1; ii <= aln_to_steal.
size(); ++ii ) {
531 string const aln_seq( aln_to_steal.
sequence(ii)->sequence() );
532 string const stolen_seq( seqs[ii]->sequence() );
533 bool error( aln_seq.length() != stolen_seq.length() );
537 if ( aln_seq != stolen_seq ) {
538 for (
Size idx = 1; idx <= stolen_seq.length(); ++idx ) {
539 error = ( error && aln_seq[idx] !=
'X' && stolen_seq[idx] !=
'X' &&
540 aln_seq[idx] != stolen_seq[idx]
546 std::string msg(
"sequences are not the same!\n" );
547 msg +=
"to_steal:\n";
548 msg += aln_to_steal.
sequence(ii)->to_string() +
"\n";
550 msg += seqs[ii]->to_string() +
"\n";
551 utility_exit_with_message( msg );
556 for (
Size j = 1; j <= seqs.size(); ++j ) {
571 runtime_assert( seq1->length() == seq1->ungapped_length() );
572 runtime_assert( seq2->length() == seq2->ungapped_length() );
575 rev_mapping.reverse();
581 Size ngaps1 = seq1_orig->start()-1, ngaps2 = seq2_orig->start()-1;
582 Size idx1 = seq1_orig->start(), idx2 = seq2_orig->start();
583 while ( idx1 <= mapping.
size1() && idx2 <= rev_mapping.size1() ) {
584 if ( mapping[ idx1 ] == 0 ) {
587 seq1->append_char( (*seq1_orig)[idx1-ngaps1] );
589 }
else if ( rev_mapping[ idx2 ] == 0 ) {
592 seq2->append_char( (*seq2_orig)[idx2-ngaps2] );
595 seq1->append_char( (*seq1_orig)[idx1-ngaps1] );
596 seq2->append_char( (*seq2_orig)[idx2-ngaps2] );
603 while ( idx1-ngaps1 <= seq1_orig->length() ) {
605 seq1->append_char( (*seq1_orig)[idx1-ngaps1] );
608 while ( idx2-ngaps2 <= seq2_orig->length() ) {
610 seq2->append_char( (*seq2_orig)[idx2-ngaps2] );
614 runtime_assert( seq1->length() == seq2->length() );
618 align.add_sequence( seq2 );
631 for (
Size ii = 1; ii <= map1.
size1(); ++ii ) {
632 if ( map1[ ii ] != 0 ) new_mapping[ii] = map2[ map1[ ii ] ];
641 using namespace core::sequence;
645 bool success(
false );
663 copy1->sequence( seq1->ungapped_sequence() );
664 copy2->sequence( seq2->ungapped_sequence() );
668 tr.Warning <<
"Error: potential mismatch between sequence from alignment ";
669 tr.Warning <<
"and sequence from PDB!" << std::endl;
670 tr.Warning <<
"alignment: " << std::endl << intermediate
683 using namespace core::sequence;
698 using namespace core::sequence;
714 for (
Size ii = 1; ii <= seq->length(); ++ii ) {
715 scores[ ii ] = ss->score( seq, seq, ii, ii );
739 tr.Error <<
"Can't extract alignment from pose!" << std::endl;
740 tr.Error <<
"query_aln: " << q_seq << std::endl;
741 tr.Error <<
"template_aln: " << t_seq << std::endl;
742 tr.flush_all_channels();
747 std::istringstream q_in( q_seq ), t_in( t_seq );
748 query->read_data( q_in );
749 templ->read_data( t_in );
753 tr.Debug <<
"extracted sequence alignment from Pose: " << std::endl
777 for (
Size mod_resi = 1; mod_resi <= mod_resn; ++mod_resi ) {
778 Size const ref_resi( mapping[mod_resi] );
779 if ( ref_resi && mod_resi <= mod_resn && ref_resi <= ref_resn ) {
780 if ( ! mod_pose.
residue(mod_resi).
has(atom_name) )
continue;
781 if ( ! ref_pose.
residue(ref_resi).
has(atom_name) )
continue;
785 atom_map.
set( id1, id2 );