Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
BackboneDB.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file protocols/loops/BackboneDB.cc
11 /// @brief
12 /// @author Mike Tyka
13 /// @author Ken Jung
16 #include <core/kinematics/Jump.hh>
17 // AUTO-REMOVED #include <core/kinematics/MoveMap.hh>
18 #include <core/kinematics/RT.hh>
19 #include <core/pose/Pose.hh>
20 #include <core/pose/util.hh>
21 #include <basic/Tracer.hh>
23 #include <boost/lexical_cast.hpp>
24 #include <basic/options/option.hh>
25 #include <basic/options/keys/lh.OptionKeys.gen.hh>
26 #include <iostream>
27 #include <fstream>
28 #include <sstream>
29 
31 #include <utility/vector1.hh>
32 
33 
34 using namespace core;
35 using namespace core::pose;
36 using namespace kinematics;
37 using namespace basic::options;
38 using namespace basic::options::OptionKeys;
39 
40 namespace protocols {
41 namespace loophash {
42 
43  static basic::Tracer TR("BackboneDB");
44 
45  short RealAngleToShort( core::Real angle ){
46  while( angle > 180.0) angle -= 360.0;
47  while( angle <-180.0) angle += 360.0;
48  // range for short: -32768 to 32767
49  short result = short( angle * 182.0 );
50  return result;
51  }
52 
53  core::Real ShortToRealAngle( short angle ){
54  core::Real result = core::Real( angle ) / 182.0;
55  return result;
56  }
57 
58  void
59  BackboneSegment::apply_to_pose( core::pose::Pose &pose, core::Size ir, bool cut ) const
60  {
61  core::Size length = phi_.size();
62  core::Size jr = ir + length;
63  if(cut){
64  //fpd vrt/ligand trim
65  core::Size newroot=0;
66  if( pose.residue_type( pose.fold_tree().root() ).aa() == core::chemical::aa_vrt ) newroot = pose.fold_tree().root();
67 
68  core::Size nres = pose.total_residue();
69  while (!pose.residue_type(nres).is_polymer()) nres--;
70 
71  // get current cutpoints; don't try to connect these
72  utility::vector1< Size > cuts_in = pose.fold_tree().cutpoints();
73  std::sort( cuts_in.begin(), cuts_in.end() );
74 
75  // bail if (ir,jr) crosses a cut
76  for (Size i=1; i<=cuts_in.size(); ++i) {
77  if (cuts_in[i]<=jr && cuts_in[i]>=ir) {
78  TR.Error << "ERROR -- residue range crosses cut IR: " << ir << " JR: " << jr << " CUT: " << cuts_in[i] << std::endl;
79  return;
80  }
81  //fpd insertions one position after the cut seem not to work ...
82  //fpd perhaps if the foldtree for the local segment were reversed this might be ok
83  if (cuts_in[i]==ir-1) {
84  TR.Error << "ERROR -- startres immediately follows cut IR: " << ir << " CUT: " << cuts_in[i] << std::endl;
85  return;
86  }
87  }
88 
89  //fpd handle multiple chains/chainbreaks
90  FoldTree f;
91  core::Size last_cut=0, jump_num=2;
92  Size cutpoint= jr-1;
93  for (Size i=1; i<=cuts_in.size(); ++i) {
94  if (cuts_in[i] >= nres) break;
95  if (cutpoint > last_cut && cutpoint < cuts_in[i]) {
96  f.add_edge( last_cut+1, ir, Edge::PEPTIDE );
97  f.add_edge( ir, cutpoint, Edge::PEPTIDE );
98  f.add_edge( cutpoint + 1, jr, Edge::PEPTIDE );
99  f.add_edge( jr, cuts_in[i] , Edge::PEPTIDE );
100  f.add_edge( ir, jr, 1 ); // this is the jump !!
101  if (last_cut!=0) f.add_edge( 1, last_cut+1, jump_num++);
102  } else {
103  f.add_edge( last_cut+1, cuts_in[i], Edge::PEPTIDE );
104  if (last_cut!=0) f.add_edge( 1, last_cut+1, jump_num++);
105  }
106  last_cut = cuts_in[i];
107  }
108  if (last_cut+1 <= nres) {
109  if (cutpoint > last_cut && cutpoint < nres) {
110  f.add_edge( last_cut+1, ir, Edge::PEPTIDE );
111  f.add_edge( ir, cutpoint, Edge::PEPTIDE );
112  f.add_edge( cutpoint + 1, jr, Edge::PEPTIDE );
113  f.add_edge( jr, nres , Edge::PEPTIDE );
114  f.add_edge( ir, jr, 1 ); // this is the jump !!
115  if (last_cut!=0) f.add_edge( 1, last_cut+1, jump_num++);
116  } else {
117  f.add_edge( last_cut+1, nres, Edge::PEPTIDE );
118  if (last_cut!=0) f.add_edge( 1, last_cut+1, jump_num++);
119  }
120  }
121  for (core::Size i=nres+1; i<=pose.total_residue(); ++i)
122  f.add_edge( 1, i, jump_num++ ); // additional jumps
123 
124  core::Size theroot = 1;
125  if( ir == 1 ) theroot = pose.total_residue();
126  if( newroot>0 ) theroot = newroot; //fpd
127  if( f.reorder(theroot) == false ){
128  TR.Error << "ERROR During reordering of fold tree - am ignoring this LOOP ! bailing: The root: " << theroot << " NRES " << pose.total_residue() << " IR: " << ir << " JR: " << jr << std::endl;
129  return; // continuing leads to a segfault - instead ignore this loop !
130  }
131  pose.fold_tree(f);
132  }
133 
134  for( core::Size i = 0; i < length; i++){
135  core::Size ires = ir + i;
136  if( ires > pose.total_residue() ) return;
137  pose.set_phi( ires, phi_[i] );
138  pose.set_psi( ires, psi_[i] );
139  pose.set_omega( ires, omega_[i] );
140  }
141  }
142 
143  void
144  BackboneSegment::read_from_pose( core::pose::Pose const &pose, core::Size ir, core::Size length )
145  {
146  phi_.clear();
147  psi_.clear();
148  omega_.clear();
149 
150  for( core::Size i = 0; i < length; i++){
151  core::Size ires = ir + i;
152  if( ires > pose.total_residue() ) return;
153 
154  phi_. push_back( pose.phi( ires ));
155  psi_. push_back( pose.psi( ires ));
156  omega_.push_back( pose.omega( ires ));
157  }
158  }
159 
160  void BackboneSegment::print() const {
161  for( std::vector<core::Real>::const_iterator it = phi_.begin(); it != phi_.end(); ++it ) TR << *it << " " ;
162  for( std::vector<core::Real>::const_iterator it = psi_.begin(); it != psi_.end(); ++it ) TR << *it << " " ;
163  for( std::vector<core::Real>::const_iterator it = omega_.begin(); it != omega_.end(); ++it ) TR << *it << " " ;
164  TR << std::endl;
165  }
166 
167  bool BackboneSegment::compare(const BackboneSegment &bs1, core::Real tolerance) const {
168  const BackboneSegment &bs2 = (*this);
169 
170  if( bs1.phi().size() != bs2.phi().size() ) return false;
171  if( bs1.psi().size() != bs2.psi().size() ) return false;
172  if( bs1.omega().size() != bs2.omega().size() ) return false;
173 
174  for( core::Size i = 0; i < bs1.phi().size(); i++ ) if( (bs1.phi()[i] - bs2.phi()[i]) > tolerance ) return false;
175  for( core::Size i = 0; i < bs1.psi().size(); i++ ) if( (bs1.psi()[i] - bs2.psi()[i]) > tolerance ) return false;
176  for( core::Size i = 0; i < bs1.omega().size(); i++ ) if( (bs1.omega()[i] - bs2.omega()[i]) > tolerance ) return false;
177 
178  return true;
179  }
180  bool BackboneSegment::operator==( const BackboneSegment &bs1 ) const {
181  const BackboneSegment &bs2 = (*this);
182 
183  if( bs1.phi().size() != bs2.phi().size() ) return false;
184  if( bs1.psi().size() != bs2.psi().size() ) return false;
185  if( bs1.omega().size() != bs2.omega().size() ) return false;
186 
187  for( core::Size i = 0; i < bs1.phi().size(); i++ ) if( bs1.phi()[i] == bs2.phi()[i] ) return false;
188  for( core::Size i = 0; i < bs1.psi().size(); i++ ) if( bs1.psi()[i] == bs2.psi()[i] ) return false;
189  for( core::Size i = 0; i < bs1.omega().size(); i++ ) if( bs1.omega()[i] == bs2.omega()[i] ) return false;
190 
191  return true;
192  }
193 
195  core::Real sumsqr = 0;
196  core::Size count = 0;
197  if( bs1.phi().size() != bs2.phi().size() ) return -1;
198  for( core::Size i = 0; i < bs1.phi().size(); i++ ){
199  if( bs1.phi()[i] == 0 || bs2.phi()[i] == 0 ) continue;
200  core::Real diff = bs1.phi()[i] - bs2.phi()[i];
201  while( diff > 180 ) diff -= 360;
202  while( diff < -180 ) diff += 360;
203  sumsqr += diff*diff;
204  count++;
205  }
206  if( bs1.psi().size() != bs2.psi().size() ) return -1;
207  for( core::Size i = 0; i < bs1.psi().size(); i++ ){
208  if( bs1.psi()[i] == 0 || bs2.psi()[i] == 0 ) continue;
209  core::Real diff = bs1.psi()[i] - bs2.psi()[i];
210  while( diff > 180 ) diff -= 360;
211  while( diff < -180 ) diff += 360;
212  sumsqr += diff*diff;
213  count++;
214  }
215  if( bs1.omega().size() != bs2.omega().size() ) return -1;
216  for( core::Size i = 0; i < bs1.omega().size(); i++ ){
217  if( bs1.omega()[i] == 0 || bs2.omega()[i] == 0 ) continue;
218  core::Real diff = bs1.omega()[i] - bs2.omega()[i];
219  while( diff > 180 ) diff -= 360;
220  while( diff < -180 ) diff += 360;
221  sumsqr += diff*diff;
222  count++;
223  }
224 
225  return sqrt( sumsqr/core::Real(count) );
226  }
227 
228  core::Real
229  BackboneDB::angle( core::Size index, core::Size offset )
230  {
231  if( index >= data_.size() ) utility_exit_with_message( "Out of bounds error" );
232  if( offset >= data_[index].angles.size() ) utility_exit_with_message( "Out of bounds error" );
233 
234  return ShortToRealAngle( data_[ index ].angles[ offset ] );
235  }
236 
237  void
238  BackboneDB::add_pose( const core::pose::Pose &pose, core::Size nres, core::Size &index, protocols::frag_picker::VallChunkOP chunk )
239  {
240  if( ! extra_ ) extra_ = true;
241  index = data_.size(); // Index of protein
242  BBData new_protein;
243  for( core::Size i = 0; i < nres; i++){
244  new_protein.angles.push_back( RealAngleToShort(pose.phi( 1 + i )));
245  new_protein.angles.push_back( RealAngleToShort(pose.psi( 1 + i )));
246  new_protein.angles.push_back( RealAngleToShort(pose.omega( 1 + i )));
247  }
248  BBExtraData extra_data;
249  if ( chunk ) {
250  //could modify this to move the chunk processing to LoopHashLibrary
251  extra_data.sequence = chunk->get_sequence();
252  extra_data.pdb_id = chunk->get_pdb_id() + chunk->get_chain_id();
253  }else{
254  extra_data.sequence = pose.sequence();
255  std::string pose_id="";
256  get_score_line_string( pose, "usid", pose_id );
257  extra_data.pdb_id = pose_id;
258  }
259  new_protein.extra_key = extra_data_.size();
260  extra_data_.push_back( extra_data );
261  data_.push_back( new_protein );
262  }
263 
264  // Maybe I should just overload the copy operator in the struct..
265  void BackboneDB::get_protein( core::Size index, BBData & protein ) const {
266  protein.extra_key = data_[index].extra_key;
267  protein.angles = data_[index].angles;
268  }
269 
270  void BackboneDB::get_extra_data( core::Size index, BBExtraData & extra ) const {
271  extra = extra_data_[index];
272  }
273 
274  void BackboneDB::add_protein( BBData new_protein ) {
275  data_.push_back( new_protein );
276  }
277 
278  void BackboneDB::add_extra_data( BBExtraData extra ) {
279  if( ! extra_ ) extra_ = true;
280  extra_data_.push_back( extra );
281  }
282 
283  void
284  BackboneDB::get_backbone_segment(
285  core::Size index,
286  core::Size offset,
287  core::Size len,
288  BackboneSegment &bs
289  ) const
290  {
291  std::vector<core::Real> phi;
292  std::vector<core::Real> psi;
293  std::vector<core::Real> omega;
294  core::Size pos = offset;
295  for( core::Size i = 0; i < len; i++){
296  phi.push_back( ShortToRealAngle(data_[index].angles[pos]) ); pos ++ ;
297  psi.push_back( ShortToRealAngle(data_[index].angles[pos]) ); pos ++ ;
298  omega.push_back( ShortToRealAngle(data_[index].angles[pos]) ); pos ++ ;
299  }
300  bs = BackboneSegment( phi, psi, omega );
301  }
302 
303  void BackboneDB::write_db( std::string filename )
304  {
305  std::ofstream file( filename.c_str() );
306  if( !file ) throw EXCN_DB_IO_Failed( filename, "write" );
307  if( data_.size() == 0 ){
308  file.close();
309  return;
310  }
311  if( ! extra_ ) throw EXCN_No_Extra_Data_To_Write();
312  for ( core::Size i = 0; i < data_.size(); i++ ) {
313  file << "pdb " << extra_data_[ data_[i].extra_key ].pdb_id << std::endl;
314  file << "seq " << extra_data_[ data_[i].extra_key ].sequence << std::endl;
315  file << "rot ";
316  for ( core::Size j = 0; j < extra_data_[ data_[i].extra_key ].rotamer_id.size(); j++ ) {
317  file << extra_data_[ data_[i].extra_key ].rotamer_id[j] << " ";
318  }
319  file << std::endl;
320  file << "ang ";
321  for ( core::Size j = 0; j < data_[i].angles.size(); j++ ) {
322  file << data_[i].angles[j] << " ";
323  }
324  file << std::endl;
325  }
326  file.close();
327  }
328 
329  void
330  BackboneDB::read_legacydb( std::string filename )
331  {
332  // use basic C input - C++ are too memory hungry to deal with these potentially v large files
333  FILE *file = fopen( filename.c_str(), "r" );
334  if( file == NULL ) throw EXCN_DB_IO_Failed( filename, "read" );
335 
336  data_.clear();
337  BBData new_protein;
338  data_.push_back( new_protein );
339  unsigned count = 0;
340  while( !feof( file ) ){
341  count++;
342  TR.Debug << "C: " << count << std::endl;
343  const unsigned int bufsize = 16384;
344  short bufferdata[16384];
345  size_t readshorts = fread(&bufferdata[0],sizeof(short),bufsize,file);
346  for( unsigned i = 0; i< readshorts; i ++ ){
347  data_[0].angles.push_back( bufferdata[i] );
348  }
349  }
350  fclose( file );
351  TR.Debug << "End of read_db_from_binary" << std::endl;
352  }
353 
354  void
355  BackboneDB::read_db( std::string filename, bool load_extra,
356  core::Size num_partitions, core::Size assigned_num,
357  std::pair< core::Size, core::Size > & loopdb_range,
358  std::map< core::Size, bool > & homolog_index )
359  {
360  std::ifstream file( filename.c_str() );
361  if( !file ) throw EXCN_DB_IO_Failed( filename, "read" );
362 
363  if( option[ lh::exclude_homo ]() ) {
364  TR << "Reading in homolog file" << std::endl;
365  read_homologs();
366  }
367 
368  extra_ = true;
369  std::string line;
370 
371  core::Size num_lines = 0;
372  // get number of lines in db
373  while( getline(file, line) ) {
374  num_lines++;
375  }
376 
377  // truncating to integer is good
378  core::Size begin = assigned_num * num_lines / 4 / num_partitions;
379  core::Size end = ( assigned_num + 1 ) * num_lines / 4 / num_partitions;
380  //if( assigned_num == num_partitions - 1 ) end = 0;
381  loopdb_range.first = begin;
382  loopdb_range.second = end;
383 
384  TR.Info << "Reading in proteins " << begin << " to " << end << " out of " << num_lines / 4 << " , partition: " << assigned_num+1 << "/"<< num_partitions << std::endl;
385  // clear eof bit
386  file.clear();
387  file.seekg( 0, std::ios_base::beg );
388 
389  BBData new_protein;
390  BBExtraData extra_data;
391  std::string command;
392  int line_counter = -1;
393  bool is_homolog = false;
394 
395  unsigned int stat_count_protein = 0;
396  while( getline( file, line ) ) {
397  line_counter++;
398  if( line_counter / 4 < int(begin) ) continue;
399  if( line_counter / 4 >= int(end) && int(end) != 0 ) continue;
400 
401  command = line.substr(0,3);
402  if( command == "" ) throw EXCN_Wrong_DB_Format( filename );
403  // Even if we're not loading extra, still process pdb line
404  // So we can use the pdb to filter homologs
405  if( command == "pdb" ) {
406  new_protein.extra_key = extra_data_.size();
407  extra_data.pdb_id = line.substr( 4 );
408  stat_count_protein++; // count the number of proteins read in
409  if( homologs_.find( extra_data.pdb_id ) != homologs_.end() ) is_homolog = true;
410  }
411  if( load_extra ) {
412  if( command == "seq" ) {
413  extra_data.sequence = line.substr( 4 );
414  }
415  if( command == "rot" ) {
416  std::string buf;
417  std::stringstream ss( line.substr( 4 ) );
418  while ( ss >> buf )
419  extra_data.rotamer_id.push_back( boost::lexical_cast< int >( buf ) );
420  }
421  }
422  if( command == "ang" ) {
423  std::string buf;
424  std::stringstream ss( line.substr( 4 ) );
425  while ( ss >> buf ) new_protein.angles.push_back( boost::lexical_cast< short >( buf ) );
426  if( is_homolog ) {
427  // Still leave a holder protein in data_ so indices in leapindex aren't messed up
428  // but with no data so it doesn't take up space
429  new_protein.angles.clear();
430  // then add the index to the homolog map
431  homolog_index[ data_.size() ] = true;
432  TR << "Homolog " << extra_data.pdb_id << " rejected." << std::endl;
433  }
434  data_.push_back(new_protein);
435  is_homolog = false;
436  new_protein.angles.clear();
437  if( load_extra ){
438  // add extra data for holder proteins, since its not that much data
439  // if this becomes too large, we can change later
440  extra_data_.push_back( extra_data );
441  extra_data.rotamer_id.clear();
442  }
443  }
444  }
445  TR.Info << "Read in " << stat_count_protein << " proteins" << std::endl;
446  TR.Info << "Data_ size " << data_.size() << std::endl;
447  file.close();
448  }
449 
450  void BackboneDB::read_homologs()
451  {
452  std::ifstream file( option[ lh::homo_file ]().c_str() );
453  if( !file ) throw EXCN_DB_IO_Failed( option[ lh::homo_file ](), "read" );
454  std::string line;
455  while( getline( file, line) ) {
457  for( utility::vector1< std::string >::const_iterator token = tokens.begin(); token != tokens.end(); ++token ){
458  std::string homolog_pdb_code_and_chain = (*token);
459  TR << "Adding homolog: " << homolog_pdb_code_and_chain << std::endl;
460  homologs_[homolog_pdb_code_and_chain] = true;
461  if( homolog_pdb_code_and_chain.size() == 5 ) {
462  if( homolog_pdb_code_and_chain[4] == 'A' || homolog_pdb_code_and_chain[4] == 'a' ) homologs_[ homolog_pdb_code_and_chain.replace( 4, 1, 1, '_' ) ] = true;
463  if( homolog_pdb_code_and_chain[4] == '_' ) homologs_[ homolog_pdb_code_and_chain.replace( 4, 1, 1, 'A' ) ] = true;
464  }
465  }
466  }
467 
468  TR << "Homolog exclusion: ";
469  for( std::map< std::string, bool >::const_iterator hom = homologs_.begin(); hom != homologs_.end(); ++hom){
470  TR << hom->first << " ";
471  }
472  TR << std::endl;
473 
474  }
475 
476 } // namespace loops
477 } // namespace protocols
478 
479 
480 
481