Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
CarbohydrateInfo.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file CarbohydrateInfo.cc
11 /// @brief Method definitions for CarbohydrateInfo.
12 /// @author labonte
13 
14 // Unit header
16 
17 // Package headers
19 
20 // Utility headers
21 #include <utility/PyAssert.hh>
22 #include <utility/exit.hh>
23 
24 // Basic headers
25 #include <basic/Tracer.hh>
26 
27 // C++ headers
28 #include <iostream>
29 #include <sstream>
30 
31 // boost headers
32 #include <boost/assign/list_of.hpp>
33 
34 
35 // Construct tracer.
36 static basic::Tracer TR("core.chemical.carbohydrates.CarbohydrateInfo");
37 
38 
39 namespace core {
40 namespace chemical {
41 namespace carbohydrates {
42 
43 // Define static data.
44 // If we ever add rare sugars larger than 7 carbons, increase the value.
47 
48 std::map<std::string, std::string> const CarbohydrateInfo::CODE_TO_ROOT_MAP =
49  boost::assign::map_list_of
50  // Aldotriose
51  ("Gly", "glycer") // TODO: Deal with this special case later.
52 
53  // Aldotetroses
54  ("Ery", "erythr")
55  ("Thr", "thre")
56 
57  // Aldopentoses
58  ("Rib", "rib")
59  ("Ara", "arabin")
60  ("Xyl", "xyl")
61  ("Lyx", "lyx")
62 
63  // Aldohexoses
64  ("All", "all")
65  ("Alt", "altr")
66  ("Glc", "gluc")
67  ("Man", "mann")
68  ("Gul", "gul")
69  ("Ido", "id")
70  ("Gal", "galact")
71  ("Tal", "tal")
72 
73  // Ketotriose
74  ("DHA", "dihydroxyacet") // TODO: Deal with this special case later.
75 
76  // Ketotetrose
77  ("Eul", "erythrul") // "Eul" is my own invention; compare Rul and Xul. ~ Labonte
78 
79  // Ketopentoses
80  ("Rul", "ribul")
81  ("Xul", "xylul")
82 
83  // Ketohexoses
84  ("Psi", "psic")
85  ("Fru", "fruct")
86  ("Sor", "sorb")
87  ("Tag", "tagat");
88 
89 using namespace core;
90 
91 
92 // Public methods //////////////////////////////////////////////////////////////
93 // Standard methods ////////////////////////////////////////////////////////////
94 // Empty constructor
95 CarbohydrateInfo::CarbohydrateInfo() : utility::pointer::ReferenceCount()
96 {
97  chemical::ResidueTypeCAP residue_type;
98 
99  init(residue_type);
100 }
101 
102 // Standard constructor
103 /// @param <residue_type>: the ResidueType object containing this CarbohydrateInfo
104 CarbohydrateInfo::CarbohydrateInfo(core::chemical::ResidueTypeCAP residue_type) : utility::pointer::ReferenceCount()
105 {
106  init(residue_type);
107 }
108 
109 // Copy constructor
110 CarbohydrateInfo::CarbohydrateInfo(CarbohydrateInfo const & object_to_copy) : utility::pointer::ReferenceCount()
111 {
112  copy_data(*this, object_to_copy);
113 }
114 
115 // Assignment operator
118 {
119  // Abort self-assignment.
120  if (this == &object_to_copy) {
121  return *this;
122  }
123 
124  copy_data(*this, object_to_copy);
125  return *this;
126 }
127 
128 // Destructor
130 
131 
132 // Standard Rosetta methods ////////////////////////////////////////////////////
133 // General methods
134 void
135 CarbohydrateInfo::show(std::ostream & output) const
136 {
137  using namespace std;
138 
139  // Parse properties.
140  string prefix, suffix, ring_form, modifications;
141  if (is_aldose()) {
142  prefix = "aldo";
143  } else /*is ketose*/ {
144  char num = '0' + anomeric_carbon_;
145  prefix = string(1, num) + string("-keto");
146  }
147  switch (n_carbons_) {
148  case 3:
149  suffix = "triose";
150  break;
151  case 4:
152  suffix = "tetrose";
153  break;
154  case 5:
155  suffix = "pentose";
156  break;
157  case 6:
158  suffix = "hexose";
159  break;
160  case 7:
161  suffix = "heptose";
162  break;
163  }
164  switch (ring_size_) {
165  case 5:
166  ring_form = "furanose";
167  break;
168  case 6:
169  ring_form = "pyranose";
170  break;
171  case 7:
172  ring_form = "septanose";
173  break;
174  }
175  if (is_uronic_acid_) {
176  modifications += string(" uronic acid\n");
177  }
178  // TODO: Add more modifications.
179  if (modifications == "") {
180  modifications = " none\n";
181  }
182 
183  // Produce output.
184  output << "Carbohydrate Properties for this Residue:" << endl;
185  output << " Basic Name: " << base_name() << endl;
186  output << " IUPAC Name: " << full_name_ << endl;
187  output << " Classification: " << prefix << suffix << endl;
188  output << " Stereochemistry: " << stereochem_ << endl;
189  if (ring_size_ != 0) {
190  output << " Ring Form: " << ring_form << endl;
191  output << " Anomeric Form: " << anomer_ << endl;
192  }
193  output << " Modifications: " << endl << modifications << endl;
194  output << " Polymeric Information:" << endl;
196  output << " Main chain connection: (_->" << mainchain_glycosidic_bond_acceptor_ << ')' << endl;
197  } else {
198  output << " Main chain connection: N/A" << endl;
199  }
200  output << " Branch connections: " << "branches not yet implemented" << endl;
201 }
202 
203 
204 // Accessors/Mutators
205 // Return the standard/common, non-residue, short name of the monosaccharide.
208 {
209  return root_from_code(residue_type_->name3()) + "ose";
210 }
211 
212 // Return the attachment point of the downstream saccharide residue attached to ith branch off of this residue.
213 /// @param <i>: the branch point index
214 /// @return an integer n of (1->n) of polysaccharide nomenclature, where n specifies the attachment point on the
215 /// upstream monosaccharide residue; e.g., 4 specifies O4
216 /// @details A monosaccharide with a group linked to it at one position is a distinct residue type from the same
217 /// monosaccharide with the same group linked to it at another position. For example, Rosetta treats (1->4)-beta-
218 /// D-glucopyranose as an entirely distinct residue type from (1->3)-beta-D-glucopyranose, with separate .params
219 /// files for each.\n
220 /// \n
221 /// See also:\n
222 /// CarbohydrateInfo.mainchain_glycosidic_bond_acceptor()\n
223 /// CarbohydrateInfo.n_branches()
224 /// @remarks Branches are not yet implemented.
227 {
228  assert((i > 0) && (i <= n_branches()));
229  PyAssert((i > 0) && (i <= n_branches()),
230  "CarbohydrateInfo::branch_point(core::uint i): "
231  "There is no ith branch point on this carbohydrate residue.");
232 
233  return branch_points_[i];
234 }
235 
236 // Return the CHI identifier for the requested nu (internal ring torsion) angle.
237 /// @param <subscript>: the subscript for nu, which must be between 1 and 2 less than the ring size, inclusive
238 /// @return a pair of values corresponding to the atom tree torsion definitions, in which the first element is
239 /// either the TorsionID BB or CHI and the second element is an integer
240 /// @remarks The atom tree in Rosetta 3 does not allow for rings, so cyclic carbohydrates are implemented as
241 /// linear residues. Because of this, the atom tree assigns backbone (BB) torsions to what it considers the main-
242 /// chain. Thus, only one side of the ring is considered backbone. Side-chain (CHI) angles must be defined in
243 /// the .params file for the residue; they are not automatically assigned. nu angles, which are the torsion
244 /// angles defining the ring, not considered BB by the atom tree must therefore be defined as CHI angles in the
245 /// .params file, even though they are not in reality side-chain torsions. Since a ring also has a multiplicity
246 /// of actual side-chains, the indices for those CHI angles that are actually nu angles will vary.
247 std::pair<core::id::TorsionType, core::uint>
249 {
250  assert((subscript > 0) && (subscript <= ring_size_ - 2));
251  PyAssert((subscript > 0) && (subscript <= ring_size_ - 2),
252  "CarbohydrateInfo::nu_id(core::uint subscript): "
253  "nu(subscript) does not have a CHI identifier.");
254 
255  return nu_id_[subscript];
256 }
257 
258 // Return the BB or CHI identifier for the requested glycosidic linkage torsion angle.
259 /// @param <torsion_index>: an integer corresponding to phi (1), psi (2), or omega (3)
260 /// @return a pair of values corresponding to the atom tree torsion definitions, in which the first element is
261 /// either the TorsionID BB or CHI and the second element is an integer
262 /// @details It is crucial to note that this data structure stores information to identify:\n
263 /// phi(n)\n
264 /// psi(n+1), NOT psi(n)\n
265 /// omega(n+1), NOT omega(n)\n
266 /// \n
267 /// See Also:\n
268 /// Pose.phi()\n
269 /// Pose.set_phi()\n
270 /// Pose.psi()\n
271 /// Pose.set_psi()\n
272 /// Pose.omega()\n
273 /// Pose.set_omega()
274 /// @remarks An enum would be better than an integer for input to this function; however, static constants
275 /// phi_torsion, psi_torsion, and omega_torsion were already defined in core/id/types.hh.\n
276 /// The atom tree in Rosetta 3 does not allow for rings, so cyclic carbohydrates are implemented as
277 /// linear residues. Because of this, the atom tree assigns backbone (BB) torsions to what it considers the main-
278 /// chain. Thus, only one side of the ring is considered backbone. Side-chain (CHI) angles must be defined in
279 /// the .params file for the residue; they are not automatically assigned. Glycosidic linkage torsions are not
280 /// necessarily defined as main chain torsions by the atom tree, so they must be designated here, in some cases with
281 /// the use of CHI angles.
282 std::pair<core::id::TorsionType, core::uint>
284 {
285  Size upper_bound = 2; // for phi and psi
287  upper_bound = 3; // for omega
288  }
289  assert((torsion_index >= 1) && (torsion_index <= upper_bound));
290  PyAssert((torsion_index >= 1) && (torsion_index <= upper_bound),
291  "CarbohydrateInfo::glycosidic_linkage_id(core::uint torsion_index): "
292  "no defined torsion angle for this index.");
293 
294  return glycosidic_linkage_id_[torsion_index];
295 }
296 
297 
298 // Private methods /////////////////////////////////////////////////////////////
299 // Initialize data members from properties.
300 void
302 {
303  // Set default values.
304  residue_type_ = residue_type;
305  anomeric_carbon_ = 1; // assumes that most sugars will be aldoses if not specified by .params file
307  stereochem_ = 'D'; // assumes that most sugars will have D stereochemistry
308  ring_size_ = 0; // assumes linear
309  anomer_ = ""; // assumes linear
310  if (residue_type_->is_lower_terminus()){
311  is_glycoside_ = false;
312  } else {
313  is_glycoside_ = true;
314  }
315  is_uronic_acid_ = false;
316 
318 
320 
322 
323  define_nu_ids();
324 }
325 
326 // Copy all data members from <object_to_copy_from> to <object_to_copy_to>.
327 void
329  CarbohydrateInfo object_to_copy_to,
330  CarbohydrateInfo object_to_copy_from)
331 {
332  object_to_copy_to.residue_type_ = object_to_copy_from.residue_type_;
333  object_to_copy_to.full_name_ = object_to_copy_from.full_name_;
334  object_to_copy_to.short_name_ = object_to_copy_from.short_name_;
335  object_to_copy_to.anomeric_carbon_ = object_to_copy_from.anomeric_carbon_;
336  object_to_copy_to.n_carbons_ = object_to_copy_from.n_carbons_;
337  object_to_copy_to.stereochem_ = object_to_copy_from.stereochem_;
338  object_to_copy_to.ring_size_ = object_to_copy_from.ring_size_;
339  object_to_copy_to.anomer_ = object_to_copy_from.anomer_;
340  object_to_copy_to.is_glycoside_ = object_to_copy_from.is_glycoside_;
341  object_to_copy_to.is_uronic_acid_ = object_to_copy_from.is_uronic_acid_;
342  object_to_copy_to.nu_id_ = object_to_copy_from.nu_id_;
343  object_to_copy_to.mainchain_glycosidic_bond_acceptor_ = object_to_copy_from.mainchain_glycosidic_bond_acceptor_;
344  object_to_copy_to.branch_points_ = object_to_copy_from.branch_points_;
345  object_to_copy_to.has_exocyclic_linkage_ = object_to_copy_from.has_exocyclic_linkage_;
346  object_to_copy_to.glycosidic_linkage_id_ = object_to_copy_from.glycosidic_linkage_id_;
347 }
348 
349 // Return the number of carbon atoms (not counting R groups) in the ResidueType.
352 {
353  using namespace std;
354 
355  for (uint carbon_num = MAX_C_SIZE_LIMIT; carbon_num >= MIN_C_SIZE_LIMIT; --carbon_num) {
356  char carbon_num_char = '0' + carbon_num; // quick way to convert int to char
357  if (residue_type_->has(string(1, 'C') + string(1, carbon_num_char) /*convert chars to strings to concatenate*/)) {
358  return carbon_num;
359  }
360  }
361  utility_exit_with_message(
362  "This residue is not a sugar or else there is an error in C atom labeling in the .params file.");
363  return 0; // will never be reached
364 }
365 
366 // Read through all the properties. Check for impossible cases. If any property type is not set, the default
367 // value will be maintained.
368 void
370 {
371  using namespace std;
372  using namespace utility;
373 
374  vector1<string> properties = residue_type_->properties();
375 
376  bool aldose_or_ketose_set = false;
377  bool stereochem_set = false;
378  bool ring_size_set = false;
379  bool anomer_set = false;
380 
381  for (uint i = 1, n_properties = properties.size(); i <= n_properties; ++i) {
382  if (properties[i] == "ALDOSE") {
383  if (anomeric_carbon_ != 1) {
384  utility_exit_with_message("A sugar cannot be both an aldose and a ketose; check the .param file.");
385  } else {
386  anomeric_carbon_ = 1;
387  aldose_or_ketose_set = true;
388  }
389  } else if (properties[i] == "KETOSE") {
390  if (aldose_or_ketose_set && (anomeric_carbon_ == 1)) {
391  utility_exit_with_message("A sugar cannot be both an aldose and a ketose; check the .param file.");
392  } else {
393  anomeric_carbon_ = 2; // TODO: Provide method for dealing with non-ulose ketoses.
394  aldose_or_ketose_set = true;
395  }
396  } else if (properties[i] == "L_SUGAR") {
397  if (stereochem_set && (stereochem_ == 'D')) {
398  utility_exit_with_message("A sugar cannot have both L and D stereochem.; check the .param file.");
399  } else {
400  stereochem_ = 'L';
401  stereochem_set = true;
402  }
403  } else if (properties[i] == "D_SUGAR") {
404  if (stereochem_ == 'L') {
405  utility_exit_with_message("A sugar cannot have both L and D stereochem.; check the .param file.");
406  } else {
407  stereochem_ = 'D';
408  stereochem_set = true;
409  }
410  } else if (properties[i] == "FURANOSE") {
411  if (ring_size_set && (ring_size_ != 5)) {
412  utility_exit_with_message("A sugar cannot have multiple ring sizes; check the .param file.");
413  } else {
414  ring_size_ = 5;
415  ring_size_set = true;
416  }
417  } else if (properties[i] == "PYRANOSE") {
418  if (ring_size_set && (ring_size_ != 6)) {
419  utility_exit_with_message("A sugar cannot have multiple ring sizes; check the .param file.");
420  } else {
421  ring_size_ = 6;
422  ring_size_set = true;
423  }
424  } else if (properties[i] == "SEPTANOSE") {
425  if (ring_size_set && (ring_size_ != 7)) {
426  utility_exit_with_message("A sugar cannot have multiple ring sizes; check the .param file.");
427  } else {
428  ring_size_ = 7;
429  ring_size_set = true;
430  }
431  } else if (properties[i] == "ALPHA_SUGAR") {
432  if (anomer_set && (anomer_ == "beta")) {
433  utility_exit_with_message("A sugar cannot be both alpha and beta; check the .param file.");
434  } else {
435  anomer_ = "alpha";
436  anomer_set = true;
437  }
438  } else if (properties[i] == "BETA_SUGAR") {
439  if (anomer_set && (anomer_ == "alpha")) {
440  utility_exit_with_message("A sugar cannot be both alpha and beta; check the .param file.");
441  } else {
442  anomer_ = "beta";
443  anomer_set = true;
444  }
445  } else if (properties[i] == "URONIC_ACID") {
446  is_uronic_acid_ = true;
447  }
448  }
449 
450  if ((ring_size_ != 0) && (anomer_ == "")) {
451  utility_exit_with_message("A cyclic sugar must have its anomeric property declared; check the .param file.");
452  }
453  if ((ring_size_ == 0) && (anomer_ != "")) {
454  utility_exit_with_message("An acyclic sugar cannot be alpha or beta; check the .param file.");
455  }
456 }
457 
458 // Get connection data from the residue type.
459 void
461 {
462  using namespace std;
463  using namespace id;
464 
465  if (!residue_type_->is_upper_terminus()) {
466  uint upper_atom_index = residue_type_->upper_connect_atom();
467  string atom_name = residue_type_->atom_name(upper_atom_index);
468  //char atom_number = atom_name[2];
469  mainchain_glycosidic_bond_acceptor_ = atoi(&atom_name[2]);
470  //uint position = atom_number - '0';
471  } else {
473  }
474 
475  // TODO: Implement branching.
476 
477  // Exocyclic linkage?
478  Size carbons_in_ring = ring_size_ - 1 /*oxygen*/;
479  uint last_carbon_in_ring = carbons_in_ring + anomeric_carbon_ - 1;
480  if (mainchain_glycosidic_bond_acceptor_ > last_carbon_in_ring) {
481  has_exocyclic_linkage_ = true;
482  } else {
483  has_exocyclic_linkage_ = false;
484  }
485 
486  // Define phi (phi_torsion = 1 in core/id/types.hh).
487  // For aldopyranoses, phi(n) is defined as: O5(n)-C1(n)-OX(n-1)-CX(n-1)
488  // BB X+1 is: CX-OX-UPPER1-UPPER2
489  // However, CHI 1 is O5-C1-O1-HO1, which for an internal residue with virtual atoms for O1 and HO1, and is
490  // the same as phi(n), provided the virtual atoms are made to move with any rotation of BB X+1.
491  // The same concept holds for aldofuranoses; however, ketoses are more complicated. the cyclic oxygen must
492  // be the reference for phi, yet CHI 2 at the anomeric position is defined with C1 as the reference atom,
493  // not the cyclic oxygen (O5 for furanoses, O6 for pyranoses).
494  // To complicate matters further, two virtual atoms in a row in a CHI gives NAN, so CHI angles cannot be used after
495  // all. We will need to use vector calculus for getting and setting phi. These calculations can be found in
496  // core/pose/carbohydrates/util.cc.
497  // For now, the below setting of glycosidic_linkage_id_[phi_torsion] is kept as (CHI, 1), but it is essentially a
498  // dummy setting for consistency, i.e., since the data is stored in a vector1 at the moment and not a map with an
499  // enum value for a key as it probably should be. ~ Labonte
500  if (is_aldose()) {
501  glycosidic_linkage_id_.push_back(make_pair(CHI, 1));
502  } else {
503  // TODO: Correct this. This is the correct bond but the wrong angle definition. I need to decide where to
504  // this CHI in the .params file.
505  glycosidic_linkage_id_.push_back(make_pair(CHI, anomeric_carbon_));
506  }
507 
508  // Define psi (psi_torsion = 2 in core/id/types.hh).
509  // psi(n) is defined as: C(anomeric)(n)-OX(n-1)-CX(n-1)-CX-1(n-1)
510  // BB X is: CX-1-CX-OX-UPPER
511  // Thus, this is actually the psi angle of the NEXT residue!
513 
514  // Define omega (omega_torsion = 3 core/id/types.hh).
517  }
518 }
519 
520 // Determine and set the full and abbreviated IUPAC names.
521 // The NAME property in the .params file is actually the standard IUPAC abbreviation (of an internal/unpatched
522 // residue), not the full name. It, combined with any patches, is the Rosetta name for the ResidueType. The IUPAC
523 // names will change depending on the residue's place in the sequence and/or any patches.
524 void
526 {
527  using namespace std;
528 
529  // Determine prefixes.
530  stringstream prefixes(stringstream::out);
531  if (!residue_type_->is_upper_terminus()) {
532  prefixes << "->" << mainchain_glycosidic_bond_acceptor_ << ")-";
533  }
534  if (!residue_type_->is_lower_terminus()) {
535  prefixes << anomer_ << '-';
536  }
537  prefixes << stereochem_ << '-';
538 
539  // Determine root.
540  string code = residue_type_->name3();
541  string root = root_from_code(code);
542 
543  // Determine suffix.
544  stringstream long_suffix(stringstream::out);
545  stringstream short_suffix(stringstream::out);
546  switch (ring_size_) {
547  case 5:
548  long_suffix << "ofuran";
549  short_suffix << 'f';
550  break;
551  case 6:
552  long_suffix << "opyran";
553  short_suffix << 'p';
554  break;
555  case 7:
556  long_suffix << "oseptan";
557  short_suffix << 's';
558  break;
559  }
560  if (residue_type_->is_lower_terminus()) {
561  if (is_glycoside_) {
562  if (is_uronic_acid_) {
563  long_suffix << "uronoside";
564  short_suffix << "A";
565  } else {
566  long_suffix << "oside";
567  }
568  } else {
569  if (is_uronic_acid_) {
570  long_suffix << "uronate";
571  short_suffix << "A";
572  } else {
573  long_suffix << "ose";
574  }
575  }
576  } else {
577  if (is_uronic_acid_) {
578  long_suffix << "uronoyl";
579  short_suffix << "A";
580  } else {
581  long_suffix << "osyl";
582  }
583  short_suffix << '-';
584  }
585 
586  full_name_ = prefixes.str() + root + long_suffix.str();
587  short_name_ = prefixes.str() + code + short_suffix.str();
588 }
589 
590 // If cyclic, define nu angles in terms of CHI ids.
591 void
593 {
594  using namespace std;
595  using namespace id;
596 
597  if (ring_size_ != 0) {
598  // Get the number of torsions need to define a ring conformation.
599  // The two remaining ring torsions (e.g., for a six-membered ring, nu(0) and nu(5)) will have to be determined
600  // using vector calculus, because of the cut bond required by the atom tree.
601  Size n_torsions_needed = ring_size_ - 2;
602  Size n_CHIs = residue_type_->nchi();
603 
604  // The final CHIs in the .params file define the (needed) ring torsions.
605  uint first_CHI = n_CHIs - n_torsions_needed + 1;
606 
607  for (uint i = first_CHI; i <= n_CHIs; ++i) {
608  nu_id_.push_back(make_pair(CHI, i));
609  }
610  }
611 }
612 
613 
614 // Friend methods //////////////////////////////////////////////////////////////
615 // Insertion operator (overloaded so that CarbohydrateInfo can be "printed" in PyRosetta).
616 std::ostream &
617 operator<<(std::ostream & output, CarbohydrateInfo const & object_to_output)
618 {
619  object_to_output.show(output);
620  return output;
621 }
622 
623 } // namespace carbohydrates
624 } // namespace chemical
625 } // namespace core