Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ctab_parser.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 // :noTabs=false:tabSize=4:indentSize=4:
4 //
5 // This file is part of the Rosetta software suite and is made available under license.
6 // The Rosetta software is developed by the contributing members of the Rosetta Commons consortium.
7 // (C) 199x-2009 Rosetta Commons participating institutions and developers.
8 // For more information, see http://www.rosettacommons.org/.
9 
10 /// @file core/io/sdf/ctab_parser.cc
11 ///
12 /// @brief
13 /// @author Sam DeLuca
14 
15 #include <basic/Tracer.hh>
17 #include <utility/string_util.hh>
18 //#include <core/chemical/ResidueType.hh>
19 //#include <protocols/ligand_docking/ColoredGraph.hh>
22 #include <utility/exit.hh>
23 #include <numeric/xyzVector.hh>
24 #include <vector>
25 #include <algorithm>
26 #include <map>
27 #include <sstream>
28 #include <cstring>
29 #include <string>
30 #include <stdlib.h>
31 
32 // Boost Headers
33 #include <boost/foreach.hpp>
34 #define foreach BOOST_FOREACH
35 
36 namespace core {
37 namespace chemical {
38 namespace sdf {
39 
40 
41 static basic::Tracer ctabParserTracer("core.io.sdf.ctab_parser");
42 /*
43 elementToType::elementToType()
44 {
45  e_to_t["C"]="CH3"; //Mult
46  e_to_t["N"]="Nbb"; //Mult
47  e_to_t["O"]="OH"; //Mult
48  e_to_t["S"]="S";
49  e_to_t["P"]="P";
50  e_to_t["H"]="Hpol"; //Mult
51  e_to_t["F"]="F";
52  e_to_t["Cl"]="Cl";
53  e_to_t["Br"]="Br";
54  e_to_t["I"]="I";
55  e_to_t["Zn"]="Zn2p";
56  e_to_t["Fe"]="Fe2p"; //Mult
57  e_to_t["Mg"]="Mg2p";
58  e_to_t["Ca"]="Ca2p";
59  e_to_t["Na"]="Na1p";
60  e_to_t["K"]="K1p";
61 
62 }
63 
64 std::string elementToType::get(std::string key)
65 {
66  std::map<std::string, std::string>::iterator val = e_to_t.find(key);
67  std::string type;
68  if (val==e_to_t.end()) {type="VIRT";}
69  else { type=val->second;}
70  return type;
71 }
72 
73 */
74 ctabV2000Parser::ctabV2000Parser(const utility::vector1<std::string> connection_table_lines, core::chemical::ResidueTypeOP molecule_container, MolData mol_data) :
75  connection_table_lines_(connection_table_lines), molecule_container_(molecule_container), mol_data_(mol_data)
76 {
77 
78 }
79 
81 {
82  std::string counts_line = connection_table_lines_[1];
83  core::Size atom_count = atoi(counts_line.substr(0,3).c_str());
84  core::Size bond_count = atoi(counts_line.substr(3,3).c_str());
85 
86  ctabParserTracer.Debug << atom_count << " atoms and " << bond_count << " bonds" <<std::endl;
87 
88  std::string version_tag = counts_line.substr(34,5);
89  if(version_tag != "V2000")
90  {
91  utility_exit_with_message("This doesnt look like a V2000 CTAB, bailing out");
92  }
93 
94  core::Size last_atom = atom_count;
95  core::Size last_bond = last_atom+bond_count;
96 
97  for(core::Size line_number = 1; line_number < connection_table_lines_.size(); ++line_number)
98  {
99  if(line_number <= last_atom)
100  {
101  ParseAtom(connection_table_lines_[line_number+1],line_number);
102  }else if(line_number <= last_bond)
103  {
104  ParseBond(connection_table_lines_[line_number+1]);
105  }
106  }
107 
108  //Iterate across the atoms, adding Hs and finding their types
109  std::map<core::Size, std::string>::iterator atom_iterator;
110  for(atom_iterator = index_to_names_map_.begin();
111  atom_iterator != index_to_names_map_.end();
112  ++atom_iterator)
113  {
114  std::string atomname=atom_iterator->second;
115  core::Size atomno = molecule_container_->atom_index(atomname);
116  set_atom_type(atomno, atomname);
117  }
118 
119  foreach(addedH added, added_H_){
120  index_to_names_map_.insert(
121  std::pair<core::Size,std::string>(added.atom_number,"H"+added.atom_number)
122  );
123  }
124 
125 
126 }
127 
128 //Sets the atom type and adds H if necessary.
130 {
131  atomTyper typer = atomTyper(atomno, molecule_container_);
132  if(typer.get_element()=="C"||typer.get_element()=="N"||typer.get_element()=="O")
133  {
134  core::SSize total_bonds=0;
135  char ele=typer.get_element().at(0);
136  switch(ele)
137  {
138  case 'C':
139  total_bonds++;
140  case 'N':
141  total_bonds++;
142  case 'O':
143  total_bonds+=2;
144  }
145  total_bonds+=molecule_container_->atom(atomno).charge();
146  total_bonds-=typer.getNumBonds();
147  while(total_bonds>0)
148  {
149  total_bonds--;
150  addedH newH;
151  newH.atom_number=++current_atom_;
152  newH.atom_type=(ele=='C')?"Hapo":"Hpol";
153  newH.bonded_atom_name=atomname;
154  added_H_.push_back(newH);
155  molecule_container_->add_atom("H"+newH.atom_number,
157  molecule_container_->add_bond(newH.bonded_atom_name,
159  typer = atomTyper(atomno, molecule_container_);
160  }
161  }
162  molecule_container_->set_atom_type(atomname, typer.getType());
163 
164  //set default charge
166  core::chemical::AtomTypeSetCAP atom_type_set = chemical_manager->atom_type_set("fa_standard");
167  core::Size atom_type_index = atom_type_set->atom_type_index(typer.getType());
168  core::Size parameter_index = atom_type_set->extra_parameter_index("CHARGE");
169  core::Real charge = atom_type_set->operator[](atom_type_index).extra_parameter(parameter_index);
170  molecule_container_->atom( atomname ).charge(charge));
171 }
172 
174 {
175  core::Real x_coord = atof(atom_line.substr(0,10).c_str());
176  core::Real y_coord = atof(atom_line.substr(10,10).c_str());
177  core::Real z_coord = atof(atom_line.substr(20,10).c_str());
178  ctabParserTracer.Debug << "atom " <<atom_number << " has coordinates " << x_coord << ',' << y_coord << ',' << z_coord << std::endl;
179  std::string element_name = atom_line.substr(31,3).c_str();
180  if(element_name.at(1)==' ') {
181  element_name=element_name.substr(0,1);
182  } else if (element_name.at(2)==' ') {
183  element_name=element_name.substr(0,2);
184  }
185 
186  std::string atom_number_string;
187  std::stringstream convert_stream;
188  convert_stream << atom_number;
189  atom_number_string = convert_stream.str();
190 
191  //Set the atom type to a default based on the element.
192  std::string atom_type = element_to_default_type.get(element_name);
193 
194  current_atom_=atom_number;
195 
196  std::string element_id = element_name+ atom_number_string;
197  utility::add_spaces_left_align(element_id,4);
198 
199  core::Size charge = atoi(atom_line.substr(36,3).c_str());
200 
201  numeric::xyzVector<core::Real> coordinates(x_coord,y_coord,z_coord);
202 
203  index_to_names_map_.insert(std::pair<core::Size,std::string>(atom_number,element_id));
204  molecule_container_->add_atom(element_id,atom_type,DEFAULT_MM_ATOM_TYPE_,charge);
205  molecule_container_->set_xyz(element_id,coordinates);
206 }
207 
209 {
210  core::Size atom1_index = atoi(bond_line.substr(0,3).c_str());
211  core::Size atom2_index = atoi(bond_line.substr(3,3).c_str());
212  core::chemical::BondName bond_type = static_cast<core::chemical::BondName>(atoi(bond_line.substr(6,3).c_str()));
213 
214  std::string atom1_id(index_to_names_map_.find(atom1_index)->second);
215  std::string atom2_id(index_to_names_map_.find(atom2_index)->second);
216  ctabParserTracer.Debug << "bond " << atom1_id << " to " << atom2_id << " of type " << bond_type << std::endl;
217  molecule_container_->add_bond(atom1_id,atom2_id,bond_type);
218 }
219 
220  std::map<core::Size, std::string> ctabV2000Parser::ParseAtomTypeData()
221  {
222  std::string atom_type_data = "";
223  std::map<core::Size, std::string> data_map;
224  for(core::Size index = 1; index <= connection_table_lines_.size(); ++index )
225  {
226  std::string line = connection_table_lines_[index];
227  if(line.find("> <Rosetta AtomTypes>")!= std::string::npos)
228  {
229  atom_type_data = connection_table_lines_[index+1];
230  break;
231  }
232  }
233  if(atom_type_data == "")
234  {
235  return data_map;
236  }
237  else
238  {
239  utility::vector1<std::string> tokens=utility::split(atom_type_data);
240  for(core::Size index = 1; index <= tokens.size();++index)
241  {
242  std::string current_token = tokens[index];
243  utility::vector1<std::string> token_split=utility::split(current_token);
244  utility::trim(token_split[2],"(");
245  utility::trim(token_split[3],")");
246 
247  core::Size atomno = atoi(token_split[2].c_str());
248  std::pair<core::Size, std::string> atom_type_point(atomno,token_split[3]);
249  data_map.insert(atom_type_point);
250  }
251 
252  }
253  return data_map;
254  }
255 
256 
258 {
259  return molecule_container_;
260 }
261 
262 
263 ctabV3000Parser::ctabV3000Parser(const utility::vector1<std::string> connection_table_lines, core::chemical::ResidueTypeOP molecule_container, MolData mol_data ) :
264  connection_table_lines_(connection_table_lines), molecule_container_(molecule_container), mol_data_(mol_data)
265 { }
266 
268 {
269  return molecule_container_;
270 
271 }
272 
274 {
275  atomTyper typer = atomTyper(atomno, molecule_container_);
276  if(typer.get_element()=="C"||typer.get_element()=="N"||typer.get_element()=="O")
277  {
278  core::Size total_bonds=0;
279  char ele=typer.get_element().at(0);
280  switch(ele)
281  {
282  case 'C':
283  total_bonds++;
284  case 'N':
285  total_bonds++;
286  case 'O':
287  total_bonds+=2;
288  }
289  total_bonds+=molecule_container_->atom(atomno).charge();
290  total_bonds-=typer.getNumBonds();
291  while(total_bonds>0)
292  {
293  total_bonds--;
294  addedH newH;
295  newH.atom_number=++current_atom_;
296  newH.atom_type=(ele=='C')?"Hapo":"Hpol";
297  newH.bonded_atom_name=atomname;
298  added_H_.push_back(newH);
299  molecule_container_->add_atom("H"+newH.atom_number,
301  molecule_container_->add_bond(newH.bonded_atom_name,
303  typer = atomTyper(atomno, molecule_container_);
304  }
305  }
306  molecule_container_->set_atom_type(atomname, typer.getType());
307 }
308 
309 core::Real ctabV3000Parser::FindExtraParameter(std::vector<std::string> extra_parameters,const std::string query )
310 {
311  foreach(std::string extra_parameter, extra_parameters){
312  if(extra_parameter.find(query) == std::string::npos)
313  {
314  continue;
315  }
316  else
317  {
318  core::Size value_length = extra_parameter.size() - (query.size()+1);
319  core::Size value_start = query.size()+1;
320 
321  std::string value_string = extra_parameter.substr(value_start,value_length);
322  core::Real value = atof(value_string.c_str());
323  return value;
324  }
325  }
326  return 0.0;
327 }
329 {
330 
331  bool atom_block(false);
332  bool bond_block(false);
333 
334  std::map<core::Size,std::string> atom_type_data = this->ParseAtomTypeData();
335 
336  foreach(std::string current_line, connector_table_lines_){
337 
338  utility::vector1<std::string> line_vector(utility::split(current_line));
339  //if(line_vector[1] != "M" || line_vector[2] != "V30")
340  //{
341  // utility_exit_with_message("This doesn't look like a V3000 CTAB, bailing out");
342  //}
343 
344  core::Size atom_count(0);
345  core::Size bond_count(0);
346  if(line_vector[2] == "END")
347  {
348  break;
349  }
350  if(line_vector[2] != "V30" || line_vector.size() <2)
351  {
352  continue;
353  }
354  if(line_vector[3] == "COUNTS")
355  {
356  atom_count = atoi(line_vector[4].c_str());
357  bond_count = atoi(line_vector[5].c_str());
358  }else if(line_vector[3] == "BEGIN")
359  {
360  if (line_vector[4] == "ATOM")
361  atom_block = true;
362  else if (line_vector[4] == "BOND")
363  bond_block = true;
364  }else if(line_vector[3] == "END")
365  {
366  if (line_vector[4] == "ATOM")
367  atom_block = false;
368  else if (line_vector[4] == "BOND")
369  bond_block = false;
370  }else
371  {
372  if (atom_block)
373  ParseAtom(current_line);
374  else if (bond_block)
375  ParseBond(current_line);
376  }
377  }
378 
379  std::map<core::Size, std::string>::iterator atom_iterator;
380  for(atom_iterator = index_to_names_map_.begin();
381  atom_iterator != index_to_names_map_.end();
382  ++atom_iterator)
383  {
384  std::string atomname= atom_iterator->second;
385  core::Size atomno = molecule_container_->atom_index(atomname);
386  std::map<core::Size,std::string>::iterator atom_type_it = atom_type_data.find(atomno);
387  if(atom_type_it != atom_type_data.end())
388  {
389  std::string atom_type = atom_type_it->second;
390  molecule_container_->set_atom_type(atomname,atom_type);
391  } else
392  {
393  set_atom_type(atomno,atomname);
394  }
395  }
396 
397  foreach(addedH added, added_H_){
398  index_to_names_map_.insert(std::pair<core::Size,std::string>(
399  added.atom_number,"H"+added.atom_number));
400  }
401 
402 }
403 
405 {
406  utility::vector1<std::string> atom_vector(utility::split(atom_line));
407 
408  core::Size index = atoi(atom_vector[3].c_str());
409  std::string element_name(atom_vector[4]);
410  std::string element_id(element_name+atom_vector[3]);
411  utility::add_spaces_left_align(element_id,4);
412  core::Real x_coord = atof(atom_vector[5].c_str());
413  core::Real y_coord = atof(atom_vector[6].c_str());
414  core::Real z_coord = atof(atom_vector[7].c_str());
415 
416  numeric::xyzVector<core::Real> coordinates(x_coord,y_coord,z_coord);
417 
418 
419 
420  //std::vector<std::string> extra_parameters;
421  //std::copy(atom_vector.begin()+8, atom_vector.end(),extra_parameters.begin());
422  core::Real charge(FindExtraParameter(atom_vector,"CHG"));
423 
424  index_to_names_map_.insert(std::pair<core::Size,std::string>(index,element_id));
425  //Set the atom type to a default based on the element.
426  std::string atom_type = element_to_default_type.get(element_name);
427  molecule_container_->add_atom(element_id,atom_type,"X",charge);
428  molecule_container_->set_xyz(element_id,coordinates);
429 
430 }
431 
433 {
434  utility::vector1<std::string> bond_vector(utility::split(bond_line));
435 
436  core::Size index = atoi(bond_vector[3].c_str());
437  core::chemical::BondName type = static_cast<core::chemical::BondName>(atoi(bond_vector[4].c_str()));
438 
439  core::Size atom1_index = atoi(bond_vector[5].c_str());
440  core::Size atom2_index = atoi(bond_vector[6].c_str());
441 
442  std::string atom1_id(index_to_names_map_.find(atom1_index)->second);
443  std::string atom2_id(index_to_names_map_.find(atom2_index)->second);
444 
445  std::vector<std::string> extra_parameters;
446  std::copy(bond_vector.begin()+8, bond_vector.end(),extra_parameters.begin());
447 
448  molecule_container_->add_bond(atom1_id,atom2_id,type);
449 }
450 
451 std::map<core::Size, std::string> ctabV3000Parser::ParseAtomTypeData()
452 {
453  utility::vector1<std::string> tokens = mol_data_.get_mol_data_string_vector("Rosetta AtomTypes",' ');
454  std::map<core::Size, std::string> data_map;
455  /*
456  for(core::Size index = 1; index <= connection_table_lines_.size(); ++index )
457  {
458  std::string line = connection_table_lines_[index];
459  if(line.find("> <Rosetta AtomTypes>")!= std::string::npos)
460  {
461  atom_type_data = connection_table_lines_[index+1];
462  break;
463  }
464  }
465  */
466 
467  if(tokens.size() == 0)
468  {
469  return data_map;
470  }
471  else
472  {
473  //utility::vector1<std::string> tokens=utility::split(atom_type_data);
474  for(core::Size index = 0; index < tokens.size();++index)
475  {
476  std::string current_token = tokens[index];
477  utility::vector1<std::string> token_split=utility::string_split(current_token,',');
478  utility::trim(token_split[1],"(");
479  utility::trim(token_split[2],")");
480 
481  core::Size atomno = atoi(token_split[1].c_str());
482  std::pair<core::Size, std::string> atom_type_point(atomno,token_split[2]);
483  data_map.insert(atom_type_point);
484  }
485 
486  }
487  return data_map;
488 }
489 
490 } // sdf
491 } // io
492 } // core