Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ProteinResidueConformationFeatures.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file protocols/features/ProteinResidueConformationFeatures.cc
11 /// @brief report idealized torsional DOFs Statistics Scientific Benchmark
12 /// @author Matthew O'Meara
13 
14 // Unit Headers
16 
17 //External
18 #include <boost/uuid/uuid.hpp>
19 #include <boost/uuid/uuid_io.hpp>
20 
21 // Project Headers
22 #include <core/chemical/AA.hh>
25 #include <core/pose/Pose.hh>
26 #include <core/pose/util.hh>
27 #include <core/types.hh>
28 #include <core/id/AtomID.hh>
29 
30 // Utility Headers
31 #include <numeric/xyzVector.hh>
32 #include <utility/vector1.hh>
33 #include <utility/sql_database/DatabaseSessionManager.hh>
34 #include <utility/string_util.hh>
35 #include <utility/tools/make_vector.hh>
36 
37 //Basic Headers
38 #include <basic/database/sql_utils.hh>
39 #include <basic/Tracer.hh>
40 #include <basic/database/schema_generator/PrimaryKey.hh>
41 #include <basic/database/schema_generator/ForeignKey.hh>
42 #include <basic/database/schema_generator/Column.hh>
43 #include <basic/database/schema_generator/Schema.hh>
44 #include <basic/database/schema_generator/Constraint.hh>
45 #include <basic/options/option.hh>
46 #include <basic/options/keys/out.OptionKeys.gen.hh>
47 #include <basic/options/keys/inout.OptionKeys.gen.hh>
48 #include <basic/database/insert_statement_generator/InsertGenerator.hh>
49 #include <basic/database/insert_statement_generator/RowData.hh>
50 
51 
52 // External Headers
53 #include <cppdb/frontend.h>
54 
55 // C++ Headers
56 #include <cmath>
57 #include <sstream>
58 
59 namespace protocols{
60 namespace features{
61 
62 static basic::Tracer TR("protocols.features.ProteinResidueConformationFeatures");
63 
64 using std::string;
65 using core::Size;
66 using core::Real;
67 using core::Vector;
70 using core::pose::Pose;
71 using core::id::AtomID;
72 using utility::sql_database::sessionOP;
73 using utility::vector1;
74 using cppdb::statement;
75 using cppdb::result;
76 using basic::database::insert_statement_generator::InsertGenerator;
77 using basic::database::insert_statement_generator::RowDataBaseOP;
78 using basic::database::insert_statement_generator::RowData;
79 
80 string
82  return "ProteinResidueConformationFeatures";
83 }
84 
85 void
86 ProteinResidueConformationFeatures::write_schema_to_db(utility::sql_database::sessionOP db_session) const{
87 
88  using namespace basic::database::schema_generator;
89 
90  //******protein_residue_conformation******//
91  Column struct_id("struct_id", new DbUUID(), false);
92  Column seqpos("seqpos", new DbInteger(), false);
93  Column secstruct("secstruct", new DbText(), false);
94  Column phi("phi", new DbDouble(), false);
95  Column psi("psi", new DbDouble(), false);
96  Column omega("omega", new DbDouble(), false);
97  Column chi1("chi1", new DbDouble(), false);
98  Column chi2("chi2", new DbDouble(), false);
99  Column chi3("chi3", new DbDouble(), false);
100  Column chi4("chi4", new DbDouble(), false);
101 
102 
103  utility::vector1<Column> prot_res_pkeys;
104  prot_res_pkeys.push_back(struct_id);
105  prot_res_pkeys.push_back(seqpos);
106 
107  utility::vector1<Column> fkey_cols;
108  fkey_cols.push_back(struct_id);
109  fkey_cols.push_back(seqpos);
110 
111  utility::vector1<std::string> fkey_reference_cols;
112  fkey_reference_cols.push_back("struct_id");
113  fkey_reference_cols.push_back("resNum");
114 
115  Schema protein_residue_conformation("protein_residue_conformation", PrimaryKey(prot_res_pkeys));
116  protein_residue_conformation.add_column(struct_id);
117  protein_residue_conformation.add_column(seqpos);
118  protein_residue_conformation.add_column(secstruct);
119  protein_residue_conformation.add_column(phi);
120  protein_residue_conformation.add_column(psi);
121  protein_residue_conformation.add_column(omega);
122  protein_residue_conformation.add_column(chi1);
123  protein_residue_conformation.add_column(chi2);
124  protein_residue_conformation.add_column(chi3);
125  protein_residue_conformation.add_column(chi4);
126  protein_residue_conformation.add_foreign_key(ForeignKey(fkey_cols, "residues", fkey_reference_cols, true));
127 
128  protein_residue_conformation.write(db_session);
129 
130  //******residue_atom_coords******//
131  Column atomno("atomno", new DbInteger(), false);
132  Column x("x", new DbDouble(), false);
133  Column y("y", new DbDouble(), false);
134  Column z("z", new DbDouble(), false);
135 
136  utility::vector1<Column> res_atm_coords_pkeys;
137  res_atm_coords_pkeys.push_back(struct_id);
138  res_atm_coords_pkeys.push_back(seqpos);
139  res_atm_coords_pkeys.push_back(atomno);
140 
141  Schema residue_atom_coords("residue_atom_coords", PrimaryKey(res_atm_coords_pkeys));
142  residue_atom_coords.add_column(struct_id);
143  residue_atom_coords.add_column(seqpos);
144  residue_atom_coords.add_column(atomno);
145  residue_atom_coords.add_column(x);
146  residue_atom_coords.add_column(y);
147  residue_atom_coords.add_column(z);
148  residue_atom_coords.add_foreign_key(ForeignKey(fkey_cols, "residues", fkey_reference_cols, true));
149 
150  residue_atom_coords.write(db_session);
151 
152 }
153 
156  utility::vector1<std::string> dependencies;
157  dependencies.push_back("ResidueFeatures");
158  return dependencies;
159 }
160 
161 
162 Size
164  Pose const & pose,
165  vector1< bool > const & relevant_residues,
166  boost::uuids::uuid struct_id,
167  sessionOP db_session
168 ){
169  bool fullatom(pose.is_fullatom());
170 
171  //check to see if this structure is ideal
172  bool ideal = true;
173  if(!basic::options::option[basic::options::OptionKeys::out::file::force_nonideal_structure]())
174  {
175  core::conformation::Conformation const & conformation(pose.conformation());
176  for(core::Size resn=1; resn <= pose.n_residue();++resn){
177  if(!relevant_residues[resn]) continue;
178  bool residue_status(core::conformation::is_ideal_position(resn,conformation));
179  if(!residue_status){
180  ideal = false;
181  break;
182  }
183  }
184  }else
185  {
186  ideal = false;
187  }
188 
189  InsertGenerator conformation_insert("protein_residue_conformation");
190  conformation_insert.add_column("struct_id");
191  conformation_insert.add_column("seqpos");
192  conformation_insert.add_column("secstruct");
193  conformation_insert.add_column("phi");
194  conformation_insert.add_column("psi");
195  conformation_insert.add_column("omega");
196  conformation_insert.add_column("chi1");
197  conformation_insert.add_column("chi2");
198  conformation_insert.add_column("chi3");
199  conformation_insert.add_column("chi4");
200 
201  InsertGenerator atom_insert("residue_atom_coords");
202  atom_insert.add_column("struct_id");
203  atom_insert.add_column("seqpos");
204  atom_insert.add_column("atomno");
205  atom_insert.add_column("x");
206  atom_insert.add_column("y");
207  atom_insert.add_column("z");
208 
209  RowDataBaseOP struct_id_data = new RowData<boost::uuids::uuid>("struct_id",struct_id);
210 
211  for (Size i = 1; i <= pose.total_residue(); ++i) {
212  if(!relevant_residues[i]) continue;
213  Residue const & resi = pose.residue(i);
214  if(resi.aa() > num_canonical_aas)
215  {
216  continue;
217  }
218  std::string secstruct = utility::to_string<char>(pose.secstruct(i));
219 
220  Real phi = 0.0;
221  Real psi = 0.0;
222  Real omega = 0.0;
223 
224  //If you have a non ideal structure, and you store both cartesian coordinates and backbone
225  //chi angles and read them into a pose, most of the backbone oxygens will be placed in correctly
226  //I currently have absolutely no idea why this is, but this fixes it.
227  //It is worth noting that the current implementation of Binary protein silent files does the same thing
228 
229  if(ideal)
230  {
231  phi = resi.mainchain_torsion(1);
232  psi = resi.mainchain_torsion(2);
233  omega = resi.mainchain_torsion(3);
234  }
235  Real chi1 = fullatom && resi.nchi() >= 1 ? resi.chi(1) : 0.0;
236  Real chi2 = fullatom && resi.nchi() >= 2 ? resi.chi(2) : 0.0;
237  Real chi3 = fullatom && resi.nchi() >= 3 ? resi.chi(3) : 0.0;
238  Real chi4 = fullatom && resi.nchi() >= 4 ? resi.chi(4) : 0.0;
239 
240  RowDataBaseOP seqpos_data = new RowData<Size>("seqpos",i);
241  RowDataBaseOP secstruct_data = new RowData<std::string>("secstruct",secstruct);
242  RowDataBaseOP phi_data = new RowData<Real>("phi",phi);
243  RowDataBaseOP psi_data = new RowData<Real>("psi",psi);
244  RowDataBaseOP omega_data = new RowData<Real>("omega",omega);
245  RowDataBaseOP chi1_data = new RowData<Real>("chi1",chi1);
246  RowDataBaseOP chi2_data = new RowData<Real>("chi2",chi2);
247  RowDataBaseOP chi3_data = new RowData<Real>("chi3",chi3);
248  RowDataBaseOP chi4_data = new RowData<Real>("chi4",chi4);
249 
250 
251  conformation_insert.add_row(utility::tools::make_vector(
252  struct_id_data,seqpos_data,secstruct_data,phi_data,
253  psi_data,omega_data,chi1_data,chi2_data,chi3_data,chi4_data));
254 
255  if(!ideal)
256  {
257  for(Size atom = 1; atom <= resi.natoms(); ++atom)
258  {
259  core::Vector coords = resi.xyz(atom);
260 
261  RowDataBaseOP atomno_data = new RowData<Size>("atomno",atom);
262  RowDataBaseOP x_data = new RowData<Real>("x",coords.x());
263  RowDataBaseOP y_data = new RowData<Real>("y",coords.y());
264  RowDataBaseOP z_data = new RowData<Real>("z",coords.z());
265 
266  atom_insert.add_row(utility::tools::make_vector(
267  struct_id_data,seqpos_data,atomno_data,x_data,y_data,z_data));
268 
269  }
270  }
271  }
272 
273  conformation_insert.write_to_database(db_session);
274  atom_insert.write_to_database(db_session);
275 
276  return 0;
277 }
278 
279 void
281  boost::uuids::uuid struct_id,
282  utility::sql_database::sessionOP db_session
283 ){
284 
285  //std::string struct_id_string(to_string(struct_id));
286  statement conf_stmt(basic::database::safely_prepare_statement("DELETE FROM protein_residue_conformation WHERE struct_id = ?;\n",db_session));
287  conf_stmt.bind(1,struct_id);
288  basic::database::safely_write_to_database(conf_stmt);
289  statement atom_stmt(basic::database::safely_prepare_statement("DELETE FROM residue_atom_coords WHERE struct_id = ?;\n",db_session));
290  atom_stmt.bind(1,struct_id);
291  basic::database::safely_write_to_database(atom_stmt);
292 
293 }
294 
295 
296 void
298  sessionOP db_session,
299  boost::uuids::uuid struct_id,
300  Pose & pose
301 ){
302  load_conformation(db_session, struct_id, pose);
303 }
304 
305 void
307  sessionOP db_session,
308  boost::uuids::uuid struct_id,
309  Pose & pose
310 ){
311 
312  if(!basic::database::table_exists(db_session, "protein_residue_conformation")){
313  TR << "WARNING: protein_residue_conformation table does not exist and thus respective data will not be added to the pose!" << std::endl;
314  return;
315  }
316 
317  set_coords_for_residues(db_session,struct_id,pose);
318 
319 
320  if(pose.is_fullatom()){
321  std::string statement_string =
322  "SELECT\n"
323  " seqpos,\n"
324  " secstruct,\n"
325  " phi,\n"
326  " psi,\n"
327  " omega,\n"
328  " chi1,\n"
329  " chi2,\n"
330  " chi3,\n"
331  " chi4\n"
332  "FROM\n"
333  " protein_residue_conformation\n"
334  "WHERE\n"
335  " protein_residue_conformation.struct_id=?;";
336  statement stmt(basic::database::safely_prepare_statement(statement_string,db_session));
337  //std::string struct_id_string(to_string(struct_id));
338  stmt.bind(1,struct_id);
339 
340  result res(basic::database::safely_read_from_database(stmt));
341 
342  while(res.next()){
343  Size seqpos;
344  Real phi,psi,omega,chi1,chi2,chi3,chi4;
345  std::string secstruct;
346  res >> seqpos >> secstruct >> phi >> psi >> omega >> chi1 >> chi2 >> chi3 >> chi4 ;
347  if (!pose.residue_type(seqpos).is_protein()){
348  // WARNING why are you storing non-protein in the ProteinSilentReport?
349  continue;
350  }
351  if(!(phi < 0.00001 && psi < 0.00001 && omega < 0.00001) )
352  {
353  pose.set_phi(seqpos,phi);
354  pose.set_psi(seqpos,psi);
355  pose.set_omega(seqpos,omega);
356  }
357  pose.set_secstruct(seqpos,static_cast<char>(secstruct[0]));
358  Size nchi(pose.residue_type(seqpos).nchi());
359  if(1 <= nchi) pose.set_chi(1, seqpos, chi1);
360  if(2 <= nchi) pose.set_chi(2, seqpos, chi2);
361  if(3 <= nchi) pose.set_chi(3, seqpos, chi3);
362  if(4 <= nchi) pose.set_chi(4, seqpos, chi4);
363  }
364 
365  }else{
366  // statement stmt = (*db_session) <<
367  // "SELECT\n"
368  // " seqpos,\n"
369  // " phi,\n"
370  // " psi,\n"
371  // " omega\n"
372  // "FROM\n"
373  // " protein_residue_conformation\n"
374  // "WHERE\n"
375  // " protein_residue_conformation.struct_id=?;" << struct_id;
376  //
377  // result res(basic::database::safely_read_from_database(stmt));
378  // while(res.next()){
379  // Size seqpos;
380  // Real phi,psi,omega;
381  // res >> seqpos >> phi >> psi >> omega;
382  // if (!pose.residue_type(seqpos).is_protein()){
383  // // WARNING why are you storing non-protein in the ProteinSilentReport?
384  // continue;
385  // }
386  //
387  // //pose.set_phi(seqpos,phi);
388  // //pose.set_psi(seqpos,psi);
389  // //pose.set_omega(seqpos,omega);
390  // }
391  }
392 }
393 
394 
395 //This should be factored out into a non-member function.
397  sessionOP db_session,
398  boost::uuids::uuid struct_id,
399  Pose & pose
400 ){
401  // lookup and set all the atoms at once because each query is
402  // roughly O(n*log(n) + k) where n is the size of the tables and k
403  // is the number of rows returned. Doing it all at once means you
404  // only have to pay the n*log(n) cost once.
405 
406  std::string statement_string =
407  "SELECT\n"
408  " seqpos,\n"
409  " atomno,\n"
410  " x,\n"
411  " y,\n"
412  " z\n"
413  "FROM\n"
414  " residue_atom_coords\n"
415  "WHERE\n"
416  " residue_atom_coords.struct_id=?;";
417  statement stmt(basic::database::safely_prepare_statement(statement_string,db_session));
418  //std::string struct_id_string(to_string(struct_id));
419  stmt.bind(1,struct_id);
420 
421  result res(basic::database::safely_read_from_database(stmt));
422 
423  vector1< AtomID > atom_ids;
424  vector1< Vector > coords;
425  while(res.next())
426  {
427  Size seqpos, atomno;
428  Real x,y,z;
429  res >> seqpos >> atomno >> x >> y >> z;
430 
431  atom_ids.push_back(AtomID(atomno, seqpos));
432  coords.push_back(Vector(x,y,z));
433  }
434  // use the batch_set_xyz because it doesn't trigger a coordinate
435  // update after setting each atom.
436  pose.batch_set_xyz(atom_ids,coords);
437 
438 }
439 
440 } // namespace
441 } // namespace