Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
AtomInResidueAtomInResiduePairFeatures.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file protocols/features/AtomInResidueAtomInResiduePairFeatures.cc
11 /// @brief report atom-atom pair geometry and scores to features statistics scientific benchmark
12 /// @author Matthew O'Meara
13 
14 // Unit Headers
16 
17 // Project Headers
18 #include <core/chemical/AA.hh>
22 #include <core/graph/Graph.hh>
23 #include <core/pose/Pose.hh>
25 #include <core/scoring/Energies.hh>
26 #include <core/types.hh>
27 #include <utility/sql_database/DatabaseSessionManager.hh>
28 #include <utility/vector1.hh>
29 #include <basic/database/sql_utils.hh>
30 #include <basic/database/schema_generator/PrimaryKey.hh>
31 #include <basic/database/schema_generator/ForeignKey.hh>
32 #include <basic/database/schema_generator/Column.hh>
33 #include <basic/database/schema_generator/Schema.hh>
34 
35 // ObjexxFCL Headers
36 #include <ObjexxFCL/FArray5D.hh>
37 
38 // Numeric Headers
39 #include <numeric/xyzVector.hh>
40 
41 // External Headers
42 #include <cppdb/frontend.h>
43 #include <boost/uuid/uuid_io.hpp>
44 
45 //Auto Headers
46 namespace protocols{
47 namespace features{
48 
49 using std::string;
52 using core::pose::Pose;
53 using core::Size;
54 using core::Distance;
55 using core::Vector;
56 using core::graph::Graph;
59 using ObjexxFCL::FArray5D;
60 using utility::sql_database::sessionOP;
61 using utility::vector1;
62 using cppdb::statement;
63 
65 
68 {}
69 
71 
72 string
73 AtomInResidueAtomInResiduePairFeatures::type_name() const { return "AtomInResidueAtomInResiduePairFeatures"; }
74 
75 void
77  sessionOP db_session
78 ) const {
80 }
81 
82 void
84  sessionOP db_session
85 ) const {
86  using namespace basic::database::schema_generator;
87 
88  Column struct_id("struct_id", new DbUUID());
89  Column residue_type1("residue_type1", new DbText());
90  Column atom_type1("atom_type1", new DbText());
91  Column residue_type2("residue_type2", new DbText());
92  Column atom_type2("atom_type2", new DbText());
93  Column distance_bin("distance_bin", new DbText());
94  Column count("count", new DbInteger());
95 
96  Columns primary_key_columns;
97  primary_key_columns.push_back(struct_id);
98  primary_key_columns.push_back(residue_type1);
99  primary_key_columns.push_back(atom_type1);
100  primary_key_columns.push_back(residue_type2);
101  primary_key_columns.push_back(atom_type2);
102  primary_key_columns.push_back(distance_bin);
103  PrimaryKey primary_key(primary_key_columns);
104 
105  Columns foreign_key_columns;
106  foreign_key_columns.push_back(struct_id);
107  vector1< std::string > reference_columns;
108  reference_columns.push_back("struct_id");
109  ForeignKey foreign_key(foreign_key_columns, "structures", reference_columns, true);
110 
111  GreaterThanConstraintOP count_is_non_negative( new GreaterThanConstraint(count, 0));
112 
113  Schema table("atom_in_residue_pairs", primary_key);
114  table.add_foreign_key(foreign_key);
115  table.add_constraint(count_is_non_negative);
116  table.add_column(count);
117 
118  table.write(db_session);
119 }
120 
121 
124  utility::vector1<std::string> dependencies;
125  dependencies.push_back("ResidueFeatures");
126  return dependencies;
127 }
128 
129 Size
131  Pose const & pose,
132  vector1< bool > const & relevant_residues,
133  boost::uuids::uuid const struct_id,
134  sessionOP db_session
135 ){
136  report_atom_pairs(pose, relevant_residues, struct_id, db_session);
137  return 0;
138 }
139 
140 /// @detail This is very similar in spirit to the potential described in
141 ///
142 ///Lu H, Skolnick J. A distance-dependent atomic knowledge-based potential for improved protein structure selection. Proteins. 2001;44(3):223-32. Available at: http://www.ncbi.nlm.nih.gov/pubmed/11455595.
143 ///
144 /// However, they use different distance bins. Here, [0,1), ...,
145 /// [9,10) are used because they are easy and as they report the the
146 /// paper, most of the signal comes in the 3.5-6.5 range. To get the
147 /// molar fraction of atom types--since the types are unique within
148 /// each residue type, there is exactly one per residue of that type.
149 /// Therefore this information can be extracted from the Residues
150 /// table when needed. It may make sense to include it here if it
151 /// turns to to be too cumbersom to get those quantities.
152 ///
153 /// TODO: Expand for not just canonical residue types
154 
155 
156 void
158  Pose const & pose,
159  vector1< bool > const & relevant_residues,
160  boost::uuids::uuid const struct_id,
161  sessionOP db_session
162 ){
163 
164  // assert pose.update_residue_neighbors() has been called:
165  runtime_assert(
166  !pose.conformation().structure_moved() &&
168 
169  Size const max_res(num_canonical_aas);
170  Size const max_atm(30); // check this
171  Size const dist_bins(15);
172  FArray5D< Size > counts;
173  counts.dimension(max_res, max_atm, max_res, max_atm, dist_bins, 1);
174 
175  TenANeighborGraph const & tenA( pose.energies().tenA_neighbor_graph() );
176 
177 
178  for(Size resNum1=1; resNum1 <= pose.total_residue(); ++resNum1){
179  Residue res1( pose.residue(resNum1) );
180  if(!relevant_residues[resNum1]) continue;
181 
183  ir = tenA.get_node( resNum1 )->const_edge_list_begin(),
184  ire = tenA.get_node( resNum1 )->const_edge_list_end();
185  ir != ire; ++ir ) {
186  Size resNum2( (*ir)->get_other_ind(resNum1) );
187  if(!relevant_residues[resNum2]) continue;
188 
189  Residue res2( pose.residue(resNum2) );
190 
191  for(Size atmNum1=1; atmNum1 <= res1.natoms(); ++atmNum1){
192  Vector const & atm1_xyz( res1.xyz(atmNum1) );
193 
194  for(Size atmNum2=1; atmNum2 <= res2.natoms(); ++atmNum2){
195  Vector const & atm2_xyz( res2.xyz(atmNum2) );
196 
197  Size const dist_bin(static_cast<Size>(ceil(atm1_xyz.distance(atm2_xyz))));
198  if(dist_bin < 15){
199  counts(res1.aa(), atmNum1, res2.aa(), atmNum2, dist_bin) += 1;
200  }
201  }
202  }
203  }
204  }
205 
206  std::string stmt_string = "INSERT INTO atom_in_residue_pairs (struct_id, residue_type1, atom_type1, residue_type2, atom_type2, distance_bin, count) VALUES (?,?,?,?,?,?,?);";
207  cppdb::statement stmt(basic::database::safely_prepare_statement(stmt_string,db_session));
208 
209  for(Size aa1=1; aa1 <= max_res; ++aa1){
210  for(Size aa2=1; aa2 <= max_res; ++aa2){
211  for(Size atmNum1=1; atmNum1 <= max_atm; ++atmNum1){
212  for(Size atmNum2=1; atmNum2 <= max_atm; ++atmNum2){
213  for(Size dist_bin=1; dist_bin <= 15; ++dist_bin){
214  Size const count(counts(aa1, atmNum1, aa2, atmNum2, dist_bin));
215  stmt.bind(1,struct_id);
216  stmt.bind(2,aa1);
217  stmt.bind(3,atmNum1);
218  stmt.bind(4,aa2);
219  stmt.bind(5,atmNum2);
220  stmt.bind(6,dist_bin);
221  stmt.bind(7,count);
222  basic::database::safely_write_to_database(stmt);
223  }
224  }
225  }
226  }
227  }
228 }
229 
230 } // namesapce
231 } // namespace