Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
DatabaseJobInputter.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file protocols/features/DatabaseJobInputter.cc
11 /// @brief
12 /// @author Matthew O'Meara (mattjomeara@gmail.com)
13 
14 // Unit Headers
18 #include <protocols/jd2/Job.hh>
20 
21 // Project Headers
22 #include <basic/Tracer.hh>
23 #include <basic/options/option.hh>
24 #include <basic/options/keys/in.OptionKeys.gen.hh>
25 #include <basic/options/keys/inout.OptionKeys.gen.hh>
26 #include <basic/database/sql_utils.hh>
28 #include <core/kinematics/Jump.hh>
29 #include <core/pose/Pose.hh>
30 #include <core/pose/util.hh>
33 
34 
35 // Utility Headers
36 #include <utility/vector1.hh>
37 #include <utility/file/FileName.hh>
38 #include <utility/string_util.hh>
39 #include <utility/sql_database/DatabaseSessionManager.hh>
40 
41 // Boost Headers
42 #include <boost/foreach.hpp>
43 #define foreach BOOST_FOREACH
44 
45 // External Headers
46 #include <cppdb/frontend.h>
47 #include <boost/uuid/uuid_io.hpp>
48 #include <boost/uuid/string_generator.hpp>
49 
50 
51 // C++ headers
52 #include <string>
53 #include <sstream>
54 
55 static basic::Tracer tr("protocols.features.DatabaseJobInputter");
56 
57 namespace protocols {
58 namespace features {
59 
60 using std::string;
61 using std::stringstream;
62 using std::map;
63 using std::endl;
64 using core::Size;
66 using core::pose::Pose;
73 using utility::vector1;
74 using utility::sql_database::DatabaseSessionManager;
75 using utility::sql_database::sessionOP;
76 using boost::uuids::uuid;
77 using cppdb::result;
78 
79 
80 
82  scfxn_(new ScoreFunction()),
83  protein_silent_report_(new ProteinSilentReport())
84 
85 {
86  tr.Debug << "Instantiate DatabaseJobInputter" << endl;
88 }
89 
91 
92 
93 void
95  using namespace basic::options;
96  using namespace basic::options::OptionKeys;
97 
98  if (option.has(inout::dbms::database_name) &&
99  option[inout::dbms::database_name].user()){
100  set_database_name(option[inout::dbms::database_name]);
101  }
102 
103  if (option.has(inout::dbms::pq_schema) &&
104  option[inout::dbms::pq_schema].user()){
105  set_database_pq_schema(option[inout::dbms::pq_schema]);
106  }
107 
108  // The in::file::tags option was created for the silent file
109  // system--but using it makes sense here because, it serves the same
110  // purpose: specify which structures to use from the data source.
111 
112  if (option.has(in::dbms::struct_ids) && option[in::dbms::struct_ids].user()){
113  set_struct_ids_from_strings(option[in::dbms::struct_ids]);
114  }
115  if(option[in::dbms::struct_ids].user() && option[in::select_structures_from_database].user()) {
116  utility_exit_with_message("you cannot use -in:dbms:struct_ids and -in:select_structures_from_database simultaniously");
117  }
118 
119  if (option[in::select_structures_from_database].user()) {
120  set_struct_ids_from_sql(option[in::select_structures_from_database]);
121  }
122 
123  //TODO do we want this still?
124 // input_protocol_id_ = option[in::database_protocol];
125 
126 }
127 
128 void
130  using namespace basic::options;
131  using namespace basic::options::OptionKeys;
132  option.add_relevant( inout::dbms::database_name );
133  option.add_relevant( inout::dbms::pq_schema );
134  option.add_relevant( inout::dbms::host );
135  option.add_relevant( inout::dbms::user );
136  option.add_relevant( inout::dbms::password );
137  option.add_relevant( inout::dbms::port );
138  option.add_relevant( inout::dbms::readonly );
139  option.add_relevant( inout::dbms::separate_db_per_mpi_process );
140 
141  option.add_relevant( in::file::tags );
142 }
143 
144 void
146  string const & database_name
147 ) {
148  database_name_ = database_name;
149 }
150 
151 string
153  if(database_name_ == ""){
154  utility_exit_with_message(
155  "To use the DatabaseJobInputter, please specify the database "
156  "where thinput is data is stored, eg. via the -inout:dbms:database_name "
157  "<database_name> option system flag.");
158  }
159  return database_name_;
160 }
161 
162 void
164  string const & database_pq_schema
165 ) {
166  database_pq_schema_ = database_pq_schema;
167 }
168 
169 string
171  return database_pq_schema_;
172 }
173 
174 
175 /// @brief Get score function
178  return scfxn_;
179 }
180 
181 /// @brief Set score function
182 void
184  scfxn_ = scorefunction;
185 }
186 
187 void
189 utility::vector1<string> const & struct_id_strings){
190 
191  for(core::Size i=1; i<=struct_id_strings.size(); ++i){
192  boost::uuids::string_generator gen;
193  try{
194  boost::uuids::uuid struct_id = gen(struct_id_strings[i]);
195  tag_structures_[struct_id_strings[i]] = struct_id;
196  }
197  catch(...){
198  stringstream err_msg;
199  err_msg
200  << "Unable to convert the struct_id '" << struct_id_strings[i]
201  << "' to a valid uuid, it should be of the form XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX or XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX where each 'X' is in [0..9a..f]" << endl;
202  utility_exit_with_message(err_msg.str());
203  }
204  }
205 }
206 
207 /// @details The specified struct_ids indicate which structures should be
208 /// used. If no ids are specified, then all will be used. Unless a tag column
209 /// is specified in the SQL statement, the job name (and
210 /// consequently, the file output name) will be an ASCII hexadecimal representation
211 /// of the struct_id (a boost UUID). If a tag column is given, then the file name will
212 /// be the tag associated with the given row.
213 void
215  utility::vector1<string> const & sql)
216 {
217  string sql_command(utility::join(sql, " "));
218  basic::database::check_statement_sanity(sql_command);
219 
220  sessionOP db_session(
221  basic::database::get_db_session(database_name_, database_pq_schema_));
222 
223  result res;
224  while(true)
225  {
226  try
227  {
228  res = (*db_session) << sql_command;
229  break;
230  }catch(cppdb::cppdb_error &)
231  {
232  #ifndef WIN32
233  usleep(10);
234  #endif
235  continue;
236  }
237  }
238 
239  bool res_nums_specified = false;
240  if(res.find_column("resnum") != -1){res_nums_specified=true;}
241 
242  bool tags_specified = false;
243  if(res.find_column("tag") != -1){tags_specified=true;}
244 
245  if(res.find_column("struct_id") != -1){
246  while(res.next()){
247  boost::uuids::uuid struct_id;
248  res.fetch("struct_id", struct_id);
249 
250  std::string tag;
251  if(tags_specified){
252  res.fetch("tag", tag);
253  if(tag_structures_.count(tag) > 0 && tag_structures_[tag] != struct_id){
254  utility_exit_with_message("You have specified non-unque input tags which can cause ambigous output. Please make input tags unique");
255  }
256  }
257  else{
258  tag = to_string(struct_id);
259  }
260  tag_structures_[tag] = struct_id;
261 
262  if(res_nums_specified){
263  core::Size resnum;
264  res.fetch("resnum", resnum);
265  tag_residues_[tag].insert(resnum);
266  }
267  }
268  if(!tag_structures_.size()){
269  utility_exit_with_message("The provided SQL query did not produce any struct_ids");
270  }
271  }
272  else{
273  utility_exit_with_message("Must provide an SQL SELECT command that selects the struct_id column from the structures table");
274  }
275 }
276 
277 /// @details This function will first see if the pose already exists in the Job.
278 /// If not, it will read it into the pose reference, and hand a COP cloned from
279 /// that pose to the Job. If the pose pre-exists it just copies the COP's pose
280 /// into it.
281 void
283  Pose & pose,
285 ) {
286  using namespace basic::options;
287  using namespace basic::options::OptionKeys;
288  tr.Debug << "DatabaseJobInputter::pose_from_job" << std::endl;
289  string tag(job->input_tag());
290  pose.clear();
291 
292  if ( !job->inner_job()->get_pose() ) {
293  tr.Debug << "filling pose from Database (input tag = " << tag << ")" << endl;
294  sessionOP db_session(
295  basic::database::get_db_session(database_name_, database_pq_schema_));
296 
297  boost::uuids::uuid struct_id = tag_structures_[tag];
298 
299  if(!tag_residues_.size()){
300  protein_silent_report_->load_pose(db_session, struct_id, pose);
301  }
302  else{
303  tr << "Residues list size " << tag_residues_[tag].size() << std::endl;
304  protein_silent_report_->load_pose(db_session, struct_id, tag_residues_[tag], pose);
305  }
306 
307  } else {
308  tr.Debug << "filling pose from saved copy (input tag = " << tag << ")" << endl;
309  pose = *(job->inner_job()->get_pose());
310  }
311 
312  // TODO: Move to pose.clear()
313  if (is_symmetric(pose)) make_asymmetric_pose( pose );
314 
315 
317 
318 }
319 
320 /// @details this function determines what jobs exist
322  tr.Debug << "DatabaseJobInputter::fill_jobs" << std::endl;
323  jobs.clear(); //should already be empty anyway
324 
325  Size const nstruct(get_nstruct());
326 
327  if(!tag_structures_.size()){
328 
329  tr << "Reading all struct_ids from database ... ";
330 
331  sessionOP db_session(
332  basic::database::get_db_session(database_name_, database_pq_schema_));
333 
334  result res;
335  while(true)
336  {
337  try
338  {
339  res = (*db_session) << "SELECT struct_id FROM structures;";
340  break;
341  }catch(cppdb::cppdb_error &)
342  {
343  #ifndef WIN32
344  usleep(10);
345  #endif
346  continue;
347  }
348  }
349  while(res.next()){
350  boost::uuids::uuid struct_id;
351  res >> struct_id;
352  tag_structures_[to_string(struct_id)]=struct_id;
353  }
354  tr << tag_structures_.size() << " struct_ids found." << endl;
355  }
356 
358  //save list of all inner_jobs first... this allows better sampling
359  //of jobs in case of unfinished runs:
360  // input1_0001
361  // input2_0001
362  // ...
363  // inputn_0001
364  // input1_0002
365  // input2_0002
366  // ....
367  tr.Debug << "reserve memory for InnerJob List " << tag_structures_.size() << endl;
368  inner_jobs.reserve( tag_structures_.size() );
369  tr.Debug
370  << "fill list with " << tag_structures_.size()
371  << " InnerJob Objects" << endl;
372 
373  for(std::map<std::string, boost::uuids::uuid>::const_iterator iter=tag_structures_.begin(); iter!=tag_structures_.end(); ++iter){
374  inner_jobs.push_back(new protocols::jd2::InnerJob(iter->first, nstruct));
375  }
376 
377  tr.Debug
378  << "reserve list for " << inner_jobs.size() * nstruct
379  << " Job Objects" << endl;
380 
381  jobs.reserve(inner_jobs.size() * nstruct);
382 
383  tr.Debug << "fill job list with... " << endl;
384  for ( Size index = 1; index <= nstruct; ++index ) {
385  foreach(protocols::jd2::InnerJobOP ijob, inner_jobs){
386  jobs.push_back(new protocols::jd2::Job(ijob, index));
387  tr.Trace
388  << "pushing " << ijob->input_tag() << " nstruct index " << index << std::endl;
389  }
390  }
391 }
392 
393 /// @brief Return the type of input source that the
394 /// DatabaseJobInputter is currently using.
395 /// @return Always <em>DATABASE</em>.
398 }
399 
400 //CREATOR SECTION
403 {
404  return "DatabaseJobInputter";
405 }
406 
409  return new DatabaseJobInputter;
410 }
411 
412 } // namespace features
413 } // namespace protocols