Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ReportToDB.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // This file is part of the Rosetta software suite and is made available under license.
5 // The Rosetta software is developed by the contributing members of the Rosetta Commons consortium.
6 // (C) 199x-2009 Rosetta Commons participating institutions and developers.
7 // For more information, see http://www.rosettacommons.org/.
8 
9 /// @file protocols/features/ReportToDB.cc
10 ///
11 /// @brief report all data to a database
12 /// @author Matthew O'Meara (mattjomeara@gmail.com)
13 
14 
15 #ifdef USEMPI
16 #include <mpi.h>
17 #endif
18 
20 #include <string>
21 
22 // Setup Mover
24 #include <basic/database/sql_utils.hh>
25 
26 
27 // Platform Headers
28 #include <basic/Tracer.hh>
29 #include <basic/datacache/CacheableString.fwd.hh>
30 #include <basic/options/keys/parser.OptionKeys.gen.hh>
31 #include <basic/options/option.hh>
34 #include <core/kinematics/Jump.hh>
37 #include <core/pose/Pose.hh>
38 #include <core/pose/PDBInfo.hh>
51 
52 // Utility Headers
53 #include <utility/vector0.hh>
54 #include <utility/vector1.hh>
55 #include <utility/tag/Tag.hh>
56 #include <utility/string_util.hh>
57 #include <basic/database/schema_generator/PrimaryKey.hh>
58 #include <basic/database/schema_generator/ForeignKey.hh>
59 #include <basic/database/schema_generator/Column.hh>
60 #include <basic/database/schema_generator/Schema.hh>
61 #include <basic/resource_manager/ResourceManager.hh>
62 #include <basic/resource_manager/util.hh>
63 
64 // Numeric Headers
65 #include <numeric>
66 
67 // Boost Headers
68 #include <boost/foreach.hpp>
69 #include <boost/uuid/uuid.hpp>
70 #include <boost/uuid/uuid_io.hpp>
71 
72 
73 // C++ Headers
74 #include <utility/excn/Exceptions.hh>
75 #include <sstream>
76 
77 
78 namespace protocols{
79 namespace features{
80 
83 {
85 }
86 
89  return new ReportToDB;
90 }
91 
94 {
95  return "ReportToDB";
96 }
97 
98 /// Macros are not properly caught and passed along by my #inclusion
99 /// cleanup script
100 #define foreach BOOST_FOREACH
101 
102 using basic::T;
103 using basic::Tracer;
104 using basic::Error;
105 using basic::Warning;
106 using basic::datacache::CacheableString;
107 using basic::database::safely_prepare_statement;
108 using basic::database::safely_write_to_database;
109 using basic::database::safely_read_from_database;
110 using basic::database::get_db_session;
111 using basic::database::set_cache_size;
112 using core::Size;
115 using core::pose::Pose;
116 using core::pose::PoseOP;
124 using cppdb::cppdb_error;
125 using cppdb::statement;
126 using cppdb::result;
138 using basic::database::parse_database_connection;
139 using std::string;
140 using std::endl;
141 using std::accumulate;
142 using std::stringstream;
143 using boost::uuids::uuid;
145 using utility::vector0;
146 using utility::vector1;
148 using utility::sql_database::DatabaseSessionManager;
149 using utility::sql_database::session;
150 using utility::sql_database::sessionOP;
151 
152 static Tracer TR("protocols.features.ReportToDB");
153 
155  Mover("ReportToDB"),
156  db_session_(),
157  sample_source_("Rosetta: Unknown Protocol"),
158  use_transactions_(true),
159  cache_size_(2000),
160  remove_xray_virt_(false),
161  protocol_id_(0),
162  batch_id_(0),
163  task_factory_(new TaskFactory()),
164  features_reporter_factory_(FeaturesReporterFactory::get_instance()),
165  features_reporters_(),
166  initialized( false )
167 {
169 }
170 
171 ReportToDB::ReportToDB(string const & name):
172  Mover(name),
173  db_session_(),
174  sample_source_("Rosetta: Unknown Protocol"),
175  use_transactions_(true),
176  cache_size_(2000),
177  remove_xray_virt_(false),
178  protocol_id_(0),
179  batch_id_(0),
180  task_factory_(new TaskFactory()),
181  features_reporter_factory_(FeaturesReporterFactory::get_instance()),
182  features_reporters_(),
183  initialized( false )
184 {
186 }
187 
189  string const & name,
190  sessionOP db_session,
191  string const & sample_source,
192  bool use_transactions,
193  Size cache_size) :
194  Mover(name),
195  db_session_(db_session),
196  sample_source_(sample_source),
197  use_transactions_(use_transactions),
198  cache_size_(cache_size),
199  remove_xray_virt_(false),
200  protocol_id_(0),
201  batch_id_(0),
202  task_factory_(new TaskFactory()),
203  features_reporter_factory_(FeaturesReporterFactory::get_instance()),
204  features_reporters_(),
205  initialized( false )
206 {
208 }
209 
211  Mover(src),
212  db_session_(src.db_session_),
213  sample_source_(src.sample_source_),
214  name_(src.name_),
215  use_transactions_(src.use_transactions_),
216  cache_size_(src.cache_size_),
217  remove_xray_virt_(src.remove_xray_virt_),
218  protocol_id_(src.protocol_id_),
219  batch_id_(src.batch_id_),
220  task_factory_(src.task_factory_),
221  features_reporter_factory_(FeaturesReporterFactory::get_instance()),
222  protocol_features_(src.protocol_features_),
223  batch_features_(src.batch_features_),
224  structure_features_(src.structure_features_),
225  features_reporters_(src.features_reporters_),
226  initialized(src.initialized)
227 {
228  TR << "ReportToDB copy ctor called" << std::endl;
229 }
230 
232 
233 void
235  using basic::options::option;
236  using namespace basic::options::OptionKeys;
237 
238  // This mover is equiped to work with the Rosetta Scripts interface
239  option.add_relevant( parser::protocol );
240 
241  //TODO call relevant_options on FeaturesMover objects
242 }
243 
245 
247 {
248  return new ReportToDB( *this );
249 }
250 
251 void
253  TagPtr const tag){
254  if( tag->hasOption("sample_source") ){
255  sample_source_ = tag->getOption<string>("sample_source");
256  } else {
257  TR << "Field 'sample_source' required for use of ReportToDB in Rosetta Scripts." << endl;
258  TR << "The sample_source should describe where the samples came from. To access the description run \"sqlite3 'select description from sample_source' fname.db3\"" << endl;
259  TR << "For example: Top4400 natives from Richardson Lab. Reduce placed hydrogens with -correct flag." << endl;
260  }
261 }
262 
263 void
265  if( tag->hasOption("name") ){
266  name_=tag->getOption<string>("name");
267  } else {
268  TR << "Field 'name' required for use of ReportToDB in Rosetta Scripts." << endl;
269  }
270 }
271 
272 void
274  TagPtr const tag){
275 
276  if(tag->hasOption("protocol_id")){
277 #ifdef USEMPI
278  int mpi_rank(0);
279  MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
280  protocol_id_ = tag->getOption<Size>("protocol_id") + mpi_rank;
281 #else
282  protocol_id_ = tag->getOption<Size>("protocol_id");
283 #endif
284  }
285 #ifdef USEMPI
286  else {
287  protocol_id_ = 0;
288  }
289 #endif
290 
291 }
292 
293 void
295  TagPtr const tag) {
296  if(tag->hasOption("use_transactions")){
297  use_transactions_ = tag->getOption<bool>("use_transactions");
298  }
299 }
300 
301 void
303  TagPtr const tag) {
304  if(tag->hasOption("cache_size")){
305  cache_size_ = tag->getOption<bool>("cache_size");
306  }
307 }
308 
309 void
311  TagPtr const tag) {
312  if(tag->hasOption("remove_xray_virt")){
313  remove_xray_virt_ = tag->getOption<bool>("remove_xray_virt");
314  }
315 }
316 
317 /// Allow ReportToDB to be called from RosettaScripts
318 /// See
319 void
321  TagPtr const tag,
322  DataMap & data,
323  Filters_map const & filters,
324  Movers_map const & movers,
325  Pose const & pose )
326 {
327  if(tag->hasOption("db")){
328  throw utility::excn::EXCN_RosettaScriptsOption("The 'db' tag has been deprecated. Please use 'database_name' instead.");
329  }
330 
331  if(tag->hasOption("db_mode")){
332  throw utility::excn::EXCN_RosettaScriptsOption("The 'database_mode' tag has been deprecated. Please use 'database_mode' instead.");
333  }
334 
335  if(tag->hasOption("separate_db_per_mpi_process")){
336  throw utility::excn::EXCN_RosettaScriptsOption("The 'parse_separate_db_per_mpi_process' tag has been deprecated. Please use 'database_parse_separate_db_per_mpi_process' instead.");
337  }
338 
339  // Name of output features database:
340  // EXAMPLE: db=features_<sample_source>.db3
341  // REQUIRED
342  if(tag->hasOption("resource_description")){
343  std::string resource_description = tag->getOption<string>("resource_description");
344  if ( ! basic::resource_manager::ResourceManager::get_instance()->has_resource_with_description( resource_description ) )
345  {
346  throw utility::excn::EXCN_Msg_Exception
347  ( "You specified a resource_description of " + resource_description +
348  " for ReportToDB, but the ResourceManager doesn't have a resource with that description" );
349  }
350  db_session_ = basic::resource_manager::get_resource< utility::sql_database::session >( resource_description );
351  }
352  else{
353  db_session_ = parse_database_connection(tag);
354  }
355 
356  // Description of features database
357  // EXAMPLE: sample_source="This is a description of the sample source."
358  // RECOMMENDED
360 
361  // Name of report to db mover. A new batch will be created for each uniquely named
362  // ReportToDb mover
363  // EXAMPLE: name="initial_feature_extraction"
364  // RECOMMENDED
365  parse_name_tag_item(tag);
366 
367  // Manually control the id of associated with this protocol
368  // EXAMPLE: protocol_id=6
369  // OPTIONAL default is to autoincrement the protocol_id in the protocols table
371 
372  // Use transactions to group database i/o to be more efficient. Turning them off can help debugging.
373  // EXAMPLE: use_transactions=true
374  // DEFAULT: TRUE
376 
377  // Specify the maximum number 1k pages to keep in memory before writing to disk
378  // EXAMPLE: cache_size=1000000 // this uses ~ 1GB of memory
379  // DEFAULT: 2000
381 
382  // Remove virtual residue attached during xray refine process
383  // EXAMPLE: remove_xray_virt=true
384  // DEFAULT: FALSE
386 
388 
389  vector0< TagPtr >::const_iterator begin=tag->getTags().begin();
390  vector0< TagPtr >::const_iterator end=tag->getTags().end();
391 
392  for(; begin != end; ++begin){
393  TagPtr feature_tag= *begin;
394  // foreach(TagPtr const & feature_tag, tag->getTags()){
395 
396  if(feature_tag->getName() != "feature"){
397  TR.Error << "Please include only tags with name 'feature' as subtags of ReportToDB" << endl;
398  TR.Error << "Tag with name '" << feature_tag->getName() << "' is invalid" << endl;
399  throw utility::excn::EXCN_RosettaScriptsOption("");
400  }
401 
402  FeaturesReporterOP features_reporter(
404  feature_tag, data, filters, movers, pose));
405 
406  check_features_reporter_dependencies(features_reporter);
407 
408  // TODO IMPLMENT THIS:
409  //check_multiple_features_reporter_definitions(features_reporter);
410 
411  features_reporters_.push_back(features_reporter);
412 
413  }
414 
415 }
416 
417 
418 void
420  FeaturesReporterOP test_features_reporter
421 ) const {
422 
423  foreach(string const dependency,
424  test_features_reporter->features_reporter_dependencies()){
425 
426  // These are defined by default
427  if(dependency == "ProtocolFeatures" || dependency == "BatchFeatures" || dependency == "StructureFeatures"){
428  continue;
429  }
430 
431  bool exists(false);
432  foreach(FeaturesReporterOP features_reporter, features_reporters_){
433  if(features_reporter->type_name() == dependency){
434  exists = true;
435  break;
436  }
437  }
438  if(!exists){
439  stringstream error_msg;
440  error_msg
441  << "For batch '" << name_ << "'," << endl
442  << "the dependencies for the '" << test_features_reporter->type_name() << "'"
443  << " reporter are not satisfied because the '" << dependency << "' has not been defined yet." << endl
444  << "These are the FeaturesReporters that have been defined:" << endl
445  << "\tProtocolFeatures (included by default)" << endl
446  << "\tStructureFeatures (included by default)" << endl;
447  foreach(FeaturesReporterOP features_reporter, features_reporters_){
448  error_msg
449  << "\t" << features_reporter->type_name() << endl;
450  }
451  utility_exit_with_message(error_msg.str());
452  }
453  }
454 }
455 
456 void
458 {
459  // the protocols, batches, and structure features are special
463 }
464 
465 void
467 
468  if (!initialized){
469  if(use_transactions_) db_session_->begin();
470 
471  protocol_features_->write_schema_to_db(db_session_);
472  batch_features_->write_schema_to_db(db_session_);
473  structure_features_->write_schema_to_db(db_session_);
474 
476 
477  foreach( FeaturesReporterOP const & reporter, features_reporters_ ){
478  reporter->write_schema_to_db(db_session_);
479  }
480 
481  if(use_transactions_) db_session_->commit();
482 
483  initialized = true;
484  }
485 }
486 
489  Pose & pose
490 ) const {
491 
492  if (remove_xray_virt_) {
493  TR << "Removing virtual residue left behind by xray refinement" << endl;
494  while (pose.residue( pose.total_residue() ).aa() == core::chemical::aa_vrt )
496  }
497 
498  PackerTaskCOP task(task_factory_->create_task_and_apply_taskoperations(pose));
499  vector1< bool > relevant_residues(task->repacking_residues());
500 
501  TR
502  << "Reporting features for "
503  << accumulate(relevant_residues.begin(), relevant_residues.end(), 0)
504  << " of the " << pose.total_residue()
505  << " total residues in the pose "
507  << " for batch '" << name_ << "'." << endl;
508 
509  return relevant_residues;
510 }
511 
512 /// @detail The 'features_reporters' table lists the type_names of the
513 /// all defined features reporters. The 'batch_reports' table link the
514 /// features reporters with each batch defined in the 'batches' table.
515 void
517  using namespace basic::database::schema_generator;
518 
519  Schema features_reporters(
520  "features_reporters",
521  PrimaryKey( Column("report_name", new DbTextKey())));
522 
523  features_reporters.write(db_session_);
524 
525  //Only report features that aren't already in the database
526  string select_string =
527  "SELECT *\n"
528  "FROM\n"
529  " features_reporters\n"
530  "WHERE\n"
531  " report_name = ?;";
532  statement select_stmt(safely_prepare_statement(select_string, db_session_));
533 
534  string insert_string = "INSERT INTO features_reporters (report_name) VALUES (?);";
535  statement insert_stmt(safely_prepare_statement(insert_string, db_session_));
536 
537  foreach(FeaturesReporterOP const & reporter, features_reporters_){
538  string const report_name(reporter->type_name());
539  select_stmt.bind(1,report_name);
540 
541  result res(safely_read_from_database(select_stmt));
542  if(!res.next()) {
543  insert_stmt.bind(1, report_name);
544  safely_write_to_database(insert_stmt);
545  }
546  }
547 }
548 
549 void
551  using namespace basic::database::schema_generator;
552 
553  Schema batch_reports("batch_reports");
554  Column report_name("report_name", new DbTextKey());
555  Column batch_id("batch_id", new DbInteger());
556 
557  batch_reports.add_foreign_key(
558  ForeignKey(batch_id, "batches", "batch_id", true /*defer*/));
559  batch_reports.add_foreign_key(
560  ForeignKey(report_name, "features_reporters", "report_name", true /*defer*/));
561 
562  vector1<Column> batch_reports_unique;
563  batch_reports_unique.push_back(batch_id);
564  batch_reports_unique.push_back(report_name);
565  batch_reports.add_constraint( new UniqueConstraint(batch_reports_unique) );
566 
567  batch_reports.write(db_session_);
568 }
569 
570 void
572 
573  try{
575  } catch(cppdb_error error){
576  stringstream err_msg;
577  err_msg
578  << "The ReportToDB Mover failed to write the 'features_reporters' table "
579  << "to the database for batch '" << name_ << "'." << endl
580  << "Error Message:" << endl << error.what() << endl;
581  utility_exit_with_message(err_msg.str());
582  }
583 
584  try{
586  } catch(cppdb_error error){
587  stringstream err_msg;
588  err_msg
589  << "The ReportToDB Mover failed to write the 'batch_reports' table "
590  << "to the database." << endl
591  << "Error Message:" << endl << error.what() << endl;
592  utility_exit_with_message(err_msg.str());
593  }
594 }
595 
596 void
598 
599  vector1<bool> relevant_residues(initialize_pose(pose));
600 
602 
603  if(use_transactions_) db_session_->begin();
604 
605  set_cache_size(db_session_, cache_size_);
606 
607  std::pair<Size, Size> ids = get_protocol_and_batch_id(name_, db_session_);
608  protocol_id_ = ids.first;
609  batch_id_ = ids.second;
610 
611  uuid struct_id = report_structure_features(relevant_residues);
612 
613  report_features(pose, struct_id, relevant_residues);
614 
615  if(use_transactions_) db_session_->commit();
616 }
617 
618 uuid
620  vector1<bool> const & relevant_residues
621 ) const {
622  uuid struct_id;
623  try {
624  struct_id = structure_features_->report_features(
625  relevant_residues, batch_id_, db_session_);
626  } catch (cppdb_error error){
627  stringstream err_msg;
628  err_msg
629  << "Failed to report structure features for:" << endl
630  << "\tprotocol_id: '" << protocol_id_ << "'" << endl
631  << "\tbatch name: '" << name_ << "'" << endl
632  << "\tbatch_id: '" << batch_id_ << "'" << endl
633  << "Error Message:" << endl << error.what() << endl;
634  utility_exit_with_message(err_msg.str());
635  }
636  return struct_id;
637 }
638 
639 void
641  Pose const & pose,
642  uuid const struct_id,
643  utility::vector1<bool> const & relevant_residues
644 ) const {
645 
646 // string batch_reports_string =
647 // "INSERT INTO batch_reports (batch_id, report_name) VALUES (?,?);";
648 // statement batch_reports_stmt(
649 // safely_prepare_statement(batch_reports_string, db_session));
650 
651  for(Size i=1; i <= features_reporters_.size(); ++i){
652  string report_name = features_reporters_[i]->type_name();
653 
654  TR << "Reporting " << report_name << std::endl;
655 
656  try {
657  features_reporters_[i]->report_features(
658  pose, relevant_residues, struct_id, db_session_);
659  } catch (cppdb_error error){
660  stringstream err_msg;
661  err_msg
662  << "Failed to report features for the "
663  << "'" << report_name << "' reporter with:" << endl
664  << "with:" << endl
665  << "\tprotocol_id: '" << protocol_id_ << "' " << endl
666  << "\tbatch name: '" << name_ << "' " << endl
667  << "\tbatch_id: '" << batch_id_ << "'" << endl
668  << "\tstruct_id: '" << struct_id << "'" << endl
669  << "Error Message:" << endl << error.what() << endl;
670  utility_exit_with_message(err_msg.str());
671  }
672 
673  //Need to check for preexisting entry to avoid constraint failure caused by having multiple structures in a batch. Alternatively, we could add struct_id to batch_reports table
674  // batch_reports_stmt.bind(1, batch_id_);
675  // batch_reports_stmt.bind(2, report_name);
676  // safely_write_to_database(batch_reports_stmt);
677  }
678 
679 }
680 
681 } // namespace
682 } // namespace