Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ThreadingJobInputter.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 //
10 /// @file protocols/comparative_modeling/ThreadingJobInputter.cc
11 
12 ///Unit headers
16 #include <protocols/jd2/Job.hh>
18 
19 ///C++ headers
20 #include <string>
21 
22 ///Utility headers
23 #include <basic/Tracer.hh>
24 #include <basic/options/option.hh>
25 #include <basic/options/keys/cm.OptionKeys.gen.hh>
26 #include <basic/options/keys/in.OptionKeys.gen.hh>
27 #include <basic/options/keys/nonlocal.OptionKeys.gen.hh>
28 #include <utility/vector1.hh>
29 #include <utility/file/FileName.hh>
30 #include <utility/io/izstream.hh>
31 
32 ///Project headers
34 // AUTO-REMOVED #include <core/id/NamedAtomID.hh>
36 // AUTO-REMOVED #include <core/io/pdb/pose_io.hh>
39 #include <core/pose/Pose.hh>
40 #include <core/pose/PDBInfo.hh>
42 #include <core/sequence/util.hh>
43 
44 // AUTO-REMOVED #include <protocols/comparative_modeling/util.hh>
47 
48 namespace protocols {
49 namespace comparative_modeling {
50 
51 static basic::Tracer tr("protocols.comparative_modeling.ThreadingJobInputter");
52 
53 std::map< std::string, utility::vector1< core::Size > > read_extra_residues(
55 ) {
56  std::map< std::string, utility::vector1< core::Size > > extra_residues;
57  using core::Size;
58  for ( Size ii = 1; ii <= fns.size(); ++ii ) {
59  utility::io::izstream input( fns[ii] );
60  if ( !input.good() ) {
61  std::string const & msg( "Error reading file " + (std::string)fns[ii] );
62  utility_exit_with_message(msg);
63  }
64  std::string line;
65  while( getline(input,line) ) {
66  if ( line.substr(0,1) == "#" ) continue;
67  std::istringstream ss(line);
68  std::string aln_id;
69  ss >> aln_id;
70  aln_id = ObjexxFCL::uppercased(aln_id);
71  extra_residues[aln_id] = utility::vector1< core::Size >();
72 
73  core::Size res(0);
74  ss >> res;
75  if ( res != 0 ) {
76  extra_residues[aln_id].push_back(res);
77  }
78  while ( ss.good() ) {
79  ss >> res;
80  if ( res != 0 ) {
81  extra_residues[aln_id].push_back(res);
82  }
83  res = 0;
84  }
85  }
86  input.close();
87  }
88 
89  return extra_residues;
90 }
91 
93  input_source_( protocols::jd2::JobInputterInputSource::NONE )
94 {
95  using namespace core;
96  using namespace core::pose;
97  using namespace basic::options;
98  using namespace basic::options::OptionKeys;
99 
100  tr.Debug << "Instantiate ThreadingJobInputter" << std::endl;
101 
102  /// read alignments from command-line
103  utility::vector1< std::string > const & aln_fns( option[ in::file::alignment ]() );
104  for ( Size ii = 1; ii <= aln_fns.size(); ++ii ) {
106  sequence::read_aln( option[ cm::aln_format ](), aln_fns[ii] )
107  );
108  for ( Size jj = 1; jj <= alns.size(); ++jj ) {
109  alignments_.push_back( alns[jj] );
110  }
111  }
112 
113  /// get template-pdbs from files
114  if ( option[ in::file::template_pdb ].user() ) {
115  FileList template_pdb_filenames = option[ in::file::template_pdb ]();
116  typedef utility::vector1< pose::PoseOP > PoseOPvec;
117  PoseOPvec poses = core::import_pose::poseOPs_from_pdbs( template_pdb_filenames );
118 
119  /// put template-pdbs into map --- use filename as key --- this is used to match pdb and alignment
120  for ( PoseOPvec::const_iterator it = poses.begin(); it != poses.end(); ++it ) {
121  utility::file::FileName fn( (*it)->pdb_info()->name() );
122  std::string const base_fn( static_cast< std::string > (fn.base()) );
123  std::string const match( ObjexxFCL::uppercased( base_fn.substr(0,5) ) );
124  tr.Trace << "add template " << match << std::endl;
125  template_poses_[ match ].push_back( *it );
126  }
128  } else if ( option[ in::file::template_silent ].user() ) { // get template-pdbs from silent-file
130  sfd.read_file( option[ in::file::template_silent ]() );
131  for ( io::silent::SilentFileData::iterator it = sfd.begin(); it != sfd.end(); ++it ) {
132  PoseOP pose = new Pose;
133  it->fill_pose( *pose );
134  std::string const match( ObjexxFCL::uppercased( it->decoy_tag().substr(2,5) ) );
135  tr.Trace << "add template " << match << std::endl;
136  template_poses_[ match ].push_back( pose );
137  }
139  } else {
140  //no -in:file:template_xxx option
141  utility_exit_with_message("ThreadingJobInputter needs parent-pdbs either as in:file:template_pdb or as in:file:template_silent");
142  }
143 
144  // read in extra residues to steal
145  if ( option[ cm::steal_extra_residues ].user() ) {
146  utility::vector1< utility::file::FileName > const & fns( option[ cm::steal_extra_residues ]() );
148  } // steal_extra_residues
149 } // ThreadingJobInputter()
150 
151 /// @details This function will first see if the pose already exists in the Job.
152 /// If not, it will read it into the pose reference, and hand a COP cloned from
153 /// that pose to the Job. If the pose pre-exists it just copies the COP's pose
154 /// into it.
156  core::pose::Pose & pose,
158 ) {
159  tr.Debug << "ThreadingJobInputter::pose_from_job" << std::endl;
160 
161  using namespace basic::options;
162  using namespace basic::options::OptionKeys;
163  using namespace core::chemical;
164  using core::Real;
165  using core::Size;
166 
167  ///cast to ThreadingJob ... to access alignment and template pdb
168  ThreadingJobCOP tjob = dynamic_cast< ThreadingJob const* const> (
169  job->inner_job().get()
170  );
171 
172  pose = core::pose::Pose(); //fpd symmetry-safe
173  std::string sequence;
174  if ( option[ in::file::fasta ].user() ) {
176  = core::sequence::read_fasta_file( option[ in::file::fasta ]()[1] );
177  if ( input_fasta.size() == 0 ) {
178  utility_exit_with_message(
179  "ERROR: Fasta file specified by -in::file::fasta contains no valid sequence"
180  );
181  }
182 
183  if ( input_fasta.size() > 1 ) {
184  utility_exit_with_message(
185  "ERROR: Fasta file specified by -in::file::fasta should contain a single sequence"
186  );
187  }
188 
189  sequence = input_fasta[1]->sequence();
190  } else {
191  sequence = tjob->alignment().sequence(1)->ungapped_sequence();
192  }
193 
195  pose,
196  sequence,
197  *( ChemicalManager::get_instance()->residue_type_set( FA_STANDARD ))
198  );
199 
200  Real const alignment_coverage( tjob->alignment().length() - tjob->alignment().gapped_positions() );
201  Real const alignment_identities( (core::Real)tjob->alignment().identities() / pose.total_residue() );
202  Real const alignment_perc ( alignment_coverage / pose.total_residue() );
203 
204  // Add the alignment length, perc coverage and total length
205  job->add_string_real_pair( "aln_len", alignment_coverage );
206  job->add_string_real_pair( "aln_perc", alignment_perc );
207  job->add_string_real_pair( "aln_ident", alignment_identities );
208  job->add_string_real_pair( "nres", pose.total_residue() );
209 
210  // thread to get starting model
211  comparative_modeling::ThreadingMover mover( tjob->alignment(), *(tjob->get_pose()) );
212  mover.build_loops(false);
213 
214  // broken-chain folding from a sequence alignment and template pdb requires
215  // that missing loop coordinates be randomized to avoid clashes. the option's
216  // default value is false to match existing behavior.
217  mover.randomize_loop_coords( option[OptionKeys::nonlocal::randomize_missing]() );
218  mover.repack_query(false);
219  mover.apply( pose );
220  core::sequence::alignment_into_pose( tjob->alignment(), pose );
221 
222  // add extra residues from template here
223  utility::vector1< core::Size > extra_res( tjob->extra_residues_to_steal() );
224  if ( extra_res.size() > 0 ) {
226  tjob->alignment(), *tjob->get_pose(), extra_res);
227  thief.apply(pose);
228  }
229 } // pose_from_job
230 
231 /// @details this function determines what jobs
233  tr.Debug << "ThreadingJobInputter::fill_jobs" << std::endl;
234  using namespace basic::options;
235  using namespace basic::options::OptionKeys;
236  using core::Size;
237  using core::Real;
238 
239  jobs.clear(); //should already be empty anyway
240 
241  //read command line
242  Size const nstruct( get_nstruct() );
243  Real filter_threshold = -1;
244 
245  if ( option[ cm::aln_length_filter_quantile ].user() ) {
246  Real quantile = option[ cm::aln_length_filter_quantile ]();
247 
248  // make list of lengths
249  //create jobs for each alignment
250  std::vector < int > length_list;
251  for ( Alignments::const_iterator align_it = alignments_.begin(); align_it != alignments_.end(); ++align_it ) {
252  length_list.push_back( align_it->length() - align_it->gapped_positions() );
253  tr << "Len " << align_it->length() - align_it->gapped_positions() << std::endl;
254  }
255 
256  std::vector< int >::iterator i = length_list.begin();
257  std::vector< int >::size_type m = (size_t)( length_list.size() * quantile );
258 
259  std::nth_element(i, i + m, length_list.end());
260 
261  filter_threshold = length_list.at(m);
262 
263  tr << "Quantile filter threshold = " << filter_threshold << std::endl;
264  }
265 
266  // create jobs for each alignment
267  for ( Alignments::const_iterator align_it = alignments_.begin(); align_it != alignments_.end(); ++align_it ) {
268  // alignment id
269  std::string const alignment_id( align_it->alignment_id() );
270  std::string const template_id( alignment_id.substr(0,5));
271  tr.Debug << "creating job for alignment " << alignment_id << " on template " << template_id << std::endl;
272 
273  Real const alignment_coverage( align_it->length() - align_it->gapped_positions() );
274 
275  if ( option[ cm::aln_length_filter ].user() ) {
276  filter_threshold = option[ cm::aln_length_filter ]();
277  }
278 
279  if ( ( filter_threshold > 0 ) && ( alignment_coverage < filter_threshold ) ) {
280  tr << "Skipping alignment " << alignment_id << ": length = "
281  << int( alignment_coverage )
282  << " threshold = " << int( filter_threshold ) << std::endl;
283  continue;
284  }
285 
286  // find matching template pdb
287  PoseMap::const_iterator iter = template_poses_.find( template_id );
288 
289  if ( iter != template_poses_.end() ) {
290  // found template
291  PoseOPs template_poses( iter->second );
292  for ( PoseOPs::const_iterator it = template_poses.begin(); it != template_poses.end(); ++it ) {
293  // create inner job
294  ThreadingJobOP ijob( new ThreadingJob(
295  *it, align_it->clone(), "S_" + alignment_id, nstruct
296  ) );
297  // find extra residues
298  ExtraResidues::const_iterator extra_res(extra_residues_.find(alignment_id));
299  if ( extra_res != extra_residues_.end() ) {
300  ijob->extra_residues_to_steal( extra_res->second );
301  }
302 
303  // make nstruct outer jobs
304  for ( Size index = 1; index <= nstruct; ++index) {
305  jobs.push_back( protocols::jd2::JobOP( new protocols::jd2::Job( ijob, index ) ) );
306  jobs.back()->add_string_string_pair( "aln_id", alignment_id );
307  } // loop over nstruct
308  }
309  } else { // report error if template pdb not found
310  //utility_exit_with_message( "ERROR: no template_pdb provided for alignment " + alignment_id );
311  tr.Error << "Warning: no template pdb provided for alignment " << alignment_id << std::endl;
312  }
313  } // for alignments
314 } // fill_jobs
315 
316 /// @brief Return the type of input source that the ThreadingJobInputter is
317 /// currently using for template structures.
319  return input_source_;
320 }
321 
323  size_t num_templates = 0;
324  for (PoseMap::const_iterator i = template_poses_.begin();
325  i != template_poses_.end(); ++i) {
326  ++num_templates;
327  }
328  return num_templates;
329 }
330 
331 //CREATOR SECTION
334 {
335  return "ThreadingJobInputter";
336 }
337 
340  return new ThreadingJobInputter;
341 }
342 
343 } // comparative_modeling
344 } // protocols