Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
AntibodyInfo.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available
6 // (c) under license. The Rosetta software is developed by the contributing
7 // (c) members of the Rosetta Commons. For more information, see
8 // (c) http://www.rosettacommons.org. Questions about this can be addressed to
9 // (c) University of Washington UW TechTransfer,email:license@u.washington.edu.
10 
11 /// @file protocols/antibody2/AntibodyInfo.cc
12 /// @brief
13 /// @author Jianqing Xu (xubest@gmail.com)
14 
22 #include <core/scoring/rms_util.hh>
23 #include <core/pose/PDBInfo.hh>
24 #include <core/pose/util.hh>
25 #include <core/pose/util.tmpl.hh>
26 #include <basic/Tracer.hh>
27 
28 // ObjexxFCL Headers
29 #include <ObjexxFCL/FArray1D.hh>
30 #include <ObjexxFCL/format.hh>
31 
32 // Utility headers
33 #include <utility/excn/Exceptions.hh>
34 
35 
36 
37 ///////////////////////////////////////////////////////////////////////////////
38 
39 static basic::Tracer TR("antibody2.AntibodyInfo");
40 
41 namespace protocols{
42 namespace antibody2{
43 
44 
45 
46 
47 
49  AntibodyNumberingEnum const & numbering_scheme,
50  bool const & cdr_pdb_numbered) {
51  set_default();
52 
53  numbering_scheme_ = numbering_scheme;
54  cdr_pdb_numbered_ = cdr_pdb_numbered;
55 
56  identify_antibody(pose);
57 
58  init(pose);
59 }
60 
61 
62 
64 {
65  is_camelid_ = false;
66  InputPose_has_antigen_ = false;
70 }
71 
72 
74 
75  switch (pose.conformation().num_chains() ) {
76  case 0:
77  throw excn::EXCN_Msg_Exception("the number of chains in the input pose is '0' !!");
78  break;
79  case 1: //if pose has only "1" chain, it is a nanobody
80  if (pose.pdb_info()->chain(pose.conformation().chain_end(1)) == 'H'){
81  is_camelid_ = true;
83  }
84  else{
85  throw excn::EXCN_Msg_Exception(" A): the input pose has only 1 chain, if it is a nanobody, the chain ID is supposed to be 'H' !!");
86  }
87  break;
88  case 2: // if pose has "2" chains, it can be 2 possibilities
89  // possiblity 1): L and H, regular antibody
90  if ( (pose.pdb_info()->chain(pose.conformation().chain_end(1)) == 'L') && (pose.pdb_info()->chain(pose.conformation().chain_end(2)) == 'H') ) {
91  is_camelid_ = false;
92  InputPose_has_antigen_ = false;
93  }
94  // possiblity 2): H nanobody and antigen
95  else if ( pose.pdb_info()->chain(pose.conformation().chain_end(1)) == 'H' ) {
96  is_camelid_ = true;
98  }
99  else{
100  throw excn::EXCN_Msg_Exception(" B): the input pose has two chains, 1). if it is nanobody, the 1st chain should be 'H'. 2). If it is a regular antibody, the 1st and 2nd chains should be 'L' and 'H' !!");
101  }
102  break;
103  default: // if pose has >=3 chains, it can be 2 possibilities
104  // possiblity 1): L and H, and antigen
105  if( pose.pdb_info()->chain(pose.conformation().chain_end(1)) == 'L' && pose.pdb_info()->chain(pose.conformation().chain_end(2)) == 'H' ){
106  is_camelid_ = false;
107  InputPose_has_antigen_ = true;
108  }
109  // possiblity 2): H annobody and antigen
110  else if (pose.pdb_info()->chain(pose.conformation().chain_end(1)) == 'H'){
111  is_camelid_ = true;
112  InputPose_has_antigen_ = true;
113  }
114  else{
115  throw excn::EXCN_Msg_Exception(" C). the input pose has more than two chains, 1). if it is nanobody, the 1st chain should be 'H'. 2). If it is a regular antibody, the 1st and 2nd chains should be 'L' and 'H' !!");
116  }
117  break;
118  }
119 
120  /// record the antibody sequence
121  Size chain_count = (is_camelid_)? 1:2 ;
122  for (Size i=1; i<=pose.conformation().chain_end(chain_count) ; ++i){
123  ab_sequence_.push_back(pose.residue(i).name1());
124  }
125 
126 }
127 
128 
129 void AntibodyInfo::init(pose::Pose const & pose){
130 
133 
134  setup_CDRsInfo(pose) ;
135 
136  setup_FrameWorkInfo(pose) ;
137 
139 
140  predict_H3_base_type( pose );
141 }
142 
143 
144 
145 /// TODO:
146 // JQX:
147 // The code assumed that the input PDB has been been renumbered using the Aroop
148 // numbering scheme [see the "get_AntibodyNumberingScheme()" function below]: as a matter
149 // of fact, since this code is desigend for the Rosetta Antibody Homology Modeling
150 // the input is always the structure made from different templates, and they are
151 // always renumbered by the perl script from the Rosetta Antibody Server
152 // A smart way would be to use the "identify_CDR_from_a_sequence()" to automatically
153 // check this out. On my list!
154 
156 
157  vector1<char> Chain_IDs_for_CDRs;
158  for (Size i=1;i<=3;++i) { Chain_IDs_for_CDRs.push_back('H'); } // HEAVY chain first
159  for (Size i=1;i<=3;++i) { Chain_IDs_for_CDRs.push_back('L'); } // light
160 
162 
163  int loop_start_in_pose, loop_stop_in_pose, cut_position ;
165 
167  loop_start_in_pose = pose.pdb_info()->pdb2pose( Chain_IDs_for_CDRs[i], cdr_numbering_info[Begin][i]);
168  if(i != h3 ){
169  loop_stop_in_pose= pose.pdb_info()->pdb2pose( Chain_IDs_for_CDRs[i], cdr_numbering_info[End][i]);
170  cut_position = (loop_stop_in_pose - loop_start_in_pose +1) /2 + loop_start_in_pose;
171  }
172  else{
173  loop_stop_in_pose = pose.pdb_info()->pdb2pose( Chain_IDs_for_CDRs[i], cdr_numbering_info[End][i]+1 );
174  loop_stop_in_pose -=1;
175  // JQX:
176  // One should always see 95-102 as the positions for your H3 in your FR02.pdb, but as a matter of fact,
177  // the antibody script just copied h3.pdb (heavy atoms) into the FR02.pdb, sometimes one sees the stop
178  // postition pdb number 98, not 102, if the h3.pdb is short. Therefore, one useing the pdb number 102 to
179  // define h3 fails!
180  // But in FR02.pdb, you always see 103, because 103 is on the framework. The idea is to find the pose number
181  // of PDB number 103, then minus 1 will give you the last residue of h3.
182  cut_position = (loop_start_in_pose +1 ) ;
183  // JQX:
184  // why this is different compared to other cuts of other loops?
185  // Aroop seems did this in his old R3 code, CHECK LATER !!!
186  }
187 
188  loops::Loop one_loop(loop_start_in_pose, loop_stop_in_pose, cut_position);
189  loops::LoopsOP one_loops = new loops::Loops(); one_loops->add_loop(one_loop);
190 
191  // make a "LoopsOP" object, in which each "Loop" was saved
192  loopsop_having_allcdrs_->add_loop(one_loop);
193 
194  // make a "vector1" of "LoopsOP" object, each "LoopsOP" has one "Loop" object
195  vector1_loopsop_having_cdr_.push_back(one_loops);
196  }
197 
198  /// FIXME: ***********************
199  loopsop_having_allcdrs_->sequential_order(); /// TODO: kind of dangerous here
200 
201  TR<<"Successfully finished the CDR defintion"<<std::endl;
202 
203 }
204 
205 
206 
207 
209 
210  FrameWork frmwk;
211  vector1<FrameWork> Lfr, Hfr;
212 
213  if(is_Camelid() == false ){
214  if (! pose.pdb_info()->pdb2pose('L', 5)) {
215  throw excn::EXCN_Msg_Exception( "L chain 5th residues missing, framework definition failed!!! " );
216  }
217  if (! pose.pdb_info()->pdb2pose('L', 105)) {
218  throw excn::EXCN_Msg_Exception( "L chain 105th residues missing, framework definition failed!!! " );
219  }
220  }
221  if (! pose.pdb_info()->pdb2pose('H', 5)) {
222  throw excn::EXCN_Msg_Exception( "H chain 5th residues missing, framework definition failed!!! " );
223  }
224  if (! pose.pdb_info()->pdb2pose('H', 110)) {
225  throw excn::EXCN_Msg_Exception( "H chain 110th residues missing, framework definition failed!!! " );
226  }
227 
228 
229  switch (numbering_scheme_) {
230  case Aroop:
231  if(! is_camelid_){
232  frmwk.chain_name='L';
233  frmwk.start=pose.pdb_info()->pdb2pose('L',5); frmwk.stop=pose.pdb_info()->pdb2pose('L',6); Lfr.push_back(frmwk);
234  frmwk.start=pose.pdb_info()->pdb2pose('L',10);frmwk.stop=pose.pdb_info()->pdb2pose('L',23); Lfr.push_back(frmwk);
235  frmwk.start=pose.pdb_info()->pdb2pose('L',35);frmwk.stop=pose.pdb_info()->pdb2pose('L',38); Lfr.push_back(frmwk);
236  frmwk.start=pose.pdb_info()->pdb2pose('L',45);frmwk.stop=pose.pdb_info()->pdb2pose('L',49); Lfr.push_back(frmwk);
237  frmwk.start=pose.pdb_info()->pdb2pose('L',57);frmwk.stop=pose.pdb_info()->pdb2pose('L',66); Lfr.push_back(frmwk);
238  frmwk.start=pose.pdb_info()->pdb2pose('L',71);frmwk.stop=pose.pdb_info()->pdb2pose('L',88); Lfr.push_back(frmwk);
239  frmwk.start=pose.pdb_info()->pdb2pose('L',98);frmwk.stop=pose.pdb_info()->pdb2pose('L',105);Lfr.push_back(frmwk);
240  }
241 
242  frmwk.chain_name='H';
243  frmwk.start=pose.pdb_info()->pdb2pose('H',5); frmwk.stop=pose.pdb_info()->pdb2pose('H',6); Hfr.push_back(frmwk);
244  frmwk.start=pose.pdb_info()->pdb2pose('H',10); frmwk.stop=pose.pdb_info()->pdb2pose('H',25); Hfr.push_back(frmwk);
245  frmwk.start=pose.pdb_info()->pdb2pose('H',36); frmwk.stop=pose.pdb_info()->pdb2pose('H',39); Hfr.push_back(frmwk);
246  frmwk.start=pose.pdb_info()->pdb2pose('H',46); frmwk.stop=pose.pdb_info()->pdb2pose('H',49); Hfr.push_back(frmwk);
247  frmwk.start=pose.pdb_info()->pdb2pose('H',66); frmwk.stop=pose.pdb_info()->pdb2pose('H',94); Hfr.push_back(frmwk);
248  frmwk.start=pose.pdb_info()->pdb2pose('H',103);frmwk.stop=pose.pdb_info()->pdb2pose('H',110);Hfr.push_back(frmwk);
249  break;
250  case Chothia:
251  break;
252  case Kabat:
253  break;
254  case Enhanced_Chothia:
255  break;
256  case AHO:
257  break;
258  case IMGT:
259  break;
260  default:
261  throw excn::EXCN_Msg_Exception("the numbering schemes can only be 'Aroop','Chothia','Kabat', 'Enhanced_Chothia', 'AHO', 'IMGT' !!!!!! ");
262  break;
263  }
264 
265  if (Lfr.size()>0) { framework_info_.push_back(Lfr);}
266  if (Hfr.size()>0) { framework_info_.push_back(Hfr);}
267  else { throw excn::EXCN_Msg_Exception("The heavy chain has no framework? This cannot be correct");}
268 
269 
270 }
271 
272 
273 
275 
276  vector1<char> Chain_IDs_for_packing_angle;
277  for (Size i=1;i<=2;++i) { Chain_IDs_for_packing_angle.push_back('L'); } // VL
278  for (Size i=1;i<=2;++i) { Chain_IDs_for_packing_angle.push_back('H'); } // VH
279 
280  vector1< vector1< Size > > packing_angle_numbering_info = get_CDR_NumberingInfo(numbering_scheme_);
281 
282  Size packing_angle_start_in_pose, packing_angle_stop_in_pose;
283 
284  for (Size i=1; i<=4; ++i){
285  packing_angle_start_in_pose = pose.pdb_info()->pdb2pose( Chain_IDs_for_packing_angle[i], packing_angle_numbering_info[Pack_Angle_Begin][i]);
286  packing_angle_stop_in_pose = pose.pdb_info()->pdb2pose( Chain_IDs_for_packing_angle[i], packing_angle_numbering_info[Pack_Angle_End][i]);
287  for (Size j=packing_angle_start_in_pose; j<=packing_angle_stop_in_pose; j++){
288  packing_angle_residues_.push_back( j );
289  }
290  }
291 }
292 
293 
294 
295 
296 ////////////////////////////////////////////////////////////////////////////////
297 /// ///
298 /// predicit H3 cterminus base type (Kinked or Extended) based on sequence ///
299 /// ///
300 ////////////////////////////////////////////////////////////////////////////////
302  if( is_camelid_ ){
304  }
305  else{
306  //detect_and_set_regular_CDR_H3_stem_type( pose );
308  }
309 } // detect_CDR_H3_stem_type
310 
311 
313  TR << "AC Detecting Camelid CDR H3 Stem Type" << std::endl;
314 
315  bool kinked_H3 (false);
316  bool extended_H3 (false);
317 
318  // extract single letter aa codes for the chopped loop residues
319  vector1< char > cdr_h3_sequence;
320  for( Size ii = get_CDR_loop(h3).start() - 2; ii <= get_CDR_loop(h3).stop(); ++ii )
321  cdr_h3_sequence.push_back( pose.sequence()[ii-1] );
322 
323  // Rule for extended
324  if( ( ( get_CDR_loop(h3).stop() - get_CDR_loop(h3).start() ) ) >= 12 ) {
325  if( ( ( cdr_h3_sequence[ cdr_h3_sequence.size() - 3 ] == 'Y' ) ||
326  ( cdr_h3_sequence[ cdr_h3_sequence.size() - 3 ] == 'W' ) ||
327  ( cdr_h3_sequence[ cdr_h3_sequence.size() - 3 ] == 'F' ) ) &&
328  ( cdr_h3_sequence[ cdr_h3_sequence.size() - 2 ] != 'H' ) &&
329  ( cdr_h3_sequence[ cdr_h3_sequence.size() - 1 ] != 'G' ) )
330  extended_H3 = true;
331  }
332 
333  if( !extended_H3 ) {
334  kinked_H3 = true;
335  if( ( cdr_h3_sequence[ cdr_h3_sequence.size() - 3 ] == 'R' ) ||
336  ( cdr_h3_sequence[ cdr_h3_sequence.size() - 2 ] == 'Y' ) ||
337  (( ( cdr_h3_sequence[ cdr_h3_sequence.size() - 1 ] != 'Y' ) || ( cdr_h3_sequence[ cdr_h3_sequence.size() - 1 ] != 'W' ) ) &&
338  ( ( cdr_h3_sequence[ cdr_h3_sequence.size() - 2 ] != 'Y' ) || ( cdr_h3_sequence[ cdr_h3_sequence.size() - 2 ] != 'W' ) ) &&
339  ( ( cdr_h3_sequence[ cdr_h3_sequence.size() - 3 ] != 'Y' ) || ( cdr_h3_sequence[ cdr_h3_sequence.size() - 3 ] != 'W' ) ))
340  )
341  kinked_H3 = false;
342  }
343 
344  if (kinked_H3) predicted_H3_base_type_ = Kinked;
345  if (extended_H3) predicted_H3_base_type_ = Extended;
346  if (!kinked_H3 && !extended_H3) predicted_H3_base_type_ = Neutral;
347  TR << "AC Finished Detecting Camelid CDR H3 Stem Type: " << get_string_h3_base_type()[predicted_H3_base_type_] << std::endl;
348 }
349 
350 
352  TR << "AC Detecting Regular CDR H3 Stem Type" << std::endl;
353  bool extended_H3 (false) ;
354  bool kinked_H3 (false);
355  bool is_H3( false );
356 
357  // extract single letter aa codes for the chopped loop residues
358  vector1< char > cdr_h3_sequence;
359  for( Size ii = get_CDR_loop(h3).start() - 2; ii <= get_CDR_loop(h3).stop()+1 ; ++ii )
360  cdr_h3_sequence.push_back( pose.sequence()[ii-1] );
361 
362  // Rule 1a for standard kink
363  if( cdr_h3_sequence[ cdr_h3_sequence.size() - 2 ] != 'D') {
364  kinked_H3 = true;
365  is_H3 = true;
366  }
367 
368  // Rule 1b for standard extended form
369  if( ( cdr_h3_sequence[ cdr_h3_sequence.size() - 2 ] == 'D')
370  && ( (cdr_h3_sequence[2] != 'K') &&
371  (cdr_h3_sequence[2] != 'R') ) && (is_H3 != true)) {
372  extended_H3 = true;
373  is_H3 = true;
374  }
375 
376  if( !is_H3 ) {
377  // Rule 1b extension for special kinked form
378  bool is_basic( false ); // Special basic residue exception flag
379  for(Size ii = 3; ii <= Size(cdr_h3_sequence.size() - 4); ++ii) {
380  if( cdr_h3_sequence[ii] == 'R' || cdr_h3_sequence[ii] == 'K') {
381  is_basic = true;
382  break;
383  }
384  }
385 
386  if( !is_basic ) {
387  Size L49_pose_number = pose.pdb_info()->pdb2pose( 'L', 49 );
388  char aa_code_L49 = pose.residue( L49_pose_number ).name1();
389  if( aa_code_L49 == 'R' || aa_code_L49 == 'K')
390  is_basic = true;
391  }
392  if( is_basic ) {
393  kinked_H3 = true;
394  is_H3 = true;
395  }
396  }
397 
398  // Rule 1c for kinked form with salt bridge
399  if( ( cdr_h3_sequence[ cdr_h3_sequence.size() - 2 ] == 'D') &&
400  ( (cdr_h3_sequence[2] == 'K') ||
401  (cdr_h3_sequence[2] == 'R') ) &&
402  ( (cdr_h3_sequence[1] != 'K') &&
403  (cdr_h3_sequence[1] != 'R') ) && (is_H3 != true) ) {
404  kinked_H3 = true;
405  is_H3 = true;
406  if( !is_H3 ) {
407  bool is_basic( false ); // Special basic residue exception flag
408  Size L46_pose_number = pose.pdb_info()->pdb2pose( 'L', 46 );
409  char aa_code_L46 = pose.residue( L46_pose_number ).name1();
410  if( aa_code_L46 == 'R' || aa_code_L46 == 'K')
411  is_basic = true;
412  if( is_basic ) {
413  extended_H3 = true;
414  is_H3 = true;
415  }
416  }
417  }
418 
419  // Rule 1d for extened form with salt bridge
420  if( ( cdr_h3_sequence[ cdr_h3_sequence.size() - 2 ] == 'D') &&
421  ( ( cdr_h3_sequence[ 2 ] == 'K') ||
422  (cdr_h3_sequence[2] == 'R')) &&
423  ( (cdr_h3_sequence[1] == 'K') ||
424  (cdr_h3_sequence[1] == 'R') ) && (is_H3 != true) ) {
425  extended_H3 = true;
426  is_H3 = true;
427  }
428 
429  if (kinked_H3) predicted_H3_base_type_ = Kinked;
430  if (extended_H3) predicted_H3_base_type_ = Extended;
431  if (!kinked_H3 && !extended_H3) predicted_H3_base_type_ = Neutral;
432  TR << "AC Finished Detecting Regular CDR H3 Stem Type: " << get_string_h3_base_type()[predicted_H3_base_type_] << std::endl;
433 } // detect_regular_CDR_H3_stem_type()
434 
435 
437  TR << "AC Detecting Regular CDR H3 Stem Type" << std::endl;
438 
439  bool extended_H3 (false) ;
440  bool kinked_H3 (false);
441 
442  // extract single letter aa codes for the chopped loop residues
443  vector1< char > cdr_h3_sequence;
444  for( Size ii = get_CDR_loop(h3).start() - 2; ii <= get_CDR_loop(h3).stop() + 1; ++ii )
445  cdr_h3_sequence.push_back( pose.sequence()[ii-1] );
446  //for (Size i=1; i<=cdr_h3_sequence.size();++i){ TR<<cdr_h3_sequence[i];} TR<<std::endl;
447 
448  /// @author: Daisuke Kuroda (dkuroda1981@gmail.com) 06/18/2012
449  ///
450  /// @last_modified 06/18/2012
451  ///
452  /// @reference Kuroda et al. Proteins. 2008 Nov 15;73(3):608-20.
453  /// Koliansnikov et al. J Bioinform Comput Biol. 2006 Apr;4(2):415-24.
454 
455  // This is only for rule 1b
456  bool is_basic( false ); // Special basic residue exception flag
457  if( !is_basic ) {
458  Size L49_pose_number = pose.pdb_info()->pdb2pose( 'L', 49 );
459  char aa_code_L49 = pose.residue( L49_pose_number ).name1();
460  if( aa_code_L49 == 'R' || aa_code_L49 == 'K')
461  is_basic = true;
462  }
463 
464  /// START H3-RULE 2007
465  if( ( cdr_h3_sequence[ cdr_h3_sequence.size() - 2 ] == 'D') &&
466  ( ( cdr_h3_sequence[ 2 ] == 'K') || (cdr_h3_sequence[2] == 'R') ) &&
467  ( ( cdr_h3_sequence[ 1 ] == 'K') || (cdr_h3_sequence[1] == 'R') ) ) {
468  // Rule 1d for extened form with salt bridge
469  extended_H3 = true;
470  }else if( ( cdr_h3_sequence[ cdr_h3_sequence.size() - 2 ] == 'D') &&
471  ( ( cdr_h3_sequence[ 2 ] == 'K') || ( cdr_h3_sequence[ 2 ] == 'R') ) &&
472  ( ( cdr_h3_sequence[ 1 ] != 'K') && ( cdr_h3_sequence[ 1 ] != 'R') ) ) {
473  // Rule 1c for kinked form with salt bridge with/without Notable signal (L46)
474  // Special basic residue exception flag
475  Size L46_pose_number = pose.pdb_info()->pdb2pose( 'L', 46 );
476  char aa_code_L46 = pose.residue( L46_pose_number ).name1();
477 
478  // Special Tyr residue exception flag
479  Size L36_pose_number = pose.pdb_info()->pdb2pose( 'L', 36 );
480  char aa_code_L36 = pose.residue( L36_pose_number ).name1();
481 
482  if( ( aa_code_L46 == 'R' || aa_code_L46 == 'K') && aa_code_L36 != 'Y' ){
483  extended_H3 = true;
484  }else{
485  kinked_H3 = true;
486  }
487  }else if( ( cdr_h3_sequence[ cdr_h3_sequence.size() - 2 ] == 'D' ) &&
488  ( cdr_h3_sequence[ 2 ] != 'K' ) && ( cdr_h3_sequence[ 2 ] != 'R' ) &&
489  ( is_basic == true ) ) {
490  // Rule 1b for standard extended form with Notable signal (L49)
491  kinked_H3 = true;
492  }else if( ( ( cdr_h3_sequence[ cdr_h3_sequence.size() - 3 ] == 'F' ) &&
493  ( cdr_h3_sequence[ cdr_h3_sequence.size() - 4 ] == 'A' ) ) ||
494  ( ( cdr_h3_sequence[ cdr_h3_sequence.size() - 3 ] == 'F' ) &&
495  ( cdr_h3_sequence[ cdr_h3_sequence.size() - 4 ] == 'G' ) ) ||
496  ( ( cdr_h3_sequence[ cdr_h3_sequence.size() - 3 ] == 'M' ) &&
497  ( cdr_h3_sequence[ cdr_h3_sequence.size() - 4 ] == 'A' ) ) ||
498  ( ( cdr_h3_sequence[ cdr_h3_sequence.size() - 3 ] == 'M' ) &&
499  ( cdr_h3_sequence[ cdr_h3_sequence.size() - 4 ] == 'G' ) ) ) {
500  // This is new feature
501  kinked_H3 = true;
502  }else if( ( cdr_h3_sequence[ cdr_h3_sequence.size() - 3 ] == 'R' ) ||
503  ( cdr_h3_sequence[ cdr_h3_sequence.size() - 3 ] == 'K' ) ||
504  ( cdr_h3_sequence[ cdr_h3_sequence.size() - 3 ] == 'D' ) ||
505  ( cdr_h3_sequence[ cdr_h3_sequence.size() - 3 ] == 'N' ) ){
506  // This is new feature
507  extended_H3 = true;
508  }else if( ( ( cdr_h3_sequence[ 3 ] == 'Y' ) &&
509  ( cdr_h3_sequence[ cdr_h3_sequence.size() - 3 ] == 'F' ) ) ||
510  ( (cdr_h3_sequence[ 3 ] == 'Y' ) &&
511  (cdr_h3_sequence[ cdr_h3_sequence.size() - 3 ] == 'M') ) ){
512  // This is new feature
513  extended_H3 = true;
514  }else if( cdr_h3_sequence.size() - 3 == 7 ) {
515  // This is new feature
516  extended_H3 = true;
517  }else if( cdr_h3_sequence[ cdr_h3_sequence.size() - 2 ] == 'D' ) {
518  // Rule 1b for standard extended form without Notable signal (L49)
519  extended_H3 = true;
520  }else if( cdr_h3_sequence[ cdr_h3_sequence.size() - 2 ] != 'D' ) {
521  // Rule 1a for standard kink. i.e. No sequence feature...
522  kinked_H3 = true;
523  }
524  // END H3-RULE 2007
525 
526  if (kinked_H3) predicted_H3_base_type_ = Kinked;
527  if (extended_H3) predicted_H3_base_type_ = Extended;
528  if (!kinked_H3 && !extended_H3) predicted_H3_base_type_ = Neutral;
529  TR << "AC Finished Detecting Regular CDR H3 Stem Type: " << get_string_h3_base_type()[predicted_H3_base_type_] << std::endl;
530 
531  TR << "AC Finished Detecting Regular CDR H3 Stem Type: "
532  << "Kink: " << kinked_H3 << " Extended: " << extended_H3 << std::endl;
533 } // detect_regular_CDR_H3_stem_type()
534 
535 
536 
537 ////////////////////////////////////////////////////////////////////////////////
538 /// ///
539 /// provide fold tree utilities for various purpose ///
540 /// ///
541 ////////////////////////////////////////////////////////////////////////////////
542 
544  Size const & jumppoint1,
545  Size const & cutpoint,
546  Size const & jumppoint2,
547  pose::Pose const & pose ) const {
548 
549  using namespace kinematics;
550 
551 
552  FoldTreeOP f = new FoldTree();
553  f->clear();
554 
555  f->add_edge( 1, jumppoint1, Edge::PEPTIDE );
556  f->add_edge( jumppoint1, cutpoint, Edge::PEPTIDE );
557  f->add_edge( cutpoint + 1, jumppoint2, Edge::PEPTIDE );
558  f->add_edge( jumppoint2, pose.total_residue(), Edge::PEPTIDE );
559  f->add_edge( jumppoint1, jumppoint2, 1 );
560  f->reorder( 1 );
561 
562  return f;
563 
564 }
565 
566 
568  using namespace kinematics;
569 
570  FoldTreeOP f = new FoldTree();
571  f->clear();
572 
573  Size jump_num = 0;
574  for( loops::Loops::const_iterator it=loopsop_having_allcdrs_->begin(), it_end=loopsop_having_allcdrs_->end(), it_next; it < it_end; ++it ) {
575 
576  it_next = it;
577  it_next++;
578 
579  if( it == loopsop_having_allcdrs_->begin() ) f->add_edge( 1, it->start()-1, Edge::PEPTIDE );
580 
581  jump_num++;
582  f->add_edge( it->start()-1, it->stop()+1, jump_num );
583  f->add_edge( it->start()-1, it->cut(), Edge::PEPTIDE );
584  f->add_edge( it->cut()+1, it->stop()+1, Edge::PEPTIDE );
585  if( it == (it_end-1) )
586  f->add_edge( it->stop()+1, pose.total_residue(), Edge::PEPTIDE);
587  else
588  f->add_edge( it->stop()+1, it_next->start()-1, Edge::PEPTIDE );
589  }
590 
591  f->reorder(1);
592 
593  return f;
594 
595 } // all_cdr_fold_tree()
596 
597 ///////////////////////////////////////////////////////////////////////////
598 /// @begin all_cdr_VL_VH_fold_tree
599 ///
600 /// @brief change to all CDR and VL-VH dock fold tree
601 ///
602 /// @authors Aroop 07/13/2010
603 ///
604 /// @last_modified 07/13/2010
605 ///////////////////////////////////////////////////////////////////////////
607 
608  using namespace kinematics;
609 
610  Size nres = pose.total_residue();
611  pose::PDBInfoCOP pdb_info = pose.pdb_info();
612  char second_chain = 'H';
613  Size rb_cutpoint(0);
614 
615  for ( Size i = 1; i <= nres; ++i ) {
616  if( pdb_info->chain( i ) == second_chain) {
617  rb_cutpoint = i-1;
618  break;
619  }
620  }
621 
622  Size jump_pos1 ( geometry::residue_center_of_mass( pose, 1, rb_cutpoint ) );
623  Size jump_pos2 ( geometry::residue_center_of_mass( pose,rb_cutpoint+1, nres ) );
624  //TR<<rb_cutpoint<<std::endl;
625  //TR<<jump_pos1<<std::endl;
626  //TR<<jump_pos2<<std::endl;
627 
628  // make sure rb jumps do not reside in the loop region
629  for( loops::Loops::const_iterator it= loopsop_having_allcdrs_->begin(), it_end = loopsop_having_allcdrs_->end(); it != it_end; ++it ) {
630  if ( jump_pos1 >= ( it->start() - 1 ) && jump_pos1 <= ( it->stop() + 1) )
631  jump_pos1 = it->stop() + 2;
632  if ( jump_pos2 >= ( it->start() - 1 ) && jump_pos2 <= ( it->stop() + 1) )
633  jump_pos2 = it->start() - 2;
634  }
635 
636  // make a simple rigid-body jump first
637  FoldTreeOP f = new FoldTree(* setup_simple_fold_tree(jump_pos1,rb_cutpoint,jump_pos2, pose ));
638 
639  for( loops::Loops::const_iterator it=loopsop_having_allcdrs_->begin(), it_end=loopsop_having_allcdrs_->end(); it != it_end; ++it ) {
640  Size const loop_start ( it->start() );
641  Size const loop_stop ( it->stop() );
642  Size const loop_cutpoint ( it->cut() );
643  Size edge_start(0), edge_stop(0);
644  bool edge_found = false;
645  const FoldTree & f_const = *f;
646  Size const num_jump = f_const.num_jump();
647  for( FoldTree::const_iterator it2=f_const.begin(), it2_end=f_const.end(); it2 !=it2_end; ++it2 ) {
648  //TR<<it2->start()<<std::endl;
649  //TR<<it2->stop()<<std::endl;
650  edge_start = std::min( it2->start(), it2->stop() );
651  edge_stop = std::max( it2->start(), it2->stop() );
652  if ( ! it2->is_jump() && loop_start > edge_start && loop_stop < edge_stop ) {
653  edge_found = true;
654  break;
655  }
656  }
657 
658  f->delete_unordered_edge( edge_start, edge_stop, Edge::PEPTIDE);
659  f->add_edge( loop_start-1, loop_stop+1, num_jump+1 );
660  f->add_edge( edge_start, loop_start-1, Edge::PEPTIDE );
661  f->add_edge( loop_start-1, loop_cutpoint, Edge::PEPTIDE );
662  f->add_edge( loop_cutpoint+1, loop_stop+1, Edge::PEPTIDE );
663  f->add_edge( loop_stop+1, edge_stop, Edge::PEPTIDE );
664  }
665 
666  f->reorder(1);
667  return f;
668 }
669 
670 
671 ///////////////////////////////////////////////////////////////////////////
672 /// @begin LH_A_foldtree
673 ///
674 /// @brief Fold tree for snugdock, docks LH with the antigen chains. The function
675 /// assumes that the coordinates for antigen chains in the input PDB file
676 /// are right after the antibody heavy chain (which must be named H).The
677 /// expected order of chains is thus L, H followed by the antigen chains.
678 ///
679 /// @authors Krishna Praneeth Kilambi 08/14/2012
680 ///
681 /// @last_modified 08/14/2012
682 ///////////////////////////////////////////////////////////////////////////
684 
685  using namespace core;
686  using namespace kinematics;
687 
688  Size nres = pose.total_residue();
689  pose::PDBInfoCOP pdb_info = pose.pdb_info();
690  char second_chain = 'H';
691  Size cutpoint = 0;
692 
693  kinematics::FoldTree LH_A_foldtree;
694 
695  for ( Size i = 1; i <= nres; ++i ) {
696  if(pdb_info->chain(1) != 'L'){
697  throw excn::EXCN_Msg_Exception("Chains are not named correctly or are not in the expected order");
698  break;
699  }
700  if( (pdb_info->chain(i) == 'L') && (pdb_info->chain(i) != pdb_info->chain(i+1))) {
701  if(pdb_info->chain(i+1) != second_chain){
702  throw excn::EXCN_Msg_Exception("Chains are not named correctly or are not in the expected order");
703  break;
704  }
705  }
706  if( (pdb_info->chain(i) == second_chain) && (pdb_info->chain(i) != pdb_info->chain(i+1))) {
707  cutpoint = i;
708  break;
709  }
710  }
711 
712  Size jump_pos1 ( geometry::residue_center_of_mass( pose, 1, cutpoint ) );
713  Size jump_pos2 ( geometry::residue_center_of_mass( pose, cutpoint+1, pose.total_residue() ) );
714 
715  //setup fold tree based on cutpoints and jump points
716  LH_A_foldtree.clear();
717  LH_A_foldtree.simple_tree( pose.total_residue() );
718  LH_A_foldtree.new_jump( jump_pos1, jump_pos2, cutpoint);
719 
720  Size chain_begin(0), chain_end(0);
721 
722  //rebuild jumps between antibody light and heavy chains
723  chain_end = cutpoint;
724  chain_begin = pose.conformation().chain_begin( pose.chain(chain_end) );
725  while (chain_begin != 1){
726  chain_end = chain_begin-1;
727  LH_A_foldtree.new_jump( chain_end, chain_begin, chain_end);
728  chain_begin = pose.conformation().chain_begin( pose.chain(chain_end) );
729  }
730 
731  //rebuild jumps between all the antigen chains
732  chain_begin = cutpoint+1;
733  chain_end = pose.conformation().chain_end( pose.chain(chain_begin) );
734  while (chain_end != pose.total_residue()){
735  chain_begin = chain_end+1;
736  LH_A_foldtree.new_jump( chain_end, chain_begin, chain_end);
737  chain_end = pose.conformation().chain_end( pose.chain(chain_begin) );
738  }
739 
740  LH_A_foldtree.reorder( 1 );
741  LH_A_foldtree.check_fold_tree();
742 
743  return LH_A_foldtree;
744 }
745 
746 
747 ///////////////////////////////////////////////////////////////////////////
748 /// @begin L_HA_foldtree
749 ///
750 /// @brief Fold tree for LH refinement in snugdock, docks L with H + antigen
751 /// chains. The function assumes that the coordinates for antigen chains
752 /// in the input PDB file are right after the antibody heavy chain
753 /// (which must be named H).The expected order of chains is thus
754 /// L, H followed by the antigen chains.
755 ///
756 /// @authors Krishna Praneeth Kilambi 08/14/2012
757 ///
758 /// @last_modified 08/14/2012
759 ///////////////////////////////////////////////////////////////////////////
761 
762  using namespace core;
763  using namespace kinematics;
764 
765  Size nres = pose.total_residue();
766  pose::PDBInfoCOP pdb_info = pose.pdb_info();
767  char second_chain = 'H';
768  Size cutpoint = 0;
769 
770  kinematics::FoldTree L_HA_foldtree;
771 
772  for ( Size i = 1; i <= nres; ++i ) {
773  if(pdb_info->chain(1) != 'L'){
774  throw excn::EXCN_Msg_Exception("Chains are not named correctly or are not in the expected order");
775  break;
776  }
777  if( (pdb_info->chain(i) == 'L') && (pdb_info->chain(i) != pdb_info->chain(i+1))) {
778  if(pdb_info->chain(i+1) != second_chain){
779  throw excn::EXCN_Msg_Exception("Chains are not named correctly or are not in the expected order");
780  break;
781  }
782  }
783  if( (pdb_info->chain(i) == 'L') && (pdb_info->chain(i+1) == second_chain)) {
784  cutpoint = i;
785  break;
786  }
787  }
788 
789  Size jump_pos1 ( geometry::residue_center_of_mass( pose, 1, cutpoint ) );
790  Size jump_pos2 ( geometry::residue_center_of_mass( pose, cutpoint+1, pose.conformation().chain_end( pose.chain(cutpoint+1) ) ) );
791 
792  //setup fold tree based on cutpoints and jump points
793  L_HA_foldtree.clear();
794  L_HA_foldtree.simple_tree( pose.total_residue() );
795  L_HA_foldtree.new_jump( jump_pos1, jump_pos2, cutpoint);
796 
797  Size chain_begin(0), chain_end(0);
798 
799  //rebuild jumps between heavy chain and antigen chains
800  chain_begin = cutpoint+1;
801  chain_end = pose.conformation().chain_end( pose.chain(chain_begin) );
802  while (chain_end != pose.total_residue()){
803  chain_begin = chain_end+1;
804  L_HA_foldtree.new_jump( chain_end, chain_begin, chain_end);
805  chain_end = pose.conformation().chain_end( pose.chain(chain_begin) );
806  }
807 
808  L_HA_foldtree.reorder( 1 );
809  L_HA_foldtree.check_fold_tree();
810 
811  return L_HA_foldtree;
812 
813 }
814 
815 ///////////////////////////////////////////////////////////////////////////
816 /// @begin LA_H_foldtree
817 ///
818 /// @brief Fold tree for LH refinement in snugdock, docks L + antigen chains
819 /// with H. The function assumes that the coordinates for antigen chains
820 /// in the input PDB file are right after the antibody heavy chain
821 /// (which must be named H).The expected order of chains is thus
822 /// L, H followed by the antigen chains.
823 ///
824 /// @authors Krishna Praneeth Kilambi 08/14/2012
825 ///
826 /// @last_modified 08/14/2012
827 ///////////////////////////////////////////////////////////////////////////
829 
830  using namespace core;
831  using namespace kinematics;
832 
833  Size nres = pose.total_residue();
834  pose::PDBInfoCOP pdb_info = pose.pdb_info();
835  char second_chain = 'H';
836  Size cutpoint = 0;
837  bool lchain_jump = false;
838 
839  kinematics::FoldTree LA_H_foldtree ;
840 
841  for ( Size i = 1; i <= nres; ++i ) {
842  if(pdb_info->chain(1) != 'L'){
843  throw excn::EXCN_Msg_Exception("Chains are not named correctly or are not in the expected order");
844  break;
845  }
846  if( (pdb_info->chain(i) == 'L') && (pdb_info->chain(i) != pdb_info->chain(i+1))) {
847  if(pdb_info->chain(i+1) != second_chain){
848  throw excn::EXCN_Msg_Exception("Chains are not named correctly or are not in the expected order");
849  break;
850  }
851  }
852  if( (pdb_info->chain(i) == 'L') && (pdb_info->chain(i+1) == second_chain)) {
853  cutpoint = i;
854  break;
855  }
856  }
857 
858  Size jump_pos1 ( geometry::residue_center_of_mass( pose, 1, cutpoint ) );
859  Size jump_pos2 ( geometry::residue_center_of_mass( pose, cutpoint+1, pose.conformation().chain_end( pose.chain(cutpoint+1) ) ) );
860 
861  //setup fold tree based on cutpoints and jump points
862  LA_H_foldtree.clear();
863  LA_H_foldtree.simple_tree( pose.total_residue() );
864  LA_H_foldtree.new_jump( jump_pos1, jump_pos2, cutpoint);
865 
866  Size chain_begin(0), chain_end(0);
867 
868  //rebuild jumps between the light chain and antigen chains
869  chain_begin = cutpoint+1;
870  chain_end = pose.conformation().chain_end( pose.chain(chain_begin) );
871  while (chain_end != pose.total_residue()){
872  chain_begin = chain_end+1;
873  if (!lchain_jump){
874  LA_H_foldtree.new_jump( pose.conformation().chain_end( pose.chain(1) ), chain_begin, chain_end);
875  lchain_jump = true;
876  }
877  else{
878  LA_H_foldtree.new_jump( chain_end, chain_begin, chain_end);
879  }
880  chain_end = pose.conformation().chain_end( pose.chain(chain_begin) );
881  }
882 
883  LA_H_foldtree.reorder( 1 );
884  LA_H_foldtree.check_fold_tree();
885 
886  return LA_H_foldtree;
887 
888 
889 }
890 
892  loops::Loops const & the_loops,
893  bool const & bb_only,
894  bool const & include_nb_sc,
895  Real const & nb_dist) const {
896  kinematics::MoveMap move_map ;
897 
898  move_map.clear();
899  move_map.set_chi( false );
900  move_map.set_bb( false );
901  utility::vector1< bool> bb_is_flexible( pose.total_residue(), false );
902  utility::vector1< bool> sc_is_flexible( pose.total_residue(), false );
903 
904  select_loop_residues( pose, the_loops, false/*include_neighbors*/, bb_is_flexible, nb_dist);
905  move_map.set_bb( bb_is_flexible );
906  if (bb_only==false) {
907  select_loop_residues( pose, the_loops, include_nb_sc/*include_neighbors*/, sc_is_flexible, nb_dist);
908  move_map.set_chi( sc_is_flexible );
909  }
910  for( Size ii = 1; ii <= the_loops.num_loop(); ++ii ){
911  move_map.set_jump( ii, false );
912  }
913 
914  return move_map;
915 }
916 
917 
919  loops::Loops const & the_loops,
920  bool const & bb_only,
921  bool const & include_nb_sc,
922  Real const & nb_dist) const {
923 
924  kinematics::MoveMap move_map = get_MoveMap_for_Loops(pose, the_loops, bb_only, include_nb_sc, nb_dist);
925 
926  move_map.set_jump( 1, true );
927  for( Size ii = 2; ii <= the_loops.num_loop() +1 ; ++ii ){
928  move_map.set_jump( ii, false );
929  }
930 
931  return move_map;
932 }
933 
934 
935 
936 
937 //JQX: doesn't matter only antibody or antibody-antigen complex, just include CDRs and their neighbors
939 
940  vector1< bool> sc_is_packable( pose.total_residue(), false );
941  select_loop_residues( pose, *loopsop_having_allcdrs_, true/*include_neighbors*/, sc_is_packable);
942 
943  using namespace pack::task;
944  using namespace pack::task::operation;
945  // selecting movable c-terminal residues
946  ObjexxFCL::FArray1D_bool loop_residues( pose.total_residue(), false );
947  for( Size i = 1; i <= pose.total_residue(); ++i ) {
948  loop_residues(i) = sc_is_packable[i];
949  } // check mapping
950 
951  using namespace protocols::toolbox::task_operations;
953  tf= setup_packer_task(pose);
954 // tf->push_back( new RestrictToInterface(loop_residues) ); //JQX: not sure why we use loop_residues, in stead of sc_is_packable
955  tf->push_back( new RestrictToInterface(sc_is_packable) );
956 
957 
958 
959  //pack::task::PackerTaskOP my_task2(tf->create_task_and_apply_taskoperations(pose));
960  //TR<<*my_task2<<std::endl; //exit(-1);
961 
962  return tf;
963 }
964 
966  vector1< bool> sc_is_packable( pose.total_residue(), false );
967 
968  select_loop_residues( pose, *get_CDR_in_loopsop(cdr_name), true/*include_neighbors*/, sc_is_packable);
969  using namespace protocols::toolbox::task_operations;
970 
972  tf= setup_packer_task(pose);
973  tf->push_back( new RestrictToInterface(sc_is_packable) );
974 
975  return tf;
976 }
977 
978 // JQX:: assuming Aroop numbering for now
980 
981 
982  // definte local variables
983  vector1<int> start, stop, pack_angle_start, pack_angle_stop;
984  vector1< vector1<int> > local_numbering_info;
985 
986  // doesn't hurt to clear all the contents, no matter they are empty or not
987  start.clear(); stop.clear(); pack_angle_start.clear(); pack_angle_stop.clear();
988  for (Size i=1;i<=local_numbering_info.size(); ++i){
989  local_numbering_info[i].clear();
990  }
991  local_numbering_info.clear();
992 
993 
994  // JQX: always make the heavy chain first, so that one can always use enum names
995 
996  //**********************************************************************************
997  // Aroop Numbering *
998  // citation:
999  //**********************************************************************************
1000  if(numbering_scheme == Aroop ){
1001  // Heavy Chain
1002  start.push_back(26); stop.push_back(35); //h1
1003  start.push_back(50); stop.push_back(65); //h2
1004  start.push_back(95); stop.push_back(102); //h3
1005  // Light Chain
1006  start.push_back(24); stop.push_back(34); //l1
1007  start.push_back(50); stop.push_back(56); //l2
1008  start.push_back(89); stop.push_back(97); //l3
1009  // VL-VH packing angle residues
1010  pack_angle_start.push_back(35); pack_angle_stop.push_back(38); //VL
1011  pack_angle_start.push_back(85); pack_angle_stop.push_back(88); //VL
1012  pack_angle_start.push_back(36); pack_angle_stop.push_back(39); //VH
1013  pack_angle_start.push_back(89); pack_angle_stop.push_back(92); //VH
1014  }
1015  //**********************************************************************************
1016  // Chothia Numbering *
1017  // citation:
1018  //**********************************************************************************
1019  else if(numbering_scheme == Chothia ){
1020  // Heavy Chain
1021  start.push_back(26); stop.push_back(32); //h1
1022  start.push_back(52); stop.push_back(56); //h2
1023  start.push_back(95); stop.push_back(102); //h3
1024  // Light Chain
1025  start.push_back(24); stop.push_back(34); //l1
1026  start.push_back(50); stop.push_back(56); //l2
1027  start.push_back(89); stop.push_back(97); //l3
1028  // VL-VH packing angle residues
1029  pack_angle_start.push_back(35); pack_angle_stop.push_back(38); //VL
1030  pack_angle_start.push_back(85); pack_angle_stop.push_back(88); //VL
1031  pack_angle_start.push_back(36); pack_angle_stop.push_back(39); //VH
1032  pack_angle_start.push_back(89); pack_angle_stop.push_back(92); //VH
1033  }
1034 
1035  //**********************************************************************************
1036  // Kabat Numbering *
1037  // citation:
1038  //**********************************************************************************
1039  else if(numbering_scheme == Kabat ){
1040  }
1041  //**********************************************************************************
1042  // Enhanced_Chothia Numbering *
1043  // Abhinandan, K.R. and Martin, A.C.R. (2008) Immunology, 45, 3832-3839. *
1044  //**********************************************************************************
1045  else if(numbering_scheme == Enhanced_Chothia){
1046  }
1047  //**********************************************************************************
1048  // AHO Numbering *
1049  // citation:
1050  //**********************************************************************************
1051  else if(numbering_scheme == AHO){
1052  }
1053  //**********************************************************************************
1054  // IMGT Numbering *
1055  // citation:
1056  //**********************************************************************************
1057  else if(numbering_scheme == IMGT){
1058  }
1059  else{
1060  throw excn::EXCN_Msg_Exception("the numbering schemes can only be 'Aroop','Chothia','Kabat', 'Enhanced_Chothia', 'AHO', 'IMGT' !!!!!! ");
1061  }
1062 
1063  local_numbering_info.push_back(start);
1064  local_numbering_info.push_back(stop);
1065  local_numbering_info.push_back(pack_angle_start);
1066  local_numbering_info.push_back(pack_angle_stop);
1067  return local_numbering_info;
1068 }
1069 
1070 
1071 
1072 
1073 
1074 
1075 
1076 
1077 
1078 /// TODO:
1079 //JQX: make Daisuke's code compatible with my code
1080 //
1081 
1082 ///////////////////////////////////////////////////////////////////////////
1083 /// @author: Daisuke Kuroda (dkuroda1981@gmail.com) 06/18/2012
1084 ///
1085 /// @brief: Identify 3 CDRs from a sequence. Automatically judge heavy or light chains (I hope!).
1086 /// The input can be either a light chain, a heavy chain or another sequence.
1087 ///
1088 /// @last_modified 08/28/2012 by DK
1089 ///////////////////////////////////////////////////////////////////////////
1091 
1092  int l1found = 0, l2found = 0, l3found = 1, h1found = 1, h2found = 0, h3found = 1; // 0 if exist; otherwise 1.
1093  int lenl1 = 0, lenl2 = 0, lenl3 = 0, lenh1 = 0, lenh2 = 0, lenh3 = 0;
1094  int posl1_s = 0, posl1_e = 0, posl2_s = 0, posl2_e = 0, posl3_s = 0, posl3_e = 0;
1095  int posh1_s = 0, posh1_e = 0, posh2_s = 0, posh2_e = 0, posh3_s = 0, posh3_e = 0;
1096  // int i = 0, // Unused variable causes warning.
1097  int k = 0, l = 0, m = 0, n = 0;
1098 
1099  int pos_fr1_s = 0, pos_fr1_e = 0, pos_fr2_s = 0, pos_fr2_e = 0;
1100  int pos_fr3_s = 0, pos_fr3_e = 0, pos_fr4_s = 0, pos_fr4_e = 0;
1101  int len_fr1 = 0, len_fr2 = 0, len_fr3 = 0, len_fr4 = 0;
1102 
1103  int len;
1104  std::string check;
1105 
1106  std::string seql1, seql2, seql3, seqh1, seqh2, seqh3;
1107  std::string frl3, frh1, frh3;
1108 
1109  std::string seq_fr1, seq_fr2, seq_fr3, seq_fr4;
1110 
1111  // For L3: [FVI]-[GAEV]-X-[GY]
1112  std::string p1_l3[] = {"F","V","I"};
1113  std::string p2_l3[] = {"G","A","E","V"};
1114  std::string p3_l3[] = {"G","A","P","C","D","E","Q","N","R","K","H","W","Y","F","M","T","V","I","S","L"};
1115  std::string p4_l3[] = {"G","Y"};
1116 
1117  // For H1
1118  std::string p1_h1[] = {"W","L"};
1119  std::string p2_h1[] = {"I","V","F","Y","A","M","L","N","G","E","W"};
1120  std::string p3_h1[] = {"R","K","Q","V","N","C"};
1121  std::string p4_h1[] = {"Q","K","H","E","L","R"};
1122 
1123  // For H3 W-[GA]-X-[DRG]
1124  // std::string p1_h3[] = {"W","V"}; // Unused variable causes warning.
1125  std::string p2_h3[] = {"G","A","C"};
1126  std::string p3_h3[] = {"A","P","C","D","E","Q","N","R","K","H","W","Y","F","M","T","V","I","S","L","G"};
1127  std::string p4_h3[] = {"G","R","D","Q","V"};
1128 
1129  // Input sequence is here
1130  std::string querychain2(querychain, 0,140);
1131  std::string querychain3(querychain, 0,110);
1132 
1133  std::string querychain_first(querychain, 0, 50);
1134  std::string querychain_last(querychain, 70);
1135 
1136  //TR << querychain2 << endl;
1137  //TR << querychain_last << endl;
1138 
1139  len = querychain.length();
1140  if(len < 130){
1141  check = "Fv";
1142  }else if(len < 250){
1143  check = "Fab";
1144  }else{
1145  check = "Weird";
1146  }
1147 
1148  //TR << "*** Query sequence ***" << endl;
1149  //TR << querychain << endl;
1150  //TR << endl;
1151 
1152  /*****************************************************/
1153  /***************** Is it light chain? ****************/
1154  /*****************************************************/
1155  /* L1 search Start */
1156  if(querychain_first.find("WYL") != std::string::npos){
1157  posl1_e = querychain_first.find("WYL") - 1;
1158  }else if(querychain_first.find("WLQ") != std::string::npos){
1159  posl1_e = querychain_first.find("WLQ") - 1;
1160  }else if(querychain_first.find("WFQ") != std::string::npos){
1161  posl1_e = querychain_first.find("WFQ") - 1;
1162  }else if(querychain_first.find("WYQ") != std::string::npos){
1163  posl1_e = querychain_first.find("WYQ") - 1;
1164  }else if(querychain_first.find("WYH") != std::string::npos){
1165  posl1_e = querychain_first.find("WYH") - 1;
1166  }else if(querychain_first.find("WVQ") != std::string::npos){
1167  posl1_e = querychain_first.find("WVQ") - 1;
1168  }else if(querychain_first.find("WVR") != std::string::npos){
1169  posl1_e = querychain_first.find("WVR") - 1;
1170  }else if(querychain_first.find("WWQ") != std::string::npos){
1171  posl1_e = querychain_first.find("WWQ") - 1;
1172  }else if(querychain_first.find("WVK") != std::string::npos){
1173  posl1_e = querychain_first.find("WVK") - 1;
1174  }else if(querychain_first.find("WLL") != std::string::npos){
1175  posl1_e = querychain_first.find("WLL") - 1;
1176  }else if(querychain_first.find("WFL") != std::string::npos){
1177  posl1_e = querychain_first.find("WFL") - 1;
1178  }else if(querychain_first.find("WVF") != std::string::npos){
1179  posl1_e = querychain_first.find("WVF") - 1;
1180  }else if(querychain_first.find("WIQ") != std::string::npos){
1181  posl1_e = querychain_first.find("WIQ") - 1;
1182  }else if(querychain_first.find("WYR") != std::string::npos){
1183  posl1_e = querychain_first.find("WYR") - 1;
1184  }else if(querychain_first.find("WNQ") != std::string::npos){
1185  posl1_e = querychain_first.find("WNQ") - 1;
1186  }else if(querychain_first.find("WHL") != std::string::npos){
1187  posl1_e = querychain_first.find("WHL") - 1;
1188  }else if(querychain_first.find("WYM") != std::string::npos){ // Add 06/10/2012
1189  posl1_e = querychain_first.find("WYM") - 1;
1190  }else{
1191  l1found = 1;
1192  }
1193 
1194  if(l1found != 1){
1195  posl1_s = querychain_first.find("C") + 1;
1196  lenl1 = posl1_e - posl1_s + 1;
1197  seql1 = querychain_first.substr(posl1_s,lenl1);
1198 
1199  pos_fr1_s = 0;
1200  pos_fr1_e = posl1_s - 1;
1201  len_fr1 = pos_fr1_e - pos_fr1_s + 1;
1202  seq_fr1 = querychain_first.substr(pos_fr1_s,len_fr1);
1203  }
1204  /* L1 search Finish */
1205 
1206  /* L2 search start */
1207  if(l1found != 1){
1208  posl2_s = posl1_e + 16;
1209  posl2_e = posl2_s + 6;
1210  lenl2 = posl2_e - posl2_s + 1;
1211  seql2 = querychain.substr(posl2_s,lenl2);
1212 
1213  pos_fr2_s = posl1_e + 1;
1214  pos_fr2_e = posl2_s - 1;
1215  len_fr2 = pos_fr2_e - pos_fr2_s + 1;
1216  seq_fr2 = querychain.substr(pos_fr2_s,len_fr2);
1217  }else{
1218  l2found = 1;
1219  }
1220  /* L2 search end */
1221 
1222  /* L3 search Start */
1223  //string p1_l3[] = {"F","V","I"};
1224  //string p2_l3[] = {"G","A","E","V"};
1225  //string p3_l3[] = {"G","A","P","C","D","E","Q","N","R","K","H","W","Y","F","M","T","V","I","S","L"};
1226  //string p4_l3[] = {"G","Y"};
1227  for(l = 0;l < 3; ++l){
1228  for(m = 0;m < 4; ++m){
1229  for(n = 0;n < 2; ++n){
1230  for(k = 0;k < 20; ++k){
1231  //frl3 = "FG" + p3_l3[k] + "G";
1232  //frl3 = p1_l3[l] + "G" + p3_l3[k] + "G"; //[VF]GXG
1233  frl3 = p1_l3[l] + p2_l3[m] + p3_l3[k] + p4_l3[n]; //[VF][AG]XG
1234 
1235  if(querychain3.find(frl3, 80) != std::string::npos){
1236  posl3_e = querychain3.find(frl3,80) - 1;
1237  posl3_s = querychain3.find("C",80) + 1;
1238  lenl3 = posl3_e - posl3_s + 1;
1239 
1240  //TR << frl3 << "\t" << posl3_s << "\t" << lenl3 << endl;
1241 
1242  seql3 = querychain3.substr(posl3_s,lenl3);
1243 
1244  if(seql3.length() > 4){
1245  l3found = 0;
1246  break;
1247  }else{
1248  l3found = 1;
1249  }
1250 
1251  pos_fr3_s = posl2_e + 1;
1252  pos_fr3_e = posl3_s - 1;
1253  pos_fr4_s = posl3_e + 1;
1254  pos_fr4_e = pos_fr4_s + 5;
1255  len_fr3 = pos_fr3_e - pos_fr3_s + 1;
1256  len_fr4 = pos_fr4_e - pos_fr4_s + 1;
1257  seq_fr3 = querychain3.substr(pos_fr3_s,len_fr3);
1258  seq_fr4 = querychain3.substr(pos_fr4_s,len_fr4);
1259  }
1260  }
1261 
1262  l = 3;
1263  m = 4;
1264  n = 2;
1265  k = 20;
1266  }
1267  }
1268  }
1269  /* L3 search Finish */
1270 
1271  /*****************************************************/
1272  /***************** Is it heavy chain? ****************/
1273  /*****************************************************/
1274  if(l1found == 1 || l2found == 1 || l3found == 1){
1275  /* H1 search Start */
1276  //string p1_h1[] = {"W","L"};
1277  //string p2_h1[] = {"I","V","F","Y","A","M","L","N","G","E"};
1278  //string p3_h1[] = {"R","K","Q","V","N","C"};
1279  //string p4_h1[] = {"Q","K","H","E","L","R"};
1280  for(n = 0; n < 2; ++n){
1281  for(l = 0; l < 6; ++l){
1282  for(m = 0; m < 6; ++m){
1283  for(k = 0;k < 10; ++k){
1284  //frh1 = "W" + p2_h1[k] + p3_h1[l] + p4_h1[m];
1285  frh1 = p1_h1[n] + p2_h1[k] + p3_h1[l] + p4_h1[m];
1286 
1287  if(querychain_first.find(frh1, 0) != std::string::npos){
1288  posh1_e = querychain_first.find(frh1, 0) - 1;
1289  h1found = 0;
1290  n = 2;
1291  l = 6;
1292  m = 6;
1293  k = 10;
1294  }
1295  }
1296  }
1297  }
1298  }
1299 
1300  if(h1found != 1){
1301  posh1_s = querychain_first.find("C") + 4;
1302  lenh1 = posh1_e - posh1_s + 1;
1303  seqh1 = querychain_first.substr(posh1_s, lenh1);
1304 
1305  pos_fr1_s = 0;
1306  pos_fr1_e = posh1_s - 1;
1307  len_fr1 = pos_fr1_e - pos_fr1_s + 1;
1308  seq_fr1 = querychain_first.substr(pos_fr1_s,len_fr1);
1309  }
1310  /* H1 search Finish */
1311 
1312  /* H3 search Start */
1313  //string p1_h3[] = {"W","V"};
1314  //string p2_h3[] = {"G","A","C"};
1315  //string p3_h3[] = {"A","P","C","D","E","Q","N","R","K","H","W","Y","F","M","T","V","I","S","L","G"};
1316  //string p4_h3[] = {"G","R","D","Q","V"};
1317  for(m = 0;m < 3; ++m){
1318  for(l = 0;l < 5; ++l){
1319  for(k = 0;k < 20; ++k){
1320  //frh3 = "WG" + p3_h3[k] + p4_h3[l];
1321  frh3 = "W" + p2_h3[m] + p3_h3[k] + p4_h3[l];
1322 
1323  if(querychain2.find(frh3, 80) != std::string::npos){
1324  posh3_e = querychain2.find(frh3,80) - 1;
1325  h3found = 0;
1326  m = 3;
1327  l = 5;
1328  k = 20;
1329  }
1330  }
1331  }
1332  }
1333 
1334  if(querychain2.find("C", 80) != std::string::npos){
1335  posh3_s = querychain2.find("C", 80) + 3;
1336  }else{
1337  h3found = 1;
1338  }
1339 
1340  if(h3found != 1){
1341  lenh3 = posh3_e - posh3_s + 1;
1342  seqh3 = querychain2.substr(posh3_s,lenh3);
1343  }
1344  /* H3 search Finish */
1345 
1346  /* H2 search start */
1347  if(h1found != 1 && h3found != 1){
1348  posh2_s = posh1_e + 15;
1349  posh2_e = posh3_s - 33;
1350  lenh2 = posh2_e - posh2_s + 1;
1351  seqh2 = querychain.substr(posh2_s,lenh2);
1352 
1353  pos_fr2_s = posh1_e + 1;
1354  pos_fr2_e = posh2_s - 1;
1355  pos_fr3_s = posh2_e + 1;
1356  pos_fr3_e = posh3_s - 1;
1357  pos_fr4_s = posh3_e + 1;
1358  pos_fr4_e = pos_fr4_s + 5;
1359  len_fr2 = pos_fr2_e - pos_fr2_s + 1;
1360  len_fr3 = pos_fr3_e - pos_fr3_s + 1;
1361  len_fr4 = pos_fr4_e - pos_fr4_s + 1;
1362  seq_fr2 = querychain.substr(pos_fr2_s,len_fr2);
1363  seq_fr3 = querychain.substr(pos_fr3_s,len_fr3);
1364  seq_fr4 = querychain.substr(pos_fr4_s,len_fr4);
1365  }
1366  /* H2 search end */
1367  }
1368 
1369  if(l1found == 0 && l2found == 0 && l3found == 0){
1370  TR << lenl1 << "\t" << lenl2 << "\t" << lenl3 << "\t";
1371  TR << seql1 << "\t" << seql2 << "\t" << seql3 << "\t" << check << "\tLIGHT" << std::endl;
1372  }else if(h1found == 0 && h2found == 0 && h3found == 0){
1373  TR << lenh1 << "\t" << lenh2 << "\t" << lenh3 << "\t";
1374  TR << seqh1 << "\t" << seqh2 << "\t" << seqh3 << "\t" << check << "\tHEAVY" << std::endl;
1375  }else if(l1found == 0 && l2found == 0 && l3found == 0 && h1found == 0 && h2found == 0 && h3found == 0){
1376  TR << lenl1 << "\t" << lenl2 << "\t" << lenl3 << "\t";
1377  TR << lenh1 << "\t" << lenh2 << "\t" << lenh3 << "\t";
1378  TR << seql1 << "\t" << seql2 << "\t" << seql3 << "\t" << check << "\tLIGHT" << "\t";
1379  TR << seqh1 << "\t" << seqh2 << "\t" << seqh3 << "\t" << check << "\tHEAVY" << std::endl;
1380  }else{
1381  TR << "Some CDRs seem to be missing!\t" << querychain << "\t";
1382  TR << lenh1 << "\t" << lenh2 << "\t" << lenh3 << "\t";
1383  TR << "H1:" << seqh1 << "\tH2: " << seqh2 << "\tH3 " << seqh3 << "\t" << posh3_s << std::endl;
1384  }
1385 
1386  // return 0;
1387 }
1388 
1389 
1390 
1391 
1393  Size left_stem ,
1394  Size right_stem ) const {
1395  vector1<char> sequence;
1396  loops::Loop the_loop = get_CDR_loop(cdr_name);
1397  /// JQX: the pose number in the loop should be consistent with the number in the ab_sequence_
1398  for (Size i=the_loop.start()-left_stem; i<=the_loop.stop()+right_stem; ++i){
1399  sequence.push_back( ab_sequence_[i] );
1400  }
1401  return sequence;
1402 }
1403 
1404 
1406  static vector1<std::string> *string_cdr_name = 0; /// JQX: this will only be executed once
1407  if(string_cdr_name==0){
1408  /// JQX: only the first time you can come here
1409  string_cdr_name=new vector1<std::string>;
1410  string_cdr_name->push_back("H1"); string_cdr_name->push_back("H2");string_cdr_name->push_back("H3");
1411  string_cdr_name->push_back("L1"); string_cdr_name->push_back("L2");string_cdr_name->push_back("L3");
1412  }
1413  return *string_cdr_name;
1414 }
1415 
1417  static vector1<std::string> *string_h3_base_type = 0; /// JQX: this will only be executed once
1418  if(string_h3_base_type==0){
1419  /// JQX: only the first time you can come here
1420  string_h3_base_type=new vector1<std::string>;
1421  string_h3_base_type->push_back("KINKED");
1422  string_h3_base_type->push_back("EXTENDED");
1423  string_h3_base_type->push_back("NEUTRAL");
1424  }
1425  return *string_h3_base_type;
1426 }
1427 
1429  static vector1<std::string> *string_numbering_scheme = 0; /// JQX: this will only be executed once
1430  if(string_numbering_scheme==0){
1431  /// JQX: only the first time you can come here
1432  string_numbering_scheme=new vector1<std::string>;
1433  string_numbering_scheme->push_back("Aroop");
1434  string_numbering_scheme->push_back("Chothia");
1435  string_numbering_scheme->push_back("Kabat");
1436  string_numbering_scheme->push_back("Enhanced_Chothia");
1437  string_numbering_scheme->push_back("AHO");
1438  string_numbering_scheme->push_back("IMGT");
1439  }
1440  return *string_numbering_scheme;
1441 }
1442 
1443 
1445  static scoring::ScoreFunctionOP pack_scorefxn = 0;
1446  if(pack_scorefxn == 0){
1448  }
1449  return pack_scorefxn;
1450 }
1452  static scoring::ScoreFunctionOP dock_scorefxn = 0;
1453  if(dock_scorefxn == 0){
1454  dock_scorefxn = core::scoring::ScoreFunctionFactory::create_score_function( "docking", "docking_min" );
1455  dock_scorefxn->set_weight( core::scoring::chainbreak, 1.0 );
1456  dock_scorefxn->set_weight( core::scoring::overlap_chainbreak, 10./3. );
1457  }
1458  return dock_scorefxn;
1459 }
1461  static scoring::ScoreFunctionOP loopcentral_scorefxn = 0;
1462  if(loopcentral_scorefxn == 0){
1463  loopcentral_scorefxn = core::scoring::ScoreFunctionFactory::create_score_function( "cen_std", "score4L" );
1464  loopcentral_scorefxn->set_weight( scoring::chainbreak, 10./3. );
1465  }
1466  return loopcentral_scorefxn;
1467 }
1469  static scoring::ScoreFunctionOP loophighres_scorefxn = 0;
1470  if(loophighres_scorefxn == 0){
1471  loophighres_scorefxn = scoring::ScoreFunctionFactory::create_score_function("standard", "score12" );
1472  loophighres_scorefxn->set_weight( scoring::chainbreak, 1.0 );
1473  loophighres_scorefxn->set_weight( scoring::overlap_chainbreak, 10./3. );
1474  loophighres_scorefxn->set_weight(scoring::dihedral_constraint, 1.0);
1475  }
1476  return loophighres_scorefxn;
1477 }
1478 
1479 
1480 
1481 
1482 
1483 
1484 
1485 
1486 /// @details Show the complete setup of the docking protocol
1487 void AntibodyInfo::show( std::ostream & out ) {
1488  out << *this;
1489 }
1490 
1491 std::ostream & operator<<(std::ostream& out, const AntibodyInfo & ab_info ) {
1492 
1493  using namespace ObjexxFCL::fmt;
1494  std::string line_marker = "///";
1495  out << "////////////////////////////////////////////////////////////////////////////////" << std::endl;
1496  out << line_marker << A( 47, "Rosetta Antibody Info" ) << space( 27 ) << line_marker << std::endl;
1497  out << line_marker << space( 74 ) << line_marker << std::endl;
1498 
1499  out << line_marker << " Antibody Type:";
1500  if(ab_info.is_Camelid()){ out << " Camelid Antibody"<< std::endl;}
1501  else { out << " Regular Antibody"<< std::endl;}
1502 
1503  out << line_marker << " Predict H3 Cterminus Base:";
1504  out <<" "<<ab_info.get_string_h3_base_type()[ab_info.get_Predicted_H3BaseType()]<<std::endl;
1505 
1506  out << line_marker << space( 74 ) << std::endl;
1508  out << line_marker << " "+ab_info.get_CDR_Name(i)+" info: "<<std::endl;
1509  out << line_marker << " length: "<< ab_info.get_CDR_loop(i).length() <<std::endl;
1510  out << line_marker << " sequence: ";
1511  for (Size j=1;j<=ab_info.get_CDR_Sequence_with_Stem(i,0,0).size();++j) {
1512  out << ab_info.get_CDR_Sequence_with_Stem(i,0,0)[j] ;
1513  }
1514  out <<std::endl;
1515  out << line_marker << " loop_info: "<< ab_info.get_CDR_loop(i)<<std::endl;
1516  }
1517  out << "////////////////////////////////////////////////////////////////////////////////" << std::endl;
1518  return out;
1519 }
1520 
1521 
1522 
1523 } // namespace antibody2
1524 } // namespace protocols