Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ANN.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 /* */
3 /* ---- TALOS C++ verison ---- */
4 /* TALOS: Torsion Angle Likeness Optimized By Shifts. */
5 /* Yang Shen, Gabriel Cornilescu, Frank Delaglio, and Ad Bax */
6 /* NIH Laboratory of Chemical Physics */
7 /* version, 1.00 (build 2010.0607.00) */
8 /* */
9 /* for any problem, please contact */
10 /* shenyang@niddk.nih.gov */
11 /* */
12 /******************************************************************************/
13 
14 
15 /* ANN.cpp: class for a simple Artificial Neural Network */
16 
17 #include <boost/unordered_map.hpp>
18 #include <protocols/sparta/ANN.hh>
19 // Package Headers
20 #include <core/types.hh>
21 // Project Headers
22 
23 // Utility headers
24 #include <basic/Tracer.hh>
25 
26 // AUTO-REMOVED #include <cmath>
27 #include <stdio.h>
28 
29 #include <utility/vector0.hh>
30 
31 #include <protocols/sparta/GDB.hh>
32 #include <utility/vector1.hh>
33 
34 
35 
36 static basic::Tracer tr("protocols.sparta");
37 
38 namespace protocols {
39 namespace sparta {
40 
41 using namespace std;
42 using namespace core;
43 
44 #define MAX(x,y) ((x)>(y)?(x):(y))
45 #define MIN(x,y) ((x)<(y)?(x):(y))
46 
48 {
49  N1_NODE_I = 96; N1_NODE_H = 20; N1_NODE_O = 3;
50  N2_NODE_I = 9; N2_NODE_H = 6; N2_NODE_O = 3;
51  input_code = 0;
52 
53  getSlashChar();
54 }
55 
56 
57 // constructor with initial setup of weighting factors' database
58 ANN::ANN(const string& dPATH, const string& dNAME_PREFIX)
59 {
60  ANN();
61  getSlashChar();
62 
63  N1_NODE_I = 96; N1_NODE_H = 20; N1_NODE_O = 3;
64  N2_NODE_I = 9; N2_NODE_H = 6; N2_NODE_O = 3;
65  input_code = 0;
66 
67  DB_PATH = dPATH;
68  DB_NAME_PREFIX = dNAME_PREFIX;
69  loadWeights();
70 }
71 
72 
73 // constructor with initial setup of weighting factors' database and number of node in each layer
74 ANN::ANN(int N1_nodeI, int N1_nodeH, int N1_nodeO, const string& dPATH, const string& dNAME_PREFIX)
75 {
76  ANN();
77  getSlashChar();
78 
79  N1_NODE_I = N1_nodeI;
80  N1_NODE_H = N1_nodeH;
81  N1_NODE_O = N1_nodeO;
82  input_code = 0;
83 
84  N2_NODE_I = 9; N2_NODE_H = 6; N2_NODE_O = 3;
85 
86  DB_PATH = dPATH;
87  DB_NAME_PREFIX = dNAME_PREFIX;
88  loadWeights();
89 }
90 
91 
92 // constructor with initial setup of weighting factors' database and number of node in each layer
93 ANN::ANN(int N1_nodeI, int N1_nodeH, int N1_nodeO, int N2_nodeI, int N2_nodeH, int N2_nodeO, const string& dPATH, const string& dNAME_PREFIX)
94 {
95  ANN();
96  getSlashChar();
97 
98  N1_NODE_I = N1_nodeI;
99  N1_NODE_H = N1_nodeH;
100  N1_NODE_O = N1_nodeO;
101  input_code = 0;
102 
103  N2_NODE_I = N2_nodeI; N2_NODE_H = N2_nodeH; N2_NODE_O = N2_nodeO;
104 
105  DB_PATH = dPATH;
106  DB_NAME_PREFIX = dNAME_PREFIX;
107  loadWeights();
108 }
109 
110 
111 // constructor with initial setup of weighting factors' database and number of node in each layer
112 void ANN::init(int N1_nodeI, int N1_nodeH, int N1_nodeO, int N2_nodeI, int N2_nodeH, int N2_nodeO, const string& dPATH, const string& dNAME_PREFIX)
113 {
114  //ANN();
115  getSlashChar();
116 
117  N1_NODE_I = N1_nodeI;
118  N1_NODE_H = N1_nodeH;
119  N1_NODE_O = N1_nodeO;
120  input_code = 0;
121 
122  N2_NODE_I = N2_nodeI; N2_NODE_H = N2_nodeH; N2_NODE_O = N2_nodeO;
123 
124  DB_PATH = dPATH;
125  DB_NAME_PREFIX = dNAME_PREFIX;
126  loadWeights();
127 }
128 
129 
131 {
132  input_code = c;
133 }
134 
135 
136 void ANN::loadWeights() // load weighting and bias
137 {
138  string wName;
139 
140  //load weights and bias for 1st level ANN
141  wName = DB_PATH+slash_char+DB_NAME_PREFIX+".level1.WI.tab"; // file name of first weight/bias for input level
142  loadWeightBias3(wName, WI_1, BI_1, WI_2, BI_2, WI_3, BI_3, N1_NODE_I, N1_NODE_I, N1_NODE_I);
143 
144  wName = DB_PATH+slash_char+DB_NAME_PREFIX+".level1.WL1.tab"; // file name of first weight/bias for connecting input and hidden level
145  loadWeightBias3(wName, WL1_1, BL1_1, WL1_2, BL1_2, WL1_3, BL1_3, N1_NODE_H, N1_NODE_I, N1_NODE_H);
146 
147  wName = DB_PATH+slash_char+DB_NAME_PREFIX+".level1.WL2.tab"; // file name of first weight/bias for connecting input and hidden level
148  loadWeightBias3(wName, WL2_1, BL2_1, WL2_2, BL2_2, WL2_3, BL2_3, N1_NODE_O, N1_NODE_H, N1_NODE_O);
149 
150  /* for a 2-level NN
151  //load weights and bias for 2nd level ANN
152  wName = DB_PATH+slash_char+DB_NAME_PREFIX+".level2.WI.tab"; // file name of first weight/Bias for input level
153  loadWeightBias3(wName, W2I_1, B2I_1, W2I_2, B2I_2, W2I_3, B2I_3, N2_NODE_I, N2_NODE_I, N2_NODE_I);
154 
155  wName = DB_PATH+slash_char+DB_NAME_PREFIX+".level2.WL1.tab"; // file name of first weight/Bias for connecting input and hidden level
156  loadWeightBias3(wName, W2L1_1, B2L1_1, W2L1_2, B2L1_2, W2L1_3, B2L1_3, N2_NODE_H, N2_NODE_I, N2_NODE_H);
157 
158  wName = DB_PATH+slash_char+DB_NAME_PREFIX+".level2.WL2.tab"; // file name of first weight/Bias for connecting input and hidden level
159  loadWeightBias3(wName, W2L2_1, B2L2_1, W2L2_2, B2L2_2, W2L2_3, B2L2_3, N2_NODE_O, N2_NODE_H, N2_NODE_O);
160  */
161 }
162 
163 
164 
165 // load weighting (N_W_row*N_W_col) and bias (N_B) from a given file contains all three sets data
166 void ANN::loadWeightBias3(const string& fName, boost::unordered_map<int, utility::vector0<float> > &W1, utility::vector0<float> &B1,
167  boost::unordered_map<int, utility::vector0<float> > &W2, utility::vector0<float> &B2, boost::unordered_map<int, utility::vector0<float> > &W3, utility::vector0<float> &B3,
168  int N_W_row, int N_W_col, int /* N_B */)
169 {
170  string str;
171  //cout << "Reading ANN Weights and Bias Set 1 " << fName << endl;
172  GDB W_Tab( fName );
173  //cout << W1_Tab.Entries.size() << "\n";
174 
175  int index = 0;
176  int row = N_W_row, col = N_W_col;
177  for ( it = W_Tab.Entries.begin(); it != W_Tab.Entries.end(); it++ )
178  {
179  int check = index/row; //cout << check << endl;
180  for( int i = 0; i < col; i++ )
181  {
182  str = itoa(i+1,buf);
183  float w = atof((it->second[str]).c_str());
184  if( check == 0) W1[ index ].push_back( w ); // assign to weight matrix 1
185  else if( check == 1) W2[ index-row ].push_back( w ); // assign to weight matrix 2
186  else if( check == 2) W3[ index-row*2 ].push_back( w ); // assign to weight matrix 2
187  else tr.Error << "Wrong size for matrix " << fName << " ... \n";
188  }
189 
190  if( check == 0) B1.push_back( atof((it->second["b"]).c_str()) );
191  else if( check == 1) B2.push_back( atof((it->second["b"]).c_str()) );
192  else if( check == 2) B3.push_back( atof((it->second["b"]).c_str()) );
193  index++;
194  }
195  // if( index > row ) tr.Error << "Wrong size for matrix " << fName << " ... \n";
196 
197 }
198 
199 
200 
201 
202 // perform 1st level ANN calculation
203 // input ANN_IN_MTX_LEVEL1
205 {
206  //boost::unordered_map<int, utility::vector0<float> > ANN_OUT_MTX;
207  boost::unordered_map<int, utility::vector0<float> >::iterator itV;
208  for ( itV = ANN_IN_MTX_LEVEL1.begin(); itV != ANN_IN_MTX_LEVEL1.end(); itV++ ) //for each tripet input
209  {
210  //cout << itV->first << endl;
211 
212  //apply input layer transformation
213  utility::vector0<float> IL1, IL2, IL3;
214  applyANNTransformation(itV->second, WI_1, BI_1, IL1, 1);
215  applyANNTransformation(itV->second, WI_2, BI_2, IL2, 1);
216  applyANNTransformation(itV->second, WI_3, BI_3, IL3, 1);
217 
218  //apply 1st hidden layer transformation
219  utility::vector0<float> HL1, HL2, HL3;
220  applyANNTransformation(IL1, WL1_1, BL1_1, HL1, 1);
221  applyANNTransformation(IL2, WL1_2, BL1_2, HL2, 1);
222  applyANNTransformation(IL3, WL1_3, BL1_3, HL3, 1);
223 
224  //apply output layer transformation
225  utility::vector0<float> OL1, OL2, OL3;
226  applyANNTransformation(HL1, WL2_1, BL2_1, OL1, 0);
227  applyANNTransformation(HL2, WL2_2, BL2_2, OL2, 0);
228  applyANNTransformation(HL3, WL2_3, BL2_3, OL3, 0);
229 
231  applyVecAverage(OL1,OL2,OL3,OUT1);
232  //cout << OUT1[0] << "\t" << OUT1[1] << "\t" << OUT1[2] << "\t" << endl;
233  ANN_OUT_MTX_LEVEL1[itV->first] = OUT1;
234  }
235 }
236 
237 
238 
239 // perform 2nd level ANN calculation
240 // input ANN_IN_MTX_LEVEL2
242 {
243  //boost::unordered_map<int, utility::vector0<float> > ANN_OUT_MTX;
244  boost::unordered_map<int, utility::vector0<float> >::iterator itV;
245  for ( itV = ANN_IN_MTX_LEVEL2.begin(); itV != ANN_IN_MTX_LEVEL2.end(); itV++ ) //for each tripet input
246  {
247  //cout << itV->first << endl;
248 
249  //apply input layer transformation
250  utility::vector0<float> IL1, IL2, IL3;
251  applyANNTransformation(itV->second, W2I_1, B2I_1, IL1, 1);
252  applyANNTransformation(itV->second, W2I_2, B2I_2, IL2, 1);
253  applyANNTransformation(itV->second, W2I_3, B2I_3, IL3, 1);
254 
255  //apply 1st hidden layer transformation
256  utility::vector0<float> HL1, HL2, HL3;
257  applyANNTransformation(IL1, W2L1_1, B2L1_1, HL1, 1);
258  applyANNTransformation(IL2, W2L1_2, B2L1_2, HL2, 1);
259  applyANNTransformation(IL3, W2L1_3, B2L1_3, HL3, 1);
260 
261  //apply output layer transformation
262  utility::vector0<float> OL1, OL2, OL3;
263  applyANNTransformation(HL1, W2L2_1, B2L2_1, OL1, 0);
264  applyANNTransformation(HL2, W2L2_2, B2L2_2, OL2, 0);
265  applyANNTransformation(HL3, W2L2_3, B2L2_3, OL3, 0);
266 
268  applyVecAverage(OL1,OL2,OL3,OUT2);
269  //cout << OUT2[0] << "\t" << OUT2[1] << "\t" << OUT2[2] << "\t" << endl;
270  ANN_OUT_MTX_LEVEL2[itV->first] = OUT2;
271  }
272 }
273 
274 
275 
276 
277 void ANN::runSpartaANN(boost::unordered_map<int, utility::vector0<float> > &inMatrix)
278 {
279  ANN_IN_MTX_LEVEL1 = inMatrix;
280  //loadWeights();
281 
282  // run 1st level ANN prediction, get ANN_OUT_MTX_LEVEL1
283  calcLevel1();
284 
285 }
286 
287 
288 
289 //apply an ANN transformation for input inp and with transformation weights w, bias b
291 {
292  // needs to check for size mismatch among inp, w and b ??????
293  if( inp.size() != w[0].size() || w.size() != b.size() ) {
294  tr.Error << " ANN prediction failed with inconsistent data!" << endl;
295  return;
296  }
297 
298  for( Size i = 0; i < w.size(); i++ ) {
299  float sum = 0;
300  for( Size j = 0; j < inp.size(); j++ ) sum += inp[j]*w[i][j];
301 
302  sum += b[i];
303  if( code == 1 ) out.push_back( 2.0/(1.0+exp(-2.0*sum))-1.0 ); // apply 'tansig' transfer function
304  else if( code == 0 ) out.push_back( sum ); // apply 'linear' transfer function
305  }
306 }
307 
308 
309 
310 //calculate 'confidence-averaged' utility::vector0 of three utility::vector0s v1, v2, v3
312 {
313  if( v1.size() == v2.size() && v1.size() == v3.size() ) {
314  //float conf1 = getConfidence(v1);
315  //float conf2 = getConfidence(v2);
316  //float conf3 = getConfidence(v3);
317  //cout << "XXX" << conf1 << "\t" << conf2 << "\t" << conf3<< endl;
318 
319  for( Size i=0; i<v1.size(); i++) {
320  //cout << "XXX" << i << endl;
321  //out.push_back( (v1[i]*conf1+v2[i]*conf2+v3[i]*conf3)/(conf1+conf2+conf3) );
322  vout.push_back( (v1[i]+v2[i]+v3[i])/3.0 );
323  //vout.push_back(v3[i]);
324  }
325 
326  }
327 }
328 
329 
330 
331 //apply normalization
333 {
334  float a=v[0], b=v[1], c=v[2];
335  if(a>1) a=1.0; else if(a<0) a=0.0;
336  if(b>1) b=1.0; else if(b<0) b=0.0;
337  if(c>1) c=1.0; else if(c<0) c=0.0;
338 
339  float sum=a+b+c;
340  a/=sum; b/=sum; c/=sum;
341  v.clear();
342  v.push_back(a); v.push_back(b); v.push_back(c);
343 }
344 
345 
346 
348 {
349  //cout << v.size() << "\t" << v[0] << "\t" << v[1] << "\t" << v[2] << endl;
350 
351  if( v.size() != 3 ) return -1.0;
352 
353  return 2.0*MAX(v[0], MAX(v[1],v[2])) - (v[0]+v[1]+v[2]) + MIN(v[0], MIN(v[1],v[2]));
354 }
355 
356 
357 
358 //check the number of atom without CS for a given residue
360 {
361  int cnt = 0;
362  cnt+=(v[1]==1); cnt+=(v[3]==1); cnt+=(v[5]==1); cnt+=(v[7]==1); cnt+=(v[9]==1); cnt+=(v[11]==1);
363  return cnt;
364 }
365 
366 
367 
368 // return a character string for an int type number
369 char * ANN::itoa( int n, char *buff, int /*base*/ )
370 {
371  sprintf(buff, "%d", n);
372  return buff;
373 }
374 
375 
376 
377 // retrun a character string for a float type number
378 char * ANN::ftoa( float n, char *buff, char f, int prec )
379 {
380  if ( !(f=='f' || f=='F' || f=='e' || f=='E' || f=='g' || f=='G') ) {
381  f = 'f';
382  }
383  char format[20];
384  char *fs = format; // generate format string
385  *fs++ = '%'; // "%.<prec>l<f>"
386  if ( prec >= 0 ) {
387  if ( prec > 99 ) // buf big enough for precision?
388  prec = 99;
389  *fs++ = '.';
390  if ( prec >= 10 ) {
391  *fs++ = prec / 10 + '0';
392  *fs++ = prec % 10 + '0';
393  } else {
394  *fs++ = prec + '0';
395  }
396  }
397  *fs++ = 'l';
398  *fs++ = f;
399  *fs = '\0';
400  sprintf( buff, format, n );
401 
402  return buff;
403 }
404 
406 {
407  if( getenv( "PATH" ) != NULL) {
408  string temp = getenv( "PATH" );
409  if(temp.find("/") != string::npos ) slash_char = "/"; // unix
410  else if(temp.find("\\") != string::npos ) slash_char = "\\"; // Windows
411  }
412  else slash_char = "/"; //default Windows
413 }
414 
415 }
416 }