Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
PCA.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @brief class to compute projection of a protein structure to principal component (PCA) eigenvectors ( as defined in file )
11 /// @author Oliver Lange
12 
13 
14 #if (defined _WIN32) && (!defined WIN_PYROSETTA)
15 #define ZLIB_WINAPI // REQUIRED FOR WINDOWS
16 #endif
17 
18 // Unit Headers
20 
21 // Package Headers
22 
23 // Project Headers
25 #include <core/pose/Pose.hh>
26 
27 // ObjexxFCL Headers
28 
29 // Numeric headers
30 #include <numeric/xyzVector.hh>
31 
32 // Utility headers
33 #include <basic/Tracer.hh>
34 #include <utility/io/izstream.hh>
35 #include <utility/exit.hh>
36 
37 //// C++ headers
38 #include <string>
39 #include <iostream>
40 
41 #include <core/id/AtomID.hh>
42 #include <utility/vector1.hh>
43 
44 
45 namespace protocols {
46 namespace evaluation {
47 
48 /// @details Auto-generated virtual destructor
50 
51 static basic::Tracer tr("protocols.evaluation.PCA",basic::t_info);
52 
53 using namespace core;
54 
55 
56 /// @brief A function (not a macro) that will not print a square matrix to tr.Debug
57 template< class T > void dump_matrix( Size, T const &, basic::Tracer & ) {}
58 
59 /// @brief A function (not a macro) that will print a square matrix to tr.Debug
60 template< class T > void dump_matrix_no( Size nr, T const & a, basic::Tracer & tr)
61 {
62  Size i,k;
63  for ( i =0 ; i<nr; ++i ) {
64  for ( k =0 ; k<nr; ++k )
65  tr.Debug << a[i][k] << " ";
66  tr.Debug << "\n";
67  }
68 }
69 
70 /// some low-level helper routines
71 
72 #define DIM 3
73 
74 void PCA::rotate_vec(int natoms,rvec *x,matrix R)
75 {
76  int j,r,c,m;
77  rvec x_old;
78 
79  /*rotate X*/
80  for(j=0; j<natoms; j++) {
81  for(m=0; m<DIM; m++)
82  x_old[m]=x[j][m];
83  for(r=0; r<DIM; r++) {
84  x[j][r]=0;
85  for(c=0; c<DIM; c++)
86  x[j][r]+=R[r][c]*x_old[c];
87  }
88  }
89 }
90 
91 void PCA::add_vec( int natoms, rvec *x, rvec transvec ) {
92  for ( int i=0; i<natoms; i++ ) {
93  for ( int j = 0; j< DIM; j++ ) {
94  x[i][j]+=transvec[j];
95  }
96  }
97 }
98 
99 
100 
101 /// @brief read definition of PCA from file
102 void PCA::read_eigvec_file( std::string fn, pose::Pose const& pose,int nvec) {
103  utility::io::izstream data( fn.c_str() );
104  if ( !data ) {
105  std::cerr << "ERROR:: Unable to open PCA file: "
106  << fn << std::endl;
107  std::exit( 1 );
108  }
109 
110  std::string line;
111  getline(data,line); // header line
112  getline(data,line); //nfit....
113  std::istringstream line_stream ( line );
114  std::string tag1, tag2, tag3;
115 
116  line_stream >> tag1 >> nfit_ >> tag2 >> npca_ >> tag3 >> nvec_;
117  if ( nvec > 0 ) nvec_ = nvec;
118  xref_.dimension( 3, nfit_ );
119  xav_.dimension( 3, npca_ );
120  ifit_.resize( nfit_ );
121  ipca_.resize( npca_ );
122  eigvec_.dimension( 3, npca_, nvec_ );
123 
124  getline(data,line); //AVERAGE
125  if ( line != "AVERAGE" ) utility_exit_with_message(" tag AVERAGE missing ");
126  read_structure( data, pose, ipca_, xav_, "REFERENCE" );
127  read_structure( data, pose, ifit_, xref_, "VECTORS" );
128  for ( Size i=1; i<=nvec_; i++ ) {
129  for ( Size k=1; k<=npca_; k++ ) {
130  getline(data, line);
131  std::istringstream line_stream( line );
132  line_stream >> eigvec_( 1, k, i) >> eigvec_( 2, k, i) >> eigvec_(3, k, i);
133  }
134  }
135 }
136 
137 
138 /// @brief helper to read_eigvec_file: reads a protein structure from input file
140  std::istream& data,
141  pose::Pose const& pose,
142  IndexVector& ind,
143  CoordVector& x,
144  std::string endtag )
145 {
146  std::string line;
147  getline( data, line);
148  int ct=1;
149  while ( line != endtag ) {
150  std::istringstream line_stream( line );
151  std::string atomname;
152  Size resnr;
153  Real x1, x2, x3;
154  line_stream >> atomname >> resnr >> x1 >> x2 >> x3;
155  //---
156  tr.Debug << "read PCA: " << atomname << " " << resnr << " " << x1 << " " << x2 << " " << x3 << "\n";
157  ind[ ct ]=id::AtomID( pose.residue_type( resnr ).atom_index( atomname ), resnr );
158  x( 1, ct ) = x1*10; x(2, ct) = x2*10; x(3, ct) = x3*10;
159  //-
160  getline( data, line);
161  ct ++;
162  }
163 }
164 
165 void PCA::reset_x( Size n, CoordVector& x, rvec transvec ) {
166  Size dim( 3 );
167  // align center of mass to origin
168  for ( Size k = 1; k <= dim; ++k ) {
169  Real temp1 = 0.0;
170  for ( Size j = 1; j <= n; ++j ) {
171  temp1 += x(k,j);
172  }
173  temp1 /= 1.0*n;
174  transvec[k-1]=-temp1;
175  for ( Size j = 1; j <= n; ++j ) {
176  x(k,j) -= temp1;
177  }
178  }
179 }
180 
181 /// @brief compute projections for given pose
182 void PCA::eval( pose::Pose const& pose, ProjectionVector& proj ) {
183  //fill Farray for fit
184  rvec* xgmx;
185  rvec* xrefgmx;
186  rvec transvec;
187  runtime_assert ( npca_ == nfit_ );// different fit- and analysis group doesn't work yet. some stupid bug.
188  xgmx = new rvec[ npca_ ];
189  xrefgmx = new rvec[ npca_ ];
190  matrix Rot;
191  CoordVector x;
192  fill_coordinates( pose, ifit_, x );
193 
194  reset_x( nfit_, x, transvec );
195 
196  //transfer into C-style arrays
197  for ( Size i = 1; i<=nfit_ ; i++) {
198  for ( Size d = 1; d<=3; d++ ) {
199  xgmx[i-1][d-1]= x(d, i)/10.0;
200  xrefgmx[i-1][d-1]=xref_(d,i)/10.0;
201  }
202  }
203 
204  //compute rotation matrix
205  calc_fit_R( nfit_, xrefgmx, xgmx, Rot );
206  dump_matrix( 3, Rot, tr );
207 
208  fill_coordinates( pose, ipca_, x );
209  //transfer into C-style arrays
210  for ( Size i = 1; i<=npca_ ; i++) {
211  for ( int d = 1; d<=3; d++ ) {
212  xgmx[i-1][d-1]= x(d, i);
213  }
214  }
215  add_vec( npca_, xgmx, transvec );
216  rotate_vec( npca_, xgmx, Rot );
217 
218  tr.Trace << "rotated and translated\n";
219  for ( Size i = 1; i<=npca_ ; i++) {
220  for ( Size d = 1; d<=3; d++ ) {
221  x(d, i) = xgmx[i-1][d-1];
222  tr.Trace << x(d, i)/10.0 << " ";
223  }
224  tr.Trace << "\n";
225  }
226  //Compute projection
227  proj.resize( nvec_ );
228  for ( Size v = 1; v <= nvec_; v++ ) {
229  proj[ v ]=0;
230  for ( Size k = 1; k <= npca_; k++ ) {
231  for ( Size d = 1; d <= 3; d++ ) {
232  proj[ v ]+= (x( d, k)-xav_( d, k)) * eigvec_( d, k, v)/10.0;
233  }
234  }
235  }
236 
237  delete[] xgmx;
238  delete[] xrefgmx;
239 }
240 
241 // @brief dump stuff on screen
242 void PCA::show( std::ostream& os ) {
243  os << nfit_ << " " << npca_ << " " << nvec_ << std::endl;
244  os << "AVERAGE" << std::endl;
245  for ( Size i = 1; i <= npca_ ; i++ ) {
246  os << ipca_[i] << " " << xav_(1, i ) << " " << xav_(2,i ) << " " << xav_(3,i ) << std::endl;
247  }
248 
249  for ( Size i = 1; i <= nfit_ ; i++ ) {
250  os << xref_( 1, i ) << " " << xref_( 2, i ) << " " << xref_( 3, i ) << std::endl;
251  }
252 
253  for ( Size k = 1; k <= nvec_ ; k++ ) {
254  for ( Size i = 1; i <= npca_ ; i++ ) {
255  os << eigvec_( 1, i, k ) << " " << eigvec_( 2, i, k ) << " " << eigvec_( 3, i, k ) << std::endl;
256  }
257  }
258 
259 }
260 
261 
262 /// @brief helper of eval: get the coordinates of interest from pose
264  pose::Pose const& pose,
265  IndexVector const& ind,
266  CoordVector & x
267 )
268 {
269  int natoms = 1;
270  x.redimension( 3, ind.size() );
271  for ( IndexVector::const_iterator it=ind.begin(), eit=ind.end(); it!=eit; ++it ) {
272  PointPosition vec( pose.xyz( *it ) );
273  for ( int k = 0; k < 3; ++k ) { // k = X, Y and Z
274  x(k+1,natoms) = vec[k];
275  }
276  ++natoms;
277  }
278 }
279 
280 
281 
282 #define ROTATE(a,i,j,k,l) g=a[i][j];h=a[k][l];a[i][j]=g-s*(h+g*tau); \
283  a[k][l]=h+s*(g-h*tau);
284 #define DIM6 6
285 #define XX 0
286 #define YY 1
287 #define ZZ 2
288 
289 void PCA::oprod(const rvec a,const rvec b,rvec c)
290 {
291  c[XX]=a[YY]*b[ZZ]-a[ZZ]*b[YY];
292  c[YY]=a[ZZ]*b[XX]-a[XX]*b[ZZ];
293  c[ZZ]=a[XX]*b[YY]-a[YY]*b[XX];
294 }
295 
296 
297 
298 void PCA::calc_fit_R(int natoms,rvec *xp,rvec const* x,matrix R)
299 {
300 
301  int c,r,n,j,i,irot;
302  double omega[ DIM6 ][ DIM6 ];
303  double om[ DIM6 ] [ DIM6 ];
304  double d[ DIM6 ],xnr,xpc;
305  matrix vh,vk,u;
306  Real mn;
307  int index;
308  Real max_d;
309 
310  for(i=0; i<DIM6; i++) {
311  d[i]=0;
312  for(j=0; j<DIM6; j++) {
313  omega[i][j]=0;
314  om[i][j]=0;
315  }
316  }
317 
318  /* clear matrix U */
319  for ( int i=0; i<DIM;i++)
320  for ( int j=0; j<DIM; j++) u[i][j]=0;
321  /*calculate the matrix U*/
322  for(n=0;(n<natoms);n++) {
323  if ((mn = 1.0) != 0.0) {
324  for(c=0; (c<DIM); c++) {
325  xpc=xp[n][c];
326  for(r=0; (r<DIM); r++) {
327  xnr=x[n][r];
328  u[c][r]+=mn*xnr*xpc;
329  }
330  }
331  }
332  }
333  dump_matrix(DIM, u, tr);
334  /*construct omega*/
335  /*omega is symmetric -> omega==omega' */
336  for(r=0; r<DIM6; r++)
337  for(c=0; c<=r; c++)
338  if (r>=DIM && c<DIM) {
339  omega[r][c]=u[r-DIM][c];
340  omega[c][r]=u[r-DIM][c];
341  } else {
342  omega[r][c]=0;
343  omega[c][r]=0;
344  }
345  dump_matrix(DIM6, omega, tr);
346  /*determine h and k*/
347  jacobi( omega,d,om,&irot);
348  /*real **omega = input matrix a[0..n-1][0..n-1] must be symmetric
349  *int natoms = number of rows and columns
350  *real NULL = d[0]..d[n-1] are the eigenvalues of a[][]
351  *real **v = v[0..n-1][0..n-1] contains the vectors in columns
352  *int *irot = number of jacobi rotations
353  */
354  dump_matrix( 2*DIM, omega, tr );
355  dump_matrix ( 2*DIM, om, tr );
356  index=0; /* For the compiler only */
357 
358  /* Copy only the first two eigenvectors */
359  for(j=0; j<2; j++) {
360  max_d=-1000;
361  for(i=0; i<DIM6; i++)
362  if (d[i]>max_d) {
363  max_d=d[i];
364  index=i;
365  }
366  d[index]=-10000;
367  for(i=0; i<DIM; i++) {
368  vh[j][i]=sqrt(2.0)*om[i][index];
369  vk[j][i]=sqrt(2.0)*om[i+DIM][index];
370  }
371  }
372  /* Calculate the last eigenvector as the outer-product of the first two.
373  * This insures that the conformation is not mirrored and
374  * prevents problems with completely flat reference structures.
375  */
376 
377  dump_matrix( DIM, vh, tr );
378  dump_matrix( DIM, vk, tr );
379  oprod(vh[0],vh[1],vh[2]);
380  oprod(vk[0],vk[1],vk[2]);
381  dump_matrix( DIM, vh, tr );
382  dump_matrix( DIM, vk, tr );
383 
384  /*determine R*/
385  for(r=0; r<DIM; r++)
386  for(c=0; c<DIM; c++)
387  R[r][c] = vk[0][r]*vh[0][c] +
388  vk[1][r]*vh[1][c] +
389  vk[2][r]*vh[2][c];
390  dump_matrix( DIM, R, tr );
391 }
392 
393 
394 
395 void PCA::jacobi(double a[6][6],double d[],double v[6][6],int *nrot)
396 {
397  int j,i;
398  int iq,ip;
399  double tresh,theta,tau,t,sm,s,h,g,c;
400  double b[DIM6];
401  double z[DIM6];
402  int const n( DIM6 );
403  for (ip=0; ip<n; ip++) {
404  for (iq=0; iq<n; iq++) v[ip][iq]=0.0;
405  v[ip][ip]=1.0;
406  }
407  for (ip=0; ip<n;ip++) {
408  b[ip]=d[ip]=a[ip][ip];
409  z[ip]=0.0;
410  }
411  *nrot=0;
412  for (i=1; i<=50; i++) {
413  sm=0.0;
414  for (ip=0; ip<n-1; ip++) {
415  for (iq=ip+1; iq<n; iq++)
416  sm += fabs(a[ip][iq]);
417  }
418  if (sm == 0.0) {
419  return;
420  }
421  if (i < 4)
422  tresh=0.2*sm/(n*n);
423  else
424  tresh=0.0;
425  for (ip=0; ip<n-1; ip++) {
426  for (iq=ip+1; iq<n; iq++) {
427  g=100.0*fabs(a[ip][iq]);
428  if (i > 4 && fabs(d[ip])+g == fabs(d[ip])
429  && fabs(d[iq])+g == fabs(d[iq]))
430  a[ip][iq]=0.0;
431  else if (fabs(a[ip][iq]) > tresh) {
432  h=d[iq]-d[ip];
433  if (fabs(h)+g == fabs(h))
434  t=(a[ip][iq])/h;
435  else {
436  theta=0.5*h/(a[ip][iq]);
437  t=1.0/(fabs(theta)+sqrt(1.0+theta*theta));
438  if (theta < 0.0) t = -t;
439  }
440  c=1.0/sqrt(1+t*t);
441  s=t*c;
442  tau=s/(1.0+c);
443  h=t*a[ip][iq];
444  z[ip] -= h;
445  z[iq] += h;
446  d[ip] -= h;
447  d[iq] += h;
448  a[ip][iq]=0.0;
449  for (j=0; j<ip; j++) {
450  ROTATE(a,j,ip,j,iq)
451  }
452  for (j=ip+1; j<iq; j++) {
453  ROTATE(a,ip,j,j,iq)
454  }
455  for (j=iq+1; j<n; j++) {
456  ROTATE(a,ip,j,iq,j)
457  }
458  for (j=0; j<n; j++) {
459  ROTATE(v,j,ip,j,iq)
460  }
461  ++(*nrot);
462  }
463  }
464  }
465  for (ip=0; ip<n; ip++) {
466  b[ip] += z[ip];
467  d[ip] = b[ip];
468  z[ip] = 0.0;
469  }
470  }
471  runtime_assert(0);
472 }
473 
474 } //evaluation
475 } //protocols
476