Rosetta 3.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Minimizer.cc
Go to the documentation of this file.
1 // -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
2 // vi: set ts=2 noet:
3 //
4 // (c) Copyright Rosetta Commons Member Institutions.
5 // (c) This file is part of the Rosetta software suite and is made available under license.
6 // (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
7 // (c) For more information, see http://www.rosettacommons.org. Questions about this can be
8 // (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
9 
10 /// @file core/optimization/Minimizer.cc
11 /// @brief Minimizer class
12 /// @author Phil Bradley
13 
14 
15 // Unit headers
19 
20 #include <basic/Tracer.hh>
21 
22 #include <ObjexxFCL/FArray2D.hh>
23 #include <utility/exit.hh>
24 
25 #include <basic/options/option.hh>
26 #include <basic/options/keys/optimization.OptionKeys.gen.hh>
27 
28 
29 // C++ headers
30 #include <cmath>
31 #include <iostream>
32 #include <algorithm>
33 
34 #include <utility/vector1.hh>
35 
36 #ifdef WIN32
37 #include <functional>
38 #endif
39 
40 
41 namespace core {
42 namespace optimization {
43 
44 using namespace ObjexxFCL;
45 
46 static basic::Tracer TR( "core.optimization.LineMinimizer" );
47 
48 // set the function and the options
50  Multifunc & func_in,
51  MinimizerOptions const & options_in
52 ) : func_( func_in ), options_( options_in ) {}
53 
54 /////////////////////////////////////////////////////////////////////////////
55 /// See @ref minimization_overview "Minimization overview and concepts" for details.
56 Real
58  Multivec & phipsi_inout // starting position, and solution is returned here
59 ) {
60  // parse options
61  std::string const type( options_.min_type() );
62 
63  Multivec phipsi( phipsi_inout ), dE_dphipsi( phipsi_inout );
64 
65  Real end_func;
66  DFPMinConvergedFractional fractional_converge_test( options_.minimize_tolerance() );
67  DFPMinConvergedAbsolute absolute_converge_test( options_.minimize_tolerance() );
68  int const ITMAX( options_.max_iter() );
69 
70  if ( type == "linmin" ) {
71  func_.dfunc( phipsi, dE_dphipsi );
72  linmin( phipsi, dE_dphipsi, end_func, ITMAX );
73  } else if ( type == "dfpmin" ) {
74  dfpmin( phipsi, end_func, fractional_converge_test, ITMAX );
75  } else if ( type == "dfpmin_armijo" ) {
76  LineMinimizationAlgorithmOP armijo_line_search( new ArmijoLineMinimization( func_, false, phipsi_inout.size() ) );
77  armijo_line_search->silent( options_.silent() );
78  dfpmin_armijo( phipsi, end_func, fractional_converge_test, armijo_line_search, ITMAX );
79  } else if ( type == "dfpmin_armijo_nonmonotone" ) {
80  LineMinimizationAlgorithmOP armijo_line_search( new ArmijoLineMinimization( func_, true, phipsi_inout.size() ) );
81  armijo_line_search->silent( options_.silent() );
82  dfpmin_armijo( phipsi, end_func, fractional_converge_test, armijo_line_search, ITMAX );
83  } else if ( type == "dfpmin_atol" ) {
84  dfpmin( phipsi, end_func, absolute_converge_test, ITMAX );
85  } else if ( type == "dfpmin_armijo_atol" ) {
86  LineMinimizationAlgorithmOP armijo_line_search( new ArmijoLineMinimization( func_, false, phipsi_inout.size() ) );
87  armijo_line_search->silent( options_.silent() );
88  dfpmin_armijo( phipsi, end_func, absolute_converge_test, armijo_line_search, ITMAX );
89  } else if ( type == "dfpmin_armijo_nonmonotone_atol" ) {
90  LineMinimizationAlgorithmOP armijo_line_search( new ArmijoLineMinimization( func_, true, phipsi_inout.size() ) );
91  armijo_line_search->silent( options_.silent() );
92  dfpmin_armijo( phipsi, end_func, absolute_converge_test, armijo_line_search, ITMAX );
93  } else if ( type == "dfpmin_strong_wolfe" ) {
94  LineMinimizationAlgorithmOP strong_wolfe_line_search( new StrongWolfeLineMinimization( func_, false, phipsi_inout.size() ) );
95  dfpmin_armijo( phipsi, end_func, fractional_converge_test, strong_wolfe_line_search, ITMAX );
96  } else if ( type == "dfpmin_strong_wolfe_atol" ) {
97  LineMinimizationAlgorithmOP strong_wolfe_line_search( new StrongWolfeLineMinimization( func_, false, phipsi_inout.size() ) );
98  dfpmin_armijo( phipsi, end_func, absolute_converge_test, strong_wolfe_line_search, ITMAX );
99  } else if ( type == "lbfgs_armijo" ) {
100  LineMinimizationAlgorithmOP armijo_line_search( new ArmijoLineMinimization( func_, false, phipsi_inout.size() ) );
101  armijo_line_search->silent( options_.silent() );
102  lbfgs( phipsi, end_func, fractional_converge_test, armijo_line_search, ITMAX );
103  } else if ( type == "lbfgs_armijo_atol" ) {
104  LineMinimizationAlgorithmOP armijo_line_search( new ArmijoLineMinimization( func_, false, phipsi_inout.size() ) );
105  armijo_line_search->silent( options_.silent() );
106  lbfgs( phipsi, end_func, absolute_converge_test, armijo_line_search, ITMAX );
107  } else if ( type == "lbfgs_armijo_nonmonotone" ) {
108  LineMinimizationAlgorithmOP armijo_line_search( new ArmijoLineMinimization( func_, true, phipsi_inout.size() ) );
109  armijo_line_search->silent( options_.silent() );
110  lbfgs( phipsi, end_func, fractional_converge_test, armijo_line_search, ITMAX );
111  } else if ( type == "lbfgs_armijo_nonmonotone_atol" ) {
112  LineMinimizationAlgorithmOP armijo_line_search( new ArmijoLineMinimization( func_, true, phipsi_inout.size() ) );
113  armijo_line_search->silent( options_.silent() );
114  lbfgs( phipsi, end_func, absolute_converge_test, armijo_line_search, ITMAX );
115  } else if ( type == "lbfgs_strong_wolfe" ) {
116  LineMinimizationAlgorithmOP strong_wolfe_line_search( new StrongWolfeLineMinimization( func_, false, phipsi_inout.size() ) );
117  lbfgs( phipsi, end_func, fractional_converge_test, strong_wolfe_line_search, ITMAX );
118  } else if ( type == "GA" ) {
120  gam.run(phipsi, ITMAX);
121  } else {
122  utility_exit_with_message("unknown type of minimization '"+type+"'");
123  }
124 
125  phipsi_inout = phipsi;
126  return func_( phipsi );
127 }
128 
129 ////////////////////////////////////////////////////////////////////////
130 // Convergence test stuff
131 ////////////////////////////////////////////////////////////////////////
132 
133 bool
135  Real Fnew,
136  Real Fold
137 ) {
138  return ( 2.0f*std::abs( Fnew - Fold ) <=
139  tolerance*( std::abs( Fnew ) + std::abs( Fold ) + eps )
140  );
141 }
142 
143 bool
145  Real Fnew,
146  Real Fold
147 ) {
148  return ( std::abs( Fnew - Fold ) <= tolerance );
149 }
150 
151 /////////////////////////////////////////////////////////////////////////////
152 // Skeleton algorithm for multivariate optimization
153 /////////////////////////////////////////////////////////////////////////////
155  Multivec & current_position
156 ) {
157  int const problem_size( current_position.size() );
158  int const max_iter( std::max( 200, problem_size/10 ));
159 
160  // reset storage variables for descent direction updates
162 
163  // Get the starting function value and gradient
164  Multivec descent_direction( problem_size, 0.0);
165  Real current_value( _func( current_position ) );
166  _func.dfunc( current_position, descent_direction );
167  // Convert to gradient (negative of the derivative) in-place
168  std::transform( descent_direction.begin(), descent_direction.end(),
169  descent_direction.begin(), std::negate<Real>() );
170 
171  // Iterate to convergence
172  int iter( 0 );
173  while( iter++ < max_iter ) {
174  Real previous_value = current_value;
175  current_value = _line_min( current_position, descent_direction );
176 
177  if( _converged(current_value, previous_value) ) return current_value;
178  // descent_direction = _get_direction();
179  }
180 
181  TR.Warning << "WARNING: Minimization has exceeded " << max_iter << "iterations but has not converged!" << std::endl;
182  return current_value;
183 }
184 
185 // wrapper functions around minimization routines to allow for more flexibility
186 //void
187 //Minimizer::dfpmin(
188 // Multivec & P,
189 // Real & FRET,
190 // ConvergenceTest & converge_test
191 // ) const
192 //{
193 // int const N( P.size() );
194 // int const ITMAX = std::max( 200, N/10 );
195 // Minimizer::dfpmin( P, FRET, converge_test, ITMAX );
196 //}
197 
198 /////////////////////////////////////////////////////////////////////////////
199 /////////////////////////////////////////////////////////////////////////////
200 // now cut and paste from minimize.cc
201 /////////////////////////////////////////////////////////////////////////////
202 /////////////////////////////////////////////////////////////////////////////
203 
204 /////////////////////////////////////////////////////////////////////////////
205 void
207  Multivec & P,
208  Real & FRET,
209  ConvergenceTest & converge_test,
210  int const ITMAX
211 ) const {
212  int const N( P.size() );
213  // int const ITMAX = std::max( 200, N/10 );
214 
215  // Grab a line minimizer
216  BrentLineMinimization test_brent( func_, N );
217  LineMinimizationAlgorithm * line_min = &test_brent; // this variable is not neccessary
218 
219  // should get rid of these FArrays
220  FArray2D< Real > HESSIN( N, N, 0.0 );
221  Multivec XI( N );
222  Multivec G( N );
223  Multivec DG( N );
224 
225  // get function and its gradient
226  Real FP;
227  FP = func_(P);
228  func_.dfunc(P,G);
229 
230  for ( int i = 1; i <= N; ++i ) {
231  HESSIN(i,i) = 1.0;
232  XI[i] = -G[i];
233  }
234 
235  Multivec HDG( N );
236 
237  for ( int ITER = 1; ITER <= ITMAX; ++ITER ) {
238  // Christophe added the following to allow premature end of minimization
239  // I probably need to do the same with line_min
240  if ( func_.abort_min(P) ) {
241  TR.Warning << "WARNING: ABORTING MINIMIZATION TRIGGERED BY abort_min" << std::endl;
242  return;
243  }
244  // End Christophe modifications
245 
246  // note that linmin modifes XI; afterward XI is the actual (vector)
247  // step taken during linmin
248  FRET = (*line_min)( P, XI );
249 
250  // check for convergence
251  if ( converge_test( FRET, FP ) ) {
252  //std::cout << "Called line minimization " << line_min->_num_linemin_calls << std::endl;
253  return;
254  }
255 
256  FP = FRET;
257  for ( int i = 1; i <= N; ++i ) {
258  DG[i] = G[i];
259  }
260 
261  // get function and its gradient
262  FRET = func_(P);
263  func_.dfunc(P,G);
264  for ( int i = 1; i <= N; ++i ) {
265  DG[i] = G[i]-DG[i];
266  }
267  for ( int i = 1; i <= N; ++i ) {
268  HDG[i] = 0.0;
269  for ( int j = 1; j <= N; ++j ) {
270  HDG[i] += HESSIN(i,j)*DG[j];
271  }
272  }
273 
274  Real FAC, FAE, FAD;
275  FAC = 0.;
276  FAE = 0.;
277  for ( int i = 1; i <= N; ++i ) {
278  FAC += DG[i]*XI[i];
279  FAE += DG[i]*HDG[i];
280  }
281  if ( FAC != 0.0 ) FAC = 1.0/FAC;
282  if ( FAE != 0.0 ) {
283  FAD = 1./FAE;
284  } else {
285  FAD = 0.;
286  }
287  for ( int i = 1; i <= N; ++i ) {
288  DG[i] = FAC*XI[i] - FAD*HDG[i];
289  }
290  for ( int i = 1; i <= N; ++i ) {
291  for ( int j = 1; j <= N; ++j ) {
292  HESSIN(i,j) += FAC*XI[i]*XI[j] - FAD*HDG[i]*HDG[j] + FAE*DG[i]*DG[j];
293  }
294  }
295  for ( int i = 1; i <= N; ++i ) {
296  XI[i] = 0.;
297  for ( int j = 1; j <= N; ++j ) {
298  XI[i] -= HESSIN(i,j)*G[j];
299  }
300  }
301  }
302 
303  if (!options_.silent())
304  TR.Warning << "WARNING: DFPMIN MAX CYCLES " << ITMAX << " EXCEEDED, BUT FUNC NOT CONVERGED!" << std::endl;
305 
306  // std::cout << "Called line minimization " << line_min->_num_linemin_calls << std::endl;
307  return;
308 
309 } // dfpmin
310  ////////////////////////////////////////////////////////////////////////
311 
312 ////////////////////////////////////////////////////////////////////////
313 // dfpmin Armijo
314 ////////////////////////////////////////////////////////////////////////
315 
316 
317 void
319  Multivec & P,
320  Real & FRET,
321  ConvergenceTest & converge_test,
323  int const ITMAX
324 ) const {
325  int const N( P.size() );
326  Real const EPS( 1.E-5 );
327 
328  FArray2D< Real > HESSIN( N, N, 0.0 );
329  Multivec XI( N );
330  Multivec G( N );
331  Multivec DG( N );
332  Multivec HDG( N );
333 
334  int const prior_func_memory_size( line_min->nonmonotone() ? 3 : 1 );
335  Multivec prior_func_memory( prior_func_memory_size );
336 
337  if ( line_min->nonmonotone() ) line_min->_last_accepted_step = 0.005;
338 
339  // When inexact line search is used, HESSIN need not remain positive definite, so
340  // additional safeguard must be added to ensure XI is a desc. direction (or inexact
341  // line search would fail). Two options for safeguards are implemented below:
342  // HOPT = 1 resets HESSIN to a multiple of identity when XI is not a desc. direction.
343  // HOPT = 2 leaves HESSIN unchanged if stepsize XMIN fails Wolfe's condition
344  // for ensuring new HESSIN to be positive definite.
345  int const HOPT( 2 );
346 
347  // get function and its gradient
348  int NF = 1; // number of func evaluations
349  Real prior_func_value = func_(P);
350  func_.dfunc(P,G);
351 
352  // Start the prior function memory storage
353  int func_memory_filled( 1 );
354  prior_func_memory[ 1 ] = prior_func_value;
355 
356  for ( int i = 1; i <= N; ++i ) {
357  HESSIN(i,i) = 1.0;
358  XI[i] = -G[i];
359  }
360 
361  Real FAC, FAE, FAD, FAF;
362 
363  for ( int ITER = 1; ITER <= ITMAX; ++ITER ) {
364  line_min->_deriv_sum = 0.0;
365  Real Gmax = 0.0;
366  Real Gnorm = 0.0;
367 
368  for ( int i = 1; i <= N; ++i ) {
369  line_min->_deriv_sum += XI[i]*G[i];
370  Gnorm += G[i]*G[i];
371  if ( std::abs( G[i] ) > Gmax ) {
372  Gmax=std::abs( G[i] );
373  }
374  }
375 
376  Gnorm = std::sqrt(Gnorm);
377 
378  line_min->_func_to_beat = prior_func_memory[ 1 ];
379  for( int i = 2 ; i <= func_memory_filled ; ++i ) {
380  if( line_min->_func_to_beat < prior_func_memory[ i ] ) {
381  line_min->_func_to_beat = prior_func_memory[ i ];
382  }
383  }
384 
385  // P is returned as new pt, and XI is returned as the change.
386  FRET = (*line_min)( P, XI );
387 
388  // std::cout << "N= " << N << " ITER= " << ITER << " #F-eval= " << NF << " maxG= " << SS( Gmax ) << " Gnorm= " << SS( Gnorm ) << " step= " << SS( line_min->_last_accepted_step ) << " func= " << SS( FRET ) << std::endl;
389 
390  if ( converge_test( FRET, prior_func_value ) ) {
391  //$$$ std::cout << "dfpmin called linmin " << linmin_count << " times" << std::endl;
393 
394  //std::cout << "N= " << N << " ITER= " << ITER << " #F-eval= " << NF << " maxG= " << SS( Gmax ) << " Gnorm= " << SS( Gnorm ) << " step= " << SS( line_min->_last_accepted_step ) << " func= " << SS( FRET ) << " time= " << SS( get_timer("dfpmin") ) << std::endl;
395 
396  // std::cout << "Called line minimization " << line_min->_num_linemin_calls << std::endl;
397  return;
398  } else {
399  if (std::abs(FRET-prior_func_value)<=EPS ) {
400  Real XInorm = 0.0;
401  for ( int i = 1; i <= N; ++i ) {
402  XInorm += XI[i]*XI[i];
403  }
404  if ( line_min->_deriv_sum < -1e-3*Gnorm*XInorm ) {
405  // std::cout << "Failed line search while large _deriv_sum, quit! N= " << N << " ITER= " << ITER << " #F-eval= " << NF << " maxG= " << SS( Gmax ) << " Gnorm= " << SS( Gnorm ) << " step= " << SS( line_min->_last_accepted_step ) << " func= " << SS( FRET ) /*<< " time= " << SS( get_timer("dfpmin") )*/ << std::endl;
406 
407  // std::cout << "Called line minimization " << line_min->_num_linemin_calls << std::endl;
408  return;
409  }
410  // Not convergence yet. Reinitialize HESSIN to a diagonal matrix & update direction XI.
411  // This requires G to be correctly the gradient of the function.
412 
413  if (!options_.silent())
414  TR.Warning << ":( reset HESSIN from failed line search" << std::endl;
415 
416  line_min->_deriv_sum = 0.0;
417  for ( int i = 1; i <= N; ++i ) {
418  for ( int j = 1; j < i; ++j ) {
419  HESSIN(i,j) = 0.0;
420  }
421  for ( int j = i+1; j <= N; ++j ) {
422  HESSIN(i,j) = 0.0;
423  }
424  if ( HESSIN(i,i) < 0.01 ) HESSIN(i,i) = 0.01;
425  XI[i] = -HESSIN(i,i)*G[i];
426  line_min->_deriv_sum += XI[i]*G[i];
427  }
428 
429  FRET = (*line_min)( P, XI );
430 
431  // std::cout << "Failed line search again, quit! N= " << N << " ITER= " << ITER << " #F-eval= " << NF << " maxG= " << SS( Gmax ) << " Gnorm= " << SS( Gnorm ) << " step= " << SS( line_min->_last_accepted_step ) << " func= " << SS( FRET ) /*<< " time= " << SS( get_timer("dfpmin") )*/ << std::endl;
432 
433  if (std::abs(FRET-prior_func_value)<=EPS)
434  {
435  // std::cout << "Called line minimization " << line_min->_num_linemin_calls << std::endl;
436  return;
437  }
438  }
439  }
440  }
441 
442  prior_func_value = FRET;
443 
444  // Update memory of function calls
445  if( func_memory_filled < prior_func_memory_size ) {
446  func_memory_filled++;
447  } else {
448  for( int i = 1 ; i < func_memory_filled ; ++ i ) {
449  prior_func_memory[ i ] = prior_func_memory[ i + 1 ];
450  }
451  }
452  prior_func_memory[ func_memory_filled ] = prior_func_value;
453 
454  for ( int i = 1; i <= N; ++i ) {
455  DG[i] = G[i];
456  }
457 
458  // Some line minimization algorithms require a curvature
459  // check that involves the derivative before they accept a
460  // move - in these cases we don't need to recalculate
461  if ( line_min->provide_stored_derivatives() ) {
462  line_min->fetch_stored_derivatives( G );
463  } else {
464  //FRET = func_(P);
465  func_.dfunc(P,G);
466  }
467 
468  NF++;
469 
470  line_min->_deriv_sum = 0.0; //needed if HOPT = 2
471  Real DRVNEW = 0.0; //needed if HOPT = 2
472  for ( int i = 1; i <= N; ++i ) {
473  line_min->_deriv_sum += XI[i]*DG[i]; //needed if HOPT = 2
474  DRVNEW += XI[i]*G[i]; //needed if HOPT = 2
475  DG[i] = G[i]-DG[i];
476  }
477 
478  // if ( line_min->_last_accepted_step = 0.0 ) {
479  // std::cout << " line_min->_last_accepted_step = 0.0! " << std::endl; //diagnostic
480  // }
481 
482  if ( HOPT == 1 || DRVNEW > 0.95*line_min->_deriv_sum ) { //needed if HOPT = 2
483  for ( int i = 1; i <= N; ++i ) {
484  HDG[i] = 0.0;
485  for ( int j = 1; j <= N; ++j ) {
486  HDG[i] += HESSIN(i,j)*DG[j];
487  }
488  }
489  FAC = 0.0;
490  FAE = 0.0;
491  FAF = 0.0;
492  for ( int i = 1; i <= N; ++i ) {
493  FAC += DG[i]*XI[i];
494  FAE += DG[i]*HDG[i];
495  FAF += DG[i]*DG[i];
496  }
497  FAF = FAC/FAF;
498  FAC = 1.0/FAC;
499  FAD = 1.0/FAE;
500  for ( int i = 1; i <= N; ++i ) {
501  DG[i] = FAC*XI[i] - FAD*HDG[i];
502  }
503  for ( int i = 1; i <= N; ++i ) {
504  for ( int j = 1; j <= N; ++j ) {
505  HESSIN(i,j) += FAC*XI[i]*XI[j] - FAD*HDG[i]*HDG[j] + FAE*DG[i]*DG[j];
506  }
507  }
508  } //needed if HOPT = 2
509 
510  for ( int i = 1; i <= N; ++i ) {
511  XI[i] = 0.0;
512  for ( int j = 1; j <= N; ++j ) {
513  XI[i] -= HESSIN(i,j)*G[j];
514  }
515  }
516 
517  if ( HOPT == 1 ) {
518  DRVNEW=0.0;
519  for ( int i = 1; i <= N; ++i ) {
520  DRVNEW += XI[i]*G[i];
521  }
522  // If direc. deriv >0, reset the Hessian inverse estimate
523  if (DRVNEW > -EPS) {
524  // std::cout << "reset hessin; dirdg=" << SS( line_min->_deriv_sum ) << std::endl;
525  if (FAF<0.01) FAF=0.01;
526  for ( int i = 1; i <= N; ++i ) {
527  for ( int j = 1; j <= N; ++j ) {
528  HESSIN(i,j) = 0;
529  }
530  HESSIN(i,i) = FAF;
531  XI[i] = -FAF*G[i];
532  }
533  }
534  } // HOPT == 1
535  } // for ITER
536 
537  if (!options_.silent())
538  TR.Warning << "WARNING: DFPMIN (Armijo) MAX CYCLES " << ITMAX << " EXCEEDED, BUT FUNC NOT CONVERGED!" << std::endl;
539 
540  // std::cout << "Called line minimization " << line_min->_num_linemin_calls << std::endl;
541  return;
542 }
543 
544  ////////////////////////////////////////////////////////////////////////
545  // * Limited memory BFGS (L-BFGS).
546  // *
547  // * Copyright (c) 1990, Jorge Nocedal
548  // * Copyright (c) 2007-2010 Naoaki Okazaki
549  // * All rights reserved.
550  // *
551  // * Permission is hereby granted, free of charge, to any person obtaining a copy
552  // * of this software and associated documentation files (the "Software"), to deal
553  // * in the Software without restriction, including without limitation the rights
554  // * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
555  // * copies of the Software, and to permit persons to whom the Software is
556  // * furnished to do so, subject to the following conditions:
557  // *
558  // * The above copyright notice and this permission notice shall be included in
559  // * all copies or substantial portions of the Software.
560  // *
561  // * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
562  // * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
563  // * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
564  // * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
565  // * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
566  // * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
567  // * THE SOFTWARE.
568  ////////////////////////////////////////////////////////////////////////
569 
570  // This library is a C port of the FORTRAN implementation of Limited-memory
571  // Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) method written by Jorge Nocedal.
572  // The original FORTRAN source code is available at:
573  // http://www.ece.northwestern.edu/~nocedal/lbfgs.html
574  //
575  // The L-BFGS algorithm is described in:
576  // - Jorge Nocedal.
577  // Updating Quasi-Newton Matrices with Limited Storage.
578  // <i>Mathematics of Computation</i>, Vol. 35, No. 151, pp. 773--782, 1980.
579  // - Dong C. Liu and Jorge Nocedal.
580  // On the limited memory BFGS method for large scale optimization.
581  // <i>Mathematical Programming</i> B, Vol. 45, No. 3, pp. 503-528, 1989.
582 
583 void
585  Multivec & X,
586  Real & FRET,
587  ConvergenceTest & converge_test,
589  int const ITMAX
590 ) const {
591  int const N( X.size() );
592  static int M( basic::options::option[ basic::options::OptionKeys::optimization::lbfgs_M ]() );
593  int const PAST( line_min->nonmonotone() ? 3 : 1 );
594  //Real const EPS( 1.E-5 );
595 
596  int K = 1; // number of func evaluations
597 
598  // Allocate working space.
599  Multivec XP( N );
600  Multivec G( N );
601  Multivec GP( N );
602  Multivec D( N );
603  Multivec W( N );
604 
605  // Allocate & initialize limited memory storage
606  int CURPOS = 1; // pointer to current location in lm
608  for (int i=1; i<=M; ++i) {
609  lm[i].alpha = 0;
610  lm[i].ys = 0;
611  lm[i].s.resize(N,0.0);
612  lm[i].y.resize(N,0.0);
613  }
614 
615  // Allocate space for storing previous values of the objective function
616  Multivec pf(PAST);
617 
618  // Evaluate the function value and its gradient
619  int func_memory_filled( 1 );
620  Real prior_func_value = func_(X);
621  pf[1] = prior_func_value;
622  func_.dfunc(X,G);
623 
624  // Compute the direction
625  // we assume the initial hessian matrix H_0 as the identity matrix.
626  Real invdnorm = 0.0;
627  for ( int i = 1; i <= N; ++i ) {
628  D[i] = -G[i];
629  invdnorm += D[i]*D[i];
630  }
631  invdnorm = 1.0/sqrt( invdnorm );
632 
633  //if( line_min->nonmonotone() ) line_min->_last_accepted_step = 0.005;
634  // initial stepsize = 1/sqrt ( dot(D,D) )
635  line_min->_last_accepted_step = 2*invdnorm;
636 
637  for ( int ITER = 1; ITER <= ITMAX; ++ITER ) {
638  // Store the current position and gradient vectors
639  XP = X;
640  GP = G;
641 
642  // line min
643  line_min->_deriv_sum = 0.0;
644  Real Gmax = 0.0;
645  Real Gnorm = 0.0;
646  for ( int i = 1; i <= N; ++i ) {
647  line_min->_deriv_sum += D[i]*G[i];
648  Gnorm += G[i]*G[i];
649  if ( std::abs( G[i] ) > Gmax ) {
650  Gmax=std::abs( G[i] );
651  }
652  }
653  Gnorm = std::sqrt(Gnorm);
654 
655  line_min->_func_to_beat = pf[ 1 ];
656  for( int i = 2 ; i <= func_memory_filled ; ++i ) {
657  if( line_min->_func_to_beat < pf[ i ] )
658  line_min->_func_to_beat = pf[ i ];
659  }
660 
661  // X is returned as new pt, and D is returned as the change
662  FRET = (*line_min)( X, D );
663 
664  if ( converge_test( FRET, prior_func_value ) || line_min->_last_accepted_step == 0) {
666  return;
667  } else {
668  if (line_min->_last_accepted_step == 0) { // failed line search
669  //Real Dnorm = 0.0;
670  //for ( int i = 1; i <= N; ++i ) {
671  // Dnorm += D[i]*D[i];
672  //}
673  //Dnorm = std::sqrt(Dnorm);
674 
675  //TR << " deriv_sum " << line_min->_deriv_sum << " -1e-3*Gnorm*Dnorm " << -1e-3*Gnorm*Dnorm << std::endl;
676  //if ( line_min->_last_accepted_step != 0 && line_min->_deriv_sum < -1e-3*Gnorm*Dnorm ) {
677  // TR << "Failed line search while large _deriv_sum, quit! N= " << N << " ITER= " << ITER << std::endl;
678  // return;
679  //}
680 
681  // Reset Hessian
682  CURPOS = 1;
683  K = 1;
684 
685  // reset line minimizer
686  line_min->_deriv_sum = 0.0;
687  for ( int i = 1; i <= N; ++i ) {
688  D[i] = -G[i];
689  line_min->_deriv_sum += D[i]*G[i];
690  }
691 
692  if ( sqrt( -line_min->_deriv_sum ) > 1e-6 ) {
693  invdnorm = 1.0/sqrt( -line_min->_deriv_sum );
694 
695  // delete prior function memory
696  line_min->_last_accepted_step = 0.1*invdnorm; // start with a smaller initial step???
697  func_memory_filled = 1;
698  prior_func_value = FRET;
699  pf[1] = prior_func_value;
700 
701  // line search in the direction of the gradient
702  FRET = (*line_min)( X, D );
703  } else {
704  return;
705  }
706 
707  // if the line minimzer fails again, abort
708  if (line_min->_last_accepted_step == 0) {
709  if (!options_.silent())
710  TR << "Line search failed even after resetting Hessian; aborting at iter#" << ITER << std::endl;
711  return;
712  }
713  }
714  }
715  }
716 
717  prior_func_value = FRET;
718 
719  // Update memory of function calls
720  if ( func_memory_filled < PAST ) {
721  func_memory_filled++;
722  } else {
723  for( int i = 1 ; i < PAST ; ++ i ) {
724  pf[ i ] = pf[ i + 1 ];
725  }
726  }
727  pf[ func_memory_filled ] = prior_func_value;
728 
729  // Some line minimization algorithms require a curvature
730  // check that involves the derivative before they accept a
731  // move - in these cases we don't need to recalculate
732  if ( line_min->provide_stored_derivatives() ) {
733  line_min->fetch_stored_derivatives( G );
734  } else {
735  //FRET = func_(X);
736  func_.dfunc(X,G);
737  }
738 
739  // LBFGS updates
740  //
741  // Update vectors s and y:
742  // s_{k+1} = x_{k+1} - x_{k} = \step * d_{k}.
743  // y_{k+1} = g_{k+1} - g_{k}.
744  // Compute scalars ys and yy:
745  // ys = y^t \cdot s = 1 / \rho.
746  // yy = y^t \cdot y.
747  // Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor).
748  core::Real ys=0, yy=0;
749  for ( int i = 1; i <= N; ++i ) {
750  lm[CURPOS].s[i] = X[i] - XP[i];
751  lm[CURPOS].y[i] = G[i] - GP[i];
752  ys += lm[CURPOS].y[i]*lm[CURPOS].s[i];
753  yy += lm[CURPOS].y[i]*lm[CURPOS].y[i];
754  }
755  lm[CURPOS].ys = ys;
756 
757  // underflow check
758  //if (std::fabs(ys) < 1e-12) {
759  // TR << "Line search step leads to underflow (ys=" << ys << ")! Aborting at iter#" << ITER << std::endl;
760  // return;
761  //}
762 
763  // Recursive formula to compute dir = -(H \cdot g).
764  // This is described in page 779 of:
765  // Jorge Nocedal.
766  // Updating Quasi-Newton Matrices with Limited Storage.
767  // Mathematics of Computation, Vol. 35, No. 151,
768  // pp. 773--782, 1980.
769  int bound = std::min( M,K );
770  CURPOS++;
771  K++;
772  if (CURPOS > M) CURPOS = 1;
773 
774  // Compute the negative of gradients
775  for ( int i = 1; i <= N; ++i ) {
776  D[i] = -G[i];
777  }
778 
779  int j = CURPOS;
780  for ( int pts=0; pts<bound; ++pts ) {
781  j--;
782  if (j<=0) j=M; // wrap around
783 
784  if (std::fabs(lm[j].ys) < 1e-9) continue;
785 
786  // \alpha_{j} = \rho_{j} s^{t}_{j} \cdot q_{k+1}
787  lm[j].alpha = 0;
788  for ( int i = 1; i <= N; ++i ) {
789  lm[j].alpha += lm[j].s[i] * D[i];
790  }
791  lm[j].alpha /= lm[j].ys;
792 
793  // q_{i} = q_{i+1} - \alpha_{i} y_{i}
794  for ( int i = 1; i <= N; ++i ) {
795  D[i] += -lm[j].alpha * lm[j].y[i];
796  }
797  }
798 
799  //for ( int i = 1; i <= N; ++i ) {
800  // D[i] *= ys / yy;
801  //}
802 
803  for ( int pts=0; pts<bound; ++pts ) {
804  if (std::fabs(lm[j].ys) < 1e-9) continue;
805 
806  // \beta_{j} = \rho_{j} y^t_{j} \cdot \gamma_{i}
807  core::Real beta=0.0;
808  for ( int i = 1; i <= N; ++i ) {
809  beta += lm[j].y[i] * D[i];
810  }
811  beta /= lm[j].ys;
812 
813  // \gamma_{i+1} = \gamma_{i} + (\alpha_{j} - \beta_{j}) s_{j}
814  for ( int i = 1; i <= N; ++i ) {
815  D[i] += (lm[j].alpha - beta) * lm[j].s[i];
816  }
817 
818  j++;
819  if (j>M) j=1; // wrap around
820  }
821  }
822 
823  if (!options_.silent())
824  TR.Warning << "WARNING: LBFGS MAX CYCLES " << ITMAX << " EXCEEDED, BUT FUNC NOT CONVERGED!" << std::endl;
825 
826  return;
827 }
828 
829 /////////////////////////////////////////////////////////////////////////////
830 void
832  Multivec & P,
833  Multivec & XI,
834  Real & FRET,
835  int const
836 ) const {
837  //Try to use a line minimizer algorithm
838  BrentLineMinimization test_brent( func_, P.size() );
839 
840  // See if this is good enough
841  FRET = test_brent( P, XI );
842 }
843 
844 } // namespace optimization
845 } // namespace core