changelog shortlog tags changeset files revisions annotate raw

scripts/optimization/fminunc.m

changeset 9846: 1d90fc211872
parent:ecc2c556f844
author: John W. Eaton <jwe@octave.org>
date: Sat Nov 21 21:44:51 2009 -0500 (33 hours ago)
permissions: -rw-r--r--
description: configure.ac: report freetype, fontconfig, and fltk cflags and libs info
1## Copyright (C) 2008, 2009 VZLU Prague, a.s.
2##
3## This file is part of Octave.
4##
5## Octave is free software; you can redistribute it and/or modify it
6## under the terms of the GNU General Public License as published by
7## the Free Software Foundation; either version 3 of the License, or (at
8## your option) any later version.
9##
10## Octave is distributed in the hope that it will be useful, but
11## WITHOUT ANY WARRANTY; without even the implied warranty of
12## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13## General Public License for more details.
14##
15## You should have received a copy of the GNU General Public License
16## along with Octave; see the file COPYING. If not, see
17## <http://www.gnu.org/licenses/>.
18##
19## Author: Jaroslav Hajek <highegg@gmail.com>
20
21## -*- texinfo -*-
22## @deftypefn{Function File} {} fminunc (@var{fcn}, @var{x0}, @var{options})
23## @deftypefnx{Function File} {[@var{x}, @var{fvec}, @var{info}, @var{output}, @var{grad}, @var{hess}]} = fminunc (@var{fcn}, @dots{})
24## Solve a unconstrained optimization problem defined by the function @var{fcn}.
25## @var{fcn} should accepts a vector (array) defining the unknown variables,
26## and return the objective function value, optionally with gradient.
27## In other words, this function attempts to determine a vector @var{x} such
28## that @code{@var{fcn} (@var{x})} is a local minimum.
29## @var{x0} determines a starting guess. The shape of @var{x0} is preserved
30## in all calls to @var{fcn}, but otherwise it is treated as a column vector.
31## @var{options} is a structure specifying additional options.
32## Currently, @code{fminunc} recognizes these options:
33## @code{"FunValCheck"}, @code{"OutputFcn"}, @code{"TolX"},
34## @code{"TolFun"}, @code{"MaxIter"}, @code{"MaxFunEvals"},
35## @code{"GradObj"}, @code{"FinDiffType"}.
36##
37## If @code{"GradObj"} is @code{"on"}, it specifies that @var{fcn},
38## called with 2 output arguments, also returns the Jacobian matrix
39## of right-hand sides at the requested point. @code{"TolX"} specifies
40## the termination tolerance in the unknown variables, while
41## @code{"TolFun"} is a tolerance for equations. Default is @code{1e-7}
42## for both @code{"TolX"} and @code{"TolFun"}.
43##
44## For description of the other options, see @code{optimset}.
45##
46## On return, @var{fval} contains the value of the function @var{fcn}
47## evaluated at @var{x}, and @var{info} may be one of the following values:
48##
49## @table @asis
50## @item 1
51## Converged to a solution point. Relative gradient error is less than specified
52## by TolFun.
53## @item 2
54## Last relative step size was less that TolX.
55## @item 3
56## Last relative decrease in func value was less than TolF.
57## @item 0
58## Iteration limit exceeded.
59## @item -3
60## The trust region radius became excessively small.
61## @end table
62##
63## Optionally, fminunc can also yield a structure with convergence statistics
64## (@var{output}), the output gradient (@var{grad}) and approximate hessian
65## (@var{hess}).
66##
67## Note: If you only have a single nonlinear equation of one variable, using
68## @code{fminbnd} is usually a much better idea.
69## @seealso{fminbnd, optimset}
70## @end deftypefn
71
72## PKG_ADD: __all_opts__ ("fminunc");
73
74function [x, fval, info, output, grad, hess] = fminunc (fcn, x0, options = struct ())
75
76 ## Get default options if requested.
77 if (nargin == 1 && ischar (fcn) && strcmp (fcn, 'defaults'))
78 x = optimset ("MaxIter", 400, "MaxFunEvals", Inf, \
79 "GradObj", "off", "TolX", 1.5e-8, "TolFun", 1.5e-8,
80 "OutputFcn", [], "FunValCheck", "off",
81 "FinDiffType", "central");
82 return;
83 endif
84
85 if (nargin < 2 || nargin > 3 || ! ismatrix (x0))
86 print_usage ();
87 endif
88
89 if (ischar (fcn))
90 fcn = str2func (fcn, "global");
91 endif
92
93 xsiz = size (x0);
94 n = numel (x0);
95
96 has_grad = strcmpi (optimget (options, "GradObj", "off"), "on");
97 cdif = strcmpi (optimget (options, "FinDiffType", "central"), "central");
98 maxiter = optimget (options, "MaxIter", 400);
99 maxfev = optimget (options, "MaxFunEvals", Inf);
100 outfcn = optimget (options, "OutputFcn");
101
102 funvalchk = strcmpi (optimget (options, "FunValCheck", "off"), "on");
103
104 if (funvalchk)
105 ## Replace fcn with a guarded version.
106 fcn = @(x) guarded_eval (fcn, x);
107 endif
108
109 ## These defaults are rather stringent. I think that normally, user
110 ## prefers accuracy to performance.
111
112 macheps = eps (class (x0));
113
114 tolx = optimget (options, "TolX", sqrt (macheps));
115 tolf = optimget (options, "TolFun", sqrt (macheps));
116
117 factor = 0.1;
118 ## FIXME: TypicalX corresponds to user scaling (???)
119 autodg = true;
120
121 niter = 1;
122 nfev = 0;
123
124 x = x0(:);
125 info = 0;
126
127 ## Initial evaluation.
128 fval = fcn (reshape (x, xsiz));
129 n = length (x);
130
131 if (! isempty (outfcn))
132 optimvalues.iter = niter;
133 optimvalues.funccount = nfev;
134 optimvalues.fval = fval;
135 optimvalues.searchdirection = zeros (n, 1);
136 state = 'init';
137 stop = outfcn (x, optimvalues, state);
138 if (stop)
139 info = -1;
140 break;
141 endif
142 endif
143
144 nsuciter = 0;
145 lastratio = 0;
146
147 grad = [];
148
149 ## Outer loop.
150 while (niter < maxiter && nfev < maxfev && ! info)
151
152 grad0 = grad;
153
154 ## Calculate function value and gradient (possibly via FD).
155 if (has_grad)
156 [fval, grad] = fcn (reshape (x, xsiz));
157 grad = grad(:);
158 nfev ++;
159 else
160 grad = __fdjac__ (fcn, reshape (x, xsiz), fval, cdif)(:);
161 nfev += (1 + cdif) * length (x);
162 endif
163
164 if (niter == 1)
165 ## Initialize by identity matrix.
166 hesr = eye (n);
167 else
168 ## Use the damped BFGS formula.
169 y = grad - grad0;
170 sBs = sumsq (w);
171 Bs = hesr'*w;
172 sy = y'*s;
173 theta = 0.8 / max (1 - sy / sBs, 0.8);
174 r = theta * y + (1-theta) * Bs;
175 hesr = cholupdate (hesr, r / sqrt (s'*r), "+");
176 [hesr, info] = cholupdate (hesr, Bs / sqrt (sBs), "-");
177 if (info)
178 hesr = eye (n);
179 endif
180 endif
181
182 ## Second derivatives approximate the hessian.
183 d2f = norm (hesr, 'columns').';
184 if (niter == 1)
185 dg = d2f;
186 xn = norm (dg .* x);
187 ## FIXME: something better?
188 delta = factor * max (xn, 1);
189 endif
190
191 ## FIXME: maybe fixed lower and upper bounds?
192 dg = max (0.1*dg, d2f);
193
194 ## FIXME -- why tolf*n*xn? If abs (e) ~ abs(x) * eps is a vector
195 ## of perturbations of x, then norm (hesr*e) <= eps*xn, i.e. by
196 ## tolf ~ eps we demand as much accuracy as we can expect.
197 if (norm (grad) <= tolf*n*xn)
198 info = 1;
199 break;
200 endif
201
202 suc = false;
203 decfac = 0.5;
204
205 ## Inner loop.
206 while (! suc && niter <= maxiter && nfev < maxfev && ! info)
207
208 s = - __doglegm__ (hesr, grad, dg, delta);
209
210 sn = norm (dg .* s);
211 if (niter == 1)
212 delta = min (delta, sn);
213 endif
214
215 fval1 = fcn (reshape (x + s, xsiz)) (:);
216 nfev ++;
217
218 if (fval1 < fval)
219 ## Scaled actual reduction.
220 actred = (fval - fval1) / (abs (fval1) + abs (fval));
221 else
222 actred = -1;
223 endif
224
225 w = hesr*s;
226 ## Scaled predicted reduction, and ratio.
227 t = 1/2 * sumsq (w) + grad'*s;
228 if (t < 0)
229 prered = -t/(abs (fval) + abs (fval + t));
230 ratio = actred / prered;
231 else
232 prered = 0;
233 ratio = 0;
234 endif
235
236 ## Update delta.
237 if (ratio < min(max(0.1, 0.8*lastratio), 0.9))
238 delta *= decfac;
239 decfac ^= 1.4142;
240 if (delta <= 1e1*macheps*xn)
241 ## Trust region became uselessly small.
242 info = -3;
243 break;
244 endif
245 else
246 lastratio = ratio;
247 decfac = 0.5;
248 if (abs (1-ratio) <= 0.1)
249 delta = 1.4142*sn;
250 elseif (ratio >= 0.5)
251 delta = max (delta, 1.4142*sn);
252 endif
253 endif
254
255 if (ratio >= 1e-4)
256 ## Successful iteration.
257 x += s;
258 xn = norm (dg .* x);
259 fval = fval1;
260 nsuciter ++;
261 suc = true;
262 endif
263
264 niter ++;
265
266 ## FIXME: should outputfcn be only called after a successful iteration?
267 if (! isempty (outfcn))
268 optimvalues.iter = niter;
269 optimvalues.funccount = nfev;
270 optimvalues.fval = fval;
271 optimvalues.searchdirection = s;
272 state = 'iter';
273 stop = outfcn (x, optimvalues, state);
274 if (stop)
275 info = -1;
276 break;
277 endif
278 endif
279
280 ## Tests for termination conditions. A mysterious place, anything
281 ## can happen if you change something here...
282
283 ## The rule of thumb (which I'm not sure M*b is quite following)
284 ## is that for a tolerance that depends on scaling, only 0 makes
285 ## sense as a default value. But 0 usually means uselessly long
286 ## iterations, so we need scaling-independent tolerances wherever
287 ## possible.
288
289 ## The following tests done only after successful step.
290 if (ratio >= 1e-4)
291 ## This one is classic. Note that we use scaled variables again,
292 ## but compare to scaled step, so nothing bad.
293 if (sn <= tolx*xn)
294 info = 2;
295 ## Again a classic one.
296 elseif (actred < tolf)
297 info = 3;
298 endif
299 endif
300
301 endwhile
302 endwhile
303
304 ## Restore original shapes.
305 x = reshape (x, xsiz);
306
307 output.iterations = niter;
308 output.successful = nsuciter;
309 output.funcCount = nfev;
310
311 if (nargout > 5)
312 hess = hesr'*hesr;
313 endif
314
315endfunction
316
317## An assistant function that evaluates a function handle and checks for
318## bad results.
319function [fx, gx] = guarded_eval (fun, x)
320 if (nargout > 1)
321 [fx, gx] = fun (x);
322 else
323 fx = fun (x);
324 gx = [];
325 endif
326
327 if (! (isreal (fx) && isreal (jx)))
328 error ("fminunc:notreal", "fminunc: non-real value encountered");
329 elseif (complexeqn && ! (isnumeric (fx) && isnumeric(jx)))
330 error ("fminunc:notnum", "fminunc: non-numeric value encountered");
331 elseif (any (isnan (fx(:))))
332 error ("fminunc:isnan", "fminunc: NaN value encountered");
333 endif
334endfunction
335
336%!function f = rosenb (x)
337%! n = length (x);
338%! f = sumsq (1 - x(1:n-1)) + 100 * sumsq (x(2:n) - x(1:n-1).^2);
339%!test
340%! [x, fval, info, out] = fminunc (@rosenb, [5, -5]);
341%! tol = 2e-5;
342%! assert (info > 0);
343%! assert (x, ones (1, 2), tol);
344%! assert (fval, 0, tol);
345%!test
346%! [x, fval, info, out] = fminunc (@rosenb, zeros (1, 4));
347%! tol = 2e-5;
348%! assert (info > 0);
349%! assert (x, ones (1, 4), tol);
350%! assert (fval, 0, tol);
351