changelog shortlog tags changeset files revisions annotate raw

scripts/optimization/fsolve.m

changeset 9846: 1d90fc211872
parent:73e6ad869f08
author: John W. Eaton <jwe@octave.org>
date: Sat Nov 21 21:44:51 2009 -0500 (33 hours ago)
permissions: -rw-r--r--
description: configure.ac: report freetype, fontconfig, and fltk cflags and libs info
1## Copyright (C) 2008, 2009 VZLU Prague, a.s.
2##
3## This file is part of Octave.
4##
5## Octave is free software; you can redistribute it and/or modify it
6## under the terms of the GNU General Public License as published by
7## the Free Software Foundation; either version 3 of the License, or (at
8## your option) any later version.
9##
10## Octave is distributed in the hope that it will be useful, but
11## WITHOUT ANY WARRANTY; without even the implied warranty of
12## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13## General Public License for more details.
14##
15## You should have received a copy of the GNU General Public License
16## along with Octave; see the file COPYING. If not, see
17## <http://www.gnu.org/licenses/>.
18##
19## Author: Jaroslav Hajek <highegg@gmail.com>
20
21## -*- texinfo -*-
22## @deftypefn {Function File} {} fsolve (@var{fcn}, @var{x0}, @var{options})
23## @deftypefnx {Function File} {[@var{x}, @var{fvec}, @var{info}, @var{output}, @var{fjac}] =} fsolve (@var{fcn}, @dots{})
24## Solve a system of nonlinear equations defined by the function @var{fcn}.
25## @var{fcn} should accepts a vector (array) defining the unknown variables,
26## and return a vector of left-hand sides of the equations. Right-hand sides
27## are defined to be zeros.
28## In other words, this function attempts to determine a vector @var{x} such
29## that @code{@var{fcn} (@var{x})} gives (approximately) all zeros.
30## @var{x0} determines a starting guess. The shape of @var{x0} is preserved
31## in all calls to @var{fcn}, but otherwise it is treated as a column vector.
32## @var{options} is a structure specifying additional options.
33## Currently, @code{fsolve} recognizes these options:
34## @code{"FunValCheck"}, @code{"OutputFcn"}, @code{"TolX"},
35## @code{"TolFun"}, @code{"MaxIter"}, @code{"MaxFunEvals"},
36## @code{"Jacobian"}, @code{"Updating"} and @code{"ComplexEqn"}.
37##
38## If @code{"Jacobian"} is @code{"on"}, it specifies that @var{fcn},
39## called with 2 output arguments, also returns the Jacobian matrix
40## of right-hand sides at the requested point. @code{"TolX"} specifies
41## the termination tolerance in the unknown variables, while
42## @code{"TolFun"} is a tolerance for equations. Default is @code{1e-7}
43## for both @code{"TolX"} and @code{"TolFun"}.
44## If @code{"Updating"} is "on", the function will attempt to use Broyden
45## updates to update the Jacobian, in order to reduce the amount of jacobian
46## calculations.
47## If your user function always calculates the Jacobian (regardless of number
48## of output arguments), this option provides no advantage and should be set to
49## false.
50##
51## @code{"ComplexEqn"} is @code{"on"}, @code{fsolve} will attempt to solve
52## complex equations in complex variables, assuming that the equations possess a
53## complex derivative (i.e., are holomorphic). If this is not what you want,
54## should unpack the real and imaginary parts of the system to get a real
55## system.
56##
57## For description of the other options, see @code{optimset}.
58##
59## On return, @var{fval} contains the value of the function @var{fcn}
60## evaluated at @var{x}, and @var{info} may be one of the following values:
61##
62## @table @asis
63## @item 1
64## Converged to a solution point. Relative residual error is less than specified
65## by TolFun.
66## @item 2
67## Last relative step size was less that TolX.
68## @item 3
69## Last relative decrease in residual was less than TolF.
70## @item 0
71## Iteration limit exceeded.
72## @item -3
73## The trust region radius became excessively small.
74## @end table
75##
76## Note: If you only have a single nonlinear equation of one variable, using
77## @code{fzero} is usually a much better idea.
78## @seealso{fzero, optimset}
79##
80## Note about user-supplied jacobians:
81## As an inherent property of the algorithm, jacobian is always requested for a
82## solution vector whose residual vector is already known, and it is the last
83## accepted successful step. Often this will be one of the last two calls, but
84## not always. If the savings by reusing intermediate results from residual
85## calculation in jacobian calculation are significant, the best strategy is to
86## employ OutputFcn: After a vector is evaluated for residuals, if OutputFcn is
87## called with that vector, then the intermediate results should be saved for
88## future jacobian evaluation, and should be kept until a jacobian evaluation
89## is requested or until outputfcn is called with a different vector, in which
90## case they should be dropped in favor of this most recent vector. A short
91## example how this can be achieved follows:
92##
93## @example
94## function [fvec, fjac] = user_func (x, optimvalues, state)
95## persistent sav = [], sav0 = [];
96## if (nargin == 1)
97## ## evaluation call
98## if (nargout == 1)
99## sav0.x = x; # mark saved vector
100## ## calculate fvec, save results to sav0.
101## elseif (nargout == 2)
102## ## calculate fjac using sav.
103## endif
104## else
105## ## outputfcn call.
106## if (all (x == sav0.x))
107## sav = sav0;
108## endif
109## ## maybe output iteration status, etc.
110## endif
111## endfunction
112##
113## ## @dots{}.
114##
115## fsolve (@@user_func, x0, optimset ("OutputFcn", @@user_func, @dots{}))
116## @end example
117###
118## @end deftypefn
119
120## PKG_ADD: __all_opts__ ("fsolve");
121
122function [x, fvec, info, output, fjac] = fsolve (fcn, x0, options = struct ())
123
124 ## Get default options if requested.
125 if (nargin == 1 && ischar (fcn) && strcmp (fcn, 'defaults'))
126 x = optimset ("MaxIter", 400, "MaxFunEvals", Inf, \
127 "Jacobian", "off", "TolX", 1.5e-8, "TolFun", 1.5e-8,
128 "OutputFcn", [], "Updating", "on", "FunValCheck", "off",
129 "ComplexEqn", "off", "FinDiffType", "central");
130 return;
131 endif
132
133 if (nargin < 2 || nargin > 3 || ! ismatrix (x0))
134 print_usage ();
135 endif
136
137 if (ischar (fcn))
138 fcn = str2func (fcn, "global");
139 endif
140
141 xsiz = size (x0);
142 n = numel (x0);
143
144 has_jac = strcmpi (optimget (options, "Jacobian", "off"), "on");
145 cdif = strcmpi (optimget (options, "FinDiffType", "central"), "central");
146 maxiter = optimget (options, "MaxIter", 400);
147 maxfev = optimget (options, "MaxFunEvals", Inf);
148 outfcn = optimget (options, "OutputFcn");
149 updating = strcmpi (optimget (options, "Updating", "on"), "on");
150 complexeqn = strcmpi (optimget (options, "ComplexEqn", "off"), "on");
151
152 funvalchk = strcmpi (optimget (options, "FunValCheck", "off"), "on");
153
154 if (funvalchk)
155 ## Replace fcn with a guarded version.
156 fcn = @(x) guarded_eval (fcn, x, complexeqn);
157 endif
158
159 ## These defaults are rather stringent. I think that normally, user
160 ## prefers accuracy to performance.
161
162 macheps = eps (class (x0));
163
164 tolx = optimget (options, "TolX", sqrt (macheps));
165 tolf = optimget (options, "TolFun", sqrt (macheps));
166
167 factor = 1;
168
169 niter = 1;
170 nfev = 1;
171
172 x = x0(:);
173 info = 0;
174
175 ## Initial evaluation.
176 ## Handle arbitrary shapes of x and f and remember them.
177 fvec = fcn (reshape (x, xsiz));
178 fsiz = size (fvec);
179 fvec = fvec(:);
180 fn = norm (fvec);
181 m = length (fvec);
182 n = length (x);
183
184 if (! isempty (outfcn))
185 optimvalues.iter = niter;
186 optimvalues.funccount = nfev;
187 optimvalues.fval = fn;
188 optimvalues.searchdirection = zeros (n, 1);
189 state = 'init';
190 stop = outfcn (x, optimvalues, state);
191 if (stop)
192 info = -1;
193 break;
194 endif
195 endif
196
197 nsuciter = 0;
198
199 ## Outer loop.
200 while (niter < maxiter && nfev < maxfev && ! info)
201
202 ## Calculate function value and Jacobian (possibly via FD).
203 if (has_jac)
204 [fvec, fjac] = fcn (reshape (x, xsiz));
205 ## If the jacobian is sparse, disable Broyden updating.
206 if (issparse (fjac))
207 updating = false;
208 endif
209 fvec = fvec(:);
210 nfev ++;
211 else
212 fjac = __fdjac__ (fcn, reshape (x, xsiz), fvec, cdif);
213 nfev += (1 + cdif) * length (x);
214 endif
215
216 ## For square and overdetermined systems, we update a QR
217 ## factorization of the jacobian to avoid solving a full system in each
218 ## step. In this case, we pass a triangular matrix to __dogleg__.
219 useqr = updating && m >= n && n > 10;
220
221 if (useqr)
222 ## FIXME: Currently, pivoting is mostly useless because the \ operator
223 ## cannot exploit the resulting props of the triangular factor.
224 ## Unpivoted QR is significantly faster so it doesn't seem right to pivot
225 ## just to get invariance. Original MINPACK didn't pivot either, at least
226 ## when qr updating was used.
227 [q, r] = qr (fjac, 0);
228 endif
229
230 ## Get column norms, use them as scaling factors.
231 jcn = norm (fjac, 'columns').';
232 if (niter == 1)
233 dg = jcn;
234 dg(dg == 0) = 1;
235 xn = norm (dg .* x);
236 ## FIXME: something better?
237 delta = factor * max (xn, 1);
238 endif
239
240 ## Rescale adaptively.
241 ## FIXME: the original minpack used the following rescaling strategy:
242 ## dg = max (dg, jcn);
243 ## but it seems not good if we start with a bad guess yielding jacobian
244 ## columns with large norms that later decrease, because the corresponding
245 ## variable will still be overscaled. So instead, we only give the old
246 ## scaling a small momentum, but do not honor it.
247
248 dg = max (0.1*dg, jcn);
249
250 ## It also seems that in the case of fast (and inhomogeneously) changing
251 ## jacobian, the Broyden updates are of little use, so maybe we could
252 ## skip them if a big disproportional change is expected. The question is,
253 ## of course, how to define the above terms :)
254
255 lastratio = 0;
256 nfail = 0;
257 nsuc = 0;
258 decfac = 0.5;
259
260 ## Inner loop.
261 while (niter <= maxiter && nfev < maxfev && ! info)
262
263 ## Get trust-region model (dogleg) minimizer.
264 if (useqr)
265 qtf = q'*fvec;
266 s = - __dogleg__ (r, qtf, dg, delta);
267 w = qtf + r * s;
268 else
269 s = - __dogleg__ (fjac, fvec, dg, delta);
270 w = fvec + fjac * s;
271 endif
272
273 sn = norm (dg .* s);
274 if (niter == 1)
275 delta = min (delta, sn);
276 endif
277
278 fvec1 = fcn (reshape (x + s, xsiz)) (:);
279 fn1 = norm (fvec1);
280 nfev ++;
281
282 if (fn1 < fn)
283 ## Scaled actual reduction.
284 actred = 1 - (fn1/fn)^2;
285 else
286 actred = -1;
287 endif
288
289 ## Scaled predicted reduction, and ratio.
290 t = norm (w);
291 if (t < fn)
292 prered = 1 - (t/fn)^2;
293 ratio = actred / prered;
294 else
295 prered = 0;
296 ratio = 0;
297 endif
298
299 ## Update delta.
300 if (ratio < min(max(0.1, 0.8*lastratio), 0.9))
301 nsuc = 0;
302 nfail ++;
303 delta *= decfac;
304 decfac ^= 1.4142;
305 if (delta <= 1e1*macheps*xn)
306 ## Trust region became uselessly small.
307 info = -3;
308 break;
309 endif
310 else
311 lastratio = ratio;
312 decfac = 0.5;
313 nfail = 0;
314 nsuc ++;
315 if (abs (1-ratio) <= 0.1)
316 delta = 1.4142*sn;
317 elseif (ratio >= 0.5 || nsuc > 1)
318 delta = max (delta, 1.4142*sn);
319 endif
320 endif
321
322 if (ratio >= 1e-4)
323 ## Successful iteration.
324 x += s;
325 xn = norm (dg .* x);
326 fvec = fvec1;
327 fn = fn1;
328 nsuciter ++;
329 endif
330
331 niter ++;
332
333 ## FIXME: should outputfcn be only called after a successful iteration?
334 if (! isempty (outfcn))
335 optimvalues.iter = niter;
336 optimvalues.funccount = nfev;
337 optimvalues.fval = fn;
338 optimvalues.searchdirection = s;
339 state = 'iter';
340 stop = outfcn (x, optimvalues, state);
341 if (stop)
342 info = -1;
343 break;
344 endif
345 endif
346
347 ## Tests for termination conditions. A mysterious place, anything
348 ## can happen if you change something here...
349
350 ## The rule of thumb (which I'm not sure M*b is quite following)
351 ## is that for a tolerance that depends on scaling, only 0 makes
352 ## sense as a default value. But 0 usually means uselessly long
353 ## iterations, so we need scaling-independent tolerances wherever
354 ## possible.
355
356 ## FIXME -- why tolf*n*xn? If abs (e) ~ abs(x) * eps is a vector
357 ## of perturbations of x, then norm (fjac*e) <= eps*n*xn, i.e. by
358 ## tolf ~ eps we demand as much accuracy as we can expect.
359 if (fn <= tolf*n*xn)
360 info = 1;
361 ## The following tests done only after successful step.
362 elseif (ratio >= 1e-4)
363 ## This one is classic. Note that we use scaled variables again,
364 ## but compare to scaled step, so nothing bad.
365 if (sn <= tolx*xn)
366 info = 2;
367 ## Again a classic one. It seems weird to use the same tolf
368 ## for two different tests, but that's what M*b manual appears
369 ## to say.
370 elseif (actred < tolf)
371 info = 3;
372 endif
373 endif
374
375 ## Criterion for recalculating jacobian.
376 if (! updating || nfail == 2 || nsuciter < 2)
377 break;
378 endif
379
380 ## Compute the scaled Broyden update.
381 if (useqr)
382 u = (fvec1 - q*w) / sn;
383 v = dg .* ((dg .* s) / sn);
384
385 ## Update the QR factorization.
386 [q, r] = qrupdate (q, r, u, v);
387 else
388 u = (fvec1 - w);
389 v = dg .* ((dg .* s) / sn);
390
391 ## update the jacobian
392 fjac += u * v';
393 endif
394 endwhile
395 endwhile
396
397 ## Restore original shapes.
398 x = reshape (x, xsiz);
399 fvec = reshape (fvec, fsiz);
400
401 output.iterations = niter;
402 output.successful = nsuciter;
403 output.funcCount = nfev;
404
405endfunction
406
407## An assistant function that evaluates a function handle and checks for
408## bad results.
409function [fx, jx] = guarded_eval (fun, x, complexeqn)
410 if (nargout > 1)
411 [fx, jx] = fun (x);
412 else
413 fx = fun (x);
414 jx = [];
415 endif
416
417 if (! complexeqn && ! (isreal (fx) && isreal (jx)))
418 error ("fsolve:notreal", "fsolve: non-real value encountered");
419 elseif (complexeqn && ! (isnumeric (fx) && isnumeric(jx)))
420 error ("fsolve:notnum", "fsolve: non-numeric value encountered");
421 elseif (any (isnan (fx(:))))
422 error ("fsolve:isnan", "fsolve: NaN value encountered");
423 endif
424endfunction
425
426%!function retval = f (p)
427%! x = p(1);
428%! y = p(2);
429%! z = p(3);
430%! retval = zeros (3, 1);
431%! retval(1) = sin(x) + y**2 + log(z) - 7;
432%! retval(2) = 3*x + 2**y -z**3 + 1;
433%! retval(3) = x + y + z - 5;
434%!test
435%! x_opt = [ 0.599054;
436%! 2.395931;
437%! 2.005014 ];
438%! tol = 1.0e-5;
439%! [x, fval, info] = fsolve (@f, [ 0.5; 2.0; 2.5 ]);
440%! assert (info > 0);
441%! assert (norm (x - x_opt, Inf) < tol);
442%! assert (norm (fval) < tol);
443
444%!function retval = f (p)
445%! x = p(1);
446%! y = p(2);
447%! z = p(3);
448%! w = p(4);
449%! retval = zeros (4, 1);
450%! retval(1) = 3*x + 4*y + exp (z + w) - 1.007;
451%! retval(2) = 6*x - 4*y + exp (3*z + w) - 11;
452%! retval(3) = x^4 - 4*y^2 + 6*z - 8*w - 20;
453%! retval(4) = x^2 + 2*y^3 + z - w - 4;
454%!test
455%! x_opt = [ -0.767297326653401, 0.590671081117440, 1.47190018629642, -1.52719341133957 ];
456%! tol = 1.0e-5;
457%! [x, fval, info] = fsolve (@f, [-1, 1, 2, -1]);
458%! assert (info > 0);
459%! assert (norm (x - x_opt, Inf) < tol);
460%! assert (norm (fval) < tol);
461
462%!function retval = f (p)
463%! x = p(1);
464%! y = p(2);
465%! z = p(3);
466%! retval = zeros (3, 1);
467%! retval(1) = sin(x) + y**2 + log(z) - 7;
468%! retval(2) = 3*x + 2**y -z**3 + 1;
469%! retval(3) = x + y + z - 5;
470%! retval(4) = x*x + y - z*log(z) - 1.36;
471%!test
472%! x_opt = [ 0.599054;
473%! 2.395931;
474%! 2.005014 ];
475%! tol = 1.0e-5;
476%! [x, fval, info] = fsolve (@f, [ 0.5; 2.0; 2.5 ]);
477%! assert (info > 0);
478%! assert (norm (x - x_opt, Inf) < tol);
479%! assert (norm (fval) < tol);
480
481%!function retval = f (p)
482%! x = p(1);
483%! y = p(2);
484%! z = p(3);
485%! retval = zeros (3, 1);
486%! retval(1) = sin(x) + y**2 + log(z) - 7;
487%! retval(2) = 3*x + 2**y -z**3 + 1;
488%! retval(3) = x + y + z - 5;
489%!test
490%! x_opt = [ 0.599054;
491%! 2.395931;
492%! 2.005014 ];
493%! tol = 1.0e-5;
494%! opt = optimset ("Updating", "qrp");
495%! [x, fval, info] = fsolve (@f, [ 0.5; 2.0; 2.5 ], opt);
496%! assert (info > 0);
497%! assert (norm (x - x_opt, Inf) < tol);
498%! assert (norm (fval) < tol);
499
500%!test
501%! b0 = 3;
502%! a0 = 0.2;
503%! x = 0:.5:5;
504%! noise = 1e-5 * sin (100*x);
505%! y = exp (-a0*x) + b0 + noise;
506%! c_opt = [a0, b0];
507%! tol = 1e-5;
508%!
509%! [c, fval, info, output] = fsolve (@(c) (exp(-c(1)*x) + c(2) - y), [0, 0]);
510%! assert (info > 0);
511%! assert (norm (c - c_opt, Inf) < tol);
512%! assert (norm (fval) < norm (noise));
513
514
515%!function y = cfun (x)
516%! y(1) = (1+i)*x(1)^2 - (1-i)*x(2) - 2;
517%! y(2) = sqrt (x(1)*x(2)) - (1-2i)*x(3) + (3-4i);
518%! y(3) = x(1) * x(2) - x(3)^2 + (3+2i);
519
520%!test
521%! x_opt = [-1+i, 1-i, 2+i];
522%! x = [i, 1, 1+i];
523%!
524%! [x, f, info] = fsolve (@cfun, x, optimset ("ComplexEqn", "on"));
525%! tol = 1e-5;
526%! assert (norm (f) < tol);
527%! assert (norm (x - x_opt, Inf) < tol);
528