1## Copyright (C) 2008, 2009 VZLU Prague, a.s.
3## This file is part of Octave.
5## Octave is free software; you can redistribute it and/or modify it
6## under the terms of the GNU General Public License as published by
7## the Free Software Foundation; either version 3 of the License, or (at
8## your option) any later version.
10## Octave is distributed in the hope that it will be useful, but
11## WITHOUT ANY WARRANTY; without even the implied warranty of
12## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13## General Public License for more details.
15## You should have received a copy of the GNU General Public License
16## along with Octave; see the file COPYING. If not, see
17## <http://www.gnu.org/licenses/>.
19## Author: Jaroslav Hajek <highegg@gmail.com>
22## @deftypefn {Function File} {} fsolve (@var{fcn}, @var{x0}, @var{options})
23## @deftypefnx {Function File} {[@var{x}, @var{fvec}, @var{info}, @var{output}, @var{fjac}] =} fsolve (@var{fcn}, @dots{})
24## Solve a system of nonlinear equations defined by the function @var{fcn}.
25## @var{fcn} should accepts a vector (array) defining the unknown variables,
26## and return a vector of left-hand sides of the equations. Right-hand sides
27## are defined to be zeros.
28## In other words, this function attempts to determine a vector @var{x} such
29## that @code{@var{fcn} (@var{x})} gives (approximately) all zeros.
30## @var{x0} determines a starting guess. The shape of @var{x0} is preserved
31## in all calls to @var{fcn}, but otherwise it is treated as a column vector.
32## @var{options} is a structure specifying additional options.
33## Currently, @code{fsolve} recognizes these options:
34## @code{"FunValCheck"}, @code{"OutputFcn"}, @code{"TolX"},
35## @code{"TolFun"}, @code{"MaxIter"}, @code{"MaxFunEvals"},
36## @code{"Jacobian"}, @code{"Updating"}, @code{"ComplexEqn"}
37## @code{"TypicalX"}, @code{"AutoScaling"} and @code{"FinDiffType"}.
39## If @code{"Jacobian"} is @code{"on"}, it specifies that @var{fcn},
40## called with 2 output arguments, also returns the Jacobian matrix
41## of right-hand sides at the requested point. @code{"TolX"} specifies
42## the termination tolerance in the unknown variables, while
43## @code{"TolFun"} is a tolerance for equations. Default is @code{1e-7}
44## for both @code{"TolX"} and @code{"TolFun"}.
46## If @code{"AutoScaling"} is on, the variables will be automatically scaled
47## according to the column norms of the (estimated) Jacobian. As a result,
48## TolF becomes scaling-independent. By default, this option is off, because
49## it may sometimes deliver unexpected (though mathematically correct) results.
51## If @code{"Updating"} is "on", the function will attempt to use Broyden
52## updates to update the Jacobian, in order to reduce the amount of jacobian
54## If your user function always calculates the Jacobian (regardless of number
55## of output arguments), this option provides no advantage and should be set to
58## @code{"ComplexEqn"} is @code{"on"}, @code{fsolve} will attempt to solve
59## complex equations in complex variables, assuming that the equations possess a
60## complex derivative (i.e., are holomorphic). If this is not what you want,
61## should unpack the real and imaginary parts of the system to get a real
64## For description of the other options, see @code{optimset}.
66## On return, @var{fval} contains the value of the function @var{fcn}
67## evaluated at @var{x}, and @var{info} may be one of the following values:
71## Converged to a solution point. Relative residual error is less than specified
74## Last relative step size was less that TolX.
76## Last relative decrease in residual was less than TolF.
78## Iteration limit exceeded.
80## The trust region radius became excessively small.
83## Note: If you only have a single nonlinear equation of one variable, using
84## @code{fzero} is usually a much better idea.
85## @seealso{fzero, optimset}
87## Note about user-supplied jacobians:
88## As an inherent property of the algorithm, jacobian is always requested for a
89## solution vector whose residual vector is already known, and it is the last
90## accepted successful step. Often this will be one of the last two calls, but
91## not always. If the savings by reusing intermediate results from residual
92## calculation in jacobian calculation are significant, the best strategy is to
93## employ OutputFcn: After a vector is evaluated for residuals, if OutputFcn is
94## called with that vector, then the intermediate results should be saved for
95## future jacobian evaluation, and should be kept until a jacobian evaluation
96## is requested or until outputfcn is called with a different vector, in which
97## case they should be dropped in favor of this most recent vector. A short
98## example how this can be achieved follows:
101## function [fvec, fjac] = user_func (x, optimvalues, state)
102## persistent sav = [], sav0 = [];
106## sav0.x = x; # mark saved vector
107## ## calculate fvec, save results to sav0.
108## elseif (nargout == 2)
109## ## calculate fjac using sav.
113## if (all (x == sav0.x))
116## ## maybe output iteration status, etc.
122## fsolve (@@user_func, x0, optimset ("OutputFcn", @@user_func, @dots{}))
127## PKG_ADD: __all_opts__ ("fsolve");
129function [x, fvec, info, output, fjac] = fsolve (fcn, x0, options = struct ())
131 ## Get default options if requested.
132 if (nargin == 1 && ischar (fcn) && strcmp (fcn, 'defaults'))
133 x = optimset ("MaxIter", 400, "MaxFunEvals", Inf, \
134 "Jacobian", "off", "TolX", 1e-7, "TolFun", 1e-7,
135 "OutputFcn", [], "Updating", "on", "FunValCheck", "off",
136 "ComplexEqn", "off", "FinDiffType", "central",
137 "TypicalX", [], "AutoScaling", "off");
141 if (nargin < 2 || nargin > 3 || ! ismatrix (x0))
146 fcn = str2func (fcn, "global");
147 elseif (iscell (fcn))
148 fcn = @(x) make_fcn_jac (x, fcn{1}, fcn{2});
154 has_jac = strcmpi (optimget (options, "Jacobian", "off"), "on");
155 cdif = strcmpi (optimget (options, "FinDiffType", "central"), "central");
156 maxiter = optimget (options, "MaxIter", 400);
157 maxfev = optimget (options, "MaxFunEvals", Inf);
158 outfcn = optimget (options, "OutputFcn");
159 updating = strcmpi (optimget (options, "Updating", "on"), "on");
160 complexeqn = strcmpi (optimget (options, "ComplexEqn", "off"), "on");
162 ## Get scaling matrix using the TypicalX option. If set to "auto", the
163 ## scaling matrix is estimated using the jacobian.
164 typicalx = optimget (options, "TypicalX");
165 if (isempty (typicalx))
166 typicalx = ones (n, 1);
168 autoscale = strcmpi (optimget (options, "AutoScaling", "off"), "on");
173 funvalchk = strcmpi (optimget (options, "FunValCheck", "off"), "on");
176 ## Replace fcn with a guarded version.
177 fcn = @(x) guarded_eval (fcn, x, complexeqn);
180 ## These defaults are rather stringent. I think that normally, user
181 ## prefers accuracy to performance.
183 macheps = eps (class (x0));
185 tolx = optimget (options, "TolX", 1e-7);
186 tolf = optimget (options, "TolFun", 1e-7);
196 ## Initial evaluation.
197 ## Handle arbitrary shapes of x and f and remember them.
198 fvec = fcn (reshape (x, xsiz));
205 if (! isempty (outfcn))
206 optimvalues.iter = niter;
207 optimvalues.funccount = nfev;
208 optimvalues.fval = fn;
209 optimvalues.searchdirection = zeros (n, 1);
211 stop = outfcn (x, optimvalues, state);
221 while (niter < maxiter && nfev < maxfev && ! info)
223 ## Calculate function value and Jacobian (possibly via FD).
225 [fvec, fjac] = fcn (reshape (x, xsiz));
226 ## If the jacobian is sparse, disable Broyden updating.
233 fjac = __fdjac__ (fcn, reshape (x, xsiz), fvec, typicalx, cdif);
234 nfev += (1 + cdif) * length (x);
237 ## For square and overdetermined systems, we update a QR
238 ## factorization of the jacobian to avoid solving a full system in each
239 ## step. In this case, we pass a triangular matrix to __dogleg__.
240 useqr = updating && m >= n && n > 10;
243 ## FIXME: Currently, pivoting is mostly useless because the \ operator
244 ## cannot exploit the resulting props of the triangular factor.
245 ## Unpivoted QR is significantly faster so it doesn't seem right to pivot
246 ## just to get invariance. Original MINPACK didn't pivot either, at least
247 ## when qr updating was used.
248 [q, r] = qr (fjac, 0);
252 ## Get column norms, use them as scaling factors.
253 jcn = norm (fjac, 'columns').';
258 ## Rescale adaptively.
259 ## FIXME: the original minpack used the following rescaling strategy:
260 ## dg = max (dg, jcn);
261 ## but it seems not good if we start with a bad guess yielding jacobian
262 ## columns with large norms that later decrease, because the corresponding
263 ## variable will still be overscaled. So instead, we only give the old
264 ## scaling a small momentum, but do not honor it.
266 dg = max (0.1*dg, jcn);
272 ## FIXME: something better?
273 delta = factor * max (xn, 1);
276 ## It also seems that in the case of fast (and inhomogeneously) changing
277 ## jacobian, the Broyden updates are of little use, so maybe we could
278 ## skip them if a big disproportional change is expected. The question is,
279 ## of course, how to define the above terms :)
287 while (niter <= maxiter && nfev < maxfev && ! info)
289 ## Get trust-region model (dogleg) minimizer.
292 s = - __dogleg__ (r, qtf, dg, delta);
295 s = - __dogleg__ (fjac, fvec, dg, delta);
301 delta = min (delta, sn);
304 fvec1 = fcn (reshape (x + s, xsiz)) (:);
309 ## Scaled actual reduction.
310 actred = 1 - (fn1/fn)^2;
315 ## Scaled predicted reduction, and ratio.
318 prered = 1 - (t/fn)^2;
319 ratio = actred / prered;
326 if (ratio < min(max(0.1, 0.8*lastratio), 0.9))
331 if (delta <= 1e1*macheps*xn)
332 ## Trust region became uselessly small.
341 if (abs (1-ratio) <= 0.1)
343 elseif (ratio >= 0.5 || nsuc > 1)
344 delta = max (delta, 1.4142*sn);
349 ## Successful iteration.
359 ## FIXME: should outputfcn be only called after a successful iteration?
360 if (! isempty (outfcn))
361 optimvalues.iter = niter;
362 optimvalues.funccount = nfev;
363 optimvalues.fval = fn;
364 optimvalues.searchdirection = s;
366 stop = outfcn (x, optimvalues, state);
373 ## Tests for termination conditions. A mysterious place, anything
374 ## can happen if you change something here...
376 ## The rule of thumb (which I'm not sure M*b is quite following)
377 ## is that for a tolerance that depends on scaling, only 0 makes
378 ## sense as a default value. But 0 usually means uselessly long
379 ## iterations, so we need scaling-independent tolerances wherever
382 ## FIXME -- why tolf*n*xn? If abs (e) ~ abs(x) * eps is a vector
383 ## of perturbations of x, then norm (fjac*e) <= eps*n*xn, i.e. by
384 ## tolf ~ eps we demand as much accuracy as we can expect.
387 ## The following tests done only after successful step.
388 elseif (ratio >= 1e-4)
389 ## This one is classic. Note that we use scaled variables again,
390 ## but compare to scaled step, so nothing bad.
393 ## Again a classic one. It seems weird to use the same tolf
394 ## for two different tests, but that's what M*b manual appears
396 elseif (actred < tolf)
401 ## Criterion for recalculating jacobian.
402 if (! updating || nfail == 2 || nsuciter < 2)
406 ## Compute the scaled Broyden update.
408 u = (fvec1 - q*w) / sn;
409 v = dg .* ((dg .* s) / sn);
411 ## Update the QR factorization.
412 [q, r] = qrupdate (q, r, u, v);
415 v = dg .* ((dg .* s) / sn);
417 ## update the jacobian
423 ## Restore original shapes.
424 x = reshape (x, xsiz);
425 fvec = reshape (fvec, fsiz);
427 output.iterations = niter;
428 output.successful = nsuciter;
429 output.funcCount = nfev;
433## An assistant function that evaluates a function handle and checks for
435function [fx, jx] = guarded_eval (fun, x, complexeqn)
443 if (! complexeqn && ! (isreal (fx) && isreal (jx)))
444 error ("fsolve:notreal", "fsolve: non-real value encountered");
445 elseif (complexeqn && ! (isnumeric (fx) && isnumeric(jx)))
446 error ("fsolve:notnum", "fsolve: non-numeric value encountered");
447 elseif (any (isnan (fx(:))))
448 error ("fsolve:isnan", "fsolve: NaN value encountered");
452function [fx, jx] = make_fcn_jac (x, fcn, fjac)
459%!function retval = f (p)
463%! retval = zeros (3, 1);
464%! retval(1) = sin(x) + y**2 + log(z) - 7;
465%! retval(2) = 3*x + 2**y -z**3 + 1;
466%! retval(3) = x + y + z - 5;
468%! x_opt = [ 0.599054;
472%! [x, fval, info] = fsolve (@f, [ 0.5; 2.0; 2.5 ]);
474%! assert (norm (x - x_opt, Inf) < tol);
475%! assert (norm (fval) < tol);
477%!function retval = f (p)
482%! retval = zeros (4, 1);
483%! retval(1) = 3*x + 4*y + exp (z + w) - 1.007;
484%! retval(2) = 6*x - 4*y + exp (3*z + w) - 11;
485%! retval(3) = x^4 - 4*y^2 + 6*z - 8*w - 20;
486%! retval(4) = x^2 + 2*y^3 + z - w - 4;
488%! x_opt = [ -0.767297326653401, 0.590671081117440, 1.47190018629642, -1.52719341133957 ];
490%! [x, fval, info] = fsolve (@f, [-1, 1, 2, -1]);
492%! assert (norm (x - x_opt, Inf) < tol);
493%! assert (norm (fval) < tol);
495%!function retval = f (p)
499%! retval = zeros (3, 1);
500%! retval(1) = sin(x) + y**2 + log(z) - 7;
501%! retval(2) = 3*x + 2**y -z**3 + 1;
502%! retval(3) = x + y + z - 5;
503%! retval(4) = x*x + y - z*log(z) - 1.36;
505%! x_opt = [ 0.599054;
509%! [x, fval, info] = fsolve (@f, [ 0.5; 2.0; 2.5 ]);
511%! assert (norm (x - x_opt, Inf) < tol);
512%! assert (norm (fval) < tol);
514%!function retval = f (p)
518%! retval = zeros (3, 1);
519%! retval(1) = sin(x) + y**2 + log(z) - 7;
520%! retval(2) = 3*x + 2**y -z**3 + 1;
521%! retval(3) = x + y + z - 5;
523%! x_opt = [ 0.599054;
527%! opt = optimset ("Updating", "qrp");
528%! [x, fval, info] = fsolve (@f, [ 0.5; 2.0; 2.5 ], opt);
530%! assert (norm (x - x_opt, Inf) < tol);
531%! assert (norm (fval) < tol);
537%! noise = 1e-5 * sin (100*x);
538%! y = exp (-a0*x) + b0 + noise;
542%! [c, fval, info, output] = fsolve (@(c) (exp(-c(1)*x) + c(2) - y), [0, 0]);
544%! assert (norm (c - c_opt, Inf) < tol);
545%! assert (norm (fval) < norm (noise));
548%!function y = cfun (x)
549%! y(1) = (1+i)*x(1)^2 - (1-i)*x(2) - 2;
550%! y(2) = sqrt (x(1)*x(2)) - (1-2i)*x(3) + (3-4i);
551%! y(3) = x(1) * x(2) - x(3)^2 + (3+2i);
554%! x_opt = [-1+i, 1-i, 2+i];
557%! [x, f, info] = fsolve (@cfun, x, optimset ("ComplexEqn", "on"));
559%! assert (norm (f) < tol);
560%! assert (norm (x - x_opt, Inf) < tol);
562## Solve the double dogleg trust-region least-squares problem:
563## Minimize norm(r*x-b) subject to the constraint norm(d.*x) <= delta,
564## x being a convex combination of the gauss-newton and scaled gradient.
567## TODO: handle singularity, or leave it up to mldivide?
569function x = __dogleg__ (r, b, d, delta)
570 ## Get Gauss-Newton direction.
574 ## GN is too big, get scaled gradient.
578 ## Normalize and rescale.
580 ## Get the line minimizer in s direction.
582 snm = (sn / tn) / tn;
584 ## Get the dogleg path minimizer.
586 dxn = delta/xn; snmd = snm/delta;
587 t = (bn/sn) * (bn/xn) * snmd;
588 t -= dxn * snmd^2 - sqrt ((t-dxn)^2 + (1-dxn^2)*(1-snmd^2));
589 alpha = dxn*(1-snmd^2) / t;
597 ## Form the appropriate convex combination.
598 x = alpha * x + ((1-alpha) * min (snm, delta)) * s;