changelog shortlog tags changeset files revisions annotate raw

scripts/optimization/sqp.m

changeset 10289: 4b124317dc38
parent:f0c3d3fc4903
author: John W. Eaton <jwe@octave.org>
date: Tue Feb 09 20:58:55 2010 -0500 (36 minutes ago)
permissions: -rw-r--r--
description: base_properties::set_children: account for hidden children
1## Copyright (C) 2005, 2006, 2007, 2008, 2009 John W. Eaton
2##
3## This file is part of Octave.
4##
5## Octave is free software; you can redistribute it and/or modify it
6## under the terms of the GNU General Public License as published by
7## the Free Software Foundation; either version 3 of the License, or (at
8## your option) any later version.
9##
10## Octave is distributed in the hope that it will be useful, but
11## WITHOUT ANY WARRANTY; without even the implied warranty of
12## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13## General Public License for more details.
14##
15## You should have received a copy of the GNU General Public License
16## along with Octave; see the file COPYING. If not, see
17## <http://www.gnu.org/licenses/>.
18
19## -*- texinfo -*-
20## @deftypefn {Function File} {[@var{x}, @var{obj}, @var{info}, @var{iter}, @var{nf}, @var{lambda}] =} sqp (@var{x}, @var{phi}, @var{g}, @var{h}, @var{lb}, @var{ub}, @var{maxiter}, @var{tolerance})
21## Solve the nonlinear program
22## @tex
23## $$
24## \min_x \phi (x)
25## $$
26## @end tex
27## @ifnottex
28##
29## @example
30## @group
31## min phi (x)
32## x
33## @end group
34## @end example
35##
36## @end ifnottex
37## subject to
38## @tex
39## $$
40## g(x) = 0 \qquad h(x) \geq 0 \qquad lb \leq x \leq ub
41## $$
42## @end tex
43## @ifnottex
44##
45## @example
46## @group
47## g(x) = 0
48## h(x) >= 0
49## lb <= x <= ub
50## @end group
51## @end example
52## @end ifnottex
53##
54## @noindent
55## using a successive quadratic programming method.
56##
57## The first argument is the initial guess for the vector @var{x}.
58##
59## The second argument is a function handle pointing to the objective
60## function. The objective function must be of the form
61##
62## @example
63## y = phi (x)
64## @end example
65##
66## @noindent
67## in which @var{x} is a vector and @var{y} is a scalar.
68##
69## The second argument may also be a 2- or 3-element cell array of
70## function handles. The first element should point to the objective
71## function, the second should point to a function that computes the
72## gradient of the objective function, and the third should point to a
73## function to compute the hessian of the objective function. If the
74## gradient function is not supplied, the gradient is computed by finite
75## differences. If the hessian function is not supplied, a BFGS update
76## formula is used to approximate the hessian.
77##
78## If supplied, the gradient function must be of the form
79##
80## @example
81## g = gradient (x)
82## @end example
83##
84## @noindent
85## in which @var{x} is a vector and @var{g} is a vector.
86##
87## If supplied, the hessian function must be of the form
88##
89## @example
90## h = hessian (x)
91## @end example
92##
93## @noindent
94## in which @var{x} is a vector and @var{h} is a matrix.
95##
96## The third and fourth arguments are function handles pointing to
97## functions that compute the equality constraints and the inequality
98## constraints, respectively.
99##
100## If your problem does not have equality (or inequality) constraints,
101## you may pass an empty matrix for @var{cef} (or @var{cif}).
102##
103## If supplied, the equality and inequality constraint functions must be
104## of the form
105##
106## @example
107## r = f (x)
108## @end example
109##
110## @noindent
111## in which @var{x} is a vector and @var{r} is a vector.
112##
113## The third and fourth arguments may also be 2-element cell arrays of
114## function handles. The first element should point to the constraint
115## function and the second should point to a function that computes the
116## gradient of the constraint function:
117##
118## @tex
119## $$
120## \Bigg( {\partial f(x) \over \partial x_1},
121## {\partial f(x) \over \partial x_2}, \ldots,
122## {\partial f(x) \over \partial x_N} \Bigg)^T
123## $$
124## @end tex
125## @ifnottex
126## @example
127## @group
128## [ d f(x) d f(x) d f(x) ]
129## transpose ( [ ------ ----- ... ------ ] )
130## [ dx_1 dx_2 dx_N ]
131## @end group
132## @end example
133## @end ifnottex
134##
135## The fifth and sixth arguments are vectors containing lower and upper bounds
136## on @var{x}. These must be consistent with equality and inequality
137## constraints @var{g} and @var{h}. If the bounds are not specified, or are
138## empty, they are set to -@var{realmax} and @var{realmax} by default.
139##
140## The seventh argument is max. number of iterations. If not specified,
141## the default value is 100.
142##
143## The eighth argument is tolerance for stopping criteria. If not specified,
144## the default value is @var{eps}.
145##
146## Here is an example of calling @code{sqp}:
147##
148## @example
149## function r = g (x)
150## r = [ sumsq(x)-10;
151## x(2)*x(3)-5*x(4)*x(5);
152## x(1)^3+x(2)^3+1 ];
153## endfunction
154##
155## function obj = phi (x)
156## obj = exp(prod(x)) - 0.5*(x(1)^3+x(2)^3+1)^2;
157## endfunction
158##
159## x0 = [-1.8; 1.7; 1.9; -0.8; -0.8];
160##
161## [x, obj, info, iter, nf, lambda] = sqp (x0, @@phi, @@g, [])
162##
163## x =
164##
165## -1.71714
166## 1.59571
167## 1.82725
168## -0.76364
169## -0.76364
170##
171## obj = 0.053950
172## info = 101
173## iter = 8
174## nf = 10
175## lambda =
176##
177## -0.0401627
178## 0.0379578
179## -0.0052227
180## @end example
181##
182## The value returned in @var{info} may be one of the following:
183## @table @asis
184## @item 101
185## The algorithm terminated because the norm of the last step was less
186## than @code{tol * norm (x))} (the value of tol is currently fixed at
187## @code{sqrt (eps)}---edit @file{sqp.m} to modify this value.
188## @item 102
189## The BFGS update failed.
190## @item 103
191## The maximum number of iterations was reached (the maximum number of
192## allowed iterations is currently fixed at 100---edit @file{sqp.m} to
193## increase this value).
194## @end table
195## @seealso{qp}
196## @end deftypefn
197
198function [x, obj, info, iter, nf, lambda] = sqp (x, objf, cef, cif, lb, ub, maxiter, tolerance)
199
200 global __sqp_nfun__;
201 global __sqp_obj_fun__;
202 global __sqp_ce_fun__;
203 global __sqp_ci_fun__;
204 global __sqp_cif__;
205 global __sqp_cifcn__;
206
207 if (nargin >= 2 && nargin <= 8 && nargin != 5)
208
209 ## Choose an initial NxN symmetric positive definite Hessan
210 ## approximation B.
211
212 n = length (x);
213
214 ## Evaluate objective function, constraints, and gradients at initial
215 ## value of x.
216 ##
217 ## obj_fun
218 ## obj_grad
219 ## ce_fun -- equality constraint functions
220 ## ci_fun -- inequality constraint functions
221 ## A == [grad_{x_1} cx_fun, grad_{x_2} cx_fun, ..., grad_{x_n} cx_fun]^T
222
223 obj_grd = @fd_obj_grd;
224 have_hess = 0;
225 if (iscell (objf))
226 if (length (objf) > 0)
227 __sqp_obj_fun__ = obj_fun = objf{1};
228 if (length (objf) > 1)
229 obj_grd = objf{2};
230 if (length (objf) > 2)
231 obj_hess = objf{3};
232 have_hess = 1;
233 endif
234 endif
235 else
236 error ("sqp: invalid objective function");
237 endif
238 else
239 __sqp_obj_fun__ = obj_fun = objf;
240 endif
241
242 ce_fun = @empty_cf;
243 ce_grd = @empty_jac;
244 if (nargin > 2)
245 ce_grd = @fd_ce_jac;
246 if (iscell (cef))
247 if (length (cef) > 0)
248 __sqp_ce_fun__ = ce_fun = cef{1};
249 if (length (cef) > 1)
250 ce_grd = cef{2};
251 endif
252 else
253 error ("sqp: invalid equality constraint function");
254 endif
255 elseif (! isempty (cef))
256 ce_fun = cef;
257 endif
258 endif
259 __sqp_ce_fun__ = ce_fun;
260
261 ci_fun = @empty_cf;
262 ci_grd = @empty_jac;
263
264 if (nargin > 3)
265 ## constraint function given by user with possibly gradient
266 __sqp_cif__ = cif;
267 ## constraint function given by user without gradient
268 __sqp_cifcn__ = @empty_cf;
269 if (iscell (__sqp_cif__))
270 if (length (__sqp_cif__) > 0)
271 __sqp_cifcn__ = __sqp_cif__{1};
272 endif
273 elseif (! isempty (__sqp_cif__))
274 __sqp_cifcn__ = __sqp_cif__;
275 endif
276
277 if (nargin < 5)
278 ci_grd = @fd_ci_jac;
279 if (iscell (cif))
280 if (length (cif) > 0)
281 __sqp_ci_fun__ = ci_fun = cif{1};
282 if (length (cif) > 1)
283 ci_grd = cif{2};
284 endif
285 else
286 error ("sqp: invalid equality constraint function");
287 endif
288 elseif (! isempty (cif))
289 ci_fun = cif;
290 endif
291 else
292 global __sqp_lb__;
293 if (isvector (lb))
294 __sqp_lb__ = lb;
295 elseif (isempty (lb))
296 if (isa (x, "single"))
297 __sqp_lb__ = -realmax ("single");
298 else
299 __sqp_lb__ = -realmax;
300 endif
301 else
302 error ("sqp: invalid lower bound");
303 endif
304
305 global __sqp_ub__;
306 if (isvector (ub))
307 __sqp_ub__ = ub;
308 elseif (isempty (lb))
309 if (isa (x, "single"))
310 __sqp_ub__ = realmax ("single");
311 else
312 __sqp_ub__ = realmax;
313 endif
314 else
315 error ("sqp: invalid upper bound");
316 endif
317
318 if (lb > ub)
319 error ("sqp: upper bound smaller than lower bound");
320 endif
321 __sqp_ci_fun__ = ci_fun = @cf_ub_lb;
322 ci_grd = @cigrad_ub_lb;
323 endif
324 __sqp_ci_fun__ = ci_fun;
325 endif
326
327 iter_max = 100;
328 if (nargin > 6 && ! isempty (maxiter))
329 if (isscalar (maxiter) && maxiter > 0 && round (maxiter) == maxiter)
330 iter_max = maxiter;
331 else
332 error ("sqp: invalid number of maximum iterations");
333 endif
334 endif
335
336 tol = sqrt (eps);
337 if (nargin > 7 && ! isempty (tolerance))
338 if (isscalar (tolerance) && tolerance > 0)
339 tol = tolerance;
340 else
341 error ("sqp: invalid value for tolerance");
342 endif
343 endif
344
345 iter = 0;
346
347 obj = feval (obj_fun, x);
348 __sqp_nfun__ = 1;
349
350 c = feval (obj_grd, x);
351
352 if (have_hess)
353 B = feval (obj_hess, x);
354 else
355 B = eye (n, n);
356 endif
357
358 ce = feval (ce_fun, x);
359 F = feval (ce_grd, x);
360
361 ci = feval (ci_fun, x);
362 C = feval (ci_grd, x);
363
364 A = [F; C];
365
366 ## Choose an initial lambda (x is provided by the caller).
367
368 lambda = 100 * ones (rows (A), 1);
369
370 qp_iter = 1;
371 alpha = 1;
372
373 ## report ();
374
375 ## report (iter, qp_iter, alpha, __sqp_nfun__, obj);
376
377 info = 0;
378
379 while (++iter < iter_max)
380
381 ## Check convergence. This is just a simple check on the first
382 ## order necessary conditions.
383
384 ## IDX is the indices of the active inequality constraints.
385
386 nr_f = rows (F);
387
388 lambda_e = lambda((1:nr_f)');
389 lambda_i = lambda((nr_f+1:end)');
390
391 con = [ce; ci];
392
393 t0 = norm (c - A' * lambda);
394 t1 = norm (ce);
395 t2 = all (ci >= 0);
396 t3 = all (lambda_i >= 0);
397 t4 = norm (lambda .* con);
398
399 if (t2 && t3 && max ([t0; t1; t4]) < tol)
400 break;
401 endif
402
403 ## Compute search direction p by solving QP.
404
405 g = -ce;
406 d = -ci;
407
408 ## Discard inequality constraints that have -Inf bounds since those
409 ## will never be active.
410 idx = isinf (d) & d < 0;
411 d(idx) = [];
412 C(idx,:) = [];
413
414 [p, obj_qp, INFO, lambda] = qp (x, B, c, F, g, [], [], d, C,
415 Inf * ones (size (d)));
416
417 info = INFO.info;
418
419 ## Check QP solution and attempt to recover if it has failed.
420
421 ## Choose mu such that p is a descent direction for the chosen
422 ## merit function phi.
423
424 [x_new, alpha, obj_new] = linesearch_L1 (x, p, obj_fun, obj_grd,
425 ce_fun, ci_fun, lambda, obj);
426
427 ## Evaluate objective function, constraints, and gradients at
428 ## x_new.
429
430 c_new = feval (obj_grd, x_new);
431
432 ce_new = feval (ce_fun, x_new);
433 F_new = feval (ce_grd, x_new);
434
435 ci_new = feval (ci_fun, x_new);
436 C_new = feval (ci_grd, x_new);
437
438 A_new = [F_new; C_new];
439
440 ## Set
441 ##
442 ## s = alpha * p
443 ## y = grad_x L (x_new, lambda) - grad_x L (x, lambda})
444
445 y = c_new - c;
446
447 if (! isempty (A))
448 t = ((A_new - A)'*lambda);
449 y -= t;
450 endif
451
452 delx = x_new - x;
453
454 if (norm (delx) < tol * norm (x))
455 info = 101;
456 break;
457 endif
458
459 if (have_hess)
460
461 B = feval (obj_hess, x);
462
463 else
464
465 ## Update B using a quasi-Newton formula.
466
467 delxt = delx';
468
469 ## Damped BFGS. Or maybe we would actually want to use the Hessian
470 ## of the Lagrangian, computed directly.
471
472 d1 = delxt*B*delx;
473
474 t1 = 0.2 * d1;
475 t2 = delxt*y;
476
477 if (t2 < t1)
478 theta = 0.8*d1/(d1 - t2);
479 else
480 theta = 1;
481 endif
482
483 r = theta*y + (1-theta)*B*delx;
484
485 d2 = delxt*r;
486
487 if (d1 == 0 || d2 == 0)
488 info = 102;
489 break;
490 endif
491
492 B = B - B*delx*delxt*B/d1 + r*r'/d2;
493
494 endif
495
496 x = x_new;
497
498 obj = obj_new;
499
500 c = c_new;
501
502 ce = ce_new;
503 F = F_new;
504
505 ci = ci_new;
506 C = C_new;
507
508 A = A_new;
509
510 ## report (iter, qp_iter, alpha, __sqp_nfun__, obj);
511
512 endwhile
513
514 if (iter >= iter_max)
515 info = 103;
516 endif
517
518 nf = __sqp_nfun__;
519
520 else
521
522 print_usage ();
523
524 endif
525
526endfunction
527
528
529function [merit, obj] = phi_L1 (obj, obj_fun, ce_fun, ci_fun, x, mu)
530
531 global __sqp_nfun__;
532
533 ce = feval (ce_fun, x);
534 ci = feval (ci_fun, x);
535
536 idx = ci < 0;
537
538 con = [ce; ci(idx)];
539
540 if (isempty (obj))
541 obj = feval (obj_fun, x);
542 __sqp_nfun__++;
543 endif
544
545 merit = obj;
546 t = norm (con, 1) / mu;
547
548 if (! isempty (t))
549 merit += t;
550 endif
551
552endfunction
553
554
555function [x_new, alpha, obj] = linesearch_L1 (x, p, obj_fun, obj_grd,
556 ce_fun, ci_fun, lambda, obj)
557
558 ## Choose parameters
559 ##
560 ## eta in the range (0, 0.5)
561 ## tau in the range (0, 1)
562
563 eta = 0.25;
564 tau = 0.5;
565
566 delta_bar = sqrt (eps);
567
568 if (isempty (lambda))
569 mu = 1 / delta_bar;
570 else
571 mu = 1 / (norm (lambda, Inf) + delta_bar);
572 endif
573
574 alpha = 1;
575
576 c = feval (obj_grd, x);
577 ce = feval (ce_fun, x);
578
579 [phi_x_mu, obj] = phi_L1 (obj, obj_fun, ce_fun, ci_fun, x, mu);
580
581 D_phi_x_mu = c' * p;
582 d = feval (ci_fun, x);
583 ## only those elements of d corresponding
584 ## to violated constraints should be included.
585 idx = d < 0;
586 t = - norm ([ce; d(idx)], 1) / mu;
587 if (! isempty (t))
588 D_phi_x_mu += t;
589 endif
590
591 while (1)
592 [p1, obj] = phi_L1 ([], obj_fun, ce_fun, ci_fun, x+alpha*p, mu);
593 p2 = phi_x_mu+eta*alpha*D_phi_x_mu;
594 if (p1 > p2)
595 ## Reset alpha = tau_alpha * alpha for some tau_alpha in the
596 ## range (0, tau).
597 tau_alpha = 0.9 * tau; ## ??
598 alpha = tau_alpha * alpha;
599 else
600 break;
601 endif
602 endwhile
603
604 ## Set x_new = x + alpha * p;
605
606 x_new = x + alpha * p;
607
608endfunction
609
610
611function report (iter, qp_iter, alpha, nfun, obj)
612
613 if (nargin == 0)
614 printf (" Itn ItQP Step Nfun Objective\n");
615 else
616 printf ("%5d %4d %8.1g %5d %13.6e\n", iter, qp_iter, alpha, nfun, obj);
617 endif
618
619endfunction
620
621
622function grd = fdgrd (f, x)
623
624 if (! isempty (f))
625 y0 = feval (f, x);
626 nx = length (x);
627 grd = zeros (nx, 1);
628 deltax = sqrt (eps);
629 for i = 1:nx
630 t = x(i);
631 x(i) += deltax;
632 grd(i) = (feval (f, x) - y0) / deltax;
633 x(i) = t;
634 endfor
635 else
636 grd = zeros (0, 1);
637 endif
638
639endfunction
640
641
642function jac = fdjac (f, x)
643 nx = length (x);
644 if (! isempty (f))
645 y0 = feval (f, x);
646 nf = length (y0);
647 nx = length (x);
648 jac = zeros (nf, nx);
649 deltax = sqrt (eps);
650 for i = 1:nx
651 t = x(i);
652 x(i) += deltax;
653 jac(:,i) = (feval (f, x) - y0) / deltax;
654 x(i) = t;
655 endfor
656 else
657 jac = zeros (0, nx);
658 endif
659
660endfunction
661
662
663function grd = fd_obj_grd (x)
664
665 global __sqp_obj_fun__;
666
667 grd = fdgrd (__sqp_obj_fun__, x);
668
669endfunction
670
671
672function res = empty_cf (x)
673
674 res = zeros (0, 1);
675
676endfunction
677
678
679function res = empty_jac (x)
680
681 res = zeros (0, length (x));
682
683endfunction
684
685
686function jac = fd_ce_jac (x)
687
688 global __sqp_ce_fun__;
689
690 jac = fdjac (__sqp_ce_fun__, x);
691
692endfunction
693
694
695function jac = fd_ci_jac (x)
696
697 global __sqp_cifcn__;
698 ## __sqp_cifcn__ = constraint function without gradients and lb or ub
699 jac = fdjac (__sqp_cifcn__, x);
700
701endfunction
702
703
704function res = cf_ub_lb (x)
705
706 ## combine constraint function with ub and lb
707 global __sqp_cifcn__ __sqp_lb__ __sqp_ub__
708
709 res = [x-__sqp_lb__; __sqp_ub__-x];
710
711 if (! isempty (__sqp_cifcn__))
712 res = [feval(__sqp_cifcn__,x); x-__sqp_lb__; __sqp_ub__-x];
713 endif
714
715endfunction
716
717
718function res = cigrad_ub_lb (x)
719
720 global __sqp_cif__
721
722 res = [eye(numel(x)); -eye(numel(x))];
723
724 cigradfcn = @fd_ci_jac;
725
726 if (iscell (__sqp_cif__) && length (__sqp_cif__) > 1)
727 cigradfcn = __sqp_cif__{2};
728 endif
729
730 if (! isempty (cigradfcn))
731 res = [feval(cigradfcn,x); eye(numel(x)); -eye(numel(x))];
732 endif
733
734endfunction
735
736%!function r = g (x)
737%! r = [sumsq(x)-10;
738%! x(2)*x(3)-5*x(4)*x(5);
739%! x(1)^3+x(2)^3+1 ];
740%!
741%!function obj = phi (x)
742%! obj = exp(prod(x)) - 0.5*(x(1)^3+x(2)^3+1)^2;
743%!
744%!test
745%! x0 = [-1.8; 1.7; 1.9; -0.8; -0.8];
746%!
747%! [x, obj, info, iter, nf, lambda] = sqp (x0, @phi, @g, []);
748%!
749%! x_opt = [-1.717143501952599;
750%! 1.595709610928535;
751%! 1.827245880097156;
752%! -0.763643103133572;
753%! -0.763643068453300];
754%!
755%! obj_opt = 0.0539498477702739;
756%!
757%! assert (all (abs (x-x_opt) < 5*sqrt (eps)) && abs (obj-obj_opt) < sqrt (eps));