org.apache.commons.math3.optim.nonlinear.scalar.gradient.BFGSOptimizer.java Source code

Introduction

Here is the source code for org.apache.commons.math3.optim.nonlinear.scalar.gradient.BFGSOptimizer.java
Source

/*----------------------------------------------------------------------------- 
 * GDSC SMLM Software
 * 
 * Copyright (C) 2014 Alex Herbert
 * Genome Damage and Stability Centre
 * University of Sussex, UK
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 * 
 * This code is based on the ideas expressed in Numerical Recipes in C++, 
 * The Art of Scientific Computing, Second Edition, W.H. Press, 
 * S.A. Teukolsky, W.T. Vetterling, B.P. Flannery (Cambridge University Press, 
 * Cambridge, 2002).
 *---------------------------------------------------------------------------*/

package org.apache.commons.math3.optim.nonlinear.scalar.gradient;

import java.util.Locale;

import org.apache.commons.math3.exception.MathUnsupportedOperationException;
import org.apache.commons.math3.exception.TooManyEvaluationsException;
import org.apache.commons.math3.exception.util.Localizable;
import org.apache.commons.math3.optim.ConvergenceChecker;
import org.apache.commons.math3.optim.OptimizationData;
import org.apache.commons.math3.optim.PointValuePair;
import org.apache.commons.math3.optim.PositionChecker;
import org.apache.commons.math3.optim.nonlinear.scalar.GradientMultivariateOptimizer;
import org.apache.commons.math3.util.FastMath;

/**
 * Implementation of the Broyden-Fletcher-Goldfarb-Shanno (BFGS) variant of the Davidson-Fletcher-Powell (DFP)
 * minimisation.
 * <p>
 * This is not part of the Apache Commons Math library but extends the same base classes to allow an easy swap with
 * existing code based on the Apache library.
 * <p>
 * Note that although rare, it may happen that the algorithm converges since the search direction no longer leads
 * downhill. In case of doubt restarting the algorithm should overcome this issue.
 * <p>
 * The implementation is based upon that presented in: Numerical Recipes in C++, The Art of Scientific Computing, Second
 * Edition, W.H. Press, S.A. Teukolsky, W.T. Vetterling, B.P. Flannery (Cambridge University Press, Cambridge, 2002).
 * The algorithm has been updated to support a bounded search and convergence checking on position and gradient.
 */
public class BFGSOptimizer extends GradientMultivariateOptimizer {
    /** Maximum step length used in line search. */
    private double[] maximumStepLength = null;

    /** Convergence tolerance on gradient */
    private double gradientTolerance;

    /** Maximum number of restarts on the convergence point */
    private int restarts = 0;

    /** Maximum number of restarts in the event of roundoff error */
    private int roundoffRestarts = 3;

    /** Convergence tolerance on position */
    private PositionChecker positionChecker = null;

    /** Flags to indicate if bounds are present */
    private boolean isLower, isUpper;
    private double[] lower, upper;

    private double sign = 0;

    /**
     * Specify the maximum step length in each dimension
     */
    public static class StepLength implements OptimizationData {
        private double[] step;

        /**
         * Build an instance
         * 
         * @param step
         *            The maximum step size in each dimension
         */
        public StepLength(double[] step) {
            this.step = step;
        }

        public double[] getStep() {
            return step;
        }
    }

    /**
     * Specify the tolerance on the gradient convergence with zero
     */
    public static class GradientTolerance implements OptimizationData {
        private double tolerance;

        /**
         * Build an instance
         * 
         * @param tolerance
         *            The tolerance on the gradient
         */
        public GradientTolerance(double tolerance) {
            this.tolerance = tolerance;
        }

        public double getTolerance() {
            return tolerance;
        }
    }

    /**
     * Specify the maximum number of restarts on the converged point in the event that the gradient has not yet
     * converged on zero.
     */
    public static class MaximumRestarts implements OptimizationData {
        private int restarts;

        /**
         * Build an instance
         * 
         * @param restarts
         *            The restarts on the gradient
         */
        public MaximumRestarts(int restarts) {
            this.restarts = restarts;
        }

        public int getRestarts() {
            return restarts;
        }
    }

    /**
     * Specify the maximum number of restarts in the event of roundoff error.
     */
    public static class MaximumRoundoffRestarts extends MaximumRestarts {
        /**
         * Build an instance
         * 
         * @param restarts
         *            The restarts on the gradient
         */
        public MaximumRoundoffRestarts(int restarts) {
            super(restarts);
        }
    }

    /**
     * Constructor
     */
    public BFGSOptimizer() {
        super(null);
    }

    /**
     * @param checker
     *            Convergence checker.
     */
    public BFGSOptimizer(ConvergenceChecker<PointValuePair> checker) {
        super(checker);
    }

    /**
     * {@inheritDoc}
     *
     * @param optData
     *            Optimization data. In addition to those documented in
     *            {@link GradientMultivariateOptimizer#parseOptimizationData(OptimizationData[])
     *            GradientMultivariateOptimizer}, this method will register the following data:
     *            <ul>
     *            <li>{@link MaximumStepLength}</li>
     *            </ul>
     * @return {@inheritDoc}
     * @throws TooManyEvaluationsException
     *             if the maximal number of
     *             evaluations (of the objective function) is exceeded.
     */
    @Override
    public PointValuePair optimize(OptimizationData... optData) throws TooManyEvaluationsException {
        // Set up base class and perform computation.
        return super.optimize(optData);
    }

    private int converged;
    private static final int CHECKER = 0;
    private static final int POSITION = 1;
    private static final int GRADIENT = 2;
    private static final int ROUNDOFF_ERROR = 3;

    /** {@inheritDoc} */
    @Override
    protected PointValuePair doOptimize() {
        final ConvergenceChecker<PointValuePair> checker = getConvergenceChecker();
        double[] p = getStartPoint();

        // Assume minimisation
        sign = -1;

        LineStepSearch lineSearch = new LineStepSearch();

        // In case there are no restarts
        if (restarts <= 0)
            return bfgsWithRoundoffCheck(checker, p, lineSearch);

        PointValuePair lastResult = null;
        PointValuePair result = null;
        //int lastConverge = 0;
        int iteration = 0;
        //int initialConvergenceIteration = 0;
        //int[] count = new int[3];
        while (iteration <= restarts) {
            iteration++;
            result = bfgsWithRoundoffCheck(checker, p, lineSearch);
            //count[converged]++;

            //if (lastResult == null)
            //   initialConvergenceIteration = getIterations();

            if (converged == GRADIENT) {
                // If no gradient remains then we cannot move anywhere so return
                break;
            }

            if (lastResult != null) {
                //// Check if the optimum was improved using the last convergence criteria
                //if (lastConverge == CHECKER)
                //{
                //   if (checker.converged(getIterations(), lastResult, result))
                //   {
                //      break;
                //   }
                //}
                //else
                //{
                //   if (positionChecker.converged(lastResult.getPointRef(), result.getPointRef()))
                //   {
                //      break;
                //   }
                //}

                // Check if the optimum was improved using the convergence criteria
                if (checker != null && checker.converged(getIterations(), lastResult, result)) {
                    break;
                }
                if (positionChecker.converged(lastResult.getPointRef(), result.getPointRef())) {
                    break;
                }
            }

            // Store the new optimum and repeat
            lastResult = result;
            //lastConverge = converged;
            p = lastResult.getPointRef();
        }

        //System.out.printf("Iter=%d (%d > %d): %s\n", iteration, initialConvergenceIteration, getIterations(),
        //      java.util.Arrays.toString(count));

        return result;
    }

    /**
     * Repeat the BFGS algorithm until it converges without roundoff error on the search direction
     * 
     * @param checker
     * @param p
     * @param lineSearch
     * @return
     */
    protected PointValuePair bfgsWithRoundoffCheck(ConvergenceChecker<PointValuePair> checker, double[] p,
            LineStepSearch lineSearch) {
        // Note: Position might converge if the hessian becomes singular or non-positive-definite
        // In this case the simple check is to restart the algorithm.
        int iteration = 0;

        PointValuePair result = bfgs(checker, p, lineSearch);

        // Allow restarts in the case of roundoff convergence
        while (converged == ROUNDOFF_ERROR && iteration < roundoffRestarts) {
            iteration++;
            p = result.getPointRef();
            result = bfgs(checker, p, lineSearch);
        }

        // If restarts did not work then this is a failure
        if (converged == ROUNDOFF_ERROR)
            throw new LineSearchRoundoffException();

        //if (iteration > 0)
        //   System.out.printf("Restarts for roundoff error = %d\n", iteration);

        return result;
    }

    protected PointValuePair bfgs(ConvergenceChecker<PointValuePair> checker, double[] p,
            LineStepSearch lineSearch) {
        final int n = p.length;

        final double EPS = epsilon;

        double[] hdg = new double[n];
        double[] xi = new double[n];
        double[][] hessian = new double[n][n];

        // Get the gradient for the the bounded point
        applyBounds(p);
        double[] g = computeObjectiveGradient(p);
        checkGradients(g, p);

        // Initialise the hessian and search direction
        for (int i = 0; i < n; i++) {
            hessian[i][i] = 1.0;
            xi[i] = -g[i];
        }

        PointValuePair current = null;

        while (true) {
            incrementIterationCount();

            // Get the value of the point
            double fp = computeObjectiveValue(p);

            if (checker != null) {
                PointValuePair previous = current;
                current = new PointValuePair(p, fp);
                if (previous != null && checker.converged(getIterations(), previous, current)) {
                    // We have found an optimum.
                    converged = CHECKER;
                    return current;
                }
            }

            // Move along the search direction.
            final double[] pnew;
            try {
                pnew = lineSearch.lineSearch(p, fp, g, xi);
            } catch (LineSearchRoundoffException e) {
                // This can happen if the Hessian is nearly singular or non-positive-definite.
                // In this case the algorithm should be restarted.
                converged = ROUNDOFF_ERROR;
                //System.out.printf("Roundoff error, iter=%d\n", getIterations());
                return new PointValuePair(p, fp);
            }

            // We assume the new point is on/within the bounds since the line search is constrained
            double fret = lineSearch.f;

            // Test for convergence on change in position
            if (positionChecker.converged(p, pnew)) {
                converged = POSITION;
                return new PointValuePair(pnew, fret);
            }

            // Update the line direction
            for (int i = 0; i < n; i++) {
                xi[i] = pnew[i] - p[i];
            }
            p = pnew;

            // Save the old gradient
            double[] dg = g;

            // Get the gradient for the new point
            g = computeObjectiveGradient(p);
            checkGradients(g, p);

            // If necessary recompute the function value. 
            // Doing this after the gradient evaluation allows the value to be cached when 
            // computing the objective gradient
            fp = fret;

            // Test for convergence on zero gradient.
            double test = 0;
            for (int i = 0; i < n; i++) {
                final double temp = Math.abs(g[i]) * FastMath.max(Math.abs(p[i]), 1);
                //final double temp = Math.abs(g[i]);
                if (test < temp)
                    test = temp;
            }
            // Compute the biggest gradient relative to the objective function
            test /= FastMath.max(Math.abs(fp), 1);
            if (test < gradientTolerance) {
                converged = GRADIENT;
                return new PointValuePair(p, fp);
            }

            for (int i = 0; i < n; i++)
                dg[i] = g[i] - dg[i];
            for (int i = 0; i < n; i++) {
                hdg[i] = 0.0;
                for (int j = 0; j < n; j++)
                    hdg[i] += hessian[i][j] * dg[j];
            }
            double fac = 0, fae = 0, sumdg = 0, sumxi = 0;
            for (int i = 0; i < n; i++) {
                fac += dg[i] * xi[i];
                fae += dg[i] * hdg[i];
                sumdg += dg[i] * dg[i];
                sumxi += xi[i] * xi[i];
            }
            if (fac > Math.sqrt(EPS * sumdg * sumxi)) {
                fac = 1.0 / fac;
                final double fad = 1.0 / fae;
                for (int i = 0; i < n; i++)
                    dg[i] = fac * xi[i] - fad * hdg[i];
                for (int i = 0; i < n; i++) {
                    for (int j = i; j < n; j++) {
                        hessian[i][j] += fac * xi[i] * xi[j] - fad * hdg[i] * hdg[j] + fae * dg[i] * dg[j];
                        hessian[j][i] = hessian[i][j];
                    }
                }
            }
            for (int i = 0; i < n; i++) {
                xi[i] = 0.0;
                for (int j = 0; j < n; j++)
                    xi[i] -= hessian[i][j] * g[j];
            }
        }
    }

    /**
     * Scans the list of (required and optional) optimization data that
     * characterize the problem.
     *
     * @param optData
     *            Optimization data.
     *            The following data will be looked for:
     *            <ul>
     *            <li>{@link GradientChecker}</li>
     *            <li>{@link PositionChecker}</li>
     *            <li>{@link MaximumStepLength}</li>
     *            </ul>
     */
    @Override
    protected void parseOptimizationData(OptimizationData... optData) {
        // Allow base class to register its own data.
        super.parseOptimizationData(optData);

        // The existing values (as set by the previous call) are reused if
        // not provided in the argument list.
        for (OptimizationData data : optData) {
            if (data instanceof PositionChecker) {
                positionChecker = (PositionChecker) data;
            } else if (data instanceof StepLength) {
                maximumStepLength = ((StepLength) data).getStep();
            } else if (data instanceof GradientTolerance) {
                gradientTolerance = ((GradientTolerance) data).getTolerance();
            } else if (data instanceof MaximumRestarts) {
                restarts = ((MaximumRestarts) data).getRestarts();
            } else if (data instanceof MaximumRoundoffRestarts) {
                roundoffRestarts = ((MaximumRoundoffRestarts) data).getRestarts();
            }
        }

        checkParameters();
    }

    /**
     * The minimum value between two doubles
     */
    private static double epsilon = calculateMachineEpsilonDouble();

    /**
     * @return The minimum value between two doubles
     * @see http://en.wikipedia.org/wiki/Machine_epsilon#Approximation_using_C.2B.2B
     */
    private static double calculateMachineEpsilonDouble() {
        double machEps = 1.0;

        do
            machEps /= 2.0;
        while ((1.0 + (machEps / 2.0)) != 1.0);

        // ISO standard is 2^-52 = 2.220446049e-16

        return machEps;
    }

    public static class LineSearchRoundoffException extends RuntimeException {
        private static final long serialVersionUID = -8974644703023090107L;
        private final double slope;

        public LineSearchRoundoffException(double slope) {
            super();
            this.slope = slope;
        }

        public LineSearchRoundoffException() {
            super();
            this.slope = 0;
        }

        @Override
        public String getMessage() {
            return (slope != 0) ? "Round-off problem. Slope = " + slope : "Round-off problem";
        }
    }

    /**
     * Internal class for a line search with backtracking
     * <p>
     * Adapted from NR::lnsrch, as discussed in Numerical Recipes section 9.7. The algorithm has been changed to support
     * bounds on the point, limits on the search direction in all dimensions and checking for bad function evaluations
     * when backtracking.
     */
    private class LineStepSearch {
        /**
         * Set to true when the the new point is too close to the old point. In a minimisation algorithm this signifies
         * convergence.
         */
        @SuppressWarnings("unused")
        boolean check;
        /**
         * The function value at the new point
         */
        double f;

        /**
         * Given an n-dimension point, the function value and gradient at that point find a new point
         * along the given search direction so that the function value has decreased sufficiently.
         * 
         * @param xOld
         *            The old point
         * @param fOld
         *            The old point function value
         * @param gradient
         *            The old point function gradient
         * @param searchDirection
         *            The search direction
         * @return The new point
         * @throws LineSearchRoundoffException
         *             if the slope of the line search is positive
         */
        double[] lineSearch(double[] xOld, final double fOld, double[] gradient, double[] searchDirection)
                throws LineSearchRoundoffException {
            final double ALF = 1.0e-4, TOLX = epsilon;
            double alam2 = 0.0, f2 = 0.0;

            // New point
            double[] x = new double[xOld.length];

            final int n = xOld.length;
            check = false;

            // Limit the search step size for each dimension
            if (maximumStepLength != null) {
                double scale = 1;
                for (int i = 0; i < n; i++) {
                    if (Math.abs(searchDirection[i]) * scale > maximumStepLength[i])
                        scale = maximumStepLength[i] / Math.abs(searchDirection[i]);
                }
                if (scale < 1) {
                    // Scale the entire search direction
                    for (int i = 0; i < n; i++)
                        searchDirection[i] *= scale;
                }
            }

            double slope = 0.0;
            for (int i = 0; i < n; i++)
                slope += gradient[i] * searchDirection[i];
            if (slope >= 0.0) {
                throw new LineSearchRoundoffException(slope);
            }

            // Compute lambda min
            double test = 0.0;
            for (int i = 0; i < n; i++) {
                final double temp = Math.abs(searchDirection[i]) / FastMath.max(Math.abs(xOld[i]), 1.0);
                if (temp > test)
                    test = temp;
            }
            double alamin = TOLX / test;

            // Always try the full step first
            double alam = 1.0;
            // Count the number of backtracking steps
            int backtracking = 0;
            for (;;) {
                if (alam < alamin) {
                    // Convergence (insignificant step).
                    // Since we use the old f and x then we do not need to compute the objective value
                    check = true;
                    f = fOld;
                    //System.out.printf("alam %f < alamin %f\n", alam, alamin);
                    return xOld;
                }

                for (int i = 0; i < n; i++)
                    x[i] = xOld[i] + alam * searchDirection[i];
                applyBounds(x);
                f = BFGSOptimizer.this.computeObjectiveValue(x);
                //System.out.printf("f=%f @ %f : %s\n", f, alam, java.util.Arrays.toString(x));
                if (f <= fOld + ALF * alam * slope) {
                    // Sufficient function decrease
                    //System.out.printf("f=%f < %f\n", f, fOld + ALF * alam * slope);
                    return x;
                } else {
                    // Check for bad function evaluation
                    if (f == Double.POSITIVE_INFINITY) {
                        // Reset backtracking
                        backtracking = 0;

                        alam *= 0.1;
                        continue;
                    }

                    // Backtrack
                    double tmplam;
                    if (backtracking++ == 0) {
                        // First backtrack iteration
                        tmplam = -slope / (2.0 * (f - fOld - slope));
                        // Ensure the lambda is reduced, i.e. we take a step smaller than last time
                        if (tmplam > 0.9 * alam)
                            tmplam = 0.9 * alam;
                    } else {
                        // Subsequent backtracks
                        final double rhs1 = f - fOld - alam * slope;
                        final double rhs2 = f2 - fOld - alam2 * slope;
                        final double a = (rhs1 / (alam * alam) - rhs2 / (alam2 * alam2)) / (alam - alam2);
                        final double b = (-alam2 * rhs1 / (alam * alam) + alam * rhs2 / (alam2 * alam2))
                                / (alam - alam2);
                        if (a == 0.0)
                            tmplam = -slope / (2.0 * b);
                        else {
                            final double disc = b * b - 3.0 * a * slope;
                            if (disc < 0.0)
                                tmplam = 0.5 * alam;
                            else if (b <= 0.0)
                                tmplam = (-b + Math.sqrt(disc)) / (3.0 * a);
                            else
                                tmplam = -slope / (b + Math.sqrt(disc));
                        }
                        // Ensure the lambda is <= 0.5 lamda1, i.e. we take a step smaller than last time
                        if (tmplam > 0.5 * alam)
                            tmplam = 0.5 * alam;
                    }

                    alam2 = alam;
                    f2 = f;
                    // Ensure the lambda is >= 0.1 lamda1, i.e. we take reasonable step
                    alam = FastMath.max(tmplam, 0.1 * alam);
                }
            }
        }
    }

    /**
     * Checks if there are lower or upper bounds that are not -Infinity or +Infinity
     * 
     * @throws MathUnsupportedOperationException
     *             if invalid bounds were passed to the {@link #optimize(OptimizationData[]) optimize} method.
     */
    private void checkParameters() {
        lower = getLowerBound();
        upper = getUpperBound();
        isLower = checkArray(lower, Double.NEGATIVE_INFINITY);
        isUpper = checkArray(upper, Double.POSITIVE_INFINITY);
        // Check that the upper bound is above the lower bound
        if (isUpper && isLower) {
            for (int i = 0; i < lower.length; i++)
                if (lower[i] > upper[i])
                    throw new MathUnsupportedOperationException(
                            createError("Lower bound must be below upper bound"));
        }

        // Numerical Recipes set the position convergence very low
        if (positionChecker == null)
            positionChecker = new PositionChecker(4 * epsilon, 0);

        // Ensure that the step length is strictly positive
        if (maximumStepLength == null) {
            for (int i = 0; i < maximumStepLength.length; i++) {
                if (maximumStepLength[i] <= 0)
                    throw new MathUnsupportedOperationException(
                            createError("Maximum step length must be strictly positive"));
            }
        }

        // Set a tolerance? If not then the routine will iterate until position convergence
        //if (gradientTolerance == 0)
        //   gradientTolerance = 1e-6;
    }

    private Localizable createError(final String message) {
        return new Localizable() {
            private static final long serialVersionUID = 1L;

            public String getSourceString() {
                return message;
            }

            public String getLocalizedString(Locale locale) {
                return message;
            }
        };
    }

    /**
     * Check if the array contains anything other than value
     * 
     * @param array
     * @param value
     * @return True if the array has another value
     */
    private boolean checkArray(double[] array, double value) {
        if (array == null)
            return false;
        for (double v : array)
            if (v != value)
                return true;
        return false;
    }

    /**
     * Check the point falls within the configured bounds truncating if necessary
     * 
     * @param point
     * @return true if the point was truncated
     */
    private boolean applyBounds(double[] point) {
        boolean truncated = false;
        if (isUpper) {
            for (int i = 0; i < point.length; i++)
                if (point[i] > upper[i]) {
                    point[i] = upper[i];
                    truncated = true;
                }
        }
        if (isLower) {
            for (int i = 0; i < point.length; i++)
                if (point[i] < lower[i]) {
                    point[i] = lower[i];
                    truncated = true;
                }
        }
        return truncated;
    }

    /**
     * Check if the point falls on or outside configured bounds truncating the gradient to zero
     * if it is moving further outside the bounds
     * 
     * @param r
     * @param point
     */
    private void checkGradients(double[] r, double[] point) {
        checkGradients(r, point, sign);
    }

    /**
     * Check if the point falls on or outside configured bounds truncating the gradient to zero
     * if it is moving further outside the bounds (defined by the sign of the search direction)
     * 
     * @param r
     * @param point
     * @param sign
     */
    private void checkGradients(double[] r, double[] point, final double sign) {
        if (isUpper) {
            for (int i = 0; i < point.length; i++)
                if (point[i] >= upper[i] && Math.signum(r[i]) == sign)
                    r[i] = 0;
        }
        if (isLower) {
            for (int i = 0; i < point.length; i++)
                if (point[i] <= lower[i] && Math.signum(r[i]) == -sign)
                    r[i] = 0;
        }
        //boolean isNaN = false;
        //for (int i = 0; i < point.length; i++)
        //   if (Double.isNaN(r[i]))
        //   {
        //      isNaN = true;
        //      r[i] = 0;
        //   }
    }
}