beast.evolution.sitemodel.SiteModel.java Source code

Java tutorial

Introduction

Here is the source code for beast.evolution.sitemodel.SiteModel.java

Source

/*
* File SiteModel.java
*
* Copyright (C) 2010 Remco Bouckaert remco@cs.auckland.ac.nz
*
* This file is part of BEAST2.
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership and licensing.
*
* BEAST is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
*  BEAST is distributed in the hope that it will be useful,
*  but WITHOUT ANY WARRANTY; without even the implied warranty of
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*  GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with BEAST; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA  02110-1301  USA
*/
package beast.evolution.sitemodel;

import org.apache.commons.math.distribution.GammaDistribution;
import org.apache.commons.math.distribution.GammaDistributionImpl;

import beast.core.Description;
import beast.core.Input;
import beast.core.parameter.RealParameter;
import beast.core.util.Log;
import beast.evolution.substitutionmodel.SubstitutionModel;
import beast.evolution.tree.Node;

/**
 * Site model with
 * o gamma site model,
 * o proportion of sites that are invariant
 * *
 */
@Description("Defines mutation rate " + "and gamma distributed rates across sites (optional) "
        + "and proportion of the sites invariant (also optional).")
public class SiteModel extends SiteModelInterface.Base {

    final public Input<RealParameter> muParameterInput = new Input<>("mutationRate",
            "mutation rate (defaults to 1.0)");
    final public Input<Integer> gammaCategoryCount = new Input<>("gammaCategoryCount",
            "gamma category count (default=zero for no gamma)", 0);
    final public Input<RealParameter> shapeParameterInput = new Input<>("shape",
            "shape parameter of gamma distribution. Ignored if gammaCategoryCount 1 or less");
    final public Input<RealParameter> invarParameterInput = new Input<>("proportionInvariant",
            "proportion of sites that is invariant: should be between 0 (default) and 1");
    //public Input<Boolean> useBeast1StyleGammaInput = new Input<>("useBeast1Gamma", "use BEAST1 style gamma categories -- for backward compatibility testing", false);

    RealParameter muParameter;
    RealParameter shapeParameter;
    RealParameter invarParameter;
    boolean useBeast1StyleGamma;

    @Override
    public void initAndValidate() {
        useBeast1StyleGamma = true; //useBeast1StyleGammaInput.get();
        muParameter = muParameterInput.get();
        if (muParameter == null) {
            muParameter = new RealParameter("1.0");
        }
        shapeParameter = shapeParameterInput.get();
        invarParameter = invarParameterInput.get();
        if (invarParameter == null) {
            invarParameter = new RealParameter("0.0");
            invarParameter.setBounds(Math.max(0.0, invarParameter.getLower()),
                    Math.min(1.0, invarParameter.getUpper()));
        }

        //if (muParameter != null) {
        muParameter.setBounds(Math.max(muParameter.getLower(), 0.0),
                Math.min(muParameter.getUpper(), Double.POSITIVE_INFINITY));
        //}
        if (shapeParameter != null) {
            // The quantile calculator fails when the shape parameter goes much below
            // 1E-3 so we have put a hard lower bound on it. If this is not there then
            // the category rates can go to 0 and cause a -Inf likelihood (whilst this
            // is not a problem as the state will be rejected, it could mask other issues
            // and this seems the better approach.
            shapeParameter.setBounds(Math.max(shapeParameter.getLower(), 1.0E-3),
                    Math.min(shapeParameter.getUpper(), 1.0E3));
        }

        if (/*invarParameter != null && */(invarParameter.getValue() < 0 || invarParameter.getValue() > 1)) {
            throw new IllegalArgumentException("proportion invariant should be between 0 and 1");
        }
        refresh();

        addCondition(muParameterInput);
        addCondition(invarParameterInput);
        addCondition(shapeParameterInput);
    }

    @Override
    protected void refresh() {
        if (shapeParameter != null) {
            categoryCount = gammaCategoryCount.get();
            if (categoryCount < 1) {
                if (categoryCount < 0) {
                    Log.warning.println("SiteModel: Invalid category count (" + categoryCount
                            + ") Setting category count to 1");
                }
                categoryCount = 1;
            }
        } else {
            categoryCount = 1;
        }

        if (/*invarParameter != null && */invarParameter.getValue() > 0) {
            if (invarParameter.getValue() >= 1.0) {
                throw new RuntimeException("Wrong value for parameter " + invarParameter.getID()
                        + ". Proportion invariant should be in bewteen 0 and 1 (exclusive)");
            }
            if (hasPropInvariantCategory) {
                categoryCount += 1;
            }
        }

        categoryRates = new double[categoryCount];
        categoryProportions = new double[categoryCount];
        calculateCategoryRates(null);
        //ratesKnown = false;
    }

    // *****************************************************************
    // Interface SiteModel
    // *****************************************************************

    @Override
    public boolean integrateAcrossCategories() {
        return true;
    }

    @Override
    public int getCategoryCount() {
        return categoryCount;
    }

    @Override
    public int getCategoryOfSite(final int site, final Node node) {
        throw new IllegalArgumentException("Integrating across categories");
    }

    @Override
    public double getRateForCategory(final int category, final Node node) {
        synchronized (this) {
            if (!ratesKnown) {
                calculateCategoryRates(node);
            }
        }

        //final double mu = (muParameter != null) ? muParameter.getValue() : 1.0;

        return categoryRates[category] * muParameter.getValue();
    }

    /**
     * return category rates
     *
     * @param node rates to which the rates apply. Typically, the rates will be uniform
     *             throughout the tree and the node argument is ignored.
     */
    @Override
    public double[] getCategoryRates(final Node node) {
        synchronized (this) {
            if (!ratesKnown) {
                calculateCategoryRates(node);
            }
        }

        final double mu = muParameter.getValue();//(muParameter != null) ? muParameter.getValue() : 1.0;

        final double[] rates = new double[categoryRates.length];
        for (int i = 0; i < rates.length; i++) {
            rates[i] = categoryRates[i] * mu;
        }

        return rates;
    }

    /**
     * @return substitution model *
     */
    @Override
    public SubstitutionModel getSubstitutionModel() {
        return substModelInput.get();
    }

    /**
     * Get the expected proportion of sites in this category.
     *
     * @param category the category number
     * @param node     node to which the proportions apply. Typically, proportions
     *                 will be uniform throughout the tree and this argument is ignored.
     * @return the proportion.
     */
    @Override
    public double getProportionForCategory(final int category, final Node node) {
        synchronized (this) {
            if (!ratesKnown) {
                calculateCategoryRates(node);
            }
        }

        return categoryProportions[category];
    }

    /**
     * Get an array of the expected proportion of sites in this category.
     *
     * @return an array of the proportion.
     */
    @Override
    public double[] getCategoryProportions(final Node node) {
        synchronized (this) {
            if (!ratesKnown) {
                calculateCategoryRates(node);
            }
        }

        return categoryProportions;
    }

    /**
     * discretisation of gamma distribution with equal proportions in each
     * category
     * @param node
     */
    protected void calculateCategoryRates(final Node node) {
        double propVariable = 1.0;
        int cat = 0;

        if (/*invarParameter != null && */invarParameter.getValue() > 0) {
            if (hasPropInvariantCategory) {
                categoryRates[0] = 0.0;
                categoryProportions[0] = invarParameter.getValue();
            }
            propVariable = 1.0 - invarParameter.getValue();
            if (hasPropInvariantCategory) {
                cat = 1;
            }
        }

        if (shapeParameter != null) {

            final double a = shapeParameter.getValue();
            double mean = 0.0;
            final int gammaCatCount = categoryCount - cat;

            final GammaDistribution g = new GammaDistributionImpl(a, 1.0 / a);
            for (int i = 0; i < gammaCatCount; i++) {
                try {
                    // RRB: alternative implementation that seems equally good in
                    // the first 5 significant digits, but uses a standard distribution object
                    if (useBeast1StyleGamma) {
                        categoryRates[i + cat] = GammaDistributionQuantile((2.0 * i + 1.0) / (2.0 * gammaCatCount),
                                a, 1.0 / a);
                    } else {
                        categoryRates[i + cat] = g
                                .inverseCumulativeProbability((2.0 * i + 1.0) / (2.0 * gammaCatCount));
                    }

                } catch (Exception e) {
                    e.printStackTrace();
                    Log.err.println("Something went wrong with the gamma distribution calculation");
                    System.exit(-1);
                }
                mean += categoryRates[i + cat];

                categoryProportions[i + cat] = propVariable / gammaCatCount;
            }

            mean = (propVariable * mean) / gammaCatCount;

            for (int i = 0; i < gammaCatCount; i++) {

                categoryRates[i + cat] /= mean;
            }
        } else {
            categoryRates[cat] = 1.0 / propVariable;
            categoryProportions[cat] = propVariable;
        }

        ratesKnown = true;
    }

    /**
     * CalculationNode methods *
     */
    @Override
    public void store() {
        super.store();
    } // no additional state needs storing

    @Override
    public void restore() {
        super.restore();
        ratesKnown = false;
    }

    @Override
    protected boolean requiresRecalculation() {
        // do explicit check whether any of the non-substitution model parameters changed
        if (categoryCount > 1) {
            if (shapeParameter != null && shapeParameter.somethingIsDirty() || muParameter.somethingIsDirty()
                    || invarParameter.somethingIsDirty()) {
                ratesKnown = false;
            }
        } else {
            if (muParameter.somethingIsDirty() || !hasPropInvariantCategory && invarParameter.somethingIsDirty()) {
                ratesKnown = false;
            }
        }
        //       ratesKnown = false;
        // we only get here if something is dirty in its inputs, so always return true
        return true;
    }

    protected boolean ratesKnown;

    protected int categoryCount;

    protected double[] categoryRates;

    protected double[] categoryProportions;

    /**
     * quantile (inverse cumulative density function) of the Gamma distribution
     *
     * @param y     argument
     * @param shape shape parameter
     * @param scale scale parameter
     * @return icdf value
     */
    protected double GammaDistributionQuantile(double y, double shape, double scale) {
        return 0.5 * scale * pointChi2(y, 2.0 * shape);
    }

    double pointChi2(double prob, double v) {
        // Returns z so that Prob{x<z}=prob where x is Chi2 distributed with df
        // = v
        // RATNEST FORTRAN by
        // Best DJ & Roberts DE (1975) The percentage points of the
        // Chi2 distribution. Applied Statistics 24: 385-388. (AS91)

        double e = 0.5e-6, aa = 0.6931471805, g;
        double xx, c, ch, a, q, p1, p2, t, x, b, s1, s2, s3, s4, s5, s6;

        if (prob < 0.000002 || prob > 0.999998 || v <= 0) {
            throw new IllegalArgumentException("Error SiteModel 102: Arguments out of range");
        }
        g = GammaFunctionlnGamma(v / 2);
        xx = v / 2;

        c = xx - 1;
        if (v < -1.24 * Math.log(prob)) {
            ch = Math.pow((prob * xx * Math.exp(g + xx * aa)), 1 / xx);
            if (ch - e < 0) {
                return ch;
            }
        } else {
            if (v > 0.32) {
                x = NormalDistributionQuantile(prob, 0, 1);
                p1 = 0.222222 / v;
                ch = v * Math.pow((x * Math.sqrt(p1) + 1 - p1), 3.0);
                if (ch > 2.2 * v + 6) {
                    ch = -2 * (Math.log(1 - prob) - c * Math.log(.5 * ch) + g);
                }
            } else {
                ch = 0.4;
                a = Math.log(1 - prob);

                do {
                    q = ch;
                    p1 = 1 + ch * (4.67 + ch);
                    p2 = ch * (6.73 + ch * (6.66 + ch));
                    t = -0.5 + (4.67 + 2 * ch) / p1 - (6.73 + ch * (13.32 + 3 * ch)) / p2;
                    ch -= (1 - Math.exp(a + g + .5 * ch + c * aa) * p2 / p1) / t;
                } while (Math.abs(q / ch - 1) - .01 > 0);
            }
        }
        do {
            q = ch;
            p1 = 0.5 * ch;
            if ((t = GammaFunctionincompleteGammaP(xx, p1, g)) < 0) {
                throw new IllegalArgumentException("Error SiteModel 101: Arguments out of range: t < 0");
            }
            p2 = prob - t;
            t = p2 * Math.exp(xx * aa + g + p1 - c * Math.log(ch));
            b = t / ch;
            a = 0.5 * t - b * c;

            s1 = (210 + a * (140 + a * (105 + a * (84 + a * (70 + 60 * a))))) / 420;
            s2 = (420 + a * (735 + a * (966 + a * (1141 + 1278 * a)))) / 2520;
            s3 = (210 + a * (462 + a * (707 + 932 * a))) / 2520;
            s4 = (252 + a * (672 + 1182 * a) + c * (294 + a * (889 + 1740 * a))) / 5040;
            s5 = (84 + 264 * a + c * (175 + 606 * a)) / 2520;
            s6 = (120 + c * (346 + 127 * c)) / 5040;
            ch += t * (1 + 0.5 * t * s1 - b * c * (s1 - b * (s2 - b * (s3 - b * (s4 - b * (s5 - b * s6))))));
        } while (Math.abs(q / ch - 1) > e);

        return (ch);
    }

    /**
     * log Gamma function: ln(gamma(alpha)) for alpha>0, accurate to 10 decimal places
     *
     * @param alpha argument
     * @return the log of the gamma function of the given alpha
     */
    double GammaFunctionlnGamma(final double alpha) {
        // Pike MC & Hill ID (1966) Algorithm 291: Logarithm of the gamma function.
        // Communications of the Association for Computing Machinery, 9:684

        double x = alpha, f = 0.0, z;

        if (x < 7) {
            f = 1;
            z = x - 1;
            while (++z < 7) {
                f *= z;
            }
            x = z;
            f = -Math.log(f);
        }
        z = 1 / (x * x);

        return f + (x - 0.5) * Math.log(x) - x + 0.918938533204673
                + (((-0.000595238095238 * z + 0.000793650793651) * z - 0.002777777777778) * z + 0.083333333333333)
                        / x;
    }

    /**
     * Incomplete Gamma function P(a,x) = 1-Q(a,x)
     * (a cleanroom implementation of Numerical Recipes gammp(a,x);
     * in Mathematica this function is 1-GammaRegularized)
     *
     * @param a        parameter
     * @param x        argument
     * @param lnGammaA precomputed lnGamma(a)
     * @return function value
     */
    double GammaFunctionincompleteGammaP(double a, double x, double lnGammaA) {
        return incompleteGamma(x, a, lnGammaA);
    }

    /**
     * Returns the incomplete gamma ratio I(x,alpha) where x is the upper
     * limit of the integration and alpha is the shape parameter.
     *
     * @param x              upper limit of integration
     * @param alpha          shape parameter
     * @param ln_gamma_alpha the log gamma function for alpha
     * @return the incomplete gamma ratio
     */
    double incompleteGamma(double x, double alpha, double ln_gamma_alpha) {
        // (1) series expansion     if (alpha>x || x<=1)
        // (2) continued fraction   otherwise
        // RATNEST FORTRAN by
        // Bhattacharjee GP (1970) The incomplete gamma integral.  Applied Statistics,
        // 19: 285-287 (AS32)

        double accurate = 1e-8, overflow = 1e30;
        double factor, gin, rn, a, b, an, dif, term;
        double pn0, pn1, pn2, pn3, pn4, pn5;

        if (x == 0.0) {
            return 0.0;
        }
        if (x < 0.0 || alpha <= 0.0) {
            throw new IllegalArgumentException("Error SiteModel 103: Arguments out of bounds");
        }

        factor = Math.exp(alpha * Math.log(x) - x - ln_gamma_alpha);

        if (x > 1 && x >= alpha) {
            // continued fraction
            a = 1 - alpha;
            b = a + x + 1;
            term = 0;
            pn0 = 1;
            pn1 = x;
            pn2 = x + 1;
            pn3 = x * b;
            gin = pn2 / pn3;

            do {
                a++;
                b += 2;
                term++;
                an = a * term;
                pn4 = b * pn2 - an * pn0;
                pn5 = b * pn3 - an * pn1;

                if (pn5 != 0) {
                    rn = pn4 / pn5;
                    dif = Math.abs(gin - rn);
                    if (dif <= accurate) {
                        if (dif <= accurate * rn) {
                            break;
                        }
                    }

                    gin = rn;
                }
                pn0 = pn2;
                pn1 = pn3;
                pn2 = pn4;
                pn3 = pn5;
                if (Math.abs(pn4) >= overflow) {
                    pn0 /= overflow;
                    pn1 /= overflow;
                    pn2 /= overflow;
                    pn3 /= overflow;
                }
            } while (true);
            gin = 1 - factor * gin;
        } else {
            // series expansion
            gin = 1;
            term = 1;
            rn = alpha;
            do {
                rn++;
                term *= x / rn;
                gin += term;
            } while (term > accurate);
            gin *= factor / alpha;
        }
        return gin;
    }

    double NormalDistributionQuantile(double z, double m, double sd) {
        return m + Math.sqrt(2.0) * sd * ErrorFunctionInverseErf(2.0 * z - 1.0);
    }

    /**
     * inverse error function
     *
     * @param z argument
     * @return function value
     */
    double ErrorFunctionInverseErf(double z) {
        return ErrorFunctionPointNormal(0.5 * z + 0.5) / Math.sqrt(2.0);
    }

    // Private

    // Returns z so that Prob{x<z}=prob where x ~ N(0,1) and (1e-12) < prob<1-(1e-12)

    double ErrorFunctionPointNormal(double prob) {
        // Odeh RE & Evans JO (1974) The percentage points of the normal distribution.
        // Applied Statistics 22: 96-97 (AS70)

        // Newer methods:
        // Wichura MJ (1988) Algorithm AS 241: the percentage points of the
        // normal distribution.  37: 477-484.
        // Beasley JD & Springer SG  (1977).  Algorithm AS 111: the percentage
        // points of the normal distribution.  26: 118-121.

        double a0 = -0.322232431088, a1 = -1, a2 = -0.342242088547, a3 = -0.0204231210245;
        double a4 = -0.453642210148e-4, b0 = 0.0993484626060, b1 = 0.588581570495;
        double b2 = 0.531103462366, b3 = 0.103537752850, b4 = 0.0038560700634;
        double y, z, p1;

        p1 = (prob < 0.5 ? prob : 1 - prob);
        if (p1 < 1e-20) {
            throw new IllegalArgumentException("Error SiteModel 104: Argument prob out of range");
        }

        y = Math.sqrt(Math.log(1 / (p1 * p1)));
        z = y + ((((y * a4 + a3) * y + a2) * y + a1) * y + a0) / ((((y * b4 + b3) * y + b2) * y + b1) * y + b0);
        return (prob < 0.5 ? -z : z);
    }

    @Override
    public double getProportionInvariant() {
        //if (invarParameter == null) {
        //   return 0;
        //}
        return invarParameter.getValue();
    }

} // class SiteModel