Java tutorial
/* * File SiteModel.java * * Copyright (C) 2010 Remco Bouckaert remco@cs.auckland.ac.nz * * This file is part of BEAST2. * See the NOTICE file distributed with this work for additional * information regarding copyright ownership and licensing. * * BEAST is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * BEAST is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with BEAST; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301 USA */ package beast.evolution.sitemodel; import org.apache.commons.math.distribution.GammaDistribution; import org.apache.commons.math.distribution.GammaDistributionImpl; import beast.core.Description; import beast.core.Input; import beast.core.parameter.RealParameter; import beast.core.util.Log; import beast.evolution.substitutionmodel.SubstitutionModel; import beast.evolution.tree.Node; /** * Site model with * o gamma site model, * o proportion of sites that are invariant * * */ @Description("Defines mutation rate " + "and gamma distributed rates across sites (optional) " + "and proportion of the sites invariant (also optional).") public class SiteModel extends SiteModelInterface.Base { final public Input<RealParameter> muParameterInput = new Input<>("mutationRate", "mutation rate (defaults to 1.0)"); final public Input<Integer> gammaCategoryCount = new Input<>("gammaCategoryCount", "gamma category count (default=zero for no gamma)", 0); final public Input<RealParameter> shapeParameterInput = new Input<>("shape", "shape parameter of gamma distribution. Ignored if gammaCategoryCount 1 or less"); final public Input<RealParameter> invarParameterInput = new Input<>("proportionInvariant", "proportion of sites that is invariant: should be between 0 (default) and 1"); //public Input<Boolean> useBeast1StyleGammaInput = new Input<>("useBeast1Gamma", "use BEAST1 style gamma categories -- for backward compatibility testing", false); RealParameter muParameter; RealParameter shapeParameter; RealParameter invarParameter; boolean useBeast1StyleGamma; @Override public void initAndValidate() { useBeast1StyleGamma = true; //useBeast1StyleGammaInput.get(); muParameter = muParameterInput.get(); if (muParameter == null) { muParameter = new RealParameter("1.0"); } shapeParameter = shapeParameterInput.get(); invarParameter = invarParameterInput.get(); if (invarParameter == null) { invarParameter = new RealParameter("0.0"); invarParameter.setBounds(Math.max(0.0, invarParameter.getLower()), Math.min(1.0, invarParameter.getUpper())); } //if (muParameter != null) { muParameter.setBounds(Math.max(muParameter.getLower(), 0.0), Math.min(muParameter.getUpper(), Double.POSITIVE_INFINITY)); //} if (shapeParameter != null) { // The quantile calculator fails when the shape parameter goes much below // 1E-3 so we have put a hard lower bound on it. If this is not there then // the category rates can go to 0 and cause a -Inf likelihood (whilst this // is not a problem as the state will be rejected, it could mask other issues // and this seems the better approach. shapeParameter.setBounds(Math.max(shapeParameter.getLower(), 1.0E-3), Math.min(shapeParameter.getUpper(), 1.0E3)); } if (/*invarParameter != null && */(invarParameter.getValue() < 0 || invarParameter.getValue() > 1)) { throw new IllegalArgumentException("proportion invariant should be between 0 and 1"); } refresh(); addCondition(muParameterInput); addCondition(invarParameterInput); addCondition(shapeParameterInput); } @Override protected void refresh() { if (shapeParameter != null) { categoryCount = gammaCategoryCount.get(); if (categoryCount < 1) { if (categoryCount < 0) { Log.warning.println("SiteModel: Invalid category count (" + categoryCount + ") Setting category count to 1"); } categoryCount = 1; } } else { categoryCount = 1; } if (/*invarParameter != null && */invarParameter.getValue() > 0) { if (invarParameter.getValue() >= 1.0) { throw new RuntimeException("Wrong value for parameter " + invarParameter.getID() + ". Proportion invariant should be in bewteen 0 and 1 (exclusive)"); } if (hasPropInvariantCategory) { categoryCount += 1; } } categoryRates = new double[categoryCount]; categoryProportions = new double[categoryCount]; calculateCategoryRates(null); //ratesKnown = false; } // ***************************************************************** // Interface SiteModel // ***************************************************************** @Override public boolean integrateAcrossCategories() { return true; } @Override public int getCategoryCount() { return categoryCount; } @Override public int getCategoryOfSite(final int site, final Node node) { throw new IllegalArgumentException("Integrating across categories"); } @Override public double getRateForCategory(final int category, final Node node) { synchronized (this) { if (!ratesKnown) { calculateCategoryRates(node); } } //final double mu = (muParameter != null) ? muParameter.getValue() : 1.0; return categoryRates[category] * muParameter.getValue(); } /** * return category rates * * @param node rates to which the rates apply. Typically, the rates will be uniform * throughout the tree and the node argument is ignored. */ @Override public double[] getCategoryRates(final Node node) { synchronized (this) { if (!ratesKnown) { calculateCategoryRates(node); } } final double mu = muParameter.getValue();//(muParameter != null) ? muParameter.getValue() : 1.0; final double[] rates = new double[categoryRates.length]; for (int i = 0; i < rates.length; i++) { rates[i] = categoryRates[i] * mu; } return rates; } /** * @return substitution model * */ @Override public SubstitutionModel getSubstitutionModel() { return substModelInput.get(); } /** * Get the expected proportion of sites in this category. * * @param category the category number * @param node node to which the proportions apply. Typically, proportions * will be uniform throughout the tree and this argument is ignored. * @return the proportion. */ @Override public double getProportionForCategory(final int category, final Node node) { synchronized (this) { if (!ratesKnown) { calculateCategoryRates(node); } } return categoryProportions[category]; } /** * Get an array of the expected proportion of sites in this category. * * @return an array of the proportion. */ @Override public double[] getCategoryProportions(final Node node) { synchronized (this) { if (!ratesKnown) { calculateCategoryRates(node); } } return categoryProportions; } /** * discretisation of gamma distribution with equal proportions in each * category * @param node */ protected void calculateCategoryRates(final Node node) { double propVariable = 1.0; int cat = 0; if (/*invarParameter != null && */invarParameter.getValue() > 0) { if (hasPropInvariantCategory) { categoryRates[0] = 0.0; categoryProportions[0] = invarParameter.getValue(); } propVariable = 1.0 - invarParameter.getValue(); if (hasPropInvariantCategory) { cat = 1; } } if (shapeParameter != null) { final double a = shapeParameter.getValue(); double mean = 0.0; final int gammaCatCount = categoryCount - cat; final GammaDistribution g = new GammaDistributionImpl(a, 1.0 / a); for (int i = 0; i < gammaCatCount; i++) { try { // RRB: alternative implementation that seems equally good in // the first 5 significant digits, but uses a standard distribution object if (useBeast1StyleGamma) { categoryRates[i + cat] = GammaDistributionQuantile((2.0 * i + 1.0) / (2.0 * gammaCatCount), a, 1.0 / a); } else { categoryRates[i + cat] = g .inverseCumulativeProbability((2.0 * i + 1.0) / (2.0 * gammaCatCount)); } } catch (Exception e) { e.printStackTrace(); Log.err.println("Something went wrong with the gamma distribution calculation"); System.exit(-1); } mean += categoryRates[i + cat]; categoryProportions[i + cat] = propVariable / gammaCatCount; } mean = (propVariable * mean) / gammaCatCount; for (int i = 0; i < gammaCatCount; i++) { categoryRates[i + cat] /= mean; } } else { categoryRates[cat] = 1.0 / propVariable; categoryProportions[cat] = propVariable; } ratesKnown = true; } /** * CalculationNode methods * */ @Override public void store() { super.store(); } // no additional state needs storing @Override public void restore() { super.restore(); ratesKnown = false; } @Override protected boolean requiresRecalculation() { // do explicit check whether any of the non-substitution model parameters changed if (categoryCount > 1) { if (shapeParameter != null && shapeParameter.somethingIsDirty() || muParameter.somethingIsDirty() || invarParameter.somethingIsDirty()) { ratesKnown = false; } } else { if (muParameter.somethingIsDirty() || !hasPropInvariantCategory && invarParameter.somethingIsDirty()) { ratesKnown = false; } } // ratesKnown = false; // we only get here if something is dirty in its inputs, so always return true return true; } protected boolean ratesKnown; protected int categoryCount; protected double[] categoryRates; protected double[] categoryProportions; /** * quantile (inverse cumulative density function) of the Gamma distribution * * @param y argument * @param shape shape parameter * @param scale scale parameter * @return icdf value */ protected double GammaDistributionQuantile(double y, double shape, double scale) { return 0.5 * scale * pointChi2(y, 2.0 * shape); } double pointChi2(double prob, double v) { // Returns z so that Prob{x<z}=prob where x is Chi2 distributed with df // = v // RATNEST FORTRAN by // Best DJ & Roberts DE (1975) The percentage points of the // Chi2 distribution. Applied Statistics 24: 385-388. (AS91) double e = 0.5e-6, aa = 0.6931471805, g; double xx, c, ch, a, q, p1, p2, t, x, b, s1, s2, s3, s4, s5, s6; if (prob < 0.000002 || prob > 0.999998 || v <= 0) { throw new IllegalArgumentException("Error SiteModel 102: Arguments out of range"); } g = GammaFunctionlnGamma(v / 2); xx = v / 2; c = xx - 1; if (v < -1.24 * Math.log(prob)) { ch = Math.pow((prob * xx * Math.exp(g + xx * aa)), 1 / xx); if (ch - e < 0) { return ch; } } else { if (v > 0.32) { x = NormalDistributionQuantile(prob, 0, 1); p1 = 0.222222 / v; ch = v * Math.pow((x * Math.sqrt(p1) + 1 - p1), 3.0); if (ch > 2.2 * v + 6) { ch = -2 * (Math.log(1 - prob) - c * Math.log(.5 * ch) + g); } } else { ch = 0.4; a = Math.log(1 - prob); do { q = ch; p1 = 1 + ch * (4.67 + ch); p2 = ch * (6.73 + ch * (6.66 + ch)); t = -0.5 + (4.67 + 2 * ch) / p1 - (6.73 + ch * (13.32 + 3 * ch)) / p2; ch -= (1 - Math.exp(a + g + .5 * ch + c * aa) * p2 / p1) / t; } while (Math.abs(q / ch - 1) - .01 > 0); } } do { q = ch; p1 = 0.5 * ch; if ((t = GammaFunctionincompleteGammaP(xx, p1, g)) < 0) { throw new IllegalArgumentException("Error SiteModel 101: Arguments out of range: t < 0"); } p2 = prob - t; t = p2 * Math.exp(xx * aa + g + p1 - c * Math.log(ch)); b = t / ch; a = 0.5 * t - b * c; s1 = (210 + a * (140 + a * (105 + a * (84 + a * (70 + 60 * a))))) / 420; s2 = (420 + a * (735 + a * (966 + a * (1141 + 1278 * a)))) / 2520; s3 = (210 + a * (462 + a * (707 + 932 * a))) / 2520; s4 = (252 + a * (672 + 1182 * a) + c * (294 + a * (889 + 1740 * a))) / 5040; s5 = (84 + 264 * a + c * (175 + 606 * a)) / 2520; s6 = (120 + c * (346 + 127 * c)) / 5040; ch += t * (1 + 0.5 * t * s1 - b * c * (s1 - b * (s2 - b * (s3 - b * (s4 - b * (s5 - b * s6)))))); } while (Math.abs(q / ch - 1) > e); return (ch); } /** * log Gamma function: ln(gamma(alpha)) for alpha>0, accurate to 10 decimal places * * @param alpha argument * @return the log of the gamma function of the given alpha */ double GammaFunctionlnGamma(final double alpha) { // Pike MC & Hill ID (1966) Algorithm 291: Logarithm of the gamma function. // Communications of the Association for Computing Machinery, 9:684 double x = alpha, f = 0.0, z; if (x < 7) { f = 1; z = x - 1; while (++z < 7) { f *= z; } x = z; f = -Math.log(f); } z = 1 / (x * x); return f + (x - 0.5) * Math.log(x) - x + 0.918938533204673 + (((-0.000595238095238 * z + 0.000793650793651) * z - 0.002777777777778) * z + 0.083333333333333) / x; } /** * Incomplete Gamma function P(a,x) = 1-Q(a,x) * (a cleanroom implementation of Numerical Recipes gammp(a,x); * in Mathematica this function is 1-GammaRegularized) * * @param a parameter * @param x argument * @param lnGammaA precomputed lnGamma(a) * @return function value */ double GammaFunctionincompleteGammaP(double a, double x, double lnGammaA) { return incompleteGamma(x, a, lnGammaA); } /** * Returns the incomplete gamma ratio I(x,alpha) where x is the upper * limit of the integration and alpha is the shape parameter. * * @param x upper limit of integration * @param alpha shape parameter * @param ln_gamma_alpha the log gamma function for alpha * @return the incomplete gamma ratio */ double incompleteGamma(double x, double alpha, double ln_gamma_alpha) { // (1) series expansion if (alpha>x || x<=1) // (2) continued fraction otherwise // RATNEST FORTRAN by // Bhattacharjee GP (1970) The incomplete gamma integral. Applied Statistics, // 19: 285-287 (AS32) double accurate = 1e-8, overflow = 1e30; double factor, gin, rn, a, b, an, dif, term; double pn0, pn1, pn2, pn3, pn4, pn5; if (x == 0.0) { return 0.0; } if (x < 0.0 || alpha <= 0.0) { throw new IllegalArgumentException("Error SiteModel 103: Arguments out of bounds"); } factor = Math.exp(alpha * Math.log(x) - x - ln_gamma_alpha); if (x > 1 && x >= alpha) { // continued fraction a = 1 - alpha; b = a + x + 1; term = 0; pn0 = 1; pn1 = x; pn2 = x + 1; pn3 = x * b; gin = pn2 / pn3; do { a++; b += 2; term++; an = a * term; pn4 = b * pn2 - an * pn0; pn5 = b * pn3 - an * pn1; if (pn5 != 0) { rn = pn4 / pn5; dif = Math.abs(gin - rn); if (dif <= accurate) { if (dif <= accurate * rn) { break; } } gin = rn; } pn0 = pn2; pn1 = pn3; pn2 = pn4; pn3 = pn5; if (Math.abs(pn4) >= overflow) { pn0 /= overflow; pn1 /= overflow; pn2 /= overflow; pn3 /= overflow; } } while (true); gin = 1 - factor * gin; } else { // series expansion gin = 1; term = 1; rn = alpha; do { rn++; term *= x / rn; gin += term; } while (term > accurate); gin *= factor / alpha; } return gin; } double NormalDistributionQuantile(double z, double m, double sd) { return m + Math.sqrt(2.0) * sd * ErrorFunctionInverseErf(2.0 * z - 1.0); } /** * inverse error function * * @param z argument * @return function value */ double ErrorFunctionInverseErf(double z) { return ErrorFunctionPointNormal(0.5 * z + 0.5) / Math.sqrt(2.0); } // Private // Returns z so that Prob{x<z}=prob where x ~ N(0,1) and (1e-12) < prob<1-(1e-12) double ErrorFunctionPointNormal(double prob) { // Odeh RE & Evans JO (1974) The percentage points of the normal distribution. // Applied Statistics 22: 96-97 (AS70) // Newer methods: // Wichura MJ (1988) Algorithm AS 241: the percentage points of the // normal distribution. 37: 477-484. // Beasley JD & Springer SG (1977). Algorithm AS 111: the percentage // points of the normal distribution. 26: 118-121. double a0 = -0.322232431088, a1 = -1, a2 = -0.342242088547, a3 = -0.0204231210245; double a4 = -0.453642210148e-4, b0 = 0.0993484626060, b1 = 0.588581570495; double b2 = 0.531103462366, b3 = 0.103537752850, b4 = 0.0038560700634; double y, z, p1; p1 = (prob < 0.5 ? prob : 1 - prob); if (p1 < 1e-20) { throw new IllegalArgumentException("Error SiteModel 104: Argument prob out of range"); } y = Math.sqrt(Math.log(1 / (p1 * p1))); z = y + ((((y * a4 + a3) * y + a2) * y + a1) * y + a0) / ((((y * b4 + b3) * y + b2) * y + b1) * y + b0); return (prob < 0.5 ? -z : z); } @Override public double getProportionInvariant() { //if (invarParameter == null) { // return 0; //} return invarParameter.getValue(); } } // class SiteModel