Java tutorial
/* * Copyright (C) 2010 Institute for Computational Biomedicine, * Weill Medical College of Cornell University * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package edu.cornell.med.icb.goby.stats; import edu.cornell.med.icb.goby.R.FisherExact; import gominer.Fisher; import it.unimi.dsi.fastutil.objects.ObjectArrayList; import it.unimi.dsi.lang.MutableString; import org.apache.commons.math.MathException; import org.apache.commons.math.stat.inference.ChiSquareTest; import org.apache.commons.math.stat.inference.ChiSquareTestImpl; import org.junit.Test; import java.io.IOException; import java.io.PrintWriter; import java.util.Random; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; /** * @author Fabien Campagne * Date: Jan 12, 2010 * Time: 1:14:57 PM */ public class TestStatistics { @Test public void testFoldChange() { final Random randomEngine = new Random(); final DifferentialExpressionCalculator deCalc = new DifferentialExpressionCalculator() { @Override public double getNormalizedExpressionValue(final String sample, final NormalizationMethod method, final MutableString elementId) { if (sample.startsWith("A")) { return 2 * Math.abs(randomEngine.nextDouble()); } else { return Math.abs(randomEngine.nextDouble()); } // fold change A/B = 2 } }; deCalc.defineElement("id-1"); deCalc.defineElement("id-2"); deCalc.defineGroup("A"); deCalc.defineGroup("B"); final int numReplicates = 20000; deCalc.reserve(2, numReplicates * 2); for (int i = 0; i < numReplicates; i++) { deCalc.associateSampleToGroup("A-" + i, "A"); deCalc.associateSampleToGroup("B-" + i, "B"); } final DifferentialExpressionInfo info = new DifferentialExpressionInfo("id-1"); final DifferentialExpressionResults results = new DifferentialExpressionResults(); final FoldChangeCalculator foldChange = new FoldChangeCalculator(results); final NormalizationMethod normalizationMethod = new AlignedCountNormalization(); foldChange.evaluate(deCalc, normalizationMethod, results, info, "A", "B"); assertEquals("fold-change does not match", 2d, results.getStatistic(info, foldChange.statisticIds.get(0)), .1); } @Test public void testAverage() throws IOException { final Random randomEngine = new Random(); final DifferentialExpressionCalculator deCalc = new DifferentialExpressionCalculator() { @Override public double getNormalizedExpressionValue(final String sample, final NormalizationMethod method, final MutableString elementId) { if (sample.startsWith("A")) { return 2 * Math.abs(randomEngine.nextDouble()); } else { return Math.abs(randomEngine.nextDouble()); } // fold change A/B = 2 } }; deCalc.defineElement("id-1"); deCalc.defineElement("id-2"); deCalc.defineGroup("A"); deCalc.defineGroup("B"); final int numReplicates = 20000; deCalc.reserve(2, numReplicates * 2); for (int i = 0; i < numReplicates; i++) { deCalc.associateSampleToGroup("A-" + i, "A"); deCalc.associateSampleToGroup("B-" + i, "B"); } final DifferentialExpressionInfo info = new DifferentialExpressionInfo("id-1"); final DifferentialExpressionResults results = new DifferentialExpressionResults(); final AverageCalculator averageCalculator = new AverageCalculator(results); results.add(info); final NormalizationMethod normalizationMethod = new AlignedCountNormalization(); averageCalculator.evaluate(deCalc, normalizationMethod, results, info, "A", "B"); assertEquals("average A must be around 2", 1d, results.getStatistic(info, averageCalculator.getStatisticId("A", "RPKM", normalizationMethod)), .1); assertEquals("average B must be around 1", 0.5d, results.getStatistic(info, averageCalculator.getStatisticId("B", "RPKM", normalizationMethod)), .1); System.out.println(results); results.write(new PrintWriter("test-results/out-stats.tsv"), '\t', deCalc); } @Test public void testTwoStats() { final Random randomEngine = new Random(37); final DifferentialExpressionCalculator deCalc = new DifferentialExpressionCalculator() { @Override public double getNormalizedExpressionValue(final String sample, final NormalizationMethod method, final MutableString elementId) { if (sample.startsWith("A")) { return 2 * Math.abs(randomEngine.nextGaussian()); } else { return Math.abs(randomEngine.nextGaussian()); } // fold change A/B = 2 } @Override public int getOverlapCount(final String sample, final MutableString elementId) { final NormalizationMethod normalizationMethod = new AlignedCountNormalization(); return (int) (getNormalizedExpressionValue(sample, normalizationMethod, elementId) * 100); } }; deCalc.defineElement("id-1"); deCalc.defineElement("id-2"); deCalc.defineGroup("A"); deCalc.defineGroup("B"); final int numReplicates = 2000; deCalc.reserve(2, numReplicates * 2); for (int i = 0; i < numReplicates; i++) { deCalc.associateSampleToGroup("A-" + i, "A"); deCalc.associateSampleToGroup("B-" + i, "B"); } // observe the counts to populate internal data structures: for (final String sampleId : deCalc.samples()) { final MutableString id1 = new MutableString("id-1"); final MutableString id2 = new MutableString("id-2"); deCalc.observe(sampleId, "id-1", deCalc.getOverlapCount(sampleId, id1)); deCalc.observe(sampleId, "id-2", deCalc.getOverlapCount(sampleId, id2)); } //deCalc.associateSampleToGroup("A-", "A"); //deCalc.associateSampleToGroup("B-1", "B"); final DifferentialExpressionInfo info = new DifferentialExpressionInfo("id-1"); final DifferentialExpressionResults results = new DifferentialExpressionResults(); final FoldChangeCalculator foldChange = new FoldChangeCalculator(results); final TTestCalculator tTest = new TTestCalculator(results); final FisherExactTestCalculator fisher = new FisherExactTestCalculator(results); final NormalizationMethod normalizationMethod = new AlignedCountNormalization(); foldChange.evaluate(deCalc, normalizationMethod, results, info, "A", "B"); tTest.evaluate(deCalc, normalizationMethod, results, info, "A", "B"); fisher.evaluate(deCalc, normalizationMethod, results, info, "A", "B"); assertEquals("fold-change does not match", 2d, results.getStatistic(info, foldChange.statisticIds.get(0)), .1); assertTrue("T-test must be significant", results.getStatistic(info, tTest.statisticIds.get(0)) < 0.01); assertTrue("fisher test must not be significant", results.getStatistic(info, fisher.statisticIds.get(0)) > 0.05); } @Test public void testParalell() { final Random randomEngine = new Random(); final DifferentialExpressionCalculator deCalc = new DifferentialExpressionCalculator() { @Override public double getNormalizedExpressionValue(final String sample, final NormalizationMethod method, final MutableString elementId) { if (sample.startsWith("A")) { return 2 * Math.abs(randomEngine.nextGaussian()); } else { return Math.abs(randomEngine.nextGaussian()); } // fold change A/B = 2 } @Override public int getOverlapCount(final String sample, final MutableString elementId) { final NormalizationMethod normalizationMethod = new AlignedCountNormalization(); return (int) (getNormalizedExpressionValue(sample, normalizationMethod, elementId) * 100); } }; deCalc.setRunInParallel(true); deCalc.defineElement("id-1"); deCalc.defineElement("id-2"); deCalc.defineGroup("A"); deCalc.defineGroup("B"); final int numReplicates = 2000; deCalc.reserve(2, numReplicates * 2); for (int i = 0; i < numReplicates; i++) { deCalc.associateSampleToGroup("A-" + i, "A"); deCalc.associateSampleToGroup("B-" + i, "B"); } // observe the counts to populate internal data structures: for (final String sampleId : deCalc.samples()) { final MutableString id1 = new MutableString("id-1"); final MutableString id2 = new MutableString("id-2"); deCalc.observe(sampleId, "id-1", deCalc.getOverlapCount(sampleId, id1)); deCalc.observe(sampleId, "id-2", deCalc.getOverlapCount(sampleId, id2)); } //deCalc.associateSampleToGroup("A-", "A"); //deCalc.associateSampleToGroup("B-1", "B"); DifferentialExpressionResults list = new DifferentialExpressionResults(); DifferentialExpressionInfo info1 = new DifferentialExpressionInfo("id-1"); DifferentialExpressionInfo info2 = new DifferentialExpressionInfo("id-2"); final FoldChangeCalculator foldChange = new FoldChangeCalculator(list); final AverageCalculator average = new AverageCalculator(list); final TTestCalculator tTest = new TTestCalculator(list); final FisherExactRCalculator fisher = new FisherExactRCalculator(list); final NormalizationMethod normalizationMethod = new AlignedCountNormalization(); list.add(info1); list.add(info2); list = foldChange.evaluate(deCalc, normalizationMethod, list, "A", "B"); list = average.evaluate(deCalc, normalizationMethod, list, "A", "B"); list = tTest.evaluate(deCalc, normalizationMethod, list, "A", "B"); list = fisher.evaluate(deCalc, normalizationMethod, list, "A", "B"); final MutableString foldChangeIndex = foldChange.statisticIds.get(0); final DifferentialExpressionInfo info = list.get(0); assertEquals("fold-change must match", 2d, list.getStatistic(info, foldChangeIndex), .2); assertTrue("T-test must be significant", list.getStatistic(info1, tTest.statisticIds.get(0)) < 0.01); } //Test Disabled fails too often in cruisecontrol public void testParalellLarge() { final Random randomEngine = new Random(); final DifferentialExpressionCalculator deCalc = new DifferentialExpressionCalculator() { @Override public double getNormalizedExpressionValue(final String sample, final NormalizationMethod method, final MutableString elementId) { if (sample.startsWith("A")) { return 2 * Math.abs(randomEngine.nextGaussian() + 3); } else { return Math.abs(randomEngine.nextGaussian() + 20); } // fold change A/B = 2 } @Override public int getOverlapCount(final String sample, final MutableString elementId) { final NormalizationMethod normalizationMethod = new AlignedCountNormalization(); return (int) (getNormalizedExpressionValue(sample, normalizationMethod, elementId) * 100); } }; deCalc.setRunInParallel(true); int elementsPerSample = 1000; for (int i = 0; i < elementsPerSample; i++) { deCalc.defineElement("id-" + i); } deCalc.defineGroup("A"); deCalc.defineGroup("B"); final int numReplicates = 2; deCalc.reserve(elementsPerSample, numReplicates * 2); for (int i = 0; i < numReplicates; i++) { deCalc.associateSampleToGroup("A-" + i, "A"); deCalc.associateSampleToGroup("B-" + i, "B"); } // observe the counts to populate internal data structures: for (int i = 0; i < numReplicates; i++) { for (final String sampleId : deCalc.samples()) { MutableString elementId = new MutableString("id-" + i); deCalc.observe(sampleId, "id-" + i, deCalc.getOverlapCount(sampleId, elementId)); } } DifferentialExpressionResults list = new DifferentialExpressionResults(); final FoldChangeCalculator foldChange = new FoldChangeCalculator(list); final AverageCalculator average = new AverageCalculator(list); final TTestCalculator tTest = new TTestCalculator(list); final FisherExactRCalculator fisher = new FisherExactRCalculator(list); final NormalizationMethod normalizationMethod = new AlignedCountNormalization(); list = foldChange.evaluate(deCalc, normalizationMethod, list, "A", "B"); list = average.evaluate(deCalc, normalizationMethod, list, "A", "B"); list = tTest.evaluate(deCalc, normalizationMethod, list, "A", "B"); list = fisher.evaluate(deCalc, normalizationMethod, list, "A", "B"); assertTrue(list != null); } @Test public void testFisher() throws MathException { final DifferentialExpressionCalculator deCalc = new DifferentialExpressionCalculator(); final int numReplicates = 2; deCalc.defineElement("id-1"); deCalc.defineElement("id-2"); deCalc.defineGroup("A"); deCalc.defineGroup("B"); deCalc.reserve(2, numReplicates * 2); for (int i = 1; i <= numReplicates; i++) { deCalc.associateSampleToGroup("A-" + i, "A"); deCalc.associateSampleToGroup("B-" + i, "B"); } /** * Encode the following table in two genes: Fisher's Exact Test http://www.langsrud.com/fisher.htm ------------------------------------------ TABLE = [ 10 , 20 , 30 , 40 ] Left : p-value = 0.2533310713617698 Right : p-value = 0.8676419647894328 2-Tail : p-value = 0.5044757698516504 ------------------------------------------ */ deCalc.observe("A-1", "id-1", 7); deCalc.observe("A-2", "id-1", 3); // 7+3 = 10 deCalc.observe("B-1", "id-1", 15); deCalc.observe("B-2", "id-1", 5); // 15+5 =20 deCalc.observe("A-1", "id-2", 15); deCalc.observe("A-2", "id-2", 15); // 15+15=30 deCalc.observe("B-1", "id-2", 20); deCalc.observe("B-2", "id-2", 20); // 20+20=40 final DifferentialExpressionInfo info = new DifferentialExpressionInfo("id-1"); final DifferentialExpressionResults results = new DifferentialExpressionResults(); final FisherExactTestCalculator fisher = new FisherExactTestCalculator(results); final NormalizationMethod normalizationMethod = new AlignedCountNormalization(); fisher.evaluate(deCalc, normalizationMethod, results, info, "A", "B"); assertEquals("fisher test equal expected result", 0.5044757698516504, results.getStatistic(info, fisher.statisticIds.get(0)), 0.001); final Fisher fisherTest = new Fisher(); final int totalCountInA = 1700; final int totalCountInB = 170; // equal total in each group final int sumCountInA = 90; final int sumCountInB = 45; // half the counts in sample B fisherTest.fisher(totalCountInA, sumCountInA, totalCountInA + totalCountInB, sumCountInA + sumCountInB); final double pValue = fisherTest.getTwotail(); final double proportionTotalA = divide(totalCountInA, (totalCountInA + totalCountInB)); final double proportionTotalB = divide(totalCountInB, (totalCountInA + totalCountInB)); final ChiSquareTest chisquare = new ChiSquareTestImpl(); final double nGroups = 2; final double[] expected = { divide(sumCountInA + sumCountInB, nGroups) * proportionTotalA * nGroups, divide(sumCountInA + sumCountInB, nGroups) * proportionTotalB * nGroups }; final long[] observed = { sumCountInA, sumCountInB }; final double chiPValue = Math.abs(chisquare.chiSquareTest(expected, observed)); assertTrue("pValue: " + chiPValue, chiPValue < 0.001); // The Fisher implementation we are using return 1 for the above. This is wrong. Compare to the chi-square result // (results should be comparable since the counts in each cell are large) // assertTrue("pValue: " + pValue, pValue < 0.001); } @Test public void testFisherExact() throws MathException { final DifferentialExpressionCalculator deCalc = new DifferentialExpressionCalculator(); final int numReplicates = 2; deCalc.defineElement("id-1"); deCalc.defineElement("id-2"); deCalc.defineGroup("A"); deCalc.defineGroup("B"); deCalc.reserve(2, numReplicates * 2); for (int i = 1; i <= numReplicates; i++) { deCalc.associateSampleToGroup("A-" + i, "A"); deCalc.associateSampleToGroup("B-" + i, "B"); } /** * Encode the following table in two genes: Fisher's Exact Test http://www.langsrud.com/fisher.htm ------------------------------------------ TABLE = [ 10 , 20 , 30 , 40 ] Left : p-value = 0.2533310713617698 Right : p-value = 0.8676419647894328 2-Tail : p-value = 0.5044757698516504 ------------------------------------------ */ deCalc.observe("A-1", "id-1", 7); deCalc.observe("A-2", "id-1", 3); // 7+3 = 10 deCalc.observe("B-1", "id-1", 15); deCalc.observe("B-2", "id-1", 5); // 15+5 =20 deCalc.observe("A-1", "id-2", 15); deCalc.observe("A-2", "id-2", 15); // 15+15=30 deCalc.observe("B-1", "id-2", 20); deCalc.observe("B-2", "id-2", 20); // 20+20=40 final DifferentialExpressionInfo info = new DifferentialExpressionInfo("id-1"); final DifferentialExpressionResults results = new DifferentialExpressionResults(); final FisherExactRCalculator fisher = new FisherExactRCalculator(results); if (fisher.installed()) { final NormalizationMethod normalizationMethod = new AlignedCountNormalization(); fisher.evaluate(deCalc, normalizationMethod, results, info, "A", "B"); assertEquals("fisher test equal expected result", 0.5044757698516504, results.getStatistic(info, fisher.statisticIds.get(0)), 0.001); final int totalCountInA = 1700; final int totalCountInB = 170; // equal total in each group final int sumCountInA = 90; final int sumCountInB = 45; // half the counts in sample B final int sumCountNotInA = totalCountInA - sumCountInA; final int sumCountNotInB = totalCountInB - sumCountInB; final FisherExact.Result result = FisherExact.fexact(sumCountInA, sumCountNotInA, sumCountInB, sumCountNotInB); final double pValue = result.getPValue(); final double proportionTotalA = divide(totalCountInA, (totalCountInA + totalCountInB)); final double proportionTotalB = divide(totalCountInB, (totalCountInA + totalCountInB)); final ChiSquareTest chisquare = new ChiSquareTestImpl(); final double nGroups = 2; final double[] expected = { divide(sumCountInA + sumCountInB, nGroups) * proportionTotalA * nGroups, divide(sumCountInA + sumCountInB, nGroups) * proportionTotalB * nGroups }; final long[] observed = { sumCountInA, sumCountInB }; final double chiPValue = Math.abs(chisquare.chiSquareTest(expected, observed)); assertTrue("pValue: " + chiPValue, chiPValue < 0.001); // The Fisher implementation we are using return 1 for the above. This is wrong. Compare to // the chi-square result // (results should be comparable since the counts in each cell are large) assertTrue("pValue: " + pValue, pValue < 0.001); } } private double divide(final int a, final int b) { return ((double) a) / ((double) b); } private double divide(final int a, final double b) { return ((double) a) / b; } @Test public void testChiSquare() throws MathException { final DifferentialExpressionCalculator deCalc = new DifferentialExpressionCalculator(); final int numReplicates = 2; deCalc.defineElement("id-1"); deCalc.defineElement("id-2"); deCalc.defineGroup("A"); deCalc.defineGroup("B"); deCalc.reserve(2, numReplicates * 2); for (int i = 1; i <= numReplicates; i++) { deCalc.associateSampleToGroup("A-" + i, "A"); deCalc.associateSampleToGroup("B-" + i, "B"); } deCalc.observe("A-1", "id-1", 7); deCalc.observe("A-2", "id-1", 3); // 7+3 = 10 deCalc.observe("B-1", "id-1", 15); deCalc.observe("B-2", "id-1", 5); // 15+5 =20 deCalc.observe("A-1", "id-2", 15); deCalc.observe("A-2", "id-2", 15); // 15+15=30 deCalc.observe("B-1", "id-2", 20); deCalc.observe("B-2", "id-2", 20); // 20+20=40 final DifferentialExpressionInfo info = new DifferentialExpressionInfo("id-1"); final DifferentialExpressionResults results = new DifferentialExpressionResults(); final ChiSquareTestCalculator calc = new ChiSquareTestCalculator(results); final NormalizationMethod normalizationMethod = new AlignedCountNormalization(); calc.evaluate(deCalc, normalizationMethod, results, info, "A", "B"); assertEquals("chi square test equal expected result", 0.456056540250256, results.getStatistic(info, calc.statisticIds.get(0)), 0.001); final ChiSquareTest chisquare = new ChiSquareTestImpl(); final double[] expected = { 30, 12 }; final long[] observed = { 0, 100 }; final double chiPValue = chisquare.chiSquareTest(expected, observed); assertTrue("pValue: " + chiPValue, chiPValue < 0.001); // The Fisher implementation we are using return 1 for the above. This is wrong. Compare to the chi-square result // (results should be comparable since the counts in each cell are large) // assertTrue("pValue: " + pValue, pValue < 0.001); } @Test public void testChiSquareZeroCount() throws MathException { final DifferentialExpressionCalculator deCalc = new DifferentialExpressionCalculator(); final int numReplicates = 2; deCalc.defineElement("id-1"); deCalc.defineElement("id-2"); deCalc.defineGroup("A"); deCalc.defineGroup("B"); deCalc.reserve(2, numReplicates * 2); for (int i = 1; i <= numReplicates; i++) { deCalc.associateSampleToGroup("A-" + i, "A"); deCalc.associateSampleToGroup("B-" + i, "B"); } deCalc.observe("A-1", "id-1", 0); // ZERO counts should yield NaN p-value deCalc.observe("A-2", "id-1", 0); // 0+0 = 0 deCalc.observe("B-1", "id-1", 15); deCalc.observe("B-2", "id-1", 5); // 15+5 =20 deCalc.observe("A-1", "id-2", 15); deCalc.observe("A-2", "id-2", 15); // 15+15=30 deCalc.observe("B-1", "id-2", 20); deCalc.observe("B-2", "id-2", 20); // 20+20=40 final DifferentialExpressionInfo info = new DifferentialExpressionInfo("id-1"); final DifferentialExpressionResults results = new DifferentialExpressionResults(); final ChiSquareTestCalculator calc = new ChiSquareTestCalculator(results); final NormalizationMethod normalizationMethod = new AlignedCountNormalization(); calc.evaluate(deCalc, normalizationMethod, results, info, "A", "B"); assertTrue("chi square test result must be NaN (zero count)", Double.isNaN(results.getStatistic(info, calc.statisticIds.get(0)))); } @Test public void testTruncatedFDR() { final BenjaminiHochbergAdjustment fdr = new BenjaminiHochbergAdjustment(); // we construct an array with additional p-values than contained in p. Each additional // p-values is larger than some q-threshold we care about. double[] expandedPValues = new double[p.length + 100]; System.arraycopy(p, 0, expandedPValues, 0, p.length); for (int i = 0; i < 100; i++) { expandedPValues[p.length + i] = Math.min(1.0, 0.3 + i / 100.0); } // we run both the complete fdr test and the truncated version of the test: DifferentialExpressionResults expandedList = toList(expandedPValues); DifferentialExpressionResults truncatedList = toList(expandedPValues); final int numberAboveThreshold = truncate(truncatedList, 0.3, truncatedList.getStatisticIndex("p-value")); final DifferentialExpressionResults originalList = fdr.adjust(expandedList, "p-value"); fdr.setNumberAboveThreshold(numberAboveThreshold); final DifferentialExpressionResults truncatedListAdjusted = fdr.adjust(truncatedList, "p-value"); final int truncatedListStatisticIndex = truncatedListAdjusted.getStatisticIndex("p-value-BH-FDR-q-value"); final int originalListStatisticIndex = originalList.getStatisticIndex("p-value-BH-FDR-q-value"); // System.out.println("originalList:" + originalList); // System.out.println("truncatedList (truncated):" + truncatedListAdjusted); for (int i = 0; i < truncatedList.size(); i++) { final DifferentialExpressionInfo originalInfo = originalList.get(i); final DifferentialExpressionInfo truncatedInfo = truncatedListAdjusted.get(i); double truncatedQValue = truncatedInfo.statistics.getDouble(truncatedListStatisticIndex); double originalQValue = originalInfo.statistics.getDouble(originalListStatisticIndex); System.out.printf("truncated fdr=%g original=%g %n", truncatedQValue, originalQValue); assertEquals("original and truncated q-values must match", originalQValue, truncatedQValue, 0.001); } } private int truncate(DifferentialExpressionResults expandedList, double threshold, int index) { int numTruncated = 0; ObjectArrayList<DifferentialExpressionInfo> toRemove = new ObjectArrayList<DifferentialExpressionInfo>(); for (DifferentialExpressionInfo info : expandedList) { double pValue = info.statistics().getDouble(index); if (pValue > threshold) { numTruncated++; toRemove.add(info); } } expandedList.removeAll(toRemove); return numTruncated; } @Test public void testFDR() { final Random randomEngine = new Random(); randomEngine.setSeed(1013); final BonferroniAdjustment bonferroni = new BonferroniAdjustment(); final BenjaminiHochbergAdjustment fdr = new BenjaminiHochbergAdjustment(); final DifferentialExpressionResults list = new DifferentialExpressionResults(); final String statId = "t-test-P-value"; list.declareStatistic(statId); final int statIndex = list.getStatisticIndex(statId); final int numObservations = 100000; final double proportionOfNaN = .1; for (int i = 0; i < numObservations; i++) { final DifferentialExpressionInfo info = new DifferentialExpressionInfo("element-" + i); info.statistics.size(list.getNumberOfStatistics()); final double random1 = randomEngine.nextDouble(); final double random2 = randomEngine.nextDouble(); info.statistics.set(statIndex, random1 < proportionOfNaN ? Double.NaN : random2); list.add(info); } final String secondPValueId = "another-p-value"; list.declareStatistic(secondPValueId); final int statIndex2 = list.getStatisticIndex(secondPValueId); for (final DifferentialExpressionInfo info : list) { info.statistics.size(list.getNumberOfStatistics()); info.statistics.set(statIndex2, randomEngine.nextDouble()); } final NormalizationMethod normalizationMethod = new AlignedCountNormalization(); bonferroni.adjust(list, normalizationMethod, statId, secondPValueId); fdr.adjust(list, normalizationMethod, statId, secondPValueId); final int index1 = list.getStatisticIndex("t-test-P-value-BH-FDR-q-value"); final int index2 = list.getStatisticIndex(secondPValueId + "-BH-FDR-q-value"); final double significanceThreshold = 0.05; int numRejectedHypothesesTest1 = 0; int numRejectedHypothesesTest2 = 0; for (final DifferentialExpressionInfo info : list) { final boolean test1 = info.statistics.getDouble(index1) > significanceThreshold; if (!test1) { // System.out.println("info:" + info); numRejectedHypothesesTest1++; } final boolean test2 = info.statistics.getDouble(index2) > significanceThreshold; if (!test2) { // System.out.println("info:" + info); numRejectedHypothesesTest2++; } } assertTrue("No q-value should be significant after FDR adjustment", numRejectedHypothesesTest1 < significanceThreshold * numObservations); assertTrue("No q-value should be significant after FDR adjustment", numRejectedHypothesesTest2 < significanceThreshold * numObservations); // System.out.println("list.adjusted: " + list); final int n = p.length; for (int rank = p.length; rank >= 1; rank--) { final int index = rank - 1; assertEquals("rank: " + rank, adjusted_R_nocummin[index], p[index] * (((double) n) / (double) rank), 0.01); } { final DifferentialExpressionResults list3 = fdr.adjust(toList(p), "p-value"); System.out.println("list3:" + list3); final int index = list3.getStatisticIndex("p-value-BH-FDR-q-value"); for (final DifferentialExpressionInfo infoAdjusted : list3) { final int elementIndex = Integer.parseInt(infoAdjusted.getElementId().toString()); assertEquals("adjusted p-values must match for i=" + infoAdjusted.getElementId(), adjusted_R[elementIndex], infoAdjusted.statistics.get(index), 0.01); } } } final double[] p = { 2.354054e-07, 2.101590e-05, 2.576842e-05, 9.814783e-05, 1.052610e-04, 1.241481e-04, 1.325988e-04, 1.568503e-04, 2.254557e-04, 3.795380e-04, 6.114943e-04, 1.613954e-03, 3.302430e-03, 3.538342e-03, 5.236997e-03, 6.831909e-03, 7.059226e-03, 8.805129e-03, 9.401040e-03, 1.129798e-02, 2.115017e-02, 4.922736e-02, 6.053298e-02, 6.262239e-02, 7.395153e-02, 8.281103e-02, 8.633331e-02, 1.190654e-01, 1.890796e-01, 2.058494e-01, 2.209214e-01, 2.856000e-01, 3.048895e-01, 4.660682e-01, 4.830809e-01, 4.921755e-01, 5.319453e-01, 5.751550e-01, 5.783195e-01, 6.185894e-01, 6.363620e-01, 6.448587e-01, 6.558414e-01, 6.885884e-01, 7.189864e-01, 8.179539e-01, 8.274487e-01, 8.971300e-01, 9.118680e-01, 9.437890e-01 }; final double[] adjusted_R = { 1.177027e-05, 4.294736e-04, 4.294736e-04, 9.471343e-04, 9.471343e-04, 9.471343e-04, 9.471343e-04, 9.803146e-04, 1.252532e-03, 1.897690e-03, 2.779520e-03, 6.724807e-03, 1.263693e-02, 1.263693e-02, 1.745666e-02, 2.076243e-02, 2.076243e-02, 2.445869e-02, 2.473958e-02, 2.824495e-02, 5.035754e-02, 1.118804e-01, 1.304633e-01, 1.304633e-01, 1.479031e-01, 1.592520e-01, 1.598765e-01, 2.126168e-01, 3.259994e-01, 3.430823e-01, 3.563248e-01, 4.462501e-01, 4.619538e-01, 6.835770e-01, 6.835770e-01, 6.835770e-01, 7.188450e-01, 7.414352e-01, 7.414352e-01, 7.626063e-01, 7.626063e-01, 7.626063e-01, 7.626063e-01, 7.824868e-01, 7.988737e-01, 8.802645e-01, 8.802645e-01, 9.304775e-01, 9.304775e-01, 9.437890e-01 }; final double[] adjusted_R_nocummin = { 1.177027e-05, 5.253976e-04, 4.294736e-04, 1.226848e-03, 1.052610e-03, 1.034567e-03, 9.471343e-04, 9.803146e-04, 1.252532e-03, 1.897690e-03, 2.779520e-03, 6.724807e-03, 1.270165e-02, 1.263693e-02, 1.745666e-02, 2.134972e-02, 2.076243e-02, 2.445869e-02, 2.473958e-02, 2.824495e-02, 5.035754e-02, 1.118804e-01, 1.315934e-01, 1.304633e-01, 1.479031e-01, 1.592520e-01, 1.598765e-01, 2.126168e-01, 3.259994e-01, 3.430823e-01, 3.563248e-01, 4.462501e-01, 4.619538e-01, 6.853944e-01, 6.901156e-01, 6.835770e-01, 7.188450e-01, 7.567830e-01, 7.414352e-01, 7.732368e-01, 7.760512e-01, 7.676890e-01, 7.626063e-01, 7.824868e-01, 7.988737e-01, 8.890803e-01, 8.802645e-01, 9.345104e-01, 9.304775e-01, 9.437890e-01 }; private DifferentialExpressionResults toList(double[] p) { final DifferentialExpressionResults list2 = new DifferentialExpressionResults(); list2.declareStatistic("p-value"); int i = 0; for (final double pValue : p) { final DifferentialExpressionInfo info = new DifferentialExpressionInfo(String.valueOf(i++)); info.statistics.add(pValue); list2.add(info); } return list2; } }