edu.cornell.med.icb.goby.stats.ChiSquareTestCalculator.java Source code

Java tutorial

Introduction

Here is the source code for edu.cornell.med.icb.goby.stats.ChiSquareTestCalculator.java

Source

/*
 * Copyright (C) 2009-2010 Institute for Computational Biomedicine,
 *                    Weill Medical College of Cornell University
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package edu.cornell.med.icb.goby.stats;

import it.unimi.dsi.fastutil.doubles.DoubleArrayList;
import it.unimi.dsi.fastutil.longs.LongArrayList;
import it.unimi.dsi.fastutil.objects.ObjectArraySet;
import it.unimi.dsi.lang.MutableString;
import org.apache.commons.math.MathException;
import org.apache.commons.math.MaxIterationsExceededException;
import org.apache.commons.math.stat.inference.ChiSquareTest;
import org.apache.commons.math.stat.inference.ChiSquareTestImpl;
import org.apache.log4j.Logger;

/**
 * Calculates the two-tailed chi square test P-value for an observed count difference between comparison groups
 * (can assess differences between multiple groups n>=2).
 *
 * @author Fabien Campagne
 *         Date: Jan 11, 2010
 *         Time: 7:06:31 PM
 */
public class ChiSquareTestCalculator extends StatisticCalculator {
    private static final Logger LOG = Logger.getLogger(ChiSquareTestCalculator.class);

    public ChiSquareTestCalculator(final DifferentialExpressionResults results) {
        this();
        setResults(results);
    }

    public ChiSquareTestCalculator() {
        super();
    }

    @Override
    public boolean canDo(final String[] group) {
        return group.length >= 2;
    }

    public MutableString getStatisticId(final String groupId, final NormalizationMethod normalizationMethod) {
        return new MutableString("chi-square-test " + groupId + "(" + normalizationMethod.getAbbreviation() + ")");
    }

    @Override
    public DifferentialExpressionInfo evaluate(
            final DifferentialExpressionCalculator differentialExpressionCalculator,
            final NormalizationMethod method, final DifferentialExpressionResults results,
            final DifferentialExpressionInfo info, final String... group) {

        // expected counts in each group, assuming the counts for the DE are spread  among the groups according to sample
        // global count proportions
        final double[] expectedCounts = new double[group.length];

        //  counts observed in each group:
        final long[] observedCounts = new long[group.length];
        final double[] groupProportions = new double[group.length];

        final int chiSquarePValuesStatIndex = defineStatisticId(results, "chi-square-test", method, group);

        int i = 0;

        double pValue = 1;
        // estimate the sumOfCountsForDE of counts over all the samples included in any group compared.
        final long sumOfCountsForDE = 0;
        int numSamples = 0;
        double sumObservedCounts = 0;

        for (final String oneGroupId : group) {
            final ObjectArraySet<String> samplesForGroup = differentialExpressionCalculator.getSamples(oneGroupId);

            for (final String sample : samplesForGroup) {
                final long observedCount = differentialExpressionCalculator.getOverlapCount(sample,
                        info.getElementId());
                final double sampleProportion = differentialExpressionCalculator.getSampleProportion(sample);
                observedCounts[i] += observedCount;
                groupProportions[i] += sampleProportion;
                sumObservedCounts += observedCount;
                numSamples++;
            }
            if (observedCounts[i] == 0) {
                // Chi Square is not defined if any observed counts are zero.
                info.statistics.size(results.getNumberOfStatistics());
                info.statistics.set(chiSquarePValuesStatIndex, Double.NaN);
                return info;
            }
            ++i;
        }

        i = 0;
        final double nGroups = group.length;
        for (int groupIndex = 0; groupIndex < nGroups; groupIndex++) {
            expectedCounts[groupIndex] += groupProportions[groupIndex] * sumObservedCounts;
        }

        final ChiSquareTest chisquare = new ChiSquareTestImpl();

        try {
            final double pValueRaw = chisquare.chiSquareTest(expectedCounts, observedCounts);
            // math commons can return negative p-values?
            pValue = Math.abs(pValueRaw);
        } catch (MaxIterationsExceededException e) {
            LOG.error("elementId:" + info.getElementId());
            LOG.error("expected:" + DoubleArrayList.wrap(expectedCounts).toString());
            LOG.error("observed:" + LongArrayList.wrap(observedCounts).toString());
            LOG.error(e);
            pValue = 1;
        } catch (MathException e) {
            e.printStackTrace();
        }

        info.statistics.size(results.getNumberOfStatistics());
        info.statistics.set(chiSquarePValuesStatIndex, pValue);
        return info;
    }
}