edu.cornell.med.icb.goby.stats.DifferentialExpressionAnalysis.java Source code

Java tutorial

Introduction

Here is the source code for edu.cornell.med.icb.goby.stats.DifferentialExpressionAnalysis.java

Source

/*
 * Copyright (C) 2009-2010 Institute for Computational Biomedicine,
 *                    Weill Medical College of Cornell University
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package edu.cornell.med.icb.goby.stats;

import com.martiansoftware.jsap.JSAPResult;
import edu.cornell.med.icb.goby.algorithmic.data.GroupComparison;
import edu.cornell.med.icb.goby.alignments.AlignmentReaderImpl;
import it.unimi.dsi.fastutil.objects.*;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import java.util.ArrayList;
import java.util.ServiceLoader;

/**
 * Class that organizes group comparisons and facilitates differential expression
 * statistical testing.
 *
 * @author Nyasha Chambwe
 *         Date: Mar 24, 2010
 *         Time: 3:29:26 PM
 */
public class DifferentialExpressionAnalysis {
    /**
     * Used to log debug and informational messages.
     */
    private static final Log LOG = LogFactory.getLog(DifferentialExpressionAnalysis.class);

    private String[] groups;
    private final Object2ObjectMap<String, Integer> groupSizes = new Object2ObjectOpenHashMap<String, Integer>();

    /**
     * Flag for an invalid t-test due to any group size < 2.
     */
    private boolean ttestflag;

    /**
     * The set of normalization methods to use for the comparison.
     */

    private ObjectArraySet<NormalizationMethod> normalizationMethods;

    private static final ServiceLoader<NormalizationMethod> normalizationMethodLoader = ServiceLoader
            .load(NormalizationMethod.class);
    private boolean runInParalell;

    public DifferentialExpressionAnalysis() {
        super();
    }

    public void parseGroupsDefinition(final String groupsDefinition,
            final DifferentialExpressionCalculator deCalculator, final String[] inputFilenames) {
        if (groupsDefinition == null) {
            // no groups definition to parse.
            return;
        }

        final String[] groupsTmp = groupsDefinition.split("/");
        ObjectSet<String> groupSet = new ObjectArraySet<String>();
        for (final String group : groupsTmp) {
            final String[] groupTokens = group.split("=");
            if (groupTokens.length < 2) {
                System.err.println("The --group argument must have the syntax groupId=basename");
                System.exit(1);
            }
            final String groupId = groupTokens[0];
            final String groupBasenames = groupTokens[1];
            assert groupTokens.length == 2 : "group definition must have only two elements separated by an equal sign.";
            deCalculator.defineGroup(groupId);
            groupSet.add(groupId);
            for (final String groupString : groupBasenames.split(",")) {

                final String sampleBasename = FilenameUtils.getName(AlignmentReaderImpl.getBasename(groupString));
                if (!isInputBasename(sampleBasename, inputFilenames)) {
                    System.err.printf("The group basename %s is not a valid input basename.%n", sampleBasename);
                    System.exit(1);
                }
                System.out.println("Associating basename: " + sampleBasename + " to group: " + groupId);
                deCalculator.associateSampleToGroup(sampleBasename, groupId);
            }
            final int groupSize = (groupBasenames.split(",")).length;
            groupSizes.put(groupId, groupSize);
        }
        groups = groupSet.toArray(new String[groupSet.size()]);
    }

    public String[] getGroups() {
        return groups;
    }

    /**
     * Return true if the basename is on the command line as an input basename.
     *
     * @param basename
     * @param inputFilenames
     * @return
     */
    private boolean isInputBasename(final String basename, final String[] inputFilenames) {
        for (final String inputFilename : inputFilenames) {
            if (FilenameUtils.getName(AlignmentReaderImpl.getBasename(inputFilename)).equals(basename)) {
                return true;
            }
        }
        return false;
    }

    private ArrayList<GroupComparison> groupComparisons;

    public ArrayList<GroupComparison> parseCompare(final String compare) {
        groupComparisons = new ArrayList<GroupComparison>();
        if (compare == null) {
            return groupComparisons;
        }
        String comparisonPairs[] = compare.split(",");
        ObjectSet<String> groupSet = new ObjectArraySet<String>();
        groupSet.addAll(ObjectArrayList.wrap(groups));

        int comparisonIndex = 0;
        for (String pair : comparisonPairs) {
            final String[] groupLanguageText = pair.split("/");
            if (groupLanguageText.length > 2) {
                System.err.println(
                        "Group comparison description" + pair + " used in --compare must name at most two groups. "
                                + "Please see the --groups option to define groups.");
                System.exit(1);
            }
            if (groupLanguageText.length == 1) {
                // comparisons with only one group are ignored.
                continue;
            }
            if (groupLanguageText.length < 1) {
                System.err.println(
                        "Group comparison description" + pair + " used in --compare must name at least one groups. "
                                + "Please see the --groups option to define groups.");
                System.exit(1);
            }
            if (groupLanguageText[0].equals(groupLanguageText[1])) {
                System.err.printf(
                        "Group names must be different when specifying two groups %s/%s. Use a single group %s or leave out the --compare option to disable group comparisons.%n",
                        groupLanguageText[0], groupLanguageText[0], groupLanguageText[0]);
                System.exit(1);
            } else {
                System.err.printf("Comparing %s/%s %n", groupLanguageText[0], groupLanguageText[1]);
            }
            int groupIndex = -1;
            for (final String groupId : groupLanguageText) {
                if (!groupSet.contains(groupId)) {
                    System.err.println("Group " + groupId + " used in --compare must be defined. "
                            + "Please see the --groups option to define groups.");
                    System.exit(1);
                }
            }

            int firstGroupIndex = 0;
            int secondGroupIndex = groupLanguageText.length - 1;
            groupComparisons.add(new GroupComparison(groupLanguageText[firstGroupIndex],
                    groupLanguageText[secondGroupIndex], groupIndex(groupLanguageText[firstGroupIndex]),
                    groupIndex(groupLanguageText[secondGroupIndex]), comparisonIndex++));
        }
        return groupComparisons;

    }

    private int groupIndex(final String groupName) {
        int index = 0;
        for (final String s : getGroups()) {
            if (groupName.equals(s)) {
                return index;
            }
            index++;
        }
        assert false : "group " + groupName + " must be found.";
        return -1;
    }

    public ObjectArraySet<NormalizationMethod> parseNormalization(final JSAPResult jsapResult) {
        final String normalizationMethodNames = jsapResult.getString("normalization-methods");
        final String[] methodIds = normalizationMethodNames.split(",");
        this.normalizationMethods = new ObjectArraySet<NormalizationMethod>();
        LOG.info("Looking up services");
        for (final String methodId : methodIds) {
            for (final NormalizationMethod aMethod : normalizationMethodLoader) {
                if (aMethod.getIdentifier().equals(methodId)) {
                    LOG.info("Adding " + aMethod.getIdentifier());
                    this.normalizationMethods.add(aMethod);
                }
            }
        }
        if (this.normalizationMethods.size() == 0) {
            LOG.error("Could not locate any normalization method with the names provided: "
                    + normalizationMethodNames);
            System.exit(1);
        }
        return normalizationMethods;
    }

    public boolean checkTtest() {
        boolean flag = true;
        for (final int size : groupSizes.values()) {
            if (size < 2) {
                flag = false;
                System.out.println("Insufficient data for t-test: need at least 2 samples per group.");
            }
        }
        return flag;
    }

    public DifferentialExpressionResults evaluateDifferentialExpressionStatistics(
            final DifferentialExpressionCalculator deCalculator, final boolean doComparison,
            final ObjectArraySet<NormalizationMethod> normalizationMethods) {
        DifferentialExpressionResults results = null;

        if (!doComparison) {
            deCalculator.createDefaultGroup();
            // make groupComparison be all samples
            groupComparisons.add(new GroupComparison("all-samples", "all-samples", 0, 0, 0));
        }
        results = null;
        deCalculator.setRunInParallel(runInParalell);
        LOG.info("Evaluating statistics..");
        if (eval("raw-counts")) {
            results = deCalculator.compare(results, null, new CountRawSampleIdsCalculator());
        }

        for (final NormalizationMethod method : normalizationMethods) {
            method.normalize(deCalculator, getGroupComparison());

            // evaluate per-sample statistics:

            if (eval("samples")) {
                results = deCalculator.compare(results, method, new SampleCountCalculator());
            }

            if (eval("counts")) {
                results = deCalculator.compare(results, method, new CountCalculator());
            }

            if (doComparison) {
                if (eval("group-averages")) {
                    results = deCalculator.compare(results, method, new AverageCalculator(), groups);
                }
                for (final GroupComparison groupComparison : groupComparisons) {

                    // evaluate differences between groups:
                    if (eval("fold-change")) {
                        results = deCalculator.compare(results, method, new FoldChangeCalculator(),
                                groupComparison);
                    }
                    //results.setOmitNonInformativeColumns(omitNonInformativeColumns);
                    if (eval("fold-change-magnitude")) {
                        results = deCalculator.compare(results, method, new FoldChangeMagnitudeCalculator(),
                                groupComparison);
                    }
                    if (eval("log2-fold-change")) {
                        results = deCalculator.compare(results, method, new Log2FoldChangeCalculator(),
                                groupComparison);
                    }

                    ttestflag = checkTtest();
                    if (ttestflag) {
                        if (eval("t-test")) {
                            results = deCalculator.compare(results, method, new TTestCalculator(), groupComparison);
                        }
                    }
                    if (eval("fisher")) {
                        results = deCalculator.compare(results, method, new FisherExactTestCalculator(),
                                groupComparison);
                    }
                    if (eval("fisher-r")) {
                        results = deCalculator.compare(results, method, new FisherExactRCalculator(),
                                groupComparison);
                    }
                    if (eval("chi-square")) {
                        results = deCalculator.compare(results, method, new ChiSquareTestCalculator(),
                                groupComparison);
                    }
                }
                final BenjaminiHochbergAdjustment benjaminiHochbergAdjustment = new BenjaminiHochbergAdjustment();
                final BonferroniAdjustment bonferroniAdjustment = new BonferroniAdjustment();

                if (eval("Bonferroni")) {
                    results = bonferroniAdjustment.adjust(results, method, "t-test", "fisher-exact-test",
                            "fisher-exact-R", "chi-square-test");
                }
                if (eval("BH")) {
                    results = benjaminiHochbergAdjustment.adjust(results, method, "t-test", "fisher-exact-test",
                            "fisher-exact-R", "chi-square-test");
                }

            }
        }
        return results;
    }

    private GroupComparison makeComparisonForGroup(String group) {
        return new GroupComparison(group, group, groupIndex(group), groupIndex(group), -1);
    }

    public boolean eval(final String evalName) {
        return evalSet.contains(evalName.toLowerCase());
    }

    private ObjectSet<String> evalSet;

    public void setEvalNames(final ObjectSet<String> evalSet) {
        this.evalSet = evalSet;
    }

    public void setRunInParallel(boolean parallel) {
        this.runInParalell = parallel;
    }

    public String[] getGroupComparison() {
        String[] result = new String[2];
        GroupComparison val = groupComparisons.get(0);
        result[0] = val.nameGroup1;
        result[1] = val.nameGroup2;
        return result;
    }
}