edu.cornell.med.icb.goby.stats.DifferentialExpressionCalculator.java Source code

Java tutorial

Introduction

Here is the source code for edu.cornell.med.icb.goby.stats.DifferentialExpressionCalculator.java

Source

/*
 * Copyright (C) 2009-2010 Institute for Computational Biomedicine,
 *                    Weill Medical College of Cornell University
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package edu.cornell.med.icb.goby.stats;

import edu.cornell.med.icb.goby.algorithmic.data.GroupComparison;
import edu.cornell.med.icb.identifier.IndexedIdentifier;
import it.unimi.dsi.fastutil.ints.Int2IntAVLTreeMap;
import it.unimi.dsi.fastutil.ints.Int2IntMap;
import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.objects.*;
import it.unimi.dsi.lang.MutableString;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import java.util.Map;
import java.util.Set;

/**
 * @author Fabien Campagne
 *         Date: Jan 11, 2010
 *         Time: 3:35:24 PM
 */
public class DifferentialExpressionCalculator {
    private final Set<String> groups;
    private final Map<String, String> sampleToGroupMap;
    private final IndexedIdentifier elementLabels;
    private int elementsPerSample;
    private int numberOfSamples;
    private final Map<String, IntArrayList> sampleToCounts;
    private Object2DoubleMap<String> sampleProportions;
    private final IntArrayList lengths;
    private final Int2IntMap elementLabelToElementType;
    private Object2IntMap<String> sampleToSumCount;
    private boolean runInParallel;

    public boolean isRunInParallel() {
        return runInParallel;
    }

    public void setRunInParallel(boolean runInParallel) {
        this.runInParallel = runInParallel;
    }

    /**
     * Used to log debug and informational messages.
     */
    private static final Log LOG = LogFactory.getLog(BullardUpperQuartileNormalization.class);
    private ObjectArrayList<String> allSamples;

    /**
     * Return the type of the element.
     *
     * @param elementId
     * @return ElementType
     */
    public ElementType getElementType(final MutableString elementId) {
        final int elementIndex = elementLabels.get(elementId);
        final int ordinal = elementLabelToElementType.get(elementIndex);
        if (ordinal == -1) {
            return ElementType.OTHER;
        } else {
            return ElementType.values()[ordinal];
        }
    }

    /**
     * Force the recalculation of SumOverlapCounts. This is useful to ensure that all the elements are counted for each sample.
     */
    public void resetSumOverlapCounts() {
        sampleToSumCount.clear();
    }

    /**
     * Associate each sample to the default group "all-samples".
     */
    public void createDefaultGroup() {

        for (final String sampleId : sampleToCounts.keySet()) {
            associateSampleToGroup(sampleId, "all-samples/all-samples");
        }
    }

    public enum ElementType {
        EXON, TRANSCRIPT, GENE, OTHER
    }

    /**
     * The number of alignment entries observed in each sample.
     */
    private final Object2LongMap<String> numAlignedInSample;

    public DifferentialExpressionCalculator() {
        super();
        groups = new ObjectArraySet<String>();
        elementLabels = new IndexedIdentifier(100000);
        sampleToGroupMap = new Object2ObjectOpenHashMap<String, String>();
        numAlignedInSample = new Object2LongOpenHashMap<String>();
        sampleToCounts = new Object2ObjectOpenHashMap<String, IntArrayList>();
        lengths = new IntArrayList();
        elementLabelToElementType = new Int2IntAVLTreeMap();
        elementLabelToElementType.defaultReturnValue(-1);
        sampleToSumCount = new Object2IntRBTreeMap<String>();
        sampleToSumCount.defaultReturnValue(-1);
    }

    public double calculateNormalized(final int readCountInt, final int annotLength,
            final double normalizationFactor) {
        final double readCount = readCountInt;
        final double length = annotLength; // in bases
        final double sampleReadCount = normalizationFactor; // in reads
        return readCount / (length / 1000.0d) / (normalizationFactor / 1E6d);
    }

    public double calculateNormalized(final double readCountInt, final int annotLength,
            final double normalizationFactor) {
        final double readCount = readCountInt;
        final double length = annotLength; // in bases
        final double sampleReadCount = normalizationFactor; // in reads
        return readCount / (length / 1000.0d) / (normalizationFactor / 1E6d);
    }

    public synchronized void defineGroup(final String label) {
        groups.add(label);
    }

    /**
     * Define an element but don't specify the type. The version with the type is the preferred version.
     *
     * @param label element-id
     * @return the index of the element
     */
    public synchronized int defineElement(final String label) {
        return defineElement(label, ElementType.OTHER);
    }

    /**
     * Define an element and it's type.
     *
     * @param label element-id
     * @param type  the type (gene, exon, ...)
     * @return the index of the element
     */
    public synchronized int defineElement(final String label, final ElementType type) {
        final MutableString elementLabel = new MutableString(label).compact();
        final int elementIndex = elementLabels.registerIdentifier(elementLabel);
        if (elementLabelToElementType.get(elementIndex) == -1) {
            // Don't REPLACE the ElementType for a label. If it was set before, leave it alone.
            elementLabelToElementType.put(elementIndex, type.ordinal());
        }
        return elementIndex;
    }

    public Int2IntMap getElementLabelToElementTypeMap() {
        return elementLabelToElementType;
    }

    /**
     * Define the number of sequence bases that this element spans. The bases do not need to be contiguous (i.e., multiple
     * exons of a transcript).
     *
     * @param elementIndex index of the element associated with length.
     * @param length       length of the element.
     */
    public synchronized void defineElementLength(final int elementIndex, final int length) {
        if (lengths.size() == elementIndex) {
            lengths.add(length);
        } else {
            if (lengths.size() < elementIndex) {
                lengths.size(elementIndex + 1);
            }
            lengths.set(elementIndex, length);
        }
    }

    public synchronized void associateSampleToGroup(final String sample, final String group) {
        sampleToGroupMap.put(sample, group);
    }

    /**
     * Return the length of an element.
     *
     * @param elementId
     * @return
     */
    public int getElementLength(final MutableString elementId) {
        final int elementIndex = elementLabels.getInt(elementId);
        return lengths.get(elementIndex);
    }

    /**
     * Observe counts for a specific element and sample.
     *
     * @param sample    sample id.
     * @param elementId element id.
     * @param count     Number of reads that can be assigned to the element.
     */
    public void observe(final String sample, final String elementId, final double count) {

        this.observe(sample, elementId, (int) Math.round(count));
    }

    /**
     * Observe counts for a sample.
     *
     * @param sample    sample id.
     * @param elementId element id.
     * @param count     Number of reads that can be assigned to the element.
     */
    public void observe(final String sample, final String elementId, final int count) {

        IntArrayList counts = sampleToCounts.get(sample);
        // the following looks a bit complicated. We are trying to avoid synchronizing every time observe is called.
        // This would slow the whole process too much. Instead, we synchronize only when we need to create a counts
        // data structure for a new sample, which should not happen too often.
        if (counts == null) {
            synchronized (this) {
                counts = sampleToCounts.get(sample);
                if (counts == null) {
                    counts = new IntArrayList(elementsPerSample);
                    counts.size(elementsPerSample);
                    sampleToCounts.put(sample, counts);
                }
            }
        }

        final int elementIndex = elementLabels.getInt(new MutableString(elementId));
        counts.set(elementIndex, count);
    }

    /**
     * Return the element index of element identified by id.
     *
     * @param elementId id of the element.
     * @return index of the element.
     */
    public int getElementIndex(final String elementId) {
        return elementLabels.getInt(new MutableString(elementId));

    }

    /**
     * Define the number of alignment entries found in each sample.
     *
     * @param sampleId            The sample
     * @param numAlignedInSamples The number of alignment entries observed in the sample.
     */
    public synchronized void setNumAlignedInSample(final String sampleId, final long numAlignedInSamples) {
        numAlignedInSample.put(sampleId, numAlignedInSamples);
    }

    /**
     * Return the number of alignment entries found in the sample.
     *
     * @param sampleId Identifier of the sample.
     * @return the number of alignment entries found in the sample.
     */
    public synchronized long getNumAlignedInSample(final String sampleId) {
        return numAlignedInSample.getLong(sampleId);
    }

    /**
     * Get the sampleToGroupMap object.
     *
     * @return map of sample id's to group names.
     */
    public Map<String, String> getSampleToGroupMap() {
        return sampleToGroupMap;
    }

    /**
     * Get the group that the sample belongs to.
     *
     * @param sampleId Id of the sample.
     * @return group id to which this sample belongs to.
     */
    public String getGroup(String sampleId) {
        return sampleToGroupMap.get(sampleId);
    }

    public DifferentialExpressionResults compare(DifferentialExpressionResults results,
            final NormalizationMethod method, final StatisticCalculator tester, final GroupComparison comparison) {
        return compare(results, method, tester, comparison.nameGroup1, comparison.nameGroup2);
    }

    public DifferentialExpressionResults compare(DifferentialExpressionResults results,
            final NormalizationMethod method, final StatisticCalculator tester, final String... group) {
        if (results == null) {
            results = new DifferentialExpressionResults();
        }
        if (tester.installed()) {
            if (tester.canDo(group)) {
                tester.setResults(results);
                return tester.evaluate(this, method, results, group);
            } else {
                LOG.warn("The number of groups to compare is not supported by the calculator ");
                return results;
            }
        } else {
            LOG.warn("Diffexp test=" + tester.getClass().getName() + " is not 'installed' and will not be run.");
            return results;
        }
    }

    public DifferentialExpressionResults compare(final StatisticCalculator tester, final NormalizationMethod method,
            final String... group) {
        final DifferentialExpressionResults results = new DifferentialExpressionResults();
        if (tester.installed()) {
            assert !tester
                    .canDo(group) : "The number of groups to compare is not supported by the specified calculator.";
            tester.setResults(results);
            return tester.evaluate(this, method, results, group);
        } else {
            return results;
        }
    }

    /**
     * Reserve storage for specified number of elements and samples. One DE test will be
     * conducted for each element.
     *
     * @param elementsPerSample
     * @param numberOfSamples
     */
    public void reserve(final int elementsPerSample, final int numberOfSamples) {
        this.elementsPerSample = elementsPerSample;
        this.numberOfSamples = numberOfSamples;
        lengths.size(elementsPerSample);
    }

    /**
     * Returns the sample ids that belong to a group.
     *
     * @param groupId Id of the group.
     * @return The set of samples that belong to group
     */
    public ObjectArraySet<String> getSamples(final String groupId) {
        final ObjectArraySet<String> samples = new ObjectArraySet<String>();
        for (final String sampleId : sampleToGroupMap.keySet()) {
            if (sampleToGroupMap.get(sampleId).equals(groupId)) {
                samples.add(sampleId);
            }
        }
        return samples;
    }

    public ObjectSet<MutableString> getElementIds() {
        return elementLabels.keySet();
    }

    /**
     * Get the normalized expression value an element in a given sample.  Equivalent
     * to calling the {@link NormalizationMethod#getNormalizedExpressionValue(DifferentialExpressionCalculator, String, it.unimi.dsi.lang.MutableString)}
     * method.
     *
     * @param sampleId            sample identifier
     * @param normalizationMethod Normalization method (adjusts the denominator of the RPKM value).
     * @param elementId           the element for which a normalized expression value is sought.
     * @return normalized expression value scaled by length and global normalization method.
     */
    public double getNormalizedExpressionValue(final String sampleId, final NormalizationMethod normalizationMethod,
            final MutableString elementId) {
        return normalizationMethod.getNormalizedExpressionValue(this, sampleId, elementId);

    }

    /**
     * Get the stored overlap count for an element in a given sample.
     *
     * @param sample
     * @param elementId
     * @return
     */
    public int getOverlapCount(final String sample, final MutableString elementId) {
        final IntArrayList counts = sampleToCounts.get(sample);
        if (counts == null) {
            return 0;
        }
        final int elementIndex = elementLabels.get(elementId);

        if (elementIndex < counts.size()) {
            return counts.get(elementIndex);
        } else {
            return 0;
        }
    }

    /**
     * Returns the sum of counts in a given sample.
     *
     * @param sample
     * @return Returns the sum of counts in a given sample.
     */
    public synchronized int getSumOverlapCounts(final String sample) {

        final int sumCountsCached = sampleToSumCount.getInt(sample);
        if (sumCountsCached != -1) {
            return sumCountsCached;
        }
        int sumCounts = 0;
        final IntArrayList counts = sampleToCounts.get(sample);
        if (counts == null) {
            return 0;
        }
        for (final int count : counts) {
            sumCounts += count;
        }
        sampleToSumCount.put(sample, sumCounts);
        return sumCounts;
    }

    public String[] samples() {
        final Set<String> sampleSet = sampleToGroupMap.keySet();
        return sampleSet.toArray(new String[sampleSet.size()]);
    }

    /**
     * Returns the proportion of counts that originate from a certain sample. The number of counts in the sample
     * is divided by the sum of counts over all the samples in the experiment.
     *
     * @param sample
     * @return
     */
    public double getSampleProportion(final String sample) {
        if (sampleProportions == null) {
            sampleProportions = new Object2DoubleArrayMap<String>();
            sampleProportions.defaultReturnValue(-1);
        }
        final double proportion;

        if (!sampleProportions.containsKey(sample)) {
            int sumOverSamples = 0;
            for (final String s : samples()) {
                sumOverSamples += getSumOverlapCounts(s);
            }
            for (final String s : samples()) {
                final double sampleProportion = ((double) getSumOverlapCounts(s)) / (double) sumOverSamples;
                this.sampleProportions.put(s, sampleProportion);
            }
        }
        proportion = sampleProportions.get(sample);
        assert proportion != -1 : " Proportion must be defined for sample " + sample;
        return proportion;
    }

}