chibi.gemmaanalysis.CoexpressionAnalysisService.java Source code

Java tutorial

Introduction

Here is the source code for chibi.gemmaanalysis.CoexpressionAnalysisService.java

Source

/*
 * The Gemma project
 * 
 * Copyright (c) 2008 Columbia University
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package chibi.gemmaanalysis;

import hep.aida.ref.Histogram1D;

import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

import org.apache.commons.lang3.time.StopWatch;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import ubic.basecode.dataStructure.matrix.DenseDouble3dMatrix;
import ubic.basecode.dataStructure.matrix.DenseDoubleMatrix;
import ubic.basecode.dataStructure.matrix.DoubleMatrix;
import ubic.basecode.io.reader.HistogramReader;
import ubic.basecode.io.writer.HistogramWriter;
import ubic.basecode.math.CorrelationStats;
import ubic.basecode.math.Distance;
import ubic.basecode.math.distribution.HistogramSampler;
import ubic.basecode.math.metaanalysis.CorrelationEffectMetaAnalysis;
import ubic.gemma.analysis.preprocess.filter.ExpressionExperimentFilter;
import ubic.gemma.analysis.preprocess.filter.FilterConfig;
import ubic.gemma.datastructure.matrix.ExpressionDataDoubleMatrix;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.arrayDesign.ArrayDesignService;
import ubic.gemma.model.expression.bioAssayData.DesignElementDataVectorService;
import ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector;
import ubic.gemma.model.expression.designElement.CompositeSequence;
import ubic.gemma.model.expression.designElement.CompositeSequenceService;
import ubic.gemma.model.expression.experiment.BioAssaySet;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.expression.experiment.service.ExpressionExperimentService;
import ubic.gemma.model.genome.Gene;
import ubic.gemma.util.Settings;
import cern.colt.list.DoubleArrayList;

/**
 * Coexpression analysis
 * <p>
 * TODO DOCUMENT ME
 * 
 * @author Raymond
 * @version $Id$
 */
public class CoexpressionAnalysisService {
    protected static final int MIN_NUM_USED = 5;

    private static final int NUM_HISTOGRAM_SAMPLES = 10000;

    private static final int NUM_HISTOGRAM_BINS = 2000;

    private static Log log = LogFactory.getLog(CoexpressionAnalysisService.class.getName());

    private ExpressionExperimentService eeService;

    private DesignElementDataVectorService dedvService;

    private CorrelationEffectMetaAnalysis metaAnalysis;

    private ArrayDesignService adService;

    /**
     * @param csService the csService to set
     */
    protected void setCsService(CompositeSequenceService csService) {
        this.csService = csService;
    }

    private CompositeSequenceService csService;

    /**
     * Create an effect size service
     */
    public CoexpressionAnalysisService() {
        metaAnalysis = new CorrelationEffectMetaAnalysis(true, false);
    }

    /**
     * Create and populate the coexpression matrices (correlation matrix, sample size matrix, expression level matrix)
     * 
     * @param ees
     * @param queryGenes
     * @param targetGenes
     * @param filterConfig
     * @param correlationMethod
     * @return
     */
    public CoexpressionMatrices calculateCoexpressionMatrices(Collection<BioAssaySet> ees,
            Collection<Gene> queryGenes, Collection<Gene> targetGenes, FilterConfig filterConfig,
            CorrelationMethod correlationMethod) {
        if (correlationMethod == null)
            correlationMethod = CorrelationMethod.PEARSON;
        CoexpressionMatrices matrices = new CoexpressionMatrices(ees, queryGenes, targetGenes);
        DenseDouble3dMatrix<Gene, Gene, BioAssaySet> correlationMatrix = matrices.getCorrelationMatrix();
        DenseDouble3dMatrix<Gene, Gene, BioAssaySet> sampleSizeMatrix = matrices.getSampleSizeMatrix();
        int count = 1;
        int numEes = ees.size();
        // calculate correlations
        log.info("Calculating correlation and sample size matrices");
        StopWatch watch = new StopWatch();
        watch.start();
        for (BioAssaySet bas : ees) {
            ExpressionExperiment ee = (ExpressionExperiment) bas;
            log.info("Processing " + ee.getShortName() + " (" + count++ + " of " + numEes + ")");
            int slice = correlationMatrix.getSliceIndexByName(ee);

            // get all the composite sequences
            Collection<ArrayDesign> ads = eeService.getArrayDesignsUsed(ee);
            Collection<CompositeSequence> css = new HashSet<CompositeSequence>();
            for (ArrayDesign ad : ads) {
                css.addAll(adService.getCompositeSequences(ad));
            }
            Map<Gene, Collection<CompositeSequence>> gene2css = getGene2CsMap(css);

            ExpressionDataDoubleMatrix dataMatrix = getExpressionDataMatrix(ee, filterConfig);
            if (dataMatrix == null) {
                log.error("ERROR: cannot process " + ee.getShortName());
                continue;
            }
            for (Gene qGene : queryGenes) {
                int row = correlationMatrix.getRowIndexByName(qGene);
                for (Gene tGene : targetGenes) {
                    int col = correlationMatrix.getColIndexByName(tGene);
                    Collection<CompositeSequence> queryCss = gene2css.get(qGene);
                    Collection<CompositeSequence> targetCss = gene2css.get(tGene);

                    if (queryCss != null && targetCss != null) {
                        CorrelationSampleSize corr = calculateCorrelation(queryCss, targetCss, dataMatrix,
                                correlationMethod);
                        if (corr != null) {
                            correlationMatrix.set(slice, row, col, corr.correlation);
                            sampleSizeMatrix.set(slice, row, col, corr.sampleSize);
                        }
                    }
                }
            }
        }
        watch.stop();
        log.info("Calculated correlations of all " + numEes + " in " + watch);
        return matrices;
    }

    /**
     * Calculate an effect size matrix
     * 
     * @param correlationMatrix
     * @param sampleSizeMatrix
     * @return
     */
    public DoubleMatrix<Gene, Gene> calculateEffectSizeMatrix(
            DenseDouble3dMatrix<Gene, Gene, BioAssaySet> correlationMatrix,
            DenseDouble3dMatrix<Gene, Gene, BioAssaySet> sampleSizeMatrix) {
        DoubleMatrix<Gene, Gene> matrix = new DenseDoubleMatrix<Gene, Gene>(correlationMatrix.rows(),
                correlationMatrix.columns());
        matrix.setRowNames(correlationMatrix.getRowNames());
        matrix.setColumnNames(correlationMatrix.getColNames());

        for (Gene rowId : correlationMatrix.getRowNames()) {
            int rowIndex = matrix.getRowIndexByName(rowId);
            for (Gene colId : correlationMatrix.getColNames()) {
                int colIndex = matrix.getColIndexByName(colId);
                DoubleArrayList correlations = new DoubleArrayList(correlationMatrix.slices());
                DoubleArrayList sampleSizes = new DoubleArrayList(correlationMatrix.slices());
                for (BioAssaySet bas : correlationMatrix.getSliceNames()) {
                    ExpressionExperiment sliceId = (ExpressionExperiment) bas;
                    int sliceIndex = correlationMatrix.getSliceIndexByName(sliceId);
                    double correlation = correlationMatrix.get(sliceIndex, rowIndex, colIndex);
                    double sampleSize = sampleSizeMatrix.get(sliceIndex, rowIndex, colIndex);
                    correlations.add(correlation);
                    sampleSizes.add(sampleSize);
                }
                metaAnalysis.run(correlations, sampleSizes);
                double effectSize = metaAnalysis.getE();
                matrix.set(rowIndex, colIndex, effectSize);
            }
        }
        return matrix;
    }

    /**
     * Calculate the p-values for a max correlation matrix using empirical distributions stored in the gemmaData dir
     * 
     * @param maxCorrelationMatrix
     * @param n specifies which n'th maximum value of the sample to be taken
     * @param ees expression experiments to sample from the gemmaData dir
     * @return p-value matrix
     */
    public DoubleMatrix<String, String> calculateMaxCorrelationPValueMatrix(
            DoubleMatrix<String, String> maxCorrelationMatrix, int n, Collection<BioAssaySet> ees) {
        log.info("Calculating " + n + "-max p-value matrix");
        StopWatch watch = new StopWatch();
        watch.start();
        DoubleMatrix<String, String> pMatrix = new DenseDoubleMatrix<String, String>(maxCorrelationMatrix.rows(),
                maxCorrelationMatrix.columns());
        pMatrix.setRowNames(maxCorrelationMatrix.getRowNames());
        pMatrix.setColumnNames(maxCorrelationMatrix.getColNames());

        // fill matrix with NaNs
        for (int i = 0; i < pMatrix.rows(); i++)
            for (int j = 0; j < pMatrix.columns(); j++)
                pMatrix.set(i, j, Double.NaN);

        // fill a histogram with the empirical distribution of max correlations
        Histogram1D hist = new Histogram1D("Max correlation empirical distribution", NUM_HISTOGRAM_BINS, -1d, 1d);
        Collection<HistogramSampler> histSamplers = getHistogramSamplers(ees);
        for (int i = 0; i < NUM_HISTOGRAM_SAMPLES; i++) {
            DoubleArrayList samples = new DoubleArrayList(histSamplers.size());
            for (HistogramSampler sampler : histSamplers) {
                samples.add(sampler.nextSample());
            }
            samples.sort();
            if (samples.size() > n)
                hist.fill(samples.get(samples.size() - 1 - n));
        }

        HistogramWriter out = new HistogramWriter();
        try {
            out.write(hist, new FileWriter("hist.txt"));
        } catch (IOException e) {
        }

        // calculate the p-value
        for (int i = 0; i < maxCorrelationMatrix.rows(); i++) {
            for (int j = 0; j < maxCorrelationMatrix.columns(); j++) {
                double corr = maxCorrelationMatrix.get(i, j);
                if (Double.isNaN(corr) || corr == 0d)
                    pMatrix.set(i, j, Double.NaN);
                else {
                    double pVal = getPvalue(hist, corr);
                    pMatrix.set(i, j, pVal);
                }
            }
        }
        watch.stop();
        log.info("Finished calculating " + n + "-max p-value matrix in " + watch);
        return pMatrix;
    }

    /**
     * Filter the specified matrix so columns (expression experiments) of missing data are removed
     * 
     * @param matrix
     * @return
     */
    public DenseDouble3dMatrix<Gene, Gene, ExpressionExperiment> filterCoexpressionMatrix(
            DenseDouble3dMatrix<Gene, Gene, ExpressionExperiment> matrix) {
        log.info("Filtering expression experiments...");
        // find empty columns
        List<ExpressionExperiment> filteredEeIds = new ArrayList<ExpressionExperiment>();
        EE: for (ExpressionExperiment eeId : matrix.getSliceNames()) {
            int slice = matrix.getSliceIndexByName(eeId);
            for (int i = 0; i < matrix.rows(); i++)
                for (int j = 0; j < matrix.columns(); j++)
                    if (!matrix.isMissing(slice, i, j)) {
                        filteredEeIds.add(eeId);
                        continue EE;
                    }
        }
        log.info(filteredEeIds.size() + " of " + matrix.slices() + " passed");

        // create a new filtered matrix
        DenseDouble3dMatrix<Gene, Gene, ExpressionExperiment> filteredMatrix = new DenseDouble3dMatrix<Gene, Gene, ExpressionExperiment>(
                filteredEeIds.size(), matrix.rows(), matrix.columns());
        filteredMatrix.setSliceNames(filteredEeIds);
        for (int i = 0; i < filteredEeIds.size(); i++) {
            ExpressionExperiment eeId = filteredEeIds.get(i);
            int slice = filteredMatrix.getSliceIndexByName(eeId);
            for (int j = 0; j < matrix.rows(); j++) {
                for (int k = 0; k < matrix.columns(); k++) {
                    double val = matrix.get(matrix.getSliceIndexByName(eeId), j, k);
                    filteredMatrix.set(slice, j, k, val);
                }
            }
        }

        return filteredMatrix;

    }

    public ArrayDesignService getAdService() {
        return adService;
    }

    /**
     * Get expression data matrix for the specified expression experiment
     * 
     * @param ee
     * @param filterConfig
     * @return an expression data double matrix
     */
    public ExpressionDataDoubleMatrix getExpressionDataMatrix(ExpressionExperiment ee, FilterConfig filterConfig) {
        StopWatch watch = new StopWatch();
        watch.start();
        log.info(ee.getShortName() + ": Getting expression data matrix");

        // get dedvs to build expression data matrix
        Collection<ProcessedExpressionDataVector> dedvs = eeService.getProcessedDataVectors(ee);
        dedvService.thaw(dedvs);

        // build and filter expression data matrix
        Collection<ArrayDesign> arrayDesignsUsed = eeService.getArrayDesignsUsed(ee);
        ExpressionExperimentFilter filter = new ExpressionExperimentFilter(arrayDesignsUsed, filterConfig);
        ExpressionDataDoubleMatrix eeDoubleMatrix;
        try {
            eeDoubleMatrix = filter.getFilteredMatrix(dedvs);
        } catch (Exception e) {
            log.error(e.getMessage());
            return null;
        }
        watch.stop();
        log.info("Retrieved expression data matrix in " + watch);
        return eeDoubleMatrix;
    }

    /**
     * Get a gene to composite sequence map // FIXME This corresponds to an existing service method?
     * 
     * @param css
     * @return gene to composite sequences map
     */
    public Map<Gene, Collection<CompositeSequence>> getGene2CsMap(Collection<CompositeSequence> css) {
        Map<CompositeSequence, Collection<Gene>> cs2gene = csService.getGenes(css);
        // filter for specific cs 2 gene
        for (Iterator<Map.Entry<CompositeSequence, Collection<Gene>>> it = cs2gene.entrySet().iterator(); it
                .hasNext();) {
            Map.Entry<CompositeSequence, Collection<Gene>> entry = it.next();
            Collection<Gene> genes = entry.getValue();
            if (genes.size() > 1)
                it.remove();
        }

        Map<Gene, Collection<CompositeSequence>> gene2css = new HashMap<Gene, Collection<CompositeSequence>>();
        for (Map.Entry<CompositeSequence, Collection<Gene>> entry : cs2gene.entrySet()) {
            CompositeSequence cs = entry.getKey();
            Collection<Gene> genes = entry.getValue();
            for (Gene gene : genes) {
                Collection<CompositeSequence> c = gene2css.get(gene);
                if (c == null) {
                    c = new HashSet<CompositeSequence>();
                    gene2css.put(gene, c);
                }
                c.add(cs);
            }
        }
        return gene2css;
    }

    /**
     * Get the histogram samplers for the specified expression experiments
     * 
     * @param ees
     * @return a collection of histogram samplers
     */
    public Collection<HistogramSampler> getHistogramSamplers(Collection<BioAssaySet> ees) {
        Collection<HistogramSampler> histSamplers = new HashSet<HistogramSampler>();
        for (BioAssaySet bas : ees) {
            ExpressionExperiment ee = (ExpressionExperiment) bas;
            String fileName = Settings.getAnalysisStoragePath() + ee.getShortName() + ".correlDist.txt";
            try {
                HistogramSampler sampler = readHistogramFile(fileName);
                if (sampler == null)
                    log.error("ERROR: " + ee.getShortName() + " has an invalid correlation distribution");
                else
                    histSamplers.add(sampler);
            } catch (IOException e) {
                log.error(e.getMessage());
                log.error("ERROR: Unable to read correlation distribution file for " + ee.getShortName());
            }
        }
        return histSamplers;
    }

    /**
     * Fold the 3D correlation matrix to a 2D matrix with maximum correlations
     * 
     * @param matrix - correlation matrix
     * @param n - the Nth largest correlation
     * @return matrix with Nth largest correlations
     */
    public DoubleMatrix<Gene, Gene> getMaxCorrelationMatrix(
            DenseDouble3dMatrix<Gene, Gene, ExpressionExperiment> matrix, int n) {
        log.info("Calculating " + n + "-max matrix");
        StopWatch watch = new StopWatch();
        watch.start();
        DoubleMatrix<Gene, Gene> maxMatrix = new DenseDoubleMatrix<Gene, Gene>(matrix.rows(), matrix.columns());
        maxMatrix.setRowNames(matrix.getRowNames());
        maxMatrix.setColumnNames(matrix.getColNames());
        for (int i = 0; i < matrix.rows(); i++) {
            for (int j = 0; j < matrix.columns(); j++) {
                DoubleArrayList list = new DoubleArrayList();
                for (int k = 0; k < matrix.slices(); k++) {
                    double val = matrix.get(k, i, j);
                    if (!Double.isNaN(val)) {
                        list.add(val);
                    }
                }
                list.sort();
                double val = Double.NaN;
                if (list.size() > n)
                    val = list.get(list.size() - 1 - n);
                maxMatrix.set(i, j, val);
            }
        }
        watch.stop();
        log.info("Finished calculating " + n + "-max matrix in " + watch);
        return maxMatrix;
    }

    public void setAdService(ArrayDesignService adService) {
        this.adService = adService;
    }

    public void setDedvService(DesignElementDataVectorService dedvService) {
        this.dedvService = dedvService;
    }

    public void setEeService(ExpressionExperimentService eeService) {
        this.eeService = eeService;
    }

    /**
     * Calculates all pairwise correlations between the query and target composite sequences and then takes the median
     * correlation
     * 
     * @param queryCss
     * @param targetCss
     * @param dataMatrix
     * @return
     */
    private CorrelationSampleSize calculateCorrelation(Collection<CompositeSequence> queryCss,
            Collection<CompositeSequence> targetCss, ExpressionDataDoubleMatrix dataMatrix,
            CorrelationMethod method) {
        TreeMap<Double, Double> correlNumUsedMap = new TreeMap<Double, Double>();
        // calculate all pairwise correlations between cs groups
        for (CompositeSequence queryCs : queryCss) {
            for (CompositeSequence targetCs : targetCss) {
                Double[] queryVals = dataMatrix.getRow(queryCs);
                Double[] targetVals = dataMatrix.getRow(targetCs);
                if (queryVals != null && targetVals != null) {
                    double[] v1 = new double[queryVals.length];
                    double[] v2 = new double[targetVals.length];
                    for (int i = 0; i < queryVals.length; i++) {
                        if (queryVals[i] != null)
                            v1[i] = queryVals[i];
                        else
                            v1[i] = Double.NaN;
                    }
                    for (int i = 0; i < targetVals.length; i++) {
                        if (targetVals[i] != null)
                            v2[i] = targetVals[i];
                        else
                            v2[i] = Double.NaN;
                    }

                    int numUsed = 0;
                    for (int i = 0; i < v1.length && i < v2.length; i++)
                        if (!Double.isNaN(v1[i]) && !Double.isNaN(v2[i]))
                            numUsed++;
                    if (numUsed > MIN_NUM_USED) {
                        double correlation;
                        switch (method) {
                        case SPEARMAN:
                            correlation = Distance.spearmanRankCorrelation(new DoubleArrayList(v1),
                                    new DoubleArrayList(v2));
                            break;
                        case PEARSON:
                        default:
                            correlation = CorrelationStats.correl(v1, v2);

                        }
                        correlNumUsedMap.put(correlation, (double) numUsed);
                    }
                }
            }
        }
        if (correlNumUsedMap.size() == 0) {
            return null;
        }
        List<Double> correlations = new ArrayList<Double>(correlNumUsedMap.keySet());
        // take the median correlation
        Double correlation = correlations.get(correlations.size() / 2);
        Double sampleSize = correlNumUsedMap.get(correlation);
        CorrelationSampleSize c = new CorrelationSampleSize();
        c.correlation = correlation;
        c.sampleSize = sampleSize;
        return c;

    }

    /**
     * Calculates a p-value from a histogram
     * 
     * @param histogram
     * @param x
     * @return
     */
    private double getPvalue(Histogram1D histogram, double x) {
        int bin = histogram.xAxis().coordToIndex(x);
        double sum = 0.0d;
        for (int i = 0; i <= bin; i++) {
            sum += histogram.binHeight(i);
        }
        if (sum == 0d)
            return 0d;

        return sum / NUM_HISTOGRAM_SAMPLES;
    }

    /**
     * Read a correlation distribution
     * 
     * @param fileName
     * @return a histogram sampler for the read distribution
     * @throws IOException
     */
    private HistogramSampler readHistogramFile(String fileName) throws IOException {
        HistogramReader in = new HistogramReader(fileName);
        Histogram1D hist = in.read1D();
        HistogramSampler sampler = new HistogramSampler(hist);
        return sampler;
    }

    /**
     * Stores matrices related to coexpression analysis
     * 
     * @author raymond
     */
    public class CoexpressionMatrices {
        private DenseDouble3dMatrix<Gene, Gene, BioAssaySet> correlationMatrix;

        private DenseDouble3dMatrix<Gene, Gene, BioAssaySet> sampleSizeMatrix;

        private Map<ExpressionExperiment, String> eeNameMap;

        private Map<Gene, String> geneNameMap;

        /**
         * @param ees
         * @param queryGenes
         * @param targetGenes
         */
        public CoexpressionMatrices(Collection<BioAssaySet> ees, Collection<Gene> queryGenes,
                Collection<Gene> targetGenes) {
            List<BioAssaySet> eeList = new ArrayList<BioAssaySet>(ees);
            List<Gene> qGeneList = new ArrayList<Gene>(queryGenes);
            List<Gene> tGeneList = new ArrayList<Gene>(targetGenes);

            correlationMatrix = new DenseDouble3dMatrix<Gene, Gene, BioAssaySet>(eeList, qGeneList, tGeneList);
            sampleSizeMatrix = new DenseDouble3dMatrix<Gene, Gene, BioAssaySet>(eeList, qGeneList, tGeneList);
            // NaN matrices
            for (int k = 0; k < correlationMatrix.slices(); k++) {
                for (int i = 0; i < correlationMatrix.rows(); i++) {
                    for (int j = 0; j < correlationMatrix.columns(); j++) {
                        correlationMatrix.set(k, i, j, Double.NaN);
                        sampleSizeMatrix.set(k, i, j, Double.NaN);
                    }
                }
            }

            // generate name maps
            eeNameMap = new HashMap<ExpressionExperiment, String>();
            for (BioAssaySet bas : ees) {
                ExpressionExperiment ee = (ExpressionExperiment) bas;
                eeNameMap.put(ee, ee.getShortName());
            }

            geneNameMap = new HashMap<Gene, String>();
            for (Gene gene : queryGenes) {
                String name = gene.getOfficialSymbol();
                if (name == null)
                    name = gene.getId().toString();
                geneNameMap.put(gene, name);
            }
            for (Gene gene : targetGenes) {
                String name = gene.getOfficialSymbol();
                if (name == null)
                    name = gene.getId().toString();
                geneNameMap.put(gene, name);
            }
        }

        public DenseDouble3dMatrix<Gene, Gene, BioAssaySet> getCorrelationMatrix() {
            return correlationMatrix;
        }

        public Map<ExpressionExperiment, String> getEeNameMap() {
            return eeNameMap;
        }

        public Map<Gene, String> getGeneNameMap() {
            return geneNameMap;
        }

        public DenseDouble3dMatrix<Gene, Gene, BioAssaySet> getSampleSizeMatrix() {
            return sampleSizeMatrix;
        }

        public void setCorrelationMatrix(DenseDouble3dMatrix<Gene, Gene, BioAssaySet> correlationMatrix) {
            this.correlationMatrix = correlationMatrix;
        }

        public void setSampleSizeMatrix(DenseDouble3dMatrix<Gene, Gene, BioAssaySet> sampleSizeMatrix) {
            this.sampleSizeMatrix = sampleSizeMatrix;
        }
    }

    public static enum CorrelationMethod {
        SPEARMAN, PEARSON
    }

    /**
     * Stores pairs of genes
     * 
     * @author raymond
     */
    public class GenePair {
        private Gene gene1;

        private Gene gene2;

        public GenePair(Gene gene1, Gene gene2) {
            this.gene1 = gene1;
            this.gene2 = gene2;
        }

        @Override
        public String toString() {
            String s1 = gene1.getOfficialSymbol();
            String s2 = gene2.getOfficialSymbol();
            if (s1 == null)
                s1 = gene1.getId().toString();
            if (s2 == null)
                s2 = gene2.getId().toString();
            return s1 + ":" + s2;
        }
    }

    private class CorrelationSampleSize {
        double correlation;

        double sampleSize;
    }

}