ubic.gemma.core.analysis.expression.diff.GeneDifferentialExpressionServiceImpl.java Source code

Java tutorial

Introduction

Here is the source code for ubic.gemma.core.analysis.expression.diff.GeneDifferentialExpressionServiceImpl.java

Source

/*
 * The Gemma project
 *
 * Copyright (c) 2013 University of British Columbia
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 */
package ubic.gemma.core.analysis.expression.diff;

import cern.colt.list.DoubleArrayList;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import ubic.basecode.math.metaanalysis.MetaAnalysis;
import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult;
import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionValueObject;
import ubic.gemma.model.common.description.Characteristic;
import ubic.gemma.model.expression.experiment.*;
import ubic.gemma.model.genome.Gene;
import ubic.gemma.model.genome.gene.GeneValueObject;
import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionResultService;
import ubic.gemma.persistence.util.EntityUtils;

import java.util.*;

/**
 * Provides access to {@link DifferentialExpressionAnalysisResult}s and meta-analysis results.
 *
 * @author keshav, anton
 */
@Component
public class GeneDifferentialExpressionServiceImpl implements GeneDifferentialExpressionService {

    private static final String FV_SEP = ", ";
    private static final int MAX_PVAL = 1;
    private static final Log log = LogFactory.getLog(GeneDifferentialExpressionServiceImpl.class);

    private final DifferentialExpressionResultService differentialExpressionResultService;

    @Autowired
    public GeneDifferentialExpressionServiceImpl(
            DifferentialExpressionResultService differentialExpressionResultService) {
        this.differentialExpressionResultService = differentialExpressionResultService;
    }

    @Override
    public ExperimentalFactorValueObject configExperimentalFactorValueObject(ExperimentalFactor ef) {
        ExperimentalFactorValueObject efvo = new ExperimentalFactorValueObject(ef.getId());
        efvo.setName(ef.getName());
        efvo.setDescription(ef.getDescription());
        Characteristic category = ef.getCategory();
        if (category != null) {
            efvo.setCategory(category.getCategory());
            efvo.setCategoryUri(category.getCategoryUri());
        }
        Collection<FactorValue> fvs = ef.getFactorValues();
        StringBuilder factorValuesAsString = new StringBuilder(StringUtils.EMPTY);

        for (FactorValue fv : fvs) {
            String fvName = fv.toString();
            if (StringUtils.isNotBlank(fvName)) {
                factorValuesAsString.append(fvName).append(GeneDifferentialExpressionServiceImpl.FV_SEP);
            }
        }

        /* clean up the start and end of the string */
        factorValuesAsString = new StringBuilder(
                StringUtils.remove(factorValuesAsString.toString(), ef.getName() + ":"));
        factorValuesAsString = new StringBuilder(StringUtils.removeEnd(factorValuesAsString.toString(),
                GeneDifferentialExpressionServiceImpl.FV_SEP));

        /*
         * Preformat the factor name; due to Ext PropertyGrid limitations we can't do this on the client.
         */
        efvo.setName(ef.getName() + " (" + StringUtils.abbreviate(factorValuesAsString.toString(), 50) + ")");

        efvo.setFactorValues(factorValuesAsString.toString());
        return efvo;
    }

    @Override
    public ExpressionExperimentValueObject configExpressionExperimentValueObject(
            ExpressionExperimentValueObject ee) {

        // if ( ee instanceof ExpressionExperiment ) {
        // eevo.setShortName( ( ( ExpressionExperiment ) ee ).getShortName() );
        // } else if ( ee instanceof ExpressionExperimentSubSet ) {
        // eevo.setShortName( "Subset of "
        // + ( ( ExpressionExperimentSubSet ) ee ).getSourceExperiment().getShortName() );
        // eevo.setSourceExperiment( ( ( ExpressionExperimentSubSet ) ee ).getSourceExperiment().getId() );
        // }
        // ee.setExternalUri( AnchorTagUtil.getExpressionExperimentUrl( ee.getId() ) );
        return ee;
    }

    @Override
    public Collection<DifferentialExpressionValueObject> getDifferentialExpression(Gene gene,
            Collection<BioAssaySet> ees) {
        StopWatch timer = new StopWatch();
        timer.start();
        Collection<DifferentialExpressionValueObject> devos = new ArrayList<>();

        if (gene == null)
            return devos;

        Map<ExpressionExperimentValueObject, List<DifferentialExpressionValueObject>> results = differentialExpressionResultService
                .find(gene, EntityUtils.getIds(ees));
        timer.stop();
        if (timer.getTime() > 1000) {
            GeneDifferentialExpressionServiceImpl.log.info("Diff ex results: " + timer.getTime() + " ms");
        }
        return this.postProcessDiffExResults(results);
    }

    @Override
    public Collection<DifferentialExpressionValueObject> getDifferentialExpression(Gene gene,
            Collection<BioAssaySet> ees, double threshold, Integer limit) {
        StopWatch timer = new StopWatch();
        timer.start();
        Collection<DifferentialExpressionValueObject> devos = new ArrayList<>();

        if (gene == null)
            return devos;

        Map<ExpressionExperimentValueObject, List<DifferentialExpressionValueObject>> results = differentialExpressionResultService
                .find(gene, EntityUtils.getIds(ees), threshold, limit);
        timer.stop();
        if (timer.getTime() > 1000) {
            GeneDifferentialExpressionServiceImpl.log.info("Diff ex results: " + timer.getTime() + " ms");
        }
        return this.postProcessDiffExResults(results);
    }

    @Override
    public Collection<DifferentialExpressionValueObject> getDifferentialExpression(Gene gene, double threshold,
            Collection<DiffExpressionSelectedFactorCommand> factorMap) {
        StopWatch timer = new StopWatch();
        timer.start();
        Collection<DifferentialExpressionValueObject> result = new ArrayList<>();

        if (gene == null)
            return result;

        Map<ExpressionExperimentValueObject, List<DifferentialExpressionValueObject>> rawDiffEx = differentialExpressionResultService
                .find(gene, threshold, null);

        // Collection<DifferentialExpressionValueObject> rawProcResults = postProcessDiffExResults( gene, threshold,
        // rawDiffEx );

        Map<Long, DiffExpressionSelectedFactorCommand> eeId2FactorCommand = new HashMap<>();
        for (DiffExpressionSelectedFactorCommand dsfc : factorMap) {
            eeId2FactorCommand.put(dsfc.getEeId(), dsfc);
        }

        for (ExpressionExperimentValueObject ee : rawDiffEx.keySet()) {
            for (DifferentialExpressionValueObject raw : rawDiffEx.get(ee)) {
                if (eeId2FactorCommand.containsKey(raw.getExpressionExperiment().getId())) {
                    DiffExpressionSelectedFactorCommand factorCommandForEE = eeId2FactorCommand
                            .get(raw.getExpressionExperiment().getId());

                    assert !raw.getExperimentalFactors().isEmpty();

                    // interaction term?
                    if (raw.getExperimentalFactors().size() > 1) {
                        continue;
                    }

                    ExperimentalFactorValueObject efvo = raw.getExperimentalFactors().iterator().next();
                    if (factorCommandForEE.getEfId().equals(efvo.getId())) {
                        result.add(raw);
                    }
                }
            }
        }
        timer.stop();
        if (timer.getTime() > 1000) {
            GeneDifferentialExpressionServiceImpl.log.info("Diff ex results: " + timer.getTime() + " ms");
        }
        return result;
    }

    @Override
    public Collection<DifferentialExpressionValueObject> getDifferentialExpression(Gene gene, double threshold,
            Integer limit) {

        Collection<DifferentialExpressionValueObject> devos = new ArrayList<>();

        if (gene == null)
            return devos;

        StopWatch timer = new StopWatch();
        timer.start();

        Map<ExpressionExperimentValueObject, List<DifferentialExpressionValueObject>> results = differentialExpressionResultService
                .find(gene, threshold, limit);

        if (timer.getTime() > 1000) {
            GeneDifferentialExpressionServiceImpl.log.info("Diff raw ex results: " + timer.getTime() + " ms");
        }

        return this.postProcessDiffExResults(results);
    }

    @Override
    public DifferentialExpressionMetaAnalysisValueObject getDifferentialExpressionMetaAnalysis(double threshold,
            Gene g, Map<Long, Long> eeFactorsMap, Collection<BioAssaySet> activeExperiments) {

        StopWatch timer = new StopWatch();
        timer.start();

        /*
         * Get results for each active experiment on given gene. Handling the threshold check below since we ignore this
         * for the meta analysis. The results returned are for all factors, not just the factors we are seeking.
         */
        Map<ExpressionExperimentValueObject, List<DifferentialExpressionValueObject>> resultsMap = differentialExpressionResultService
                .find(g, EntityUtils.getIds(activeExperiments));

        GeneDifferentialExpressionServiceImpl.log
                .debug(resultsMap.size() + " results for " + g + " in " + activeExperiments);

        DifferentialExpressionMetaAnalysisValueObject mavo = new DifferentialExpressionMetaAnalysisValueObject();

        DoubleArrayList pvaluesToCombine = new DoubleArrayList();

        /* a gene can have multiple probes that map to it, so store one diff value object for each probe */
        Collection<DifferentialExpressionValueObject> devos = new ArrayList<>();

        Collection<Long> eesThatMetThreshold = new HashSet<>();

        for (ExpressionExperimentValueObject ee : resultsMap.keySet()) {

            ExpressionExperimentValueObject eevo = this.configExpressionExperimentValueObject(ee);

            Collection<DifferentialExpressionValueObject> proberesults = resultsMap.get(ee);

            Collection<DifferentialExpressionValueObject> filteredResults = new HashSet<>();
            for (DifferentialExpressionValueObject r : proberesults) {
                Collection<ExperimentalFactorValueObject> efs = r.getExperimentalFactors();

                assert efs.size() > 0;
                if (efs.size() > 1) {
                    // We always ignore interaction effects.
                    continue;
                }

                ExperimentalFactorValueObject ef = efs.iterator().next();

                /*
                 * note that we don't care about the reverse: the eefactorsmap can have stuff we don't need. We focus on
                 * the experiments because they are easy to select & secure. The eefactorsmap provides additional
                 * details.
                 */
                assert eeFactorsMap.containsKey(ee.getId()) : "eeFactorsMap does not contain ee=" + ee.getId();

                Long sfId = eeFactorsMap.get(ee.getId());
                if (!ef.getId().equals(sfId)) {
                    /*
                     * Screen out factors we're not using.
                     */
                    continue;
                }

                /* filtered result with chosen factor */
                filteredResults.add(r);

            }

            if (filteredResults.size() == 0) {
                GeneDifferentialExpressionServiceImpl.log.warn("No result for ee=" + ee);
                continue;
            }

            /*
             * For the diff expression meta analysis, ignore threshold. Select the 'best' penalized probe if multiple
             * probes map to the same gene.
             */
            DifferentialExpressionValueObject res = this.findMinPenalizedProbeResult(filteredResults);
            if (res == null)
                continue;

            Double p = res.getP();
            if (p == null)
                continue;

            /*
             * Moderate the pvalues by setting all values to be no smaller than PVALUE_CLIP_THRESHOLD
             */
            pvaluesToCombine.add(Math.max(p, GeneDifferentialExpressionService.PVALUE_CLIP_THRESHOLD));

            /* for each filtered result, set up a devo (contains only results with chosen factor) */

            for (DifferentialExpressionValueObject r : filteredResults) {

                Collection<ExperimentalFactorValueObject> efs = r.getExperimentalFactors();
                if (efs == null) {
                    // This should not happen any more, but just in case.
                    GeneDifferentialExpressionServiceImpl.log.warn(
                            "No experimentalFactor(s) for DifferentialExpressionAnalysisResult: " + r.getId());
                    continue;
                }

                Boolean metThreshold = r.getCorrP() != null && (r.getCorrP() <= threshold);
                r.setMetThreshold(metThreshold);

                if (metThreshold) {
                    eesThatMetThreshold.add(eevo.getId());
                }

                Boolean fisherContribution = r.equals(res);
                r.setFisherContribution(fisherContribution);

            }

        }

        /*
         * Meta-analysis part.
         */
        double fisherPval = MetaAnalysis.fisherCombinePvalues(pvaluesToCombine);
        mavo.setFisherPValue(fisherPval);
        mavo.setGene(new GeneValueObject(g));
        mavo.setActiveExperiments(activeExperiments);
        mavo.setProbeResults(devos);
        mavo.setNumMetThreshold(eesThatMetThreshold.size());
        mavo.setSortKey();

        timer.stop();
        if (timer.getTime() > 1000) {
            GeneDifferentialExpressionServiceImpl.log.info("Meta-analysis results: " + timer.getTime() + " ms");
        }

        return mavo;
    }

    /**
     * When n probes map to the sa
     *
     * @return the result with the min p value.
     */
    private DifferentialExpressionValueObject findMinPenalizedProbeResult(
            Collection<DifferentialExpressionValueObject> results) {

        DifferentialExpressionValueObject minResult = null;

        int numProbesForGene = results.size();
        if (numProbesForGene == 1) {
            return results.iterator().next();
        }

        double min = 0;
        int i = 0;
        for (DifferentialExpressionValueObject r : results) {

            /*
             * TODO use the contrasts.
             */
            //r.getContrasts();

            if (r.getP() == null)
                continue;
            /* penalize pvals */
            double pval = r.getP() * numProbesForGene;
            if (pval > GeneDifferentialExpressionServiceImpl.MAX_PVAL)
                pval = GeneDifferentialExpressionServiceImpl.MAX_PVAL;

            /* find the best pval */
            if (i == 0 || pval <= min) {
                min = pval;
                minResult = r;
                minResult.setP(min);
            }

            i++;
        }

        return minResult;

    }

    private Collection<DifferentialExpressionValueObject> postProcessDiffExResults(
            Map<ExpressionExperimentValueObject, List<DifferentialExpressionValueObject>> results) {
        StopWatch timer = new StopWatch();
        timer.start();

        Collection<DifferentialExpressionValueObject> devos = new ArrayList<>();

        /*
         * convert to DEVOs
         */
        for (ExpressionExperimentValueObject eevo : results.keySet()) {

            Collection<DifferentialExpressionValueObject> probeResults = results.get(eevo);

            assert probeResults != null && !probeResults.isEmpty();

            for (DifferentialExpressionValueObject r : probeResults) {

                // this doesn't do anything any more since we're already working with valueobjects.

                Collection<ExperimentalFactorValueObject> efs = r.getExperimentalFactors();

                if (efs == null) {
                    // This should not happen any more, but just in case.
                    GeneDifferentialExpressionServiceImpl.log.warn(
                            "No experimentalfactor(s) for DifferentialExpressionAnalysisResult: " + r.getId());
                    continue;
                }

                if (r.getCorrP() == null) {
                    GeneDifferentialExpressionServiceImpl.log
                            .warn("No p-value for DifferentialExpressionAnalysisResult: " + r.getId());
                    continue;
                }

                devos.add(r);

            }

        }
        timer.stop();
        if (timer.getTime() > 1000) {
            GeneDifferentialExpressionServiceImpl.log
                    .info("Postprocess Diff ex results: " + timer.getTime() + " ms");
        }

        return devos;
    }

}