ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionResultDaoImpl.java Source code

Java tutorial

Introduction

Here is the source code for ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionResultDaoImpl.java

Source

/*
 * The Gemma project.
 *
 * Copyright (c) 2006-2007 University of British Columbia
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package ubic.gemma.persistence.service.analysis.expression.diff;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.StopWatch;
import org.hibernate.*;
import org.hibernate.type.DoubleType;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.orm.hibernate3.HibernateTemplate;
import org.springframework.stereotype.Repository;
import ubic.basecode.io.ByteArrayConverter;
import ubic.basecode.math.distribution.Histogram;
import ubic.basecode.util.BatchIterator;
import ubic.basecode.util.SQLUtils;
import ubic.gemma.model.analysis.expression.diff.*;
import ubic.gemma.model.expression.designElement.CompositeSequence;
import ubic.gemma.model.expression.experiment.*;
import ubic.gemma.model.genome.Gene;
import ubic.gemma.model.genome.gene.GeneValueObject;
import ubic.gemma.persistence.service.AbstractDao;
import ubic.gemma.persistence.util.CommonQueries;
import ubic.gemma.persistence.util.EntityUtils;
import ubic.gemma.persistence.util.TaskCancelledException;

import java.math.BigInteger;
import java.util.*;

/**
 * This is a key class for queries to retrieve differential expression results (as well as standard CRUD aspects of
 * working with DifferentialExpressionResults).
 *
 * @author keshav
 */
@Repository
public class DifferentialExpressionResultDaoImpl extends AbstractDao<DifferentialExpressionAnalysisResult>
        implements DifferentialExpressionResultDao {

    /*
     * Temporary. For mimicing the effect of storing only 'significant' results.
     */
    private static final Double CORRECTED_PVALUE_THRESHOLD_TO_BE_CONSIDERED_DIFF_EX = 1.0;
    /*
     * This is a key query: get all results for a set of genes in a set of resultssets (basically, experiments)
     */
    private static final String fetchBatchDifferentialExpressionAnalysisResultsByResultSetsAndGeneQuery = "SELECT dear.PROBE_FK, dear.ID,"
            + " dear.RESULT_SET_FK, dear.CORRECTED_PVALUE, dear.PVALUE  "
            + " FROM DIFFERENTIAL_EXPRESSION_ANALYSIS_RESULT dear FORCE INDEX (probeResultSets) WHERE dear.RESULT_SET_FK IN (:rs_ids) AND "
            + " dear.PROBE_FK IN (:probe_ids) ";

    private static final String fetchResultsByExperimentsQuery = "select distinct e, r"
            + " from DifferentialExpressionAnalysis a, BioSequence2GeneProduct bs2gp"
            + " inner join a.experimentAnalyzed e  "
            + " inner join a.resultSets rs inner join rs.results r inner join fetch r.probe p "
            + "left join p.biologicalCharacteristic bs left join bs2gp.geneProduct gp left join gp.gene g"
            + " where bs2gp.bioSequence=bs and e.id in (:experimentsAnalyzed) and r.correctedPvalue < :threshold order by r.correctedPvalue";

    private static final String fetchResultsByGene = "select distinct e, r"
            + " from DifferentialExpressionAnalysis a, BioSequence2GeneProduct bs2gp"
            + " inner join a.experimentAnalyzed e  "
            + " inner join a.resultSets rs inner join rs.results r inner join r.probe  p "
            + "inner join p.biologicalCharacteristic bs inner join bs2gp.geneProduct gp inner join gp.gene g"
            + " where g=:gene"; // no order by clause, we add it later

    private static final String fetchResultsByGeneSQL = "select d.ID, d.PVALUE, d.CORRECTED_PVALUE, d.PROBE_FK, d.RESULT_SET_FK, "
            + " c.FACTOR_VALUE_FK, c.PVALUE, c.LOG_FOLD_CHANGE, c.SECOND_FACTOR_VALUE_FK, c.ID  "
            + " from DIFFERENTIAL_EXPRESSION_ANALYSIS_RESULT d, GENE2CS g2s, CONTRAST_RESULT c "
            + " where g2s.CS = d.PROBE_FK and c.DIFFERENTIAL_EXPRESSION_ANALYSIS_RESULT_FK = d.ID and g2s.GENE = :gene_id ";

    private static final String fetchResultsByGeneAndExperimentsQuery = "select distinct e, r"
            + " from DifferentialExpressionAnalysis a, BioSequence2GeneProduct bs2gp"
            + " inner join a.experimentAnalyzed e  "
            + " inner join a.resultSets rs inner join rs.results r inner join fetch r.probe p "
            + "inner join p.biologicalCharacteristic bs inner join bs2gp.geneProduct gp inner join gp.gene g"
            + " where bs2gp.bioSequence=bs and g=:gene and e.id in (:experimentsAnalyzed)"; // no order by clause, we
    // add
    // it later

    private static final String fetchResultsByResultSetAndGeneQuery = "SELECT dear.CORRECTED_PVALUE "
            + " FROM DIFFERENTIAL_EXPRESSION_ANALYSIS_RESULT dear, GENE2CS g2s FORCE KEY(GENE)  "
            + " WHERE g2s.CS = dear.PROBE_FK  AND dear.RESULT_SET_FK = :rs_id AND g2s.GENE = :gene_id "
            + " ORDER BY dear.CORRECTED_P_VALUE_BIN DESC";

    /**
     * No constraint on gene
     */
    private static final String fetchResultsByResultSetQuery = "select distinct rs, r "
            + " from DifferentialExpressionAnalysis a inner join a.experimentAnalyzed e  "
            + " inner join a.resultSets rs inner  join  rs.results r inner join fetch r.probe p "
            + " where rs in (:resultsSets)"; // no order by clause, we add it later; 'e' is not used in this query.

    private static final String fetchResultsBySingleResultSetQuery = "select distinct r "
            + " from DifferentialExpressionAnalysis a inner join a.experimentAnalyzed e  "
            + " inner join a.resultSets rs inner  join  rs.results r inner join fetch r.probe p "
            + " where rs in (:resultsSets)"; // no order by clause, we add it later; 'e' is not used in this query.

    private final DifferentialExpressionResultCache differentialExpressionResultCache;

    @Autowired
    public DifferentialExpressionResultDaoImpl(SessionFactory sessionFactory,
            DifferentialExpressionResultCache differentialExpressionResultCache) {
        super(DifferentialExpressionAnalysisResult.class, sessionFactory);
        this.differentialExpressionResultCache = differentialExpressionResultCache;
    }

    @Override
    public Map<ExpressionExperimentValueObject, List<DifferentialExpressionValueObject>> find(Gene gene,
            Collection<Long> experimentsAnalyzed, double threshold, Integer limit) {

        StopWatch timer = new StopWatch();
        timer.start();
        String qs = DifferentialExpressionResultDaoImpl.fetchResultsByGeneAndExperimentsQuery
                + " and r.correctedPvalue < :threshold";

        HibernateTemplate tpl = new HibernateTemplate(this.getSessionFactory());
        tpl.setQueryCacheRegion("diffExResult");
        tpl.setCacheQueries(true);

        if (limit != null) {
            tpl.setMaxResults(limit);
            qs += " order by r.correctedPvalue";
        }

        Map<ExpressionExperimentValueObject, List<DifferentialExpressionValueObject>> results = new HashMap<>();

        if (experimentsAnalyzed.size() == 0) {
            return results;
        }

        String[] paramNames = { "gene", "experimentsAnalyzed", "threshold" };
        Object[] objectValues = { gene, experimentsAnalyzed, threshold };

        List<?> qResult = tpl.findByNamedParam(qs, paramNames, objectValues);

        for (Object o : qResult) {

            Object[] oa = (Object[]) o;
            ExpressionExperimentValueObject ee = ((BioAssaySet) oa[0]).createValueObject();
            DifferentialExpressionAnalysisResult probeResult = (DifferentialExpressionAnalysisResult) oa[1];

            if (!results.containsKey(ee)) {
                results.put(ee, new ArrayList<DifferentialExpressionValueObject>());
            }

            results.get(ee).add(new DifferentialExpressionValueObject(probeResult));
        }

        AbstractDao.log.warn("Num experiments with probe analysis results (with limit = " + limit + ") : "
                + results.size() + ". Number of probes returned in total: " + qResult.size());

        timer.stop();
        if (timer.getTime() > 1000) {
            AbstractDao.log.info("Diff ex results: " + timer.getTime() + " ms");
        }

        return results;
    }

    @Override
    public Map<ExpressionExperimentValueObject, List<DifferentialExpressionValueObject>> find(
            Collection<Long> experiments, double qvalueThreshold, Integer limit) {

        Map<ExpressionExperimentValueObject, List<DifferentialExpressionValueObject>> results = new HashMap<>();

        if (experiments.size() == 0) {
            return results;
        }

        StopWatch timer = new StopWatch();
        timer.start();

        HibernateTemplate tpl = new HibernateTemplate(this.getSessionFactory());
        tpl.setQueryCacheRegion("diffExResult");
        tpl.setCacheQueries(true);

        if (limit != null) {
            tpl.setMaxResults(limit);
        }

        String[] paramNames = { "experimentsAnalyzed", "threshold" };
        Object[] objectValues = { experiments, qvalueThreshold };

        List<?> qResult = tpl.findByNamedParam(DifferentialExpressionResultDaoImpl.fetchResultsByExperimentsQuery,
                paramNames, objectValues);

        for (Object o : qResult) {

            Object[] oa = (Object[]) o;
            DifferentialExpressionAnalysisResult probeResult = (DifferentialExpressionAnalysisResult) oa[1];
            ExpressionExperimentValueObject eevo = ((BioAssaySet) oa[0]).createValueObject();
            if (!results.containsKey(eevo)) {
                results.put(eevo, new ArrayList<DifferentialExpressionValueObject>());
            }

            results.get(eevo).add(new DifferentialExpressionValueObject(probeResult));
        }

        timer.stop();
        if (timer.getTime() > 1000) {
            AbstractDao.log.info("Diff ex results: " + timer.getTime() + " ms");
        }

        return results;
    }

    @Override
    public Map<ExpressionExperimentValueObject, List<DifferentialExpressionValueObject>> find(Gene gene) {
        StopWatch timer = new StopWatch();
        timer.start();
        Map<ExpressionExperimentValueObject, List<DifferentialExpressionValueObject>> results = new HashMap<>();
        if (gene == null)
            return results;

        HibernateTemplate tpl = new HibernateTemplate(this.getSessionFactory());
        tpl.setCacheQueries(true);

        List<?> qResult = tpl.findByNamedParam(DifferentialExpressionResultDaoImpl.fetchResultsByGene, "gene",
                gene);

        for (Object o : qResult) {

            Object[] oa = (Object[]) o;
            ExpressionExperimentValueObject ee = ((BioAssaySet) oa[0]).createValueObject();
            DifferentialExpressionValueObject probeResult = new DifferentialExpressionValueObject(
                    (DifferentialExpressionAnalysisResult) oa[1]);

            if (!results.containsKey(ee)) {
                results.put(ee, new ArrayList<DifferentialExpressionValueObject>());
            }

            results.get(ee).add(probeResult);
        }
        timer.stop();
        if (timer.getTime() > 1000) {
            AbstractDao.log.info("Diff ex results: " + timer.getTime() + " ms");
        }
        return results;
    }

    @Override
    public Map<ExpressionExperimentValueObject, List<DifferentialExpressionValueObject>> find(Gene gene,
            Collection<Long> experimentsAnalyzed) {

        Map<ExpressionExperimentValueObject, List<DifferentialExpressionValueObject>> results = new HashMap<>();

        if (experimentsAnalyzed.size() == 0) {
            return results;
        }

        StopWatch timer = new StopWatch();
        timer.start();

        String[] paramNames = { "gene", "experimentsAnalyzed" };
        Object[] objectValues = { gene, experimentsAnalyzed };

        List<?> qResult = this.getHibernateTemplate().findByNamedParam(
                DifferentialExpressionResultDaoImpl.fetchResultsByGeneAndExperimentsQuery, paramNames,
                objectValues);

        for (Object o : qResult) {

            Object[] oa = (Object[]) o;
            ExpressionExperimentValueObject ee = new ExpressionExperimentValueObject((ExpressionExperiment) oa[0]);
            DifferentialExpressionValueObject probeResult = new DifferentialExpressionValueObject(
                    (DifferentialExpressionAnalysisResult) oa[1]);
            probeResult.setExpressionExperiment(ee);

            if (!results.containsKey(ee)) {
                results.put(ee, new ArrayList<DifferentialExpressionValueObject>());
            }

            results.get(ee).add(probeResult);
        }
        timer.stop();
        if (timer.getTime() > 1000) {
            AbstractDao.log.info("Diff ex results: " + timer.getTime() + " ms");
        }
        return results;
    }

    @Override
    public Map<Long, Map<Long, DiffExprGeneSearchResult>> findDiffExAnalysisResultIdsInResultSets(
            Collection<DiffExResultSetSummaryValueObject> resultSets, Collection<Long> geneIds) {

        Map<Long, Map<Long, DiffExprGeneSearchResult>> results = new HashMap<>();

        Session session = this.getSessionFactory().getCurrentSession();

        Map<Long, DiffExResultSetSummaryValueObject> resultSetIdsMap = EntityUtils.getIdMap(resultSets,
                "getResultSetId");

        Map<Long, Collection<Long>> foundInCache = this.fillFromCache(results, resultSetIdsMap.keySet(), geneIds);

        if (!foundInCache.isEmpty()) {
            AbstractDao.log.info("Results for " + foundInCache.size() + " resultsets found in cache");
        } else {
            AbstractDao.log.info("No results were in the cache");
        }

        Collection<Long> resultSetsNeeded = this.stripUnneededResultSets(foundInCache, resultSetIdsMap.keySet(),
                geneIds);

        // Are we finished?
        if (resultSetsNeeded.isEmpty()) {
            AbstractDao.log.info("All results were in the cache.");
            return results;
        }

        AbstractDao.log.info(foundInCache.size() + "/" + resultSetIdsMap.size()
                + " resultsSets had at least some cached results; still need to query " + resultSetsNeeded.size());

        assert !resultSetsNeeded.isEmpty();

        org.hibernate.SQLQuery queryObject = session.createSQLQuery(
                DifferentialExpressionResultDaoImpl.fetchBatchDifferentialExpressionAnalysisResultsByResultSetsAndGeneQuery);

        /*
         * These values have been tweaked to probe for performance issues.
         */
        int resultSetBatchSize = 50;
        int geneBatchSize = 100;

        if (resultSetsNeeded.size() > geneIds.size()) {
            resultSetBatchSize = Math.min(500, resultSetsNeeded.size());
            AbstractDao.log.info("Batching by result sets (" + resultSetsNeeded.size() + " resultSets); "
                    + geneIds.size() + " genes; batch size=" + resultSetBatchSize);

        } else {
            geneBatchSize = Math.min(200, geneIds.size());
            AbstractDao.log.info("Batching by genes (" + geneIds.size() + " genes); " + resultSetsNeeded.size()
                    + " resultSets; batch size=" + geneBatchSize);
        }

        final int numResultSetBatches = (int) Math.ceil(resultSetsNeeded.size() / resultSetBatchSize);

        queryObject.setFlushMode(FlushMode.MANUAL);

        StopWatch timer = new StopWatch();
        timer.start();
        int numResults = 0;
        long timeForFillingNonSig = 0;

        Map<Long, Map<Long, DiffExprGeneSearchResult>> resultsFromDb = new HashMap<>();

        int numResultSetBatchesDone = 0;

        // Iterate over batches of resultSets
        for (Collection<Long> resultSetIdBatch : new BatchIterator<>(resultSetsNeeded, resultSetBatchSize)) {

            if (AbstractDao.log.isDebugEnabled())
                AbstractDao.log.debug("Starting batch of resultsets: "
                        + StringUtils.abbreviate(StringUtils.join(resultSetIdBatch, ","), 100));

            /*
             * Get the probes using the CommonQueries gene2cs. Otherwise we (in effect) end up doing this over and over
             * again.
             */
            Map<Long, Collection<Long>> cs2GeneIdMap = this.getProbesForGenesInResultSetBatch(session, geneIds,
                    resultSetIdsMap, resultSetIdBatch);

            queryObject.setParameterList("rs_ids", resultSetIdBatch);

            int numGeneBatchesDone = 0;
            final int numGeneBatches = (int) Math.ceil(cs2GeneIdMap.size() / geneBatchSize);

            StopWatch innerQt = new StopWatch();

            // iterate over batches of probes (genes)
            for (Collection<Long> probeBatch : new BatchIterator<>(cs2GeneIdMap.keySet(), geneBatchSize)) {

                if (AbstractDao.log.isDebugEnabled())
                    AbstractDao.log.debug("Starting batch of probes: "
                            + StringUtils.abbreviate(StringUtils.join(probeBatch, ","), 100));

                // would it help to sort the probeBatch/
                List<Long> pbL = new Vector<>(probeBatch);
                Collections.sort(pbL);

                queryObject.setParameterList("probe_ids", pbL);

                innerQt.start();
                List<?> queryResult = queryObject.list();
                innerQt.stop();

                if (innerQt.getTime() > 2000) {
                    // show the actual query with params.
                    AbstractDao.log.info("Query time: " + innerQt.getTime() + "ms:\n "
                            + queryObject.getQueryString().replace(":probe_ids", StringUtils.join(probeBatch, ","))
                                    .replace(":rs_ids", StringUtils.join(resultSetIdBatch, ",")));
                }
                innerQt.reset();

                /*
                 * Each query tuple are the probe, result, resultsSet, qvalue, pvalue.
                 */
                for (Object o : queryResult) {
                    // Long resultSetId = ( ( BigInteger )((Object[])o)[2] ).longValue();
                    // if (!resultSetId.equals)
                    numResults += this.processResultTuple(o, resultsFromDb, cs2GeneIdMap);
                }

                if (timer.getTime() > 5000 && AbstractDao.log.isInfoEnabled()) {
                    AbstractDao.log.info("Batch time: " + timer.getTime() + "ms; Fetched DiffEx " + numResults
                            + " results so far. " + numResultSetBatchesDone + "/" + numResultSetBatches
                            + " resultset batches completed. " + numGeneBatchesDone + "/" + numGeneBatches
                            + " gene batches done.");
                    timer.reset();
                    timer.start();
                }

                // Check if task was cancelled.
                if (Thread.currentThread().isInterrupted()) {
                    throw new TaskCancelledException("Search was cancelled");
                }

                numGeneBatchesDone++;

                if (DifferentialExpressionResultDaoImpl.CORRECTED_PVALUE_THRESHOLD_TO_BE_CONSIDERED_DIFF_EX < 1.0) {
                    timeForFillingNonSig += this.fillNonSignificant(pbL, resultSetIdsMap, resultsFromDb,
                            resultSetIdBatch, cs2GeneIdMap, session);
                }
            } // over probes.

            // Check if task was cancelled.
            if (Thread.currentThread().isInterrupted()) {
                throw new TaskCancelledException("Search was cancelled");
            }

            numResultSetBatchesDone++;

        }

        if (timer.getTime() > 1000 && AbstractDao.log.isInfoEnabled()) {
            AbstractDao.log.info("Fetching DiffEx from DB took total of " + timer.getTime() + " ms : geneIds="
                    + StringUtils.abbreviate(StringUtils.join(geneIds, ","), 50) + " result set="
                    + StringUtils.abbreviate(StringUtils.join(resultSetsNeeded, ","), 50));
            if (timeForFillingNonSig > 100) {
                AbstractDao.log.info("Filling in non-significant values: " + timeForFillingNonSig + "ms in total");
            }
        }

        // Add the DB results to the cached results.
        this.addToCache(resultsFromDb, resultSetsNeeded, geneIds);

        for (Long resultSetId : resultsFromDb.keySet()) {
            Map<Long, DiffExprGeneSearchResult> geneResults = resultsFromDb.get(resultSetId);
            if (results.containsKey(resultSetId)) {
                results.get(resultSetId).putAll(geneResults);
            } else {
                results.put(resultSetId, geneResults);
            }
        }

        return results;
    }

    @Override
    public List<Double> findGeneInResultSets(Gene gene, ExpressionAnalysisResultSet resultSet,
            Collection<Long> arrayDesignIds, Integer limit) {

        StopWatch timer = new StopWatch();
        timer.start();

        List<Double> results;

        Session session = this.getSessionFactory().getCurrentSession();
        org.hibernate.SQLQuery queryObject = session
                .createSQLQuery(DifferentialExpressionResultDaoImpl.fetchResultsByResultSetAndGeneQuery);

        queryObject.setLong("gene_id", gene.getId());
        queryObject.setLong("rs_id", resultSet.getId());

        if (limit != null) {
            queryObject.setMaxResults(limit);
        }

        queryObject.addScalar("CORRECTED_PVALUE", new DoubleType());
        //noinspection unchecked
        results = queryObject.list();

        timer.stop();
        if (AbstractDao.log.isDebugEnabled())
            AbstractDao.log.debug("Fetching probeResults from resultSet " + resultSet.getId() + " for gene "
                    + gene.getId() + "and " + arrayDesignIds.size() + "arrays took : " + timer.getTime() + " ms");

        return results;
    }

    @Override
    public List<DifferentialExpressionValueObject> findInResultSet(ExpressionAnalysisResultSet resultSet,
            Double threshold, Integer limit, Integer minNumberOfResults) {

        if (minNumberOfResults == null || minNumberOfResults < 1) {
            throw new IllegalArgumentException("Minimum number of results must be positive");
        }

        List<DifferentialExpressionValueObject> results = new ArrayList<>();

        if (resultSet == null) {
            return results;
        }

        results = differentialExpressionResultCache.getTopHits(resultSet);
        if (results != null && results.size() >= minNumberOfResults) {
            AbstractDao.log.info("Top hits already in cache");
            return results;
        }

        results = new ArrayList<>();

        // get it.

        Collection<ExpressionAnalysisResultSet> resultsSets = new ArrayList<>();
        resultsSets.add(resultSet);

        StopWatch timer = new StopWatch();
        timer.start();
        String qs = DifferentialExpressionResultDaoImpl.fetchResultsBySingleResultSetQuery
                + " and r.correctedPvalue < :threshold order by r.correctedPvalue";

        HibernateTemplate tpl = new HibernateTemplate(this.getSessionFactory());

        if (limit != null) {
            tpl.setMaxResults(limit);
        }

        String[] paramNames = { "resultsSets", "threshold" };
        Object[] objectValues = { resultsSets, threshold };

        List<?> qResult = tpl.findByNamedParam(qs, paramNames, objectValues);

        // If too few probes meet threshold, redo and just get top results.
        if (qResult.size() < minNumberOfResults) {
            AbstractDao.log.info("No results met threshold, repeating to just get the top hits");
            qs = DifferentialExpressionResultDaoImpl.fetchResultsBySingleResultSetQuery
                    + " order by r.correctedPvalue";

            tpl = new HibernateTemplate(this.getSessionFactory());
            tpl.setMaxResults(minNumberOfResults);

            String[] paramName = { "resultsSets" };
            Object[] objectValue = { resultsSets };

            qResult = tpl.findByNamedParam(qs, paramName, objectValue);
        }

        for (Object o : qResult) {
            DifferentialExpressionValueObject probeResult = new DifferentialExpressionValueObject(
                    (DifferentialExpressionAnalysisResult) o);
            results.add(probeResult);
        }

        timer.stop();
        if (timer.getTime() > 1000) {
            AbstractDao.log.info("Diff ex results: " + timer.getTime() + " ms");
        }

        differentialExpressionResultCache.addToTopHitsCache(resultSet, results);

        return results;
    }

    /**
     * Given a list of result sets finds the results that met the given threshold
     *
     * @param limit - max number of results to return.
     */
    @Override
    public Map<ExpressionAnalysisResultSet, List<DifferentialExpressionAnalysisResult>> findInResultSets(
            Collection<ExpressionAnalysisResultSet> resultsAnalyzed, double threshold, Integer limit) {

        Map<ExpressionAnalysisResultSet, List<DifferentialExpressionAnalysisResult>> results = new HashMap<>();

        if (resultsAnalyzed.size() == 0) {
            return results;
        }

        // Integer bin = Math.log10(threshold);

        StopWatch timer = new StopWatch();
        timer.start();
        String qs = DifferentialExpressionResultDaoImpl.fetchResultsByResultSetQuery
                + " and r.correctedPvalue < :threshold order by r.correctedPvalue";

        HibernateTemplate tpl = new HibernateTemplate(this.getSessionFactory());

        if (limit != null) {
            tpl.setMaxResults(limit);
        }

        String[] paramNames = { "resultsAnalyzed", "threshold" };
        Object[] objectValues = { resultsAnalyzed, threshold };

        List<?> qResult = tpl.findByNamedParam(qs, paramNames, objectValues);

        for (Object o : qResult) {

            Object[] oa = (Object[]) o;
            ExpressionAnalysisResultSet ee = (ExpressionAnalysisResultSet) oa[0];
            DifferentialExpressionAnalysisResult probeResult = (DifferentialExpressionAnalysisResult) oa[1];

            if (!results.containsKey(ee)) {
                results.put(ee, new ArrayList<DifferentialExpressionAnalysisResult>());
            }

            results.get(ee).add(probeResult);
        }

        timer.stop();
        if (timer.getTime() > 1000) {
            AbstractDao.log.info("Diff ex results: " + timer.getTime() + " ms");
        }
        return results;
    }

    @Override
    public DifferentialExpressionAnalysis getAnalysis(ExpressionAnalysisResultSet rs) {
        return (DifferentialExpressionAnalysis) this.getHibernateTemplate()
                .findByNamedParam("select a from DifferentialExpressionAnalysis a join a.resultSets r where r=:r",
                        "r", rs)
                .iterator().next();
    }

    @Override
    public Collection<ExperimentalFactor> getExperimentalFactors(
            DifferentialExpressionAnalysisResult differentialExpressionAnalysisResult) {

        //noinspection unchecked
        return this.getSessionFactory().getCurrentSession()
                .createQuery("select ef from ExpressionAnalysisResultSet rs"
                        + " inner join rs.results r inner join rs.experimentalFactors ef where r=:differentialExpressionAnalysisResult")
                .setParameter("differentialExpressionAnalysisResult", differentialExpressionAnalysisResult).list();

    }

    @Override
    public Map<DifferentialExpressionAnalysisResult, Collection<ExperimentalFactor>> getExperimentalFactors(
            Collection<DifferentialExpressionAnalysisResult> differentialExpressionAnalysisResults) {
        StopWatch timer = new StopWatch();
        timer.start();
        Map<DifferentialExpressionAnalysisResult, Collection<ExperimentalFactor>> factorsByResult = new HashMap<>();
        if (differentialExpressionAnalysisResults.isEmpty()) {
            return factorsByResult;
        }

        //language=HQL
        final String queryString = "select rs.experimentalFactors, r from ExpressionAnalysisResultSet rs"
                + " inner join rs.results r where r in (:differentialExpressionAnalysisResults)";

        String[] paramNames = { "differentialExpressionAnalysisResults" };
        Object[] objectValues = { differentialExpressionAnalysisResults };

        List<?> qr = this.getHibernateTemplate().findByNamedParam(queryString, paramNames, objectValues);

        if (qr == null || qr.isEmpty())
            return factorsByResult;

        for (Object o : qr) {
            Object[] ar = (Object[]) o;
            ExperimentalFactor f = (ExperimentalFactor) ar[0];
            DifferentialExpressionAnalysisResult res = (DifferentialExpressionAnalysisResult) ar[1];

            assert differentialExpressionAnalysisResults.contains(res);

            if (!factorsByResult.containsKey(res)) {
                factorsByResult.put(res, new HashSet<ExperimentalFactor>());
            }

            factorsByResult.get(res).add(f);

            if (AbstractDao.log.isDebugEnabled())
                AbstractDao.log.debug(res);
        }
        timer.stop();
        if (timer.getTime() > 1000) {
            AbstractDao.log.info("factors by results: " + timer.getTime() + " ms");
        }
        return factorsByResult;

    }

    /**
     * Key method for getting contrasts associated with results.
     */
    @Override
    public Map<Long, ContrastsValueObject> loadContrastDetailsForResults(Collection<Long> ids) {
        //language=SQL
        final String queryString = "SELECT DISTINCT c.ID, c.LOG_FOLD_CHANGE, c.FACTOR_VALUE_FK,"
                + " c.DIFFERENTIAL_EXPRESSION_ANALYSIS_RESULT_FK, c.PVALUE FROM CONTRAST_RESULT c"
                + " WHERE c.DIFFERENTIAL_EXPRESSION_ANALYSIS_RESULT_FK IN (:ids)  ";

        Map<Long, ContrastsValueObject> probeResults = new HashMap<>();

        if (ids.isEmpty()) {
            return probeResults;
        }

        SQLQuery query = this.getSessionFactory().getCurrentSession().createSQLQuery(queryString);

        int BATCH_SIZE = 2000; // previously: 500, then 1000. New optimized query is plenty fast.
        StopWatch timer = new StopWatch();
        for (Collection<Long> batch : new BatchIterator<>(ids, BATCH_SIZE)) {
            timer.reset();
            timer.start();

            query.setParameterList("ids", batch);

            List<?> batchR = query.list();

            for (Object o : batchR) {
                Object[] ol = (Object[]) o;

                Long resultId = ((BigInteger) ol[3]).longValue();

                if (!probeResults.containsKey(resultId)) {
                    probeResults.put(resultId, new ContrastsValueObject(resultId));
                }

                ContrastsValueObject cvo = probeResults.get(resultId);

                Long contrastId = ((BigInteger) ol[0]).longValue();

                Double logFoldChange = ol[1] == null ? null : (Double) ol[1];

                Long factorValueId = ol[2] == null ? null : ((BigInteger) ol[2]).longValue();

                Double pvalue = ol[4] == null ? null : (Double) ol[4];

                cvo.addContrast(contrastId, factorValueId, logFoldChange, pvalue, null);

            }

            if (timer.getTime() > 2000) {
                AbstractDao.log.info("Fetch " + batch.size() + " results with contrasts: " + timer.getTime()
                        + "ms; query was\n " + queryString.replace(":ids", StringUtils.join(batch, ",")));
            }
        }

        return probeResults;
    }

    @Override
    public void thaw(final Collection<DifferentialExpressionAnalysisResult> results) {
        Session session = this.getSessionFactory().getCurrentSession();
        for (DifferentialExpressionAnalysisResult result : results) {
            session.buildLockRequest(LockOptions.NONE).lock(result);
            Hibernate.initialize(result);
            CompositeSequence cs = result.getProbe();
            Hibernate.initialize(cs);
            Hibernate.initialize(result.getContrasts());
        }

    }

    @Override
    public void thaw(final DifferentialExpressionAnalysisResult result) {
        Session session = this.getSessionFactory().getCurrentSession();

        session.buildLockRequest(LockOptions.NONE).lock(result);
        Hibernate.initialize(result);

        CompositeSequence cs = result.getProbe();
        Hibernate.initialize(cs);

        Collection<ContrastResult> contrasts = result.getContrasts();
        for (ContrastResult contrast : contrasts) {
            FactorValue f = contrast.getFactorValue();
            Hibernate.initialize(f);
            //noinspection ResultOfMethodCallIgnored
            f.getIsBaseline();
        }

    }

    @Override
    public Map<ExpressionExperimentValueObject, List<DifferentialExpressionValueObject>> find(Gene gene,
            double threshold, Integer limit) {

        StopWatch timer = new StopWatch();
        timer.start();

        Session session = this.getSessionFactory().getCurrentSession();
        String sql = DifferentialExpressionResultDaoImpl.fetchResultsByGeneSQL;

        if (threshold > 0.0) {
            sql = sql + " and d.CORRECTED_PVALUE < :threshold ";
        }

        if (limit != null) {
            sql = sql + "  order by d.PVALUE ASC ";
        }

        SQLQuery query = session.createSQLQuery(sql);

        query.setParameter("gene_id", gene.getId());
        if (limit != null) {
            query.setMaxResults(limit);
        }
        if (threshold > 0.0) {
            query.setParameter("threshold", threshold);
        }

        Map<ExpressionExperimentValueObject, List<DifferentialExpressionValueObject>> results = new HashMap<>();

        //noinspection unchecked
        List<Object[]> qResult = query.list();

        Set<Long> resultSets = new HashSet<>();

        Map<Long, DifferentialExpressionValueObject> resultsInter = new HashMap<>();
        Set<Long> probeIds = new HashSet<>();
        GeneValueObject genevo = new GeneValueObject(gene);

        for (Object[] rec : qResult) {
            Long id = SQLUtils.asId(rec[0]);

            if (!resultsInter.containsKey(id)) {

                DifferentialExpressionValueObject vo = new DifferentialExpressionValueObject(id);
                vo.setP((Double) rec[1]);
                vo.setCorrP((Double) rec[2]);
                vo.setProbeId(SQLUtils.asId(rec[3])); // fill in probe name later.
                probeIds.add(vo.getProbeId());
                vo.setResultSetId(SQLUtils.asId(rec[4]));
                if (threshold > 0)
                    vo.setMetThreshold(true);

                vo.setGene(genevo);

                // gather up result sets so we can fetch the experiments, experimental factors
                resultSets.add(vo.getResultSetId());

                resultsInter.put(id, vo);

            }

            resultsInter.get(id).addContrast(SQLUtils.asId(rec[9]), SQLUtils.asId(rec[5]), (Double) rec[6],
                    (Double) rec[7], SQLUtils.asId(rec[8]));
        }

        // gather up probe information
        Map<Long, String> probeNames = new HashMap<>();
        if (!probeIds.isEmpty()) {
            //noinspection unchecked
            for (Object[] rec : (List<Object[]>) session
                    .createQuery("select id,name from CompositeSequence where id in (:ids)")
                    .setParameterList("ids", probeIds).list()) {
                probeNames.put((Long) rec[0], (String) rec[1]);
            }
        }

        /*
         * load the result set information.
         */
        if (resultSets.isEmpty())
            return results;

        //noinspection unchecked
        List<Object[]> ees = session.createQuery(
                "select ee, rs from  ExpressionAnalysisResultSet rs join fetch rs.experimentalFactors join rs.analysis a join a.experimentAnalyzed ee"
                        + " where rs.id in (:rsids)")
                .setParameterList("rsids", resultSets).list();

        /*
         * Finish populating the objects
         */
        for (Object[] oa : ees) {
            ExpressionAnalysisResultSet rs = (ExpressionAnalysisResultSet) oa[1];
            ExpressionExperimentValueObject evo = ((BioAssaySet) oa[0]).createValueObject();

            if (!results.containsKey(evo)) {
                results.put(evo, new ArrayList<DifferentialExpressionValueObject>());
            }

            for (Iterator<DifferentialExpressionValueObject> it = resultsInter.values().iterator(); it.hasNext();) {

                DifferentialExpressionValueObject dvo = it.next();
                dvo.setExpressionExperiment(evo);
                dvo.setProbe(probeNames.get(dvo.getProbeId()));
                dvo.getExperimentalFactors().clear();

                for (ExperimentalFactor ef : rs.getExperimentalFactors()) {
                    dvo.getExperimentalFactors().add(new ExperimentalFactorValueObject(ef));

                }

                if (dvo.getResultSetId().equals(rs.getId())) {
                    results.get(evo).add(dvo);
                    it.remove();
                }

            }
        }

        AbstractDao.log.debug("Num experiments with probe analysis results (with limit = " + limit + ") : "
                + results.size() + ". Number of results (probes x contrasts) returned in total: " + qResult.size());

        timer.stop();
        if (timer.getTime() > 1000) {
            AbstractDao.log.info("Diff ex results: " + timer.getTime() + " ms");
        }

        return results;
    }

    @Override
    public Histogram loadPvalueDistribution(Long resultSetId) {

        List<?> pvds = this.getHibernateTemplate().findByNamedParam(
                "select rs.pvalueDistribution from ExpressionAnalysisResultSet rs where rs.id=:rsid ", "rsid",
                resultSetId);
        if (pvds.isEmpty()) {
            return null;
        }

        assert pvds.size() == 1;

        PvalueDistribution pvd = (PvalueDistribution) pvds.get(0);
        ByteArrayConverter bac = new ByteArrayConverter();
        double[] counts = bac.byteArrayToDoubles(pvd.getBinCounts());

        Integer numBins = pvd.getNumBins();
        assert numBins == counts.length;

        Histogram hist = new Histogram(resultSetId.toString(), numBins, 0.0, 1.0);
        for (int i = 0; i < numBins; i++) {
            hist.fill(i, (int) counts[i]);
        }

        return hist;

    }

    @Override
    public Collection<DifferentialExpressionAnalysisResult> load(Collection<Long> ids) {
        //language=HQL
        final String queryString = "from DifferentialExpressionAnalysisResult dea where dea.id in (:ids)";

        Collection<DifferentialExpressionAnalysisResult> probeResults = new HashSet<>();

        if (ids.isEmpty()) {
            return probeResults;
        }

        int BATCH_SIZE = 1000; // previously: 500.

        for (Collection<Long> batch : new BatchIterator<>(ids, BATCH_SIZE)) {
            StopWatch timer = new StopWatch();
            timer.start();
            //noinspection unchecked
            probeResults.addAll(this.getSessionFactory().getCurrentSession().createQuery(queryString)
                    .setParameterList("ids", batch).list());

            if (timer.getTime() > 1000) {
                AbstractDao.log.info("Fetch " + batch.size() + "/" + ids.size() + " results with contrasts: "
                        + timer.getTime() + "ms; query was\n " + queryString);
            }
        }

        return probeResults;
    }

    @Override
    public Collection<DifferentialExpressionAnalysisResult> loadAll() {
        throw new UnsupportedOperationException("Sorry, that would be nuts");
    }

    @Override
    public void remove(Collection<DifferentialExpressionAnalysisResult> entities) {
        if (entities == null || entities.size() < 1)
            return;
        Collection<Long> cIds = new HashSet<>();

        // Read contrast ids and wipe references
        for (DifferentialExpressionAnalysisResult r : entities) {
            cIds.addAll(EntityUtils.getIds(r.getContrasts()));
            r.setContrasts(new HashSet<ContrastResult>());
        }

        // Remove contrasts
        if (cIds.size() > 0) {
            AbstractDao.log.info("Removing contrasts...");
            this.getSessionFactory().getCurrentSession()
                    .createQuery("delete from ContrastResult e where e.id in (:ids)").setParameterList("ids", cIds)
                    .executeUpdate();

        }

        // Remove results
        AbstractDao.log.info("Removing DEA results");
        this.getSessionFactory().getCurrentSession()
                .createQuery("delete from DifferentialExpressionAnalysisResult e where e.id in (:ids)")
                .setParameterList("ids", EntityUtils.getIds(entities)).executeUpdate();
    }

    /**
     * @return how many results were added. Either 1 or 0.
     */
    private int processResultTuple(Object resultRow, Map<Long, Map<Long, DiffExprGeneSearchResult>> resultsFromDb,
            Map<Long, Collection<Long>> cs2GeneIdMap) {
        Object[] row = (Object[]) resultRow;
        Long probeId = ((BigInteger) row[0]).longValue();
        Long resultId = ((BigInteger) row[1]).longValue();
        Long resultSetId = ((BigInteger) row[2]).longValue();
        Double correctedPvalue = (Double) row[3];
        Double pvalue = (Double) row[4];

        if (pvalue == null || correctedPvalue == null) {
            return 0;
        }

        if (!resultsFromDb.containsKey(resultSetId)) {
            resultsFromDb.put(resultSetId, new HashMap<Long, DiffExprGeneSearchResult>());
        }

        assert cs2GeneIdMap.containsKey(probeId);

        /*
         * This is a hack to mimic the effect of storing only 'good' results.
         */
        if (correctedPvalue > DifferentialExpressionResultDaoImpl.CORRECTED_PVALUE_THRESHOLD_TO_BE_CONSIDERED_DIFF_EX) {
            return 0;
        }

        for (Long geneId : cs2GeneIdMap.get(probeId)) {
            this.processDiffExResultHit(resultsFromDb.get(resultSetId), resultSetId, geneId, resultId,
                    correctedPvalue, pvalue);
        }

        if (AbstractDao.log.isDebugEnabled())
            AbstractDao.log.debug("resultset=" + resultSetId + " probe=" + probeId + " qval="
                    + String.format("%.2g", correctedPvalue));

        return 1;
    }

    /**
     * This is to be called after a query for diff ex results is finished for a set of resultSets and genes. It assumes
     * that if a gene is missing from the results, there are none for that resultSet for that gene. It then stores a
     * dummy entry.
     *
     * @param results - which might be empty.
     * @param resultSetids - the ones which we searched for in the database. Put in dummy results if we have to.
     */
    private void addToCache(Map<Long, Map<Long, DiffExprGeneSearchResult>> results, Collection<Long> resultSetids,
            Collection<Long> geneIds) {
        StopWatch timer = new StopWatch();
        timer.start();
        int i = 0;
        for (Long resultSetId : resultSetids) {
            Map<Long, DiffExprGeneSearchResult> resultSetResults = results.get(resultSetId);

            for (Long geneId : geneIds) {

                if (resultSetResults != null && resultSetResults.containsKey(geneId)) {
                    this.differentialExpressionResultCache.addToCache(resultSetResults.get(geneId));
                } else {
                    // put in a dummy, so we don't bother searching for it later.
                    this.differentialExpressionResultCache.addToCache(new MissingResult(resultSetId, geneId));
                }
                i++;
            }
        }

        if (timer.getTime() > 10) {
            AbstractDao.log.info("Add " + i + " results to cache: " + timer.getTime() + "ms");
        }
    }

    /**
     * Get results that are already in the cache.
     */
    private Map<Long, Collection<Long>> fillFromCache(Map<Long, Map<Long, DiffExprGeneSearchResult>> results,
            Collection<Long> resultSetIds, Collection<Long> geneIds) {

        Map<Long, Collection<Long>> foundInCache = new HashMap<>();

        boolean useCache = true;
        //noinspection ConstantConditions // for debugging ... disable cache.
        if (!useCache) {
            AbstractDao.log.warn("Cache is disabled");
            return foundInCache;
        }

        StopWatch timer = new StopWatch();
        timer.start();
        int totalFound = 0;
        for (Long r : resultSetIds) {

            Collection<DiffExprGeneSearchResult> cached = differentialExpressionResultCache.get(r, geneIds);

            if (cached.isEmpty())
                continue;

            foundInCache.put(r, new HashSet<Long>());

            for (DiffExprGeneSearchResult result : cached) {
                if (!results.containsKey(r)) {
                    results.put(r, new HashMap<Long, DiffExprGeneSearchResult>());
                }

                results.get(r).put(result.getGeneId(), result);
                foundInCache.get(r).add(result.getGeneId());
                totalFound++;
            }

        }

        if (timer.getTime() > 100 && totalFound > 0) {
            AbstractDao.log.info("Fill " + totalFound + " results from cache: " + timer.getTime() + "ms");
        }

        return foundInCache;

    }

    /**
     * For any genes for which the result did not meet the threshold, but where the gene was tested, add a dummy value.
     *
     * @return ms taken
     */
    private long fillNonSignificant(List<Long> pbL, Map<Long, DiffExResultSetSummaryValueObject> resultSetIds,
            Map<Long, Map<Long, DiffExprGeneSearchResult>> resultsFromDb, Collection<Long> resultSetIdBatch,
            Map<Long, Collection<Long>> cs2GeneIdMap, Session session) {

        if (pbL.isEmpty())
            return 0;
        int d = 0;
        StopWatch t = new StopWatch();
        t.start();
        for (Long resultSetId : resultSetIdBatch) {

            /*
             * only include a dummy for probes which are from this result set.
             */
            Collection<Long> arrayDesignIds = resultSetIds.get(resultSetId).getArrayDesignsUsed();
            Collection<Long> probesForResultSet = CommonQueries.filterProbesByPlatform(pbL, arrayDesignIds,
                    session);

            for (Long probeId : probesForResultSet) {
                for (Long geneId : cs2GeneIdMap.get(probeId)) {

                    Map<Long, DiffExprGeneSearchResult> resultsForResultSet = resultsFromDb.get(resultSetId);
                    if (resultsForResultSet == null || resultsForResultSet.containsKey(geneId)) {
                        continue;
                    }

                    DiffExprGeneSearchResult dummy = new NonRetainedResult(resultSetId, geneId);

                    resultsForResultSet.put(geneId, dummy);
                    d++;
                }
            }
        }
        if (t.getTime() > 100) {
            AbstractDao.log.info("Fill in " + d + " non-significant values: " + t.getTime() + "ms");
        }
        return t.getTime();
    }

    /**
     * @return map of probe to genes.
     */
    private Map<Long, Collection<Long>> getProbesForGenesInResultSetBatch(Session session, Collection<Long> geneIds,
            Map<Long, DiffExResultSetSummaryValueObject> resultSetIds, Collection<Long> resultSetIdBatch) {
        Collection<Long> adUsed = new HashSet<>();
        for (Long rsid : resultSetIdBatch) {
            assert resultSetIds.containsKey(rsid);
            Collection<Long> arrayDesignsUsed = resultSetIds.get(rsid).getArrayDesignsUsed();
            assert arrayDesignsUsed != null;
            adUsed.addAll(arrayDesignsUsed);
        }
        return CommonQueries.getCs2GeneIdMap(geneIds, adUsed, session);
    }

    /**
     * @param results map of gene id to result, which the result gets added to.
     * @param resultId the specific DifferentialExpressionAnalysisResult, corresponds to an entry for one probe in the
     *        resultSet
     */
    private void processDiffExResultHit(Map<Long, DiffExprGeneSearchResult> results, Long resultSetId, Long geneId,
            Long resultId, Double correctedPvalue, Double uncorrectedPvalue) {

        assert correctedPvalue != null;
        DiffExprGeneSearchResult r = results.get(geneId);

        if (r == null) { // first encounter
            r = new DiffExprGeneSearchResult(resultSetId, geneId);
            r.setResultId(resultId);
            r.setNumberOfProbes(r.getNumberOfProbes() + 1);

            if (correctedPvalue <= DifferentialExpressionResultDaoImpl.CORRECTED_PVALUE_THRESHOLD_TO_BE_CONSIDERED_DIFF_EX) {
                // This check is only useful if we are only storing 'significant' results.
                r.setNumberOfProbesDiffExpressed(r.getNumberOfProbesDiffExpressed() + 1);
            }
            r.setCorrectedPvalue(correctedPvalue);
            r.setPvalue(uncorrectedPvalue);
            results.put(geneId, r);
        } else if (r.getCorrectedPvalue() == null || r.getCorrectedPvalue() > correctedPvalue) {
            // replace with the better value
            r.setResultId(resultId); // note this changes the hashcode of r.
            r.setCorrectedPvalue(correctedPvalue);
            r.setNumberOfProbes(r.getNumberOfProbes() + 1);
            r.setPvalue(uncorrectedPvalue);
            r.setNumberOfProbesDiffExpressed(r.getNumberOfProbesDiffExpressed() + 1);
        }
    }

    /**
     * Identify resultSets that are still need to be queried. Those would be the ones which don't have information for
     * all the genes requested in the cache. Note that those results could be 'dummies' that are represent missing
     * results.
     */
    private Collection<Long> stripUnneededResultSets(Map<Long, Collection<Long>> foundInCache,
            Collection<Long> resultSetIds, Collection<Long> geneIds) {
        StopWatch timer = new StopWatch();
        timer.start();
        Collection<Long> needToQuery = new HashSet<>();
        for (Long resultSetId : resultSetIds) {

            if (!foundInCache.containsKey(resultSetId) || !foundInCache.get(resultSetId).containsAll(geneIds)) {
                needToQuery.add(resultSetId);
            }

        }
        if (timer.getTime() > 1) {
            AbstractDao.log.info("Checking cache results: " + timer.getTime() + "ms; " + needToQuery.size()
                    + " result sets must be queried");
        }
        return needToQuery;
    }
}