ubic.gemma.analysis.report.ExpressionExperimentReportServiceImpl.java Source code

Java tutorial

Introduction

Here is the source code for ubic.gemma.analysis.report.ExpressionExperimentReportServiceImpl.java

Source

/*
 * The Gemma project
 * 
 * Copyright (c) 2007 Columbia University
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package ubic.gemma.analysis.report;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;

import net.sf.ehcache.Cache;
import net.sf.ehcache.CacheManager;
import net.sf.ehcache.Element;
import net.sf.ehcache.config.CacheConfiguration;
import net.sf.ehcache.config.NonstopConfiguration;
import net.sf.ehcache.config.TerracottaConfiguration;
import net.sf.ehcache.config.TimeoutBehaviorConfiguration;
import net.sf.ehcache.config.TimeoutBehaviorConfiguration.TimeoutBehaviorType;
import net.sf.ehcache.store.MemoryStoreEvictionPolicy;

import org.apache.commons.lang.time.StopWatch;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.security.access.annotation.Secured;
import org.springframework.stereotype.Component;

import ubic.gemma.expression.experiment.service.ExpressionExperimentService;
import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisService;
import ubic.gemma.model.association.coexpression.Probe2ProbeCoexpressionService;
import ubic.gemma.model.common.Auditable;
import ubic.gemma.model.common.auditAndSecurity.AuditEvent;
import ubic.gemma.model.common.auditAndSecurity.AuditEventService;
import ubic.gemma.model.common.auditAndSecurity.Securable;
import ubic.gemma.model.common.auditAndSecurity.eventType.AuditEventType;
import ubic.gemma.model.common.auditAndSecurity.eventType.AutomatedAnnotationEvent;
import ubic.gemma.model.common.auditAndSecurity.eventType.BatchInformationFetchingEvent;
import ubic.gemma.model.common.auditAndSecurity.eventType.DifferentialExpressionAnalysisEvent;
import ubic.gemma.model.common.auditAndSecurity.eventType.LinkAnalysisEvent;
import ubic.gemma.model.common.auditAndSecurity.eventType.MissingValueAnalysisEvent;
import ubic.gemma.model.common.auditAndSecurity.eventType.PCAAnalysisEvent;
import ubic.gemma.model.common.auditAndSecurity.eventType.ProcessedVectorComputationEvent;
import ubic.gemma.model.common.auditAndSecurity.eventType.ValidatedAnnotations;
import ubic.gemma.model.expression.bioAssayData.ProcessedDataVectorCache;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject;
import ubic.gemma.security.SecurityService;
import ubic.gemma.util.ConfigUtils;
import ubic.gemma.util.EntityUtils;
import ubic.gemma.visualization.ExperimentalDesignVisualizationService;

/**
 * Handles creation, serialization and/or marshaling of reports about expression experiments. Reports are stored in
 * ExpressionExperimentValueObjects.
 * 
 * @author jsantos
 * @author paul
 * @author klc
 * @version $Id: ExpressionExperimentReportServiceImpl.java,v 1.56 2013/04/18 22:53:15 paul Exp $
 */
@Component
public class ExpressionExperimentReportServiceImpl implements ExpressionExperimentReportService, InitializingBean {

    @Autowired
    private AuditEventService auditEventService;

    @Autowired
    private CacheManager cacheManager;

    @Autowired
    private DifferentialExpressionAnalysisService differentialExpressionAnalysisService;

    private String EESTATS_CACHE_NAME = "ExpressionExperimentReportsCache";

    /**
     * Batch of classes we can get events for all at once.
     */
    @SuppressWarnings("unchecked")
    private Class<? extends AuditEventType>[] eventTypes = new Class[] { LinkAnalysisEvent.class,
            MissingValueAnalysisEvent.class, ProcessedVectorComputationEvent.class, ValidatedAnnotations.class,
            DifferentialExpressionAnalysisEvent.class, AutomatedAnnotationEvent.class,
            BatchInformationFetchingEvent.class, PCAAnalysisEvent.class };

    @Autowired
    private ExperimentalDesignVisualizationService experimentalDesignVisualizationService;

    @Autowired
    private ExpressionExperimentService expressionExperimentService;

    private Log log = LogFactory.getLog(this.getClass());

    @Autowired
    private Probe2ProbeCoexpressionService probe2ProbeCoexpressionService;

    @Autowired
    private ProcessedDataVectorCache processedDataVectorCache;

    @Autowired
    private SecurityService securityService;

    /**
     * Cache to hold stats in memory. This is used to avoid hittinig the disk for reports too often.
     */
    private Cache statsCache;

    @Override
    public void afterPropertiesSet() throws Exception {
        /*
         * Initialize the cache; if it already exists it will not be recreated.
         */
        boolean terracottaEnabled = ConfigUtils.getBoolean("gemma.cache.clustered", false);
        boolean diskPersistent = ConfigUtils.getBoolean("gemma.cache.diskpersistent", false) && !terracottaEnabled;
        int maxElements = 5000;
        boolean eternal = false;
        boolean overFlowToDisk = false;
        int diskExpiryThreadIntervalSeconds = 600;
        int maxElementsOnDisk = 10000;
        int secondsToLive = 300;
        boolean terracottaCoherentReads = false;
        boolean clearOnFlush = false;

        if (terracottaEnabled) {
            CacheConfiguration config = new CacheConfiguration(EESTATS_CACHE_NAME, maxElements);
            config.setStatistics(false);
            config.setMemoryStoreEvictionPolicy(MemoryStoreEvictionPolicy.LRU.toString());
            config.setOverflowToDisk(false);
            config.setEternal(eternal);
            config.setTimeToIdleSeconds(0);
            config.setTimeToLiveSeconds(secondsToLive);
            config.setMaxElementsOnDisk(maxElementsOnDisk);
            config.addTerracotta(new TerracottaConfiguration());
            config.getTerracottaConfiguration().setCoherentReads(terracottaCoherentReads);
            config.clearOnFlush(clearOnFlush);

            config.getTerracottaConfiguration().setClustered(true);
            config.getTerracottaConfiguration().setValueMode("SERIALIZATION");
            NonstopConfiguration nonstopConfiguration = new NonstopConfiguration();
            TimeoutBehaviorConfiguration tobc = new TimeoutBehaviorConfiguration();
            tobc.setType(TimeoutBehaviorType.NOOP.getTypeName());
            nonstopConfiguration.addTimeoutBehavior(tobc);
            config.getTerracottaConfiguration().addNonstop(nonstopConfiguration);
            this.statsCache = new Cache(config);
        } else {
            this.statsCache = new Cache(EESTATS_CACHE_NAME, maxElements, MemoryStoreEvictionPolicy.LRU,
                    overFlowToDisk, null, eternal, secondsToLive, 0, diskPersistent,
                    diskExpiryThreadIntervalSeconds, null);
        }

        cacheManager.addCache(statsCache);
        this.statsCache = cacheManager.getCache(EESTATS_CACHE_NAME);

    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.analysis.report.ExpressionExperimentReportService#evictFromCache(java.lang.Long)
     */
    @Override
    public void evictFromCache(Long id) {
        this.statsCache.remove(id);

        processedDataVectorCache.clearCache(id);
        experimentalDesignVisualizationService.clearCaches(id);

    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.analysis.report.ExpressionExperimentReportService#generateSummaryObject(java.lang.Long)
     */
    @Override
    public ExpressionExperimentValueObject generateSummary(Long id) {
        assert id != null;
        Collection<Long> ids = new ArrayList<Long>();
        ids.add(id);
        Collection<ExpressionExperimentValueObject> results = generateSummaryObjects(ids);
        if (results.size() > 0) {
            return results.iterator().next();
        }
        return null;
    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.analysis.report.ExpressionExperimentReportService#generateSummaryObjects()
     */
    @Override
    @Secured({ "GROUP_AGENT" })
    public void generateSummaryObjects() {
        Collection<Long> ids = EntityUtils.getIds(expressionExperimentService.loadAll());
        Collection<ExpressionExperimentValueObject> vos = expressionExperimentService.loadValueObjects(ids, false);
        getStats(vos);
    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.analysis.report.ExpressionExperimentReportService#generateSummaryObjects(java.util.Collection)
     */
    @Override
    public Collection<ExpressionExperimentValueObject> generateSummaryObjects(Collection<Long> ids) {

        Collection<ExpressionExperimentValueObject> vos = expressionExperimentService.loadValueObjects(ids, false);
        getStats(vos);

        for (ExpressionExperimentValueObject vo : vos) {
            evictFromCache(vo.getId());
            statsCache.put(new Element(vo.getId(), vo));
        }
        return vos;
    }

    /**
     * Populate information about how many annotations there are, and how many factor values there are. Batch is not
     * counted towards the number of factors
     * 
     * @param vos
     */
    @Override
    public void getAnnotationInformation(Collection<ExpressionExperimentValueObject> vos) {

        StopWatch timer = new StopWatch();
        timer.start();

        Collection<Long> ids = new HashSet<Long>();
        for (ExpressionExperimentValueObject eeVo : vos) {
            Long id = eeVo.getId();
            ids.add(id);
        }

        Map<Long, Integer> annotationCounts = expressionExperimentService.getAnnotationCounts(ids);

        Map<Long, Integer> factorCounts = expressionExperimentService.getPopulatedFactorCountsExcludeBatch(ids);

        for (ExpressionExperimentValueObject eeVo : vos) {
            Long id = eeVo.getId();
            eeVo.setNumAnnotations(annotationCounts.get(id));
            eeVo.setNumPopulatedFactors(factorCounts.get(id));
        }

        if (timer.getTime() > 1000) {
            log.info("Fill annotation information: " + timer.getTime() + "ms");
        }

    }

    /**
     * Fills in event and security information from the database. This will only retrieve the latest event (if any).
     * This is rather slow so should be avoided if the information isn't needed.
     * 
     * @return Map of EE ids to the most recent update.
     */
    @Override
    public Map<Long, Date> getEventInformation(Collection<ExpressionExperimentValueObject> vos) {

        StopWatch timer = new StopWatch();
        timer.start();

        Collection<Long> ids = EntityUtils.getIds(vos);

        Map<Long, Date> results = new HashMap<Long, Date>();

        // do this ahead to avoid round trips - this also filters...
        Collection<ExpressionExperiment> ees = expressionExperimentService.loadMultiple(ids);

        if (ees.size() == 0) {
            return results;
        }

        Map<Long, ExpressionExperiment> eemap = EntityUtils.getIdMap(ees);

        Collection<Long> troubledEEs = getTroubled(ees);
        Map<Long, Date> lastArrayDesignUpdates = expressionExperimentService.getLastArrayDesignUpdate(ees);
        Collection<Class<? extends AuditEventType>> typesToGet = Arrays.asList(eventTypes);

        Map<Class<? extends AuditEventType>, Map<Auditable, AuditEvent>> events = getEvents(ees, typesToGet);

        Map<Auditable, AuditEvent> linkAnalysisEvents = events.get(LinkAnalysisEvent.class);
        Map<Auditable, AuditEvent> missingValueAnalysisEvents = events.get(MissingValueAnalysisEvent.class);
        Map<Auditable, AuditEvent> rankComputationEvents = events.get(ProcessedVectorComputationEvent.class);

        Collection<Long> validatedEEs = getValidated(vos);

        Map<Auditable, AuditEvent> differentialAnalysisEvents = events
                .get(DifferentialExpressionAnalysisEvent.class);
        Map<Auditable, AuditEvent> autotaggerEvents = events.get(AutomatedAnnotationEvent.class);
        Map<Auditable, AuditEvent> batchFetchEvents = events.get(BatchInformationFetchingEvent.class);
        Map<Auditable, AuditEvent> pcaAnalysisEvents = events.get(PCAAnalysisEvent.class);

        Map<Long, Collection<AuditEvent>> sampleRemovalEvents = getSampleRemovalEvents(ees);

        Map<Securable, Boolean> privacyInfo = securityService.arePrivate(ees);
        Map<Securable, Boolean> sharingInfo = securityService.areShared(ees);

        /*
         * add in the last events of interest for all eeVos This step is remarkably slow.
         */
        for (ExpressionExperimentValueObject eeVo : vos) {

            /*
             * Note that in the current incarnation, the last update date is already filled in, so the checks in this
             * loop are superfluous.
             */
            Date mostRecentDate = eeVo.getDateLastUpdated() == null ? new Date(0) : eeVo.getDateLastUpdated();

            Long id = eeVo.getId();

            ExpressionExperiment ee = eemap.get(id);

            if (linkAnalysisEvents.containsKey(ee)) {
                AuditEvent event = linkAnalysisEvents.get(ee);
                if (event != null) {
                    Date date = event.getDate();
                    eeVo.setDateLinkAnalysis(date);

                    eeVo.setLinkAnalysisEventType(event.getEventType().getClass().getSimpleName());
                }
            }

            if (missingValueAnalysisEvents.containsKey(ee)) {
                AuditEvent event = missingValueAnalysisEvents.get(ee);
                if (event != null) {
                    Date date = event.getDate();
                    eeVo.setDateMissingValueAnalysis(date);

                    eeVo.setMissingValueAnalysisEventType(event.getEventType().getClass().getSimpleName());
                }
            }

            if (rankComputationEvents.containsKey(ee)) {
                AuditEvent event = rankComputationEvents.get(ee);
                if (event != null) {
                    Date date = event.getDate();
                    eeVo.setDateProcessedDataVectorComputation(date);

                    eeVo.setProcessedDataVectorComputationEventType(
                            event.getEventType().getClass().getSimpleName());
                }
            }

            if (differentialAnalysisEvents.containsKey(ee)) {
                AuditEvent event = differentialAnalysisEvents.get(ee);
                if (event != null) {
                    Date date = event.getDate();
                    eeVo.setDateDifferentialAnalysis(date);

                }
            }

            if (pcaAnalysisEvents.containsKey(ee)) {
                AuditEvent event = pcaAnalysisEvents.get(ee);
                if (event != null) {
                    Date date = event.getDate();
                    eeVo.setDatePcaAnalysis(date);

                    eeVo.setPcaAnalysisEventType(event.getEventType().getClass().getSimpleName());
                }
            }

            if (batchFetchEvents.containsKey(ee)) {
                AuditEvent event = batchFetchEvents.get(ee);
                if (event != null) {
                    Date date = event.getDate();
                    eeVo.setDateBatchFetch(date);

                    eeVo.setBatchFetchEventType(event.getEventType().getClass().getSimpleName());
                }
            }

            if (lastArrayDesignUpdates.containsKey(id)) {
                Date date = lastArrayDesignUpdates.get(id);
                eeVo.setDateArrayDesignLastUpdated(date);
            }

            if (validatedEEs.contains(ee.getId())) {

                eeVo.setValidated(true);

            }

            if (autotaggerEvents.containsKey(ee)) {
                AuditEvent taggerEvent = autotaggerEvents.get(ee);

                if (taggerEvent.getDate().after(mostRecentDate)) {
                    mostRecentDate = taggerEvent.getDate();
                }

                eeVo.setAutoTagDate(taggerEvent.getDate());
            }

            if (privacyInfo.containsKey(ee)) {
                eeVo.setIsPublic(!privacyInfo.get(ee));
            }

            if (sharingInfo.containsKey(ee)) {
                eeVo.setIsShared(sharingInfo.get(ee));
            }

            /*
             * The following are keyed by ID
             */

            if (sampleRemovalEvents.containsKey(id)) {
                Collection<AuditEvent> removalEvents = sampleRemovalEvents.get(id);
                // we find we are getting lazy-load exceptions from this guy.
                eeVo.auditEvents2SampleRemovedFlags(removalEvents);

            }

            if (troubledEEs.contains(id)) {
                Collection<Long> tids = new HashSet<Long>();
                tids.add(id);
                Map<Long, AuditEvent> troublM = expressionExperimentService.getLastTroubleEvent(tids);
                if (!troublM.isEmpty()) {
                    eeVo.setTroubleDetails(troublM.get(id).getDate() + ": " + troublM.get(id).getNote());
                    eeVo.setTroubled(true);
                } else {
                    log.warn("Status was out of date? for EE=" + id);
                }
            }

            if (validatedEEs.contains(id)) {
                eeVo.setValidated(true);
            }

            if (mostRecentDate.after(new Date(0)))
                results.put(ee.getId(), mostRecentDate);
        }

        if (timer.getTime() > 1000)
            log.info("Retrieving audit events took " + timer.getTime() + "ms");

        return results;
    }

    /**
     * Fills in link analysis and other info from the report.
     * 
     * @return map of when the objects were most recently updated (or created)
     */
    @Override
    public Map<Long, Date> getReportInformation(Collection<ExpressionExperimentValueObject> vos) {
        StopWatch timer = new StopWatch();
        Map<Long, Date> result = new HashMap<Long, Date>();
        timer.start();

        List<Long> ids = new ArrayList<Long>();
        for (ExpressionExperimentValueObject vo : vos) {
            ids.add(vo.getId());
        }

        Collection<ExpressionExperimentValueObject> cachedVos = retrieveSummaryObjects(ids);
        Map<Long, ExpressionExperimentValueObject> id2cachedVo = EntityUtils.getIdMap(cachedVos);

        for (ExpressionExperimentValueObject eeVo : vos) {
            ExpressionExperimentValueObject cacheVo = id2cachedVo.get(eeVo.getId());
            if (cacheVo != null) {
                eeVo.setBioMaterialCount(cacheVo.getBioMaterialCount());
                eeVo.setProcessedExpressionVectorCount(cacheVo.getProcessedExpressionVectorCount());
                eeVo.setCoexpressionLinkCount(cacheVo.getCoexpressionLinkCount());
                eeVo.setDateCached(cacheVo.getDateCached());
                eeVo.setDifferentialExpressionAnalyses(cacheVo.getDifferentialExpressionAnalyses());

                if (eeVo.getDateCreated() == null) {
                    // should be filled in already.
                    log.warn("Create date was not populated: " + eeVo);
                    eeVo.setDateCreated(cacheVo.getDateCreated());
                    eeVo.setDateLastUpdated(cacheVo.getDateLastUpdated());
                }

                if (eeVo.getDateLastUpdated() != null) {
                    result.put(eeVo.getId(), eeVo.getDateLastUpdated());
                } else {
                    result.put(eeVo.getId(), eeVo.getDateCreated());
                }

            }
        }
        timer.stop();
        if (timer.getTime() > 1000) {
            log.info(vos.size() + " EE reports fetched in " + timer.getTime() + "ms");
        }

        return result;
    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.analysis.report.ExpressionExperimentReportService#retrieveSummaryObjects(java.util.Collection)
     */
    @Override
    public Collection<ExpressionExperimentValueObject> retrieveSummaryObjects(Collection<Long> ids) {
        Collection<ExpressionExperimentValueObject> eeValueObjects = new ArrayList<ExpressionExperimentValueObject>();
        Collection<Long> filteredIds = securityFilterExpressionExperimentIds(ids);

        int incache = 0;
        for (Long id : filteredIds) {

            Element cachedElement = this.statsCache.get(id);
            if (cachedElement != null) {
                incache++;
                Serializable el = cachedElement.getValue();
                assert el instanceof ExpressionExperimentValueObject;

                eeValueObjects.add((ExpressionExperimentValueObject) el);
                continue;
            }

            ExpressionExperimentValueObject valueObject = generateSummary(id);
            eeValueObjects.add(valueObject);
            continue;

        }
        if (ids.size() > 1) {
            log.info(incache + "/" + ids.size() + " reports were found in the cache");
        }
        return eeValueObjects;
    }

    /**
     * @param ees
     * @param types
     * @return
     */
    private Map<Class<? extends AuditEventType>, Map<Auditable, AuditEvent>> getEvents(
            Collection<ExpressionExperiment> ees, Collection<Class<? extends AuditEventType>> types) {

        return auditEventService.getLastEvents(ees, types);

    }

    /**
     * @param ees
     * @return
     */
    private Map<Long, Collection<AuditEvent>> getSampleRemovalEvents(Collection<ExpressionExperiment> ees) {
        Map<Long, Collection<AuditEvent>> result = new HashMap<Long, Collection<AuditEvent>>();
        Map<ExpressionExperiment, Collection<AuditEvent>> rawr = expressionExperimentService
                .getSampleRemovalEvents(ees);
        for (ExpressionExperiment e : rawr.keySet()) {
            result.put(e.getId(), rawr.get(e));
        }
        return result;
    }

    /**
     * Compute statistics for EEs, that aren't immediately part of the value object.
     * 
     * @param vos
     */
    private void getStats(Collection<ExpressionExperimentValueObject> vos) {
        log.debug("Getting stats for " + vos.size() + " value objects.");
        int count = 0;
        for (ExpressionExperimentValueObject object : vos) {
            getStats(object);

            if (++count % 10 == 0) {
                log.debug("Processed " + count + " reports.");
            }
        }
        log.debug("Done, processed " + count + " reports");
    }

    /**
     * Get the stats report for one EE
     * 
     * @param object
     */
    private void getStats(ExpressionExperimentValueObject eeVo) {
        Long id = eeVo.getId();
        assert id != null;

        eeVo.setDifferentialExpressionAnalyses(differentialExpressionAnalysisService.getAnalysisValueObjects(id));

        eeVo.setCoexpressionLinkCount(probe2ProbeCoexpressionService.countLinks(id));

        Date timestamp = new Date(System.currentTimeMillis());
        eeVo.setDateCached(timestamp);
        assert eeVo.getDateCreated() != null;
        assert eeVo.getDateLastUpdated() != null;

    }

    private Collection<Long> getTroubled(Collection<ExpressionExperiment> ees) {
        Collection<Long> ids = EntityUtils.getIds(ees);
        Collection<Long> untroubled = expressionExperimentService.getUntroubled(ids);
        ids.removeAll(untroubled);
        return ids;
    }

    private Collection<Long> getValidated(Collection<ExpressionExperimentValueObject> vos) {
        Collection<Long> result = new HashSet<Long>();
        for (ExpressionExperimentValueObject ee : vos) {

            if (ee.getValidated()) {
                result.add(ee.getId());
            }
        }
        return result;
    }

    /**
     * @param ids
     * @return
     */
    private Collection<Long> securityFilterExpressionExperimentIds(Collection<Long> ids) {
        /*
         * Because this method returns the results, we have to screen.
         */
        Collection<ExpressionExperiment> securityScreened = expressionExperimentService.loadMultiple(ids);

        Collection<Long> filteredIds = new HashSet<Long>();
        for (ExpressionExperiment ee : securityScreened) {
            filteredIds.add(ee.getId());
        }
        return filteredIds;
    }

}