ubic.gemma.analysis.report.DatabaseViewGeneratorImpl.java Source code

Java tutorial

Introduction

Here is the source code for ubic.gemma.analysis.report.DatabaseViewGeneratorImpl.java

Source

/*
 * The Gemma project
 * 
 * Copyright (c) 2009 University of British Columbia
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package ubic.gemma.analysis.report;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Collection;
import java.util.zip.GZIPOutputStream;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import ubic.gemma.analysis.util.ExperimentalDesignUtils;
import ubic.gemma.expression.experiment.service.ExpressionExperimentService;
import ubic.gemma.model.analysis.expression.diff.ContrastResult;
import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis;
import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionResultService;
import ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet;
import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult;
import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisService;
import ubic.gemma.model.common.description.Characteristic;
import ubic.gemma.model.common.description.VocabCharacteristic;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.arrayDesign.ArrayDesignService;
import ubic.gemma.model.expression.designElement.CompositeSequence;
import ubic.gemma.model.expression.designElement.CompositeSequenceService;
import ubic.gemma.model.expression.experiment.ExperimentalFactor;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.model.expression.experiment.FactorValue;
import ubic.gemma.model.genome.Gene;
import ubic.gemma.model.genome.Taxon;

/**
 * Generates textual views of the database so other people can use the data.
 * <p>
 * Development of this was started due to the collaboration with NIF. See {@link http
 * ://www.chibi.ubc.ca/faculty/pavlidis/bugs/show_bug.cgi?id=1747}
 * <p>
 * It is essential that these views be created by a principal with Anonymous status, so as not to create views of
 * private data (that could be done, but would be separate).
 * 
 * @author paul
 * @version $Id: DatabaseViewGeneratorImpl.java,v 1.6 2012/06/23 14:01:40 paul Exp $
 */
@Component
public class DatabaseViewGeneratorImpl implements DatabaseViewGenerator {

    private static final double THRESH_HOLD = 0.01;

    private static Log log = LogFactory.getLog(DatabaseViewGeneratorImpl.class);

    private static final String DATASET_SUMMARY_VIEW_BASENAME = "DatasetSummary";
    private static final String DATASET_TISSUE_VIEW_BASENAME = "DatasetTissue";
    private static final String DATASET_DIFFEX_VIEW_BASENAME = "DatasetDiffEx";

    @Autowired
    private ExpressionExperimentService expressionExperimentService;

    @Autowired
    private CompositeSequenceService compositeSequenceService;

    @Autowired
    private DifferentialExpressionAnalysisService differentialExpressionAnalysisService;

    @Autowired
    private DifferentialExpressionResultService differentialExpressionResultService;

    @Autowired
    private ArrayDesignService arrayDesignService;

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.analysis.report.DatabaseViewGenerator#runAll(java.lang.Integer)
     */
    @Override
    public void runAll(Integer limit) {
        // TODO: put the loading and thawing of EE's here and pass the EE in as a parameter so that the
        // EE's are not thawed multiple times (will this matter?)
        try {
            generateDatasetView(limit);
            generateDatasetTissueView(limit);
            generateDifferentialExpressionView(limit);
        } catch (FileNotFoundException e) {
            throw new RuntimeException(e);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.analysis.report.DatabaseViewGenerator#runAll()
     */
    @Override
    public void runAll() {
        runAll(null);
    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.analysis.report.DatabaseViewGenerator#generateDatasetView(int)
     */
    @Override
    public void generateDatasetView(int limit) throws FileNotFoundException, IOException {

        log.info("Generating dataset summary view");

        /*
         * Get handle to output file
         */
        File file = getViewFile(DATASET_SUMMARY_VIEW_BASENAME);
        log.info("Writing to " + file);
        Writer writer = new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(file)));

        /*
         * Load all the data sets
         */
        Collection<ExpressionExperiment> vos = expressionExperimentService.loadAll();

        writer.write("GemmaDsId\tSource\tSourceAccession\tShortName\tName\tDescription\ttaxon\tManufacturer\n");

        /*
         * Print out their names etc.
         */
        int i = 0;
        for (ExpressionExperiment vo : vos) {
            vo = expressionExperimentService.thawLite(vo);
            log.info("Processing: " + vo.getShortName());

            String acc = "";
            String source = "";

            if (vo.getAccession() != null && vo.getAccession().getAccession() != null) {
                acc = vo.getAccession().getAccession();
                source = vo.getAccession().getExternalDatabase().getName();
            }

            Long gemmaId = vo.getId();
            String shortName = vo.getShortName();
            String name = vo.getName();
            String description = vo.getDescription();
            description = StringUtils.replaceChars(description, '\t', ' ');
            description = StringUtils.replaceChars(description, '\n', ' ');
            description = StringUtils.replaceChars(description, '\r', ' ');

            Taxon taxon = expressionExperimentService.getTaxon(vo);

            if (taxon == null)
                continue;

            Collection<ArrayDesign> ads = expressionExperimentService.getArrayDesignsUsed(vo);
            StringBuffer manufacturers = new StringBuffer();

            // TODO could cache the arrayDesigns to make faster, thawing ad is time consuming
            for (ArrayDesign ad : ads) {
                ad = arrayDesignService.thawLite(ad);
                if (ad.getDesignProvider() == null) {
                    log.debug("Array Design: " + ad.getShortName()
                            + " has no design provoider assoicated with it. Skipping");
                    continue;
                }
                manufacturers.append(ad.getDesignProvider().getName() + ",");
            }

            writer.write(String.format("%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", gemmaId, source, acc, shortName, name,
                    description, taxon.getCommonName(), StringUtils.chomp(manufacturers.toString(), ",")));

            if (limit > 0 && ++i > limit)
                break;

        }

        writer.close();
    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.analysis.report.DatabaseViewGenerator#generateDatasetTissueView(int)
     */
    @Override
    public void generateDatasetTissueView(int limit) throws FileNotFoundException, IOException {
        log.info("Generating dataset tissue view");

        /*
         * Get handle to output file
         */
        File file = getViewFile(DATASET_TISSUE_VIEW_BASENAME);
        log.info("Writing to " + file);
        Writer writer = new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(file)));

        /*
         * Load all the data sets
         */Collection<ExpressionExperiment> vos = expressionExperimentService.loadAll();

        /*
         * For all of their annotations... if it's a tissue, print out a line
         */
        writer.write("GemmaDsId\tTerm\tTermURI\n");
        int i = 0;
        for (ExpressionExperiment vo : vos) {
            vo = expressionExperimentService.thawLite(vo);

            log.info("Processing: " + vo.getShortName());

            Long gemmaId = vo.getId();

            for (Characteristic c : vo.getCharacteristics()) {

                if (StringUtils.isBlank(c.getValue())) {
                    continue;
                }

                /*
                 * check if vocab characteristic.
                 */

                if (c.getCategory().equals("OrganismPart")) { // or tissue? check URI

                    String uri = "";

                    if (c instanceof VocabCharacteristic) {
                        VocabCharacteristic vocabCharacteristic = (VocabCharacteristic) c;
                        if (StringUtils.isNotBlank(vocabCharacteristic.getValueUri()))
                            uri = vocabCharacteristic.getValueUri();
                    }

                    writer.write(String.format("%d\t%s\t%s\n", gemmaId, c.getValue(), uri));

                }

            }

            if (limit > 0 && ++i > limit)
                break;

        }

        writer.close();
    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.analysis.report.DatabaseViewGenerator#generateDifferentialExpressionView(int)
     */
    @Override
    public void generateDifferentialExpressionView(int limit) throws FileNotFoundException, IOException {
        log.info("Generating dataset diffex view");

        /*
         * Get handle to output file
         */
        File file = getViewFile(DATASET_DIFFEX_VIEW_BASENAME);
        log.info("Writing to " + file);
        Writer writer = new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(file)));

        /*
         * Load all the data sets
         */Collection<ExpressionExperiment> experiments = expressionExperimentService.loadAll();

        /*
         * For each gene that is differentially expressed, print out a line per contrast
         */
        writer.write(
                "GemmaDsId\tEEShortName\tGeneNCBIId\tGemmaGeneId\tFactor\tFactorURI\tBaseline\tContrasting\tDirection\n");
        int i = 0;
        for (ExpressionExperiment ee : experiments) {
            ee = expressionExperimentService.thawLite(ee);

            Collection<DifferentialExpressionAnalysis> results = differentialExpressionAnalysisService
                    .getAnalyses(ee);
            if (results == null || results.isEmpty()) {
                log.warn("No differential expression results found for " + ee);
                continue;
            }

            if (results.size() > 1) {
                /*
                 * FIXME. Should probably skip for this purpose.
                 */
            }

            log.info("Processing: " + ee.getShortName());

            for (DifferentialExpressionAnalysis analysis : results) {

                for (ExpressionAnalysisResultSet ears : analysis.getResultSets()) {

                    ears = differentialExpressionResultService.thaw(ears);

                    FactorValue baselineGroup = ears.getBaselineGroup();

                    if (baselineGroup == null) {
                        // log.warn( "No baseline defined for " + ee ); // interaction
                        continue;
                    }

                    if (ExperimentalDesignUtils.isBatch(baselineGroup.getExperimentalFactor())) {
                        continue;
                    }

                    String baselineDescription = ExperimentalDesignUtils.prettyString(baselineGroup);

                    // Get the factor category name
                    String factorName = "";
                    String factorURI = "";

                    for (ExperimentalFactor ef : ears.getExperimentalFactors()) {
                        factorName += ef.getName() + ",";
                        if (ef.getCategory() instanceof VocabCharacteristic) {
                            factorURI += ((VocabCharacteristic) ef.getCategory()).getCategoryUri() + ",";
                        }
                    }
                    factorName = StringUtils.chomp(factorName, ",");
                    factorURI = StringUtils.chomp(factorURI, ",");

                    if (ears.getResults() == null || ears.getResults().isEmpty()) {
                        log.warn("No  differential expression analysis results found for " + ee);
                        continue;
                    }

                    // Generate probe details
                    for (DifferentialExpressionAnalysisResult dear : ears.getResults()) {

                        if (dear == null) {
                            log.warn("Missing results for " + ee + " skipping to next. ");
                            continue;
                        }

                        if (dear.getCorrectedPvalue() == null || dear.getCorrectedPvalue() > THRESH_HOLD)
                            continue;

                        String formatted = formatDiffExResult(ee, dear, factorName, factorURI, baselineDescription);

                        if (StringUtils.isNotBlank(formatted))
                            writer.write(formatted);

                    } // dear loop
                } // ears loop
            } // analysis loop

            if (limit > 0 && ++i > limit)
                break;

        } // EE loop
        writer.close();
    }

    /**
     * @param probeAnalysisResult
     * @return
     */
    private String formatDiffExResult(ExpressionExperiment ee,
            DifferentialExpressionAnalysisResult probeAnalysisResult, String factorName, String factorURI,
            String baselineDescription) {

        CompositeSequence cs = probeAnalysisResult.getProbe();

        Collection<Gene> genes = compositeSequenceService.getGenes(cs);

        if (genes.isEmpty() || genes.size() > 1) {
            return null;
        }

        Gene g = genes.iterator().next();

        if (g.getNcbiGeneId() == null)
            return null;

        Collection<ContrastResult> contrasts = probeAnalysisResult.getContrasts();

        StringBuilder buf = new StringBuilder();
        for (ContrastResult cr : contrasts) {
            FactorValue factorValue = cr.getFactorValue();

            String direction = cr.getLogFoldChange() < 0 ? "-" : "+";

            String factorValueDescription = ExperimentalDesignUtils.prettyString(factorValue);

            buf.append(String.format("%d\t%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n", ee.getId(), ee.getShortName(),
                    g.getNcbiGeneId().toString(), g.getId(), factorName, factorURI, baselineDescription,
                    factorValueDescription, direction));
        }

        return buf.toString();
    }

    /**
     * @param datasetDiffexViewBasename
     * @return
     */
    private File getViewFile(String datasetDiffexViewBasename) {
        return getOutputFile(datasetDiffexViewBasename + VIEW_FILE_SUFFIX);
    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.analysis.report.DatabaseViewGenerator#getOutputFile(java.lang.String)
     */
    @Override
    public File getOutputFile(String filename) {
        String fullFilePath = VIEW_DIR + filename;
        File f = new File(fullFilePath);

        if (f.exists()) {
            return f;
        }

        File parentDir = f.getParentFile();
        if (!parentDir.exists())
            parentDir.mkdirs();
        return f;
    }

}