ubic.gemma.apps.ExpressionExperimentManipulatingCLI.java Source code

Java tutorial

Introduction

Here is the source code for ubic.gemma.apps.ExpressionExperimentManipulatingCLI.java

Source

/*
 * The Gemma project
 * 
 * Copyright (c) 2008 University of British Columbia
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package ubic.gemma.apps;

import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.collections.Predicate;
import org.apache.commons.lang.StringUtils;
import ubic.gemma.expression.experiment.service.ExpressionExperimentService;
import ubic.gemma.expression.experiment.service.ExpressionExperimentSetService;
import ubic.gemma.genome.gene.service.GeneService;
import ubic.gemma.genome.taxon.service.TaxonService;
import ubic.gemma.model.analysis.expression.ExpressionExperimentSet;
import ubic.gemma.model.common.auditAndSecurity.AuditEventService;
import ubic.gemma.model.common.search.SearchSettingsImpl;
import ubic.gemma.model.expression.experiment.BioAssaySet;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.model.genome.Gene;
import ubic.gemma.model.genome.Taxon;
import ubic.gemma.search.SearchResult;
import ubic.gemma.search.SearchService;
import ubic.gemma.util.AbstractCLIContextCLI;
import ubic.gemma.util.EntityUtils;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;

/**
 * Base class for CLIs that needs one or more expression experiment as an input. It offers the following ways of reading
 * them in:
 * <ul>
 * <li>All EEs
 * <li>All EEs for a particular taxon.
 * <li>A specific ExpressionExperimentSet, identified by name</li>
 * <li>A comma-delimited list of one or more EEs identified by short name given on the command line
 * <li>From a file, with one short name per line.
 * <li>EEs matching a query string (e.g., 'brain')
 * <li>(Optional) 'Auto' mode, in which experiments to analyze are selected automatically based on their workflow state.
 * This can be enabled and modified by subclasses who override the "needToRun" method.
 * <li>All EEs that were last processed after a given date, similar to 'auto' otherwise.
 * </ul>
 * Some of these options can be (or should be) combined, and modified by a (optional) "force" option, and will have
 * customized behavior.
 * <p>
 * In addition, EEs can be excluded based on a list given in a separate file.
 * 
 * @author Paul
 * @version $Id: ExpressionExperimentManipulatingCLI.java,v 1.43 2013/02/18 21:43:51 paul Exp $
 */
public abstract class ExpressionExperimentManipulatingCLI extends AbstractCLIContextCLI {

    protected ExpressionExperimentService eeService;

    protected GeneService geneService;

    protected SearchService searchService;

    protected TaxonService taxonService;

    protected Taxon taxon = null;

    protected Set<BioAssaySet> expressionExperiments = new HashSet<BioAssaySet>();

    protected Collection<BioAssaySet> excludeExperiments;

    protected boolean force = false;

    protected ExpressionExperimentSet expressionExperimentSet;

    protected void addForceOption() {
        this.addForceOption(null);
    }

    /**
     * 
     */
    @SuppressWarnings("static-access")
    protected void addForceOption(String explanation) {
        String defaultExplanation = "Ignore other reasons for skipping experiments (e.g., trouble) and overwrite existing data (see documentation for this tool to see exact behavior if not clear)";
        String usedExpl = explanation == null ? defaultExplanation : explanation;
        Option forceOption = OptionBuilder.withArgName("Force processing").withLongOpt("force")
                .withDescription(usedExpl).create("force");
        addOption(forceOption);
    }

    @Override
    @SuppressWarnings("static-access")
    protected void buildOptions() {
        Option expOption = OptionBuilder.hasArg().withArgName("shortname").withDescription(
                "Expression experiment short name. Most tools recognize comma-delimited values given on the command line, "
                        + "and if this option is omitted (and none other provided), the tool will be applied to all expression experiments.")
                .withLongOpt("experiment").create('e');

        addOption(expOption);

        Option eeFileListOption = OptionBuilder.hasArg().withArgName("file").withDescription(
                "File with list of short names or IDs of expression experiments (one per line; use instead of '-e')")
                .withLongOpt("eeListfile").create('f');
        addOption(eeFileListOption);

        Option eeSetOption = OptionBuilder.hasArg().withArgName("eeSetName")
                .withDescription("Name of expression experiment set to use").create("eeset");

        addOption(eeSetOption);

        Option taxonOption = OptionBuilder.hasArg().withDescription("taxon name")
                .withDescription("Taxon of the expression experiments and genes").withLongOpt("taxon").create('t');
        addOption(taxonOption);

        Option excludeEeOption = OptionBuilder.hasArg().withArgName("file")
                .withDescription("File containing list of expression experiments to exclude")
                .withLongOpt("excludeEEFile").create('x');
        addOption(excludeEeOption);

        Option eeSearchOption = OptionBuilder.hasArg().withArgName("expressionQuery")
                .withDescription("Use a query string for defining which expression experiments to use")
                .withLongOpt("expressionQuery").create('q');
        addOption(eeSearchOption);

    }

    /**
     * @param symbol
     * @param t
     * @return
     */
    protected Gene findGeneByOfficialSymbol(String symbol, Taxon t) {
        Collection<Gene> genes = geneService.findByOfficialSymbolInexact(symbol);
        for (Gene gene : genes) {
            if (t.equals(gene.getTaxon()))
                return gene;
        }
        return null;
    }

    /**
     * @param ee
     * @return true if the expression experiment has an active 'trouble' flag
     */
    protected boolean isTroubled(BioAssaySet ee) {
        Collection<BioAssaySet> eec = new HashSet<BioAssaySet>();
        eec.add(ee);
        removeTroubledEes(eec);
        if (eec.size() == 0) {
            return true;
        }
        return false;
    }

    /**
     * @param short name of the experiment to find.
     * @return
     */
    protected ExpressionExperiment locateExpressionExperiment(String name) {

        if (name == null) {
            errorObjects.add("Expression experiment short name must be provided");
            return null;
        }

        ExpressionExperiment experiment = eeService.findByShortName(name);

        if (experiment == null) {
            log.error("No experiment " + name + " found");
            bail(ErrorCode.INVALID_OPTION);
        }
        return experiment;
    }

    @Override
    protected void processOptions() {
        super.processOptions();

        eeService = this.getBean(ExpressionExperimentService.class);
        geneService = this.getBean(GeneService.class);
        taxonService = getBean(TaxonService.class);
        this.auditEventService = getBean(AuditEventService.class);
        if (hasOption('t')) {
            String taxonName = getOptionValue('t');
            this.taxon = taxonService.findByCommonName(taxonName);
            if (taxon == null) {
                log.error("ERROR: Cannot find taxon " + taxonName);
            }
        }

        if (hasOption("force")) {
            this.force = true;
        }

        if (this.hasOption("eeset")) {
            experimentsFromEeSet(getOptionValue("eeset"));
        } else if (this.hasOption('e')) {
            experimentsFromCliList();
        } else if (hasOption('f')) {
            String experimentListFile = getOptionValue('f');
            log.info("Reading experiment list from " + experimentListFile);
            try {
                this.expressionExperiments = readExpressionExperimentListFile(experimentListFile);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        } else if (hasOption('q')) {
            log.info("Processing all experiments that match query " + getOptionValue('q'));
            this.expressionExperiments = this.findExpressionExperimentsByQuery(getOptionValue('q'));
        } else if (taxon != null) {
            if (!hasOption("dataFile")) {
                log.info("Processing all experiments for " + taxon.getCommonName());
                this.expressionExperiments = new HashSet<BioAssaySet>(eeService.findByTaxon(taxon));
            }
        } else {
            if (!hasOption("dataFile")) {
                log.info("Processing all experiments (futher filtering may modify)");
                this.expressionExperiments = new HashSet<BioAssaySet>(eeService.loadAll());
            }
        }

        if (hasOption('x')) {
            excludeFromFile();
        }

        if (expressionExperiments != null && expressionExperiments.size() > 0 && !force) {

            if (hasOption(AUTO_OPTION_NAME)) {
                this.autoSeek = true;
                if (this.autoSeekEventType == null) {
                    throw new IllegalStateException("Programming error: there is no 'autoSeekEventType' set");
                }
                log.info(
                        "Filtering for experiments lacking a " + this.autoSeekEventType.getSimpleName() + " event");
                auditEventService.retainLackingEvent(this.expressionExperiments, this.autoSeekEventType);
            }

            removeTroubledEes(expressionExperiments);
        }

        if (expressionExperiments.size() > 1) {
            log.info("Final list: " + this.expressionExperiments.size()
                    + " expressionExperiments (futher filtering may modify)");
        } else if (expressionExperiments.size() == 0) {
            if (hasOption("dataFile")) {
                log.info("Expression matrix from data file selected");
            } else {
                log.info("No experiments selected");
            }
        }

    }

    /**
     * Read in a list of genes
     * 
     * @param inFile - file name to read
     * @param t
     * @return collection of genes
     * @throws IOException
     */
    protected Collection<Gene> readGeneListFile(String inFile, Taxon t) throws IOException {
        log.info("Reading " + inFile);

        Collection<Gene> genes = new ArrayList<Gene>();
        BufferedReader in = new BufferedReader(new FileReader(inFile));
        String line;
        while ((line = in.readLine()) != null) {
            if (line.startsWith("#"))
                continue;
            String s = line.trim();
            Gene gene = findGeneByOfficialSymbol(s, t);
            if (gene == null) {
                log.error("ERROR: Cannot find genes for " + s);
                continue;
            }
            genes.add(gene);
        }
        return genes;
    }

    /**
     * 
     */
    private void excludeFromFile() {
        String excludeEeFileName = getOptionValue('x');
        try {
            this.excludeExperiments = readExpressionExperimentListFile(excludeEeFileName);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        assert expressionExperiments.size() > 0;

        int before = expressionExperiments.size();

        expressionExperiments.removeAll(excludeExperiments);
        int removed = before - expressionExperiments.size();

        if (removed > 0)
            log.info("Excluded " + removed + " expression experiments");
    }

    /**
     * 
     */
    private void experimentsFromCliList() {
        String experimentShortNames = this.getOptionValue('e');
        String[] shortNames = experimentShortNames.split(",");

        for (String shortName : shortNames) {
            ExpressionExperiment expressionExperiment = locateExpressionExperiment(shortName);
            if (expressionExperiment == null) {
                log.warn(shortName + " not found");
                continue;
            }
            expressionExperiments.add(eeService.thawLite(expressionExperiment));
        }
        if (expressionExperiments.size() == 0) {
            log.error("There were no valid experimnents specified");
            bail(ErrorCode.INVALID_OPTION);
        }
    }

    private void experimentsFromEeSet(String optionValue) {

        if (StringUtils.isBlank(optionValue)) {
            throw new IllegalArgumentException("Please provide an eeset name");
        }

        ExpressionExperimentSetService expressionExperimentSetService = this
                .getBean(ExpressionExperimentSetService.class);
        Collection<ExpressionExperimentSet> sets = expressionExperimentSetService.findByName(optionValue);
        if (sets.size() > 1) {
            throw new IllegalArgumentException("More than on EE set has name '" + optionValue + "'");
        } else if (sets.size() == 0) {
            throw new IllegalArgumentException("No EE set has name '" + optionValue + "'");
        }
        ExpressionExperimentSet set = sets.iterator().next();
        this.expressionExperimentSet = set;
        this.expressionExperiments = new HashSet<BioAssaySet>(set.getExperiments());

    }

    /**
     * Use the search engine to locate expression experiments.
     * 
     * @param query
     */
    private Set<BioAssaySet> findExpressionExperimentsByQuery(String query) {
        Set<BioAssaySet> ees = new HashSet<BioAssaySet>();
        Collection<SearchResult> eeSearchResults = searchService
                .search(SearchSettingsImpl.expressionExperimentSearch(query)).get(ExpressionExperiment.class);

        log.info(ees.size() + " Expression experiments matched '" + query + "'");

        // Filter out all the ee that are not of correct taxon
        for (SearchResult sr : eeSearchResults) {
            ExpressionExperiment ee = (ExpressionExperiment) sr.getResultObject();
            Taxon t = eeService.getTaxon(ee);
            if (t != null && t.getCommonName().equalsIgnoreCase(taxon.getCommonName())) {
                ees.add(ee);
            }
        }
        return ees;

    }

    /**
     * Load expression experiments based on a list of short names or IDs in a file.
     * 
     * @param fileName
     * @return
     * @throws IOException
     */
    private Set<BioAssaySet> readExpressionExperimentListFile(String fileName) throws IOException {
        Set<BioAssaySet> ees = new HashSet<BioAssaySet>();
        for (String eeName : readExpressionExperimentListFileToStrings(fileName)) {
            ExpressionExperiment ee = eeService.findByShortName(eeName);
            if (ee == null) {

                try {
                    Long id = Long.parseLong(eeName);
                    ee = eeService.load(id);
                    if (ee == null) {
                        log.error("No experiment " + eeName + " found");
                        continue;
                    }
                } catch (NumberFormatException e) {
                    log.error("No experiment " + eeName + " found");
                    continue;

                }

            }
            ees.add(ee);
        }
        return ees;
    }

    /**
     * @param fileName
     * @return
     * @throws IOException
     */
    private Collection<String> readExpressionExperimentListFileToStrings(String fileName) throws IOException {
        Collection<String> eeNames = new HashSet<String>();
        BufferedReader in = new BufferedReader(new FileReader(fileName));
        while (in.ready()) {
            String eeName = in.readLine().trim();
            if (eeName.startsWith("#")) {
                continue;
            }
            eeNames.add(eeName);
        }
        return eeNames;
    }

    /**
     * @param ees
     */
    private void removeTroubledEes(Collection<BioAssaySet> ees) {
        if (ees == null || ees.size() == 0) {
            log.warn("No experiments to remove troubled from");
            return;
        }
        final Collection<Long> untroubled = eeService.getUntroubled(EntityUtils.getIds(ees));

        BioAssaySet theOnlyOne = null;
        if (ees.size() == 1) {
            theOnlyOne = ees.iterator().next();
        }
        int size = ees.size();

        CollectionUtils.filter(ees, new Predicate() {
            @Override
            public boolean evaluate(Object object) {
                boolean ok = untroubled.contains(((ExpressionExperiment) object).getId());
                if (!ok) {
                    log.info("Troubled: " + object);
                }
                return ok;
            }
        });
        int newSize = ees.size();
        if (newSize != size) {
            assert newSize < size;
            if (size == 0 && theOnlyOne != null) {
                log.info(theOnlyOne.getName() + " has an active trouble flag");
            } else {
                log.info("Removed " + (size - newSize) + " experiments with 'trouble' flags, leaving " + newSize);
            }
        }
    }

}