chibi.gemmaanalysis.ArrayDesignStatCli.java Source code

Java tutorial

Introduction

Here is the source code for chibi.gemmaanalysis.ArrayDesignStatCli.java

Source

/*
 * The Gemma project
 * 
 * Copyright (c) 2007 University of British Columbia
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package chibi.gemmaanalysis;

import java.io.File;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;

import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.lang.time.StopWatch;

import ubic.gemma.apps.ArrayDesignSequenceManipulatingCli;
import ubic.gemma.genome.gene.service.GeneService;
import ubic.gemma.genome.taxon.service.TaxonService;
import ubic.gemma.model.common.auditAndSecurity.Status;
import ubic.gemma.model.common.auditAndSecurity.StatusService;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.arrayDesign.ArrayDesignService;
import ubic.gemma.model.expression.designElement.CompositeSequence;
import ubic.gemma.model.expression.designElement.CompositeSequenceService;
import ubic.gemma.model.genome.Gene;
import ubic.gemma.model.genome.Taxon;
import cern.colt.Arrays;

/**
 * CLI for ArrayDesignMapSummaryService
 * 
 * @author xwan
 * @version $Id$
 */
public class ArrayDesignStatCli extends ArrayDesignSequenceManipulatingCli {

    private ArrayDesignService adService;
    private GeneService geneService;
    private final static int MAXIMUM_COUNT = 10;
    private Collection<Long> geneIds = new HashSet<Long>();
    private CompositeSequenceService compositeSequenceService;
    private TaxonService taxonService;
    private final static String DEFAULT_OUT_FILE = "arraydesignsummary.txt";
    private String outFile;
    private Taxon taxon;
    private StatusService statusService;
    private final static String NA = "NA";

    @Override
    @SuppressWarnings("static-access")
    protected void buildOptions() {
        super.buildOptions();

        Option expOption = OptionBuilder.hasArg().withArgName("outfile").withDescription("TSV output filename")
                .withLongOpt("outfile").create('o');
        addOption(expOption);

        Option taxonOption = OptionBuilder.hasArg().withDescription("taxon name")
                .withDescription("Taxon of the expression experiments and genes").withLongOpt("taxon").create('t');
        addOption(taxonOption);
    }

    @Override
    protected void processOptions() {
        super.processOptions();
        // FIXME: add HTML output option.

        this.adService = this.getBean(ArrayDesignService.class);
        this.compositeSequenceService = this.getBean(CompositeSequenceService.class);
        this.geneService = this.getBean(GeneService.class);
        this.taxonService = this.getBean(TaxonService.class);
        this.statusService = this.getBean(StatusService.class);

        if (hasOption('o')) {
            this.outFile = getOptionValue('o');
            log.info("Output will be written to " + outFile);
        } else {
            this.outFile = DEFAULT_OUT_FILE;
        }

        if (hasOption('t')) {
            String taxonName = getOptionValue('t');
            this.taxon = taxonService.findByCommonName(taxonName);
            if (this.taxon == null) {
                log.error("ERROR: Cannot find taxon " + taxonName);
            }
        }

    }

    Map<Long, Collection<Long>> getGeneId2CSIdsMap(Map<Long, Collection<Long>> csId2geneIds) {
        Map<Long, Collection<Long>> geneId2csIds = new HashMap<Long, Collection<Long>>();
        for (Long csId : csId2geneIds.keySet()) {
            Collection<Long> gids = csId2geneIds.get(csId);
            for (Long geneId : gids) {
                Collection<Long> csIds = geneId2csIds.get(geneId);
                if (csIds == null) {
                    csIds = new HashSet<Long>();
                    geneId2csIds.put(geneId, csIds);
                }
                csIds.add(csId);
            }
        }
        return geneId2csIds;
    }

    int[] getStats(Map<Long, Collection<Long>> dataMap, boolean geneIdKey) {
        int[] res = new int[MAXIMUM_COUNT];
        for (Long id : dataMap.keySet()) {
            if (geneIdKey) {
                if (!geneIds.contains(id))
                    continue;
            }
            int size = dataMap.get(id).size();
            if (!geneIdKey) {
                size = 0;
                Collection<Long> ids = dataMap.get(id);
                for (Long geneId : ids) {
                    if (geneIds.contains(geneId))
                        size++;
                }
            }
            if (size == 0)
                continue;
            if (size > MAXIMUM_COUNT)
                size = MAXIMUM_COUNT;
            res[size - 1]++;
        }
        return res;
    }

    private Map<Long, Collection<Long>> getCs2GeneMap(Collection<Long> csIds) {
        Map<CompositeSequence, Collection<Gene>> genes = compositeSequenceService
                .getGenes(compositeSequenceService.loadMultiple(csIds));
        Map<Long, Collection<Long>> result = new HashMap<Long, Collection<Long>>();
        for (CompositeSequence cs : genes.keySet()) {
            result.put(cs.getId(), new HashSet<Long>());
            for (Gene g : genes.get(cs)) {
                result.get(cs.getId()).add(g.getId());
            }
        }
        return result;
    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.util.AbstractCLI#doWork(java.lang.String[])
     */
    @Override
    protected Exception doWork(String[] args) {
        Collection<String> failedAds = new ArrayList<>();
        Exception err = processCommandLine(args);
        if (err != null)
            return err;
        if (arrayDesignsToProcess == null || arrayDesignsToProcess.size() == 0) {
            this.arrayDesignsToProcess = adService.loadAll();
        }
        Map<Taxon, Collection<ArrayDesign>> taxon2arraydesign = new HashMap<Taxon, Collection<ArrayDesign>>();
        Collection<Long> adIds = new HashSet<Long>();
        for (ArrayDesign ad : this.arrayDesignsToProcess) {

            Taxon taxon = ad.getPrimaryTaxon();
            if (taxon == null) {
                System.err.println("ArrayDesign " + ad.getName() + " doesn't have a taxon");
                continue;
            }
            taxon = taxonService.load(taxon.getId());

            if (taxon != null && taxon.getCommonName() == null) {
                log.warn(ad.getShortName() + " taxon common name is null");
                failedAds.add(ad.getShortName());
                continue;
            }

            // filter out taxon
            if (this.taxon != null && this.taxon.getCommonName() != null
                    && !taxon.getCommonName().equalsIgnoreCase(this.taxon.getCommonName())) {
                continue;
            }

            adIds.add(ad.getId());

            Collection<ArrayDesign> ads = null;
            ads = taxon2arraydesign.get(taxon);
            if (ads == null) {
                ads = new HashSet<ArrayDesign>();
                taxon2arraydesign.put(taxon, ads);
            }
            ads.add(ad);
        }
        Map<Long, Boolean> isMerged = adService.isMerged(adIds);
        Map<Long, Boolean> isSubsumed = adService.isSubsumed(adIds);
        StopWatch timer = new StopWatch();
        timer.start();
        int lineCount = 0;
        try (FileWriter out = new FileWriter(new File(this.outFile));) {
            String header = "taxon\tshortName\tname\tisTroubled\texperiments\tmergees\tsubsumes\tsubsumedBy\tgenes\tprobes\tcsWithGenes\tcsBioSequences\tcsBlatResults\tP2G_0";
            out.write(header);
            for (int i = 1; i <= MAXIMUM_COUNT; i++)
                out.write("\tP2G_" + i);
            for (int i = 1; i <= MAXIMUM_COUNT; i++)
                out.write("\tG2P_" + i);
            out.write("\n");
            System.err.print(header + "\n");
            for (Taxon taxon : taxon2arraydesign.keySet()) {
                Collection<ArrayDesign> ads = taxon2arraydesign.get(taxon);
                Collection<Gene> allGenes = geneService.getGenesByTaxon(taxon);
                for (Gene gene : allGenes) {
                    geneIds.add(gene.getId());

                }
                for (ArrayDesign ad : ads) {

                    try {
                        Status status = statusService.getStatus(ad);
                        String isTroubled = status != null ? Boolean.toString(status.getTroubled().booleanValue())
                                : NA;
                        ad = arrayDesignService.thawLite(ad);
                        long mergees = ad.getMergees().size();
                        long subsumes = ad.getSubsumedArrayDesigns().size();
                        String subsumedBy = ad.getSubsumingArrayDesign() != null
                                ? ad.getSubsumingArrayDesign().getShortName()
                                : NA;
                        long numEEs = arrayDesignService.getExpressionExperiments(ad).size();
                        // boolean merged = isMerged.get( ad.getId() );
                        // if ( merged ) continue;
                        // boolean subsumed = isSubsumed.get( ad.getId() );
                        // if ( subsumed ) continue;
                        long numProbes = getArrayDesignService().getCompositeSequenceCount(ad).longValue();
                        long numCsBioSequences = getArrayDesignService().numCompositeSequenceWithBioSequences(ad);
                        long numCsBlatResults = getArrayDesignService().numCompositeSequenceWithBlatResults(ad);
                        long numCsGenes = getArrayDesignService().numCompositeSequenceWithGenes(ad);
                        long numGenes = getArrayDesignService().numGenes(ad);
                        Collection<CompositeSequence> allCSs = getArrayDesignService().getCompositeSequences(ad);
                        Collection<Long> csIds = new HashSet<Long>();
                        for (CompositeSequence cs : allCSs)
                            csIds.add(cs.getId());
                        // FIXME this used to provide only known genes.
                        Map<Long, Collection<Long>> csId2geneIds = this.getCs2GeneMap(csIds);
                        Map<Long, Collection<Long>> geneId2csIds = getGeneId2CSIdsMap(csId2geneIds);
                        int[] csStats = getStats(csId2geneIds, false);
                        int[] geneStats = getStats(geneId2csIds, true);
                        int cs2NoneGene = allCSs.size() - csId2geneIds.keySet().size();
                        String line = taxon.getCommonName() + "\t" + ad.getShortName() + "\t" + ad.getName() + "\t"
                                + isTroubled + "\t" + numEEs + "\t" + mergees + "\t" + subsumes + "\t" + subsumedBy
                                + "\t" + numGenes + "\t" + numProbes + "\t" + numCsGenes + "\t" + numCsBioSequences
                                + "\t" + numCsBlatResults + "\t" + cs2NoneGene;
                        out.write(line);
                        for (int i = 0; i < MAXIMUM_COUNT; i++) {
                            out.write("\t" + csStats[i]);
                        }
                        for (int i = 0; i < MAXIMUM_COUNT; i++) {
                            out.write("\t" + geneStats[i]);
                        }
                        out.write("\n");
                        System.err.print(line + "\n");

                        lineCount++;
                    } catch (Exception e) {
                        log.error(e, e);
                        failedAds.add(ad.getShortName());
                        continue;
                    }
                }
            }
            out.close();
            log.info("Skipped " + failedAds.size() + " array designs : " + Arrays.toString(failedAds.toArray()));
            log.info("Finished running in " + timer.getTime() + " ms.");
            log.info("Wrote " + lineCount + " lines to " + outFile);

        } catch (Exception e) {
            return e;
        }
        return null;
    }

    public static void main(String[] args) {
        ArrayDesignStatCli s = new ArrayDesignStatCli();
        try {
            Exception ex = s.doWork(args);
            if (ex != null) {
                ex.printStackTrace();
            }
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.util.AbstractCLI#getCommandName()
     */
    @Override
    public String getCommandName() {
        return "platformStats";
    }

}