ubic.gemma.loader.genome.gene.ncbi.homology.HomologeneServiceImpl.java Source code

Java tutorial

Introduction

Here is the source code for ubic.gemma.loader.genome.gene.ncbi.homology.HomologeneServiceImpl.java

Source

/*
 * The Gemma project
 * 
 * Copyright (c) 2009 University of British Columbia
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

package ubic.gemma.loader.genome.gene.ncbi.homology;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.time.StopWatch;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import ubic.basecode.util.FileTools;
import ubic.gemma.genome.gene.service.GeneService;
import ubic.gemma.genome.taxon.service.TaxonService;
import ubic.gemma.model.common.description.LocalFile;
import ubic.gemma.model.genome.Gene;
import ubic.gemma.model.genome.Taxon;
import ubic.gemma.model.genome.gene.GeneValueObject;
import ubic.gemma.util.ConfigUtils;

/**
 * Reads in the homologene list as specified in the Gemmea.properties file. Loads the list at startup and keeps a
 * mapping off
 * 
 * @author kelsey
 * @version $Id: HomologeneServiceImpl.java,v 1.4 2013/03/19 18:34:00 paul Exp $
 */
@Component
public class HomologeneServiceImpl implements HomologeneService {

    protected static final Log log = LogFactory.getLog(HomologeneServiceImpl.class);

    private static final String COMMENT_CHARACTER = "#";

    private static final char DELIMITING_CHARACTER = '\t';

    private static final String HOMOLOGENE_FILE = "ncbi.homologene.fileName";

    private static final String LOAD_HOMOLOGENE = "load.homologene";

    private AtomicBoolean enabled = new AtomicBoolean(false);

    // a collection of gene
    // IDs
    private Map<Long, Long> gene2Group = new ConcurrentHashMap<Long, Long>();

    @Autowired
    private GeneService geneService;

    @Autowired
    private TaxonService taxonService;

    private Map<Long, Collection<Long>> group2Gene = new ConcurrentHashMap<Long, Collection<Long>>(); // Homology group
                                                                                                      // ID to

    /*
     * Name of file in NCBI
     */
    private String homologeneFileName = "homologene.data";

    private AtomicBoolean ready = new AtomicBoolean(false);

    private AtomicBoolean running = new AtomicBoolean(false);

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.loader.genome.gene.ncbi.homology.HomologeneService#getHomologue(ubic.gemma.model.genome.Gene,
     * ubic.gemma.model.genome.Taxon)
     */

    @Override
    public Gene getHomologue(Gene gene, Taxon taxon) {
        if (gene.getTaxon().getId() == taxon.getId())
            return gene;

        Collection<Gene> homologues = this.getHomologues(gene);

        if (homologues == null || homologues.isEmpty())
            return null;

        for (Gene g : homologues) {
            if (g.getTaxon().getId().equals(taxon.getId()))
                return g;

        }

        return null;
    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.loader.genome.gene.ncbi.homology.HomologeneService#getHomologues(ubic.gemma.model.genome.Gene)
     */

    @Override
    public Collection<Gene> getHomologues(Gene gene) {

        Collection<Gene> genes = new HashSet<Gene>();

        if (!this.ready.get()) {
            return genes;
        }

        Long groupId = null;

        Integer ncbiGeneId = gene.getNcbiGeneId();
        if (ncbiGeneId == null)
            return genes;
        try {
            groupId = this.getHomologeneGroup(ncbiGeneId.longValue());
        } catch (NumberFormatException e) {
            return genes;
        }

        if (groupId == null) {
            return genes;
        }

        genes = this.getGenesInGroup(groupId);

        if (genes != null)
            genes.remove(gene); // remove the given gene from the list

        return genes;

    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.loader.genome.gene.ncbi.homology.HomologeneService#getHomologues(java.lang.Long)
     */
    @Override
    public Collection<Long> getHomologues(Long ncbiId) {

        Collection<Long> NcbiGeneIds = new HashSet<Long>();

        if (!this.ready.get()) {
            return NcbiGeneIds;
        }

        Long groupId = this.getHomologeneGroup(ncbiId);
        if (groupId == null)
            return NcbiGeneIds;
        NcbiGeneIds = this.getNCBIGeneIdsInGroup(groupId);
        NcbiGeneIds.remove(ncbiId); // remove the given gene from the list

        return NcbiGeneIds;

    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.loader.genome.gene.ncbi.homology.HomologeneService#init(boolean)
     */
    @Override
    public synchronized void init(boolean force) {

        if (running.get()) {
            return;
        }

        if (ready.get()) {
            return;
        }

        boolean loadHomologene = ConfigUtils.getBoolean(LOAD_HOMOLOGENE, true);
        this.homologeneFileName = ConfigUtils.getString(HOMOLOGENE_FILE);

        if (!loadHomologene)
            return;

        // if loading homologene is disabled in the configuration, return
        if (!force && !loadHomologene) {
            log.info("Loading Homologene is disabled (force=" + force + ", load.homologene =" + loadHomologene
                    + ")");
            return;
        }

        enabled.set(true);

        // Load the homologene groups for searching

        Thread loadThread = new Thread(new Runnable() {
            @Override
            public void run() {

                running.set(true);

                log.info("Loading Homologene...");
                StopWatch loadTime = new StopWatch();
                loadTime.start();

                boolean interrupted = false;

                HomologeneFetcher hf = new HomologeneFetcher();
                Collection<LocalFile> downloadedFiles = hf.fetch(homologeneFileName);
                File f = null;

                if (downloadedFiles == null || downloadedFiles.isEmpty()) {
                    log.warn("Unable to download Homologene File. Aborting");
                    return;
                }

                if (downloadedFiles.size() > 1)
                    log.info("Downloaded more than 1 file for homologene.  Using 1st.  ");

                f = downloadedFiles.iterator().next().asFile();
                if (!f.canRead()) {
                    log.warn("Downloaded Homologene File. But unable to read Aborting");
                    return;
                }

                while (!interrupted && !ready.get()) {

                    try {
                        InputStream is = FileTools.getInputStreamFromPlainOrCompressedFile(f.getAbsolutePath());
                        parseHomologGeneFile(is);
                    } catch (IOException ioe) {
                        log.error("Unable to parse homologene file. Error is " + ioe);
                    }

                }
                running.set(false);

            }

        }, "Homologene_load_thread");

        if (running.get())
            return;
        loadThread.setDaemon(true); // So vm doesn't wait on these threads to shutdown (if shutting down)
        loadThread.start();

    }

    /**
     * Given an NCBI Homologene Group ID returns a list of all the NCBI Gene Ids in the given group
     * 
     * @param homologeneGrouopId
     * @return Collection of NCBI Gene Ids, or null if not ready.
     */
    protected Collection<Long> getNCBIGeneIdsInGroup(long homologeneGrouopId) {

        if (!this.ready.get()) {
            return null;
        }

        return this.group2Gene.get(homologeneGrouopId);
    }

    /**
     * @param is
     * @throws IOException
     */
    protected void parseHomologGeneFile(InputStream is) throws IOException {

        BufferedReader br = new BufferedReader(new InputStreamReader(is));
        String line = null;

        while ((line = br.readLine()) != null) {

            if (StringUtils.isBlank(line) || line.startsWith(COMMENT_CHARACTER)) {
                continue;
            }
            String[] fields = StringUtils.splitPreserveAllTokens(line, DELIMITING_CHARACTER);

            Integer taxonId = Integer.parseInt(fields[1]);
            Long groupId;
            Long geneId;
            try {
                groupId = Long.parseLong(fields[0]);
                geneId = Long.parseLong(fields[2]);
            } catch (NumberFormatException e) {
                log.warn("Unparseable line from homologene: " + line);
                continue;
            }
            String geneSymbol = fields[3];

            if (!group2Gene.containsKey(groupId)) {
                group2Gene.put(groupId, new ArrayList<Long>());
            }
            group2Gene.get(groupId).add(geneId);

            if (!gene2Group.containsKey(geneId)) {
                gene2Group.put(geneId, groupId);
            } else {
                log.warn("Duplicate gene ID encountered.  Skipping: geneID=" + geneId + " ,taxonID = " + taxonId
                        + " ,geneSymbol = " + geneSymbol);
            }
        }
        ready.set(true);
        log.info("Gene Homology successfully loaded: " + gene2Group.keySet().size() + " genes covered in "
                + group2Gene.keySet().size() + " groups");

    }

    /**
     * Given an NCBI Homologene Group ID returns a list of all the genes in gemma in that given group
     * 
     * @param homologeneGrouopId NCBI Homologene group ID
     * @return Collection genes in the given group.
     */

    private Collection<Gene> getGenesInGroup(Long homologeneGroupId) {

        Collection<Gene> genes = new ArrayList<Gene>();

        if (homologeneGroupId == null || !this.ready.get() || !this.group2Gene.containsKey(homologeneGroupId)) {
            return genes;
        }

        Collection<Long> ncbiIds = this.group2Gene.get(homologeneGroupId);

        Collection<Long> skippedNcbiIds = new ArrayList<Long>();

        for (Long ncbiId : ncbiIds) {
            Gene gene = this.geneService.findByNCBIId(ncbiId.intValue());
            if (gene == null) {
                skippedNcbiIds.add(ncbiId);
                continue;
            }

            genes.add(gene);
        }

        if (log.isDebugEnabled() && !skippedNcbiIds.isEmpty()) {
            log.debug("Skipped " + skippedNcbiIds.size()
                    + " homologous genes cause unable to find in Gemma. NCBI ids are:  " + skippedNcbiIds);
        }
        return genes;

    }

    /**
     * Given an NCBI Gene ID returns the NCBI homologene group
     * 
     * @param ncbiID
     * @return homologene group id, or null if not ready
     */
    private Long getHomologeneGroup(long ncbiID) {

        if (!this.ready.get()) {
            return null;
        }

        return this.gene2Group.get(ncbiID);
    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.loader.genome.gene.ncbi.homology.HomologeneService#getHomologueValueObject(java.lang.Long,
     * java.lang.String)
     */

    @Override
    public GeneValueObject getHomologueValueObject(Long geneId, String taxonCommonName) {

        Gene gene = geneService.load(geneId);
        final Taxon mouse = this.taxonService.findByCommonName("mouse");
        Gene geneToReturn = null;
        if (gene.getTaxon().getId() == mouse.getId()) {
            geneToReturn = gene;
        } else {
            geneToReturn = getHomologue(gene, mouse);
        }

        if (geneToReturn == null)
            return null;
        return new GeneValueObject(geneToReturn);
    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.loader.genome.gene.ncbi.homology.HomologeneService#getHomologueValueObjects(java.lang.Long)
     */

    @Override
    public Collection<GeneValueObject> getHomologueValueObjects(Long geneId) {

        Gene gene = geneService.load(geneId);

        Collection<Gene> genes = getHomologues(gene);
        if (genes == null)
            return null;

        return GeneValueObject.convert2ValueObjects(getHomologues(gene));

    }

}