ubic.gemma.loader.genome.gene.ExternalFileGeneLoaderServiceImpl.java Source code

Java tutorial

Introduction

Here is the source code for ubic.gemma.loader.genome.gene.ExternalFileGeneLoaderServiceImpl.java

Source

/*
 * The Gemma project
 * 
 * Copyright (c) 2006 University of British Columbia
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package ubic.gemma.loader.genome.gene;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Collection;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import ubic.gemma.genome.gene.service.GeneService;
import ubic.gemma.genome.taxon.service.TaxonService;
import ubic.gemma.model.genome.Gene;
import ubic.gemma.model.genome.Taxon;
import ubic.gemma.model.genome.gene.GeneProduct;
import ubic.gemma.model.genome.gene.GeneProductService;
import ubic.gemma.model.genome.gene.GeneProductType;
import ubic.gemma.model.genome.gene.GeneProductValueObject;
import ubic.gemma.persistence.Persister;

/**
 * Class to provide functionality to load genes from a tab delimited file. Typical usage is for non model organisms that
 * do not have genes in NCBI. Supports loading genes against a non species taxon such as a family e.g Salmonids. File
 * format is : Optional header which should be appended with a # to indicate not to process this line Then a line
 * containing 3 fields which should be 'Gene Symbol' 'Gene Name' 'UniProt id' (last is optional) separated by tabs. The
 * Class reads the file and looping through each line creates a gene (NCBI id is null) and one associated gene product.
 * The gene is populated with gene symbol, gene name, gene official name (gene symbol) and a description indicating that
 * this gene has been loaded from a text file. Then gene is associated with a gene product bearing the same name as the
 * gene symbol and persisted.
 * 
 * @author ldonnison
 * @version $Id: ExternalFileGeneLoaderServiceImpl.java,v 1.5 2013/04/29 01:41:40 paul Exp $
 */
@Component
public class ExternalFileGeneLoaderServiceImpl implements ExternalFileGeneLoaderService {
    private static Log log = LogFactory.getLog(ExternalFileGeneLoaderServiceImpl.class.getName());

    @Autowired
    private GeneService geneService;

    @Autowired
    private Persister persisterHelper;

    @Autowired
    private TaxonService taxonService;

    @Autowired
    private GeneProductService geneProductService;

    @Override
    public int load(InputStream geneInputStream, String taxonName) throws Exception {
        BufferedReader b = new BufferedReader(new InputStreamReader(geneInputStream));
        Taxon taxon = validateTaxon(taxonName);
        log.info("Taxon and file validation passed for taxon " + taxonName);
        return load(b, taxon);
    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.loader.genome.gene.ExternalFileGeneLoaderService#load(java.lang.String, java.lang.String)
     */
    @Override
    public int load(String geneFile, String taxonName) throws Exception {

        log.info("Starting loading gene file " + geneFile + " for taxon " + taxonName);
        BufferedReader bufferedReaderGene = readFile(geneFile);
        Taxon taxon = validateTaxon(taxonName);
        log.info("Taxon and file validation passed for " + geneFile + " for taxon " + taxonName);
        int loadedGeneCount = load(bufferedReaderGene, taxon);
        return loadedGeneCount;
    }

    /**
     * Creates a gene, where gene name and official gene symbol is set to gene symbol(from file) and official name is
     * set to geneName(from file). The gene description is set to a message indicating that the gene was imported from
     * an external file and the associated uniprot id.
     * <p>
     * If the gene already exists, then it is not modified, unless it lacks a gene product. In that case we add one and
     * return it.
     * 
     * @param fields A string array containing gene symbol, gene name and uniprot id.
     * @param taxon Taxon relating to gene
     * @return Gene with associated gene product for loading into Gemma. Null if no gene was loaded (exists, or invalid
     *         fields) or modified.
     */
    private Gene createGene(String[] fields, Taxon taxon) {

        assert fields.length > 1;

        String geneSymbol = fields[0];
        String geneName = fields[1];
        String uniProt = "";
        if (fields.length > 2)
            uniProt = fields[2];
        Gene gene = null;
        // need at least the gene symbol and gene name
        if (StringUtils.isBlank(geneSymbol) || StringUtils.isBlank(geneName)) {
            log.warn("Line did not contain valid gene information; GeneSymbol=" + geneSymbol + "GeneName="
                    + geneName + " UniProt=" + uniProt);
            return null;
        }

        if (log.isDebugEnabled())
            log.debug("Creating gene " + geneSymbol);
        gene = geneService.findByOfficialSymbol(geneSymbol, taxon);

        if (gene != null) {
            Collection<GeneProductValueObject> existingProducts = geneService.getProducts(gene.getId());
            if (existingProducts.isEmpty()) {
                log.warn("Gene " + gene + " exists, but has no products; adding one");
                gene = geneService.thaw(gene);
                GeneProduct newgp = createGeneProduct(gene);
                newgp = geneProductService.create(newgp);
                gene.getProducts().add(newgp);
                geneService.update(gene);
                return gene;
            } else {
                log.info(gene + " already exists and is valid, will not update");
                return null; // no need to create it, though we ignore the name.
            }
        }

        gene = Gene.Factory.newInstance();
        gene.setName(geneSymbol);
        gene.setOfficialSymbol(geneSymbol);
        gene.setOfficialName(StringUtils.lowerCase(geneName));
        gene.setDescription("Imported from external annotation file");
        gene.setTaxon(taxon);
        gene.getProducts().add(createGeneProduct(gene));
        gene = (Gene) persisterHelper.persistOrUpdate(gene);
        return gene;
    }

    /**
     * When loading genes with a file each gene will have just 1 gene product. The gene product is a filler taking its
     * details from the gene.
     * 
     * @param gene The gene associated to this gene product
     * @return Collection of gene products in this case just 1.
     */
    private GeneProduct createGeneProduct(Gene gene) {
        GeneProduct geneProduct = GeneProduct.Factory.newInstance();
        geneProduct.setType(GeneProductType.RNA);
        geneProduct.setGene(gene);
        geneProduct.setName(gene.getName());
        geneProduct.setDescription("Gene product placeholder");
        return geneProduct;
    }

    /**
     * @param bufferedReaderGene
     * @param taxon
     * @return
     * @throws IOException
     */
    private int load(BufferedReader bufferedReaderGene, Taxon taxon) throws IOException {
        int loadedGeneCount = 0;
        String line = null;
        int linesSkipped = 0;
        while ((line = bufferedReaderGene.readLine()) != null) {
            String[] lineContents = readLine(line);
            if (lineContents != null) {

                Gene gene = createGene(lineContents, taxon);
                if (gene != null) {
                    loadedGeneCount++;
                } else {
                    linesSkipped++;
                }
            }
        }
        updateTaxonWithGenesLoaded(taxon);
        log.info("Genes loaded: " + loadedGeneCount + ", lines skipped: " + linesSkipped);
        return loadedGeneCount;
    }

    /**
     * Creates a bufferedReader for gene file.
     * 
     * @param geneFile GeneFile including full path
     * @return BufferedReader The bufferedReader for gene file.
     * @throws IOException File can not be opened for reading such as does not exist.
     */
    private BufferedReader readFile(String geneFile) throws IOException {
        File f = new File(geneFile);
        if (!f.canRead()) {
            throw new IOException("Cannot read from " + geneFile);
        }
        BufferedReader b = new BufferedReader(new FileReader(geneFile));
        log.info("File " + geneFile + " read successfully");
        return b;

    }

    /**
     * Read a gene file line, splitting the line into 3 strings.
     * 
     * @param line A line from the gene file
     * @return Array of strings representing a line in a gene file.
     * @throws IOException Thrown if file is not readable
     */
    private String[] readLine(String line) throws IOException {
        if (StringUtils.isBlank(line)) {
            return null;
        }
        if (line.startsWith("#")) {
            return null;
        }

        String[] fields = StringUtils.splitPreserveAllTokens(line, '\t');
        if (fields.length < 2) {
            throw new IOException("Illegal format, expected at least 2 columns, got " + fields.length);
        }
        return fields;

    }

    /**
     * Method to update taxon to indicate that genes have been loaded for that taxon. If the taxon has children taxa
     * then those child genes should not be used and the flag for those child taxon set to false.
     * 
     * @param taxon The taxon to update
     * @exception Thrown if error accessing updating taxon details
     */
    private void updateTaxonWithGenesLoaded(Taxon taxon) {
        Collection<Taxon> childTaxa = taxonService.findChildTaxaByParent(taxon);
        // if this taxon has children flag not to use their genes
        if (childTaxa != null && !childTaxa.isEmpty()) {
            for (Taxon childTaxon : childTaxa) {
                if (childTaxon != null && childTaxon.getIsGenesUsable()) {
                    childTaxon.setIsGenesUsable(false);
                    taxonService.update(childTaxon);
                    log.warn("Child taxa" + childTaxon + " genes have been loaded parent taxa should superseed");
                }
            }
        }
        // set taxon flag indicating that use this taxon's genes
        if (!taxon.getIsGenesUsable()) {
            taxon.setIsGenesUsable(true);
            taxonService.update(taxon);
            log.info("Updating taxon genes loaded to true for taxon " + taxon);
        }

    }

    /**
     * Method to validate that taxon is held in system.
     * 
     * @param taxonName Taxon common name
     * @return Full Taxon details
     * @exception If taxon is not found in the system.
     */
    private Taxon validateTaxon(String taxonName) throws IllegalArgumentException {
        Taxon taxon = taxonService.findByCommonName(taxonName);
        if (taxon == null) {
            throw new IllegalArgumentException("No taxon with common name " + taxonName + " found");
        }
        return taxon;
    }

}