ubc.pavlab.aspiredb.server.biomartquery.BioMartQueryServiceImpl.java Source code

Java tutorial

Introduction

Here is the source code for ubc.pavlab.aspiredb.server.biomartquery.BioMartQueryServiceImpl.java

Source

/*
 * The aspiredb project
 * 
 * Copyright (c) 2012 University of British Columbia
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 */
package ubc.pavlab.aspiredb.server.biomartquery;

import java.io.StringWriter;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Timer;
import java.util.TimerTask;
import java.util.concurrent.atomic.AtomicBoolean;

import javax.annotation.PostConstruct;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.MultivaluedMap;
import javax.ws.rs.core.Response;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Marshaller;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.time.StopWatch;
import org.apache.commons.lang3.RandomStringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import ubc.pavlab.aspiredb.server.exceptions.BioMartServiceException;
import ubc.pavlab.aspiredb.server.util.ConfigUtils;
import ubc.pavlab.aspiredb.shared.GeneValueObject;
import ubc.pavlab.aspiredb.shared.GenomicRange;

import com.sun.jersey.api.client.Client;
import com.sun.jersey.api.client.ClientResponse;
import com.sun.jersey.api.client.WebResource;
import com.sun.jersey.core.util.MultivaluedMapImpl;

/**
 * Simple wrapper that calls BioMart REST query service.
 * 
 * @author anton
 * @version $Id: BioMartQueryServiceImpl.java,v 1.13 2013/07/15 16:01:54 anton Exp $
 */
@Service
public class BioMartQueryServiceImpl implements BioMartQueryService {

    private static final String BIO_MART_URL = ConfigUtils.getString("aspiredb.biomart.url",
            "http://grch37.ensembl.org/biomart/martservice/results");

    private static final int BIOMART_TIMEOUT_SECONDS = 30;

    private static Log log = LogFactory.getLog(BioMartQueryServiceImpl.class.getName());

    protected AtomicBoolean cacheReady = new AtomicBoolean(false);

    private BioMartQueryServiceInitializationThread initializationThread;

    protected class BioMartQueryServiceInitializationThread extends Thread {

        @Override
        public void run() {
            try {
                updateCache();
                cacheReady.set(true);
            } catch (BioMartServiceException e) {
                log.error("Error initializing BioMartQueryServiceInitializationThread", e);
            }
        }
    }

    private static String sendRequest(String xmlQueryString) throws BioMartServiceException {
        Client client = Client.create();

        MultivaluedMap<String, String> queryData = new MultivaluedMapImpl();
        queryData.add("query", xmlQueryString);

        WebResource resource = client.resource(BIO_MART_URL).queryParams(queryData);
        client.setReadTimeout(1000 * BIOMART_TIMEOUT_SECONDS);

        ClientResponse response = resource.type(MediaType.APPLICATION_FORM_URLENCODED_TYPE)
                .get(ClientResponse.class);

        // Check return code
        if (Response.Status.fromStatusCode(response.getStatus()).getFamily() != Response.Status.Family.SUCCESSFUL) {
            String errorMessage = "Error occurred when accessing BioMart web service: "
                    + response.getEntity(String.class);
            log.error(errorMessage);

            throw new BioMartServiceException(errorMessage);
        }

        return response.getEntity(String.class);
    }

    @Autowired
    private BioMartCache bioMartCache;

    private Map<Integer, Collection<GeneValueObject>> geneCache;

    @Override
    public Collection<GeneValueObject> fetchGenesByGeneSymbols(Collection<String> geneSymbols)
            throws BioMartServiceException {
        updateCache();

        return bioMartCache.fetchGenesByGeneSymbols(geneSymbols);
    }

    @Override
    public Collection<GeneValueObject> fetchGenesByLocation(String chromosomeName, Long start, Long end)
            throws BioMartServiceException {
        updateCache();

        return bioMartCache.fetchGenesByLocation(chromosomeName, start, end);
    }

    @Override
    public Collection<GeneValueObject> fetchGenesByBin(int bin) throws BioMartServiceException {
        updateCache();

        return geneCache.get(bin);
    }

    @Override
    public Collection<GenomicRange> fetchGenomicRangesByGeneSymbols(Collection<String> geneSymbols)
            throws BioMartServiceException {
        Collection<GeneValueObject> genes = fetchGenesByGeneSymbols(geneSymbols);
        Collection<GenomicRange> genomicRanges = new HashSet<GenomicRange>(genes.size());

        for (GeneValueObject gene : genes) {
            genomicRanges.add(gene.getGenomicRange());
        }

        return genomicRanges;
    }

    @Override
    public Collection<GeneValueObject> findGenes(String queryString) throws BioMartServiceException {
        updateCache();

        return bioMartCache.findGenes(queryString);
    }

    /**
     * get the genes using the list of gene ids or list of gene symbols
     * 
     * @param List of gene strings
     * @return Gene value Objects associated with the given gene string list
     */
    @Override
    public List<GeneValueObject> getGenes(List<String> geneStrings) throws BioMartServiceException {
        updateCache();

        return bioMartCache.getGenes(geneStrings);
    }

    @SuppressWarnings("unused")
    @PostConstruct
    private void initialize() throws BioMartServiceException {
        initializationThread = new BioMartQueryServiceInitializationThread();
        initializationThread
                .setName(this.getClass().getName() + "_load_thread_" + RandomStringUtils.randomAlphanumeric(5));
        // To prevent VM from waiting on this thread to shutdown (if shutting down).
        initializationThread.setDaemon(true);
        initializationThread.start();
    }

    private void updateCache() throws BioMartServiceException {

        /**
         * Commented out code to check if cache hasExpired() because it takes ~8-10ms everytime this method is called.
         * Assuming cache never expires.
         */
        // if ( this.bioMartCache.hasExpired() ) {

        if (geneCache == null) {

            Dataset dataset = new Dataset("hsapiens_gene_ensembl");

            dataset.Filter.add(
                    new Filter("chromosome_name", "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y"));

            dataset.Attribute.add(new Attribute("ensembl_gene_id"));
            dataset.Attribute.add(new Attribute("hgnc_symbol"));
            dataset.Attribute.add(new Attribute("description"));
            dataset.Attribute.add(new Attribute("gene_biotype"));
            dataset.Attribute.add(new Attribute("chromosome_name"));
            dataset.Attribute.add(new Attribute("start"));
            dataset.Attribute.add(new Attribute("end"));

            Query query = new Query();
            query.Dataset = dataset;

            StringWriter xmlQueryWriter = null;

            try {
                JAXBContext jaxbContext = JAXBContext.newInstance(Query.class, Dataset.class, Filter.class,
                        Attribute.class);
                Marshaller jaxbMarshaller = jaxbContext.createMarshaller();

                xmlQueryWriter = new StringWriter();
                jaxbMarshaller.marshal(query, xmlQueryWriter);
            } catch (JAXBException e) {
                String errorMessage = "Cannot initialize genes from BioMart";
                log.error(errorMessage, e);

                throw new BioMartServiceException(errorMessage);
            }

            final StopWatch timer = new StopWatch();
            timer.start();

            Timer uploadCheckerTimer = new Timer(true);
            uploadCheckerTimer.scheduleAtFixedRate(new TimerTask() {
                @Override
                public void run() {
                    log.info("Waiting for BioMart response ... " + timer.getTime() + " ms");
                }
            }, 0, 100 * 1000);

            String response = sendRequest(xmlQueryWriter.toString());
            uploadCheckerTimer.cancel();

            String[] rows = StringUtils.split(response, "\n");

            Collection<GeneValueObject> genes = new HashSet<GeneValueObject>();

            int rowsLength = rows.length;
            if (rowsLength <= 1) {
                String errorMessage = "Error: retrieved only " + rowsLength + " row of gene data from BioMart"
                        + (rowsLength == 1 ? "(Error message from BioMart: " + rows[0] + ")" : "");
                log.error(errorMessage);

                throw new BioMartServiceException(errorMessage);
            }

            geneCache = new HashMap<>(rowsLength);

            for (String row : rows) {
                String[] fields = row.split("\t");

                int index = 0;
                String ensemblId = fields[index++];
                String symbol = fields[index++];
                String name = fields[index++];
                String geneBiotype = fields[index++];
                String chromosome = fields[index++];
                String start = fields[index++];
                String end = fields[index++];

                // Ignore results that do not have required attributes.
                if (ensemblId.equals("") || symbol.equals("") || chromosome.equals("") || start.equals("")
                        || end.equals("")) {
                    continue;
                }

                int sourceIndex = name.indexOf(" [Source:");
                name = sourceIndex >= 0 ? name.substring(0, sourceIndex) : name;

                GeneValueObject gene = new GeneValueObject(ensemblId, symbol, name, geneBiotype, "human");
                int startBase = Integer.valueOf(start);
                int endBase = Integer.valueOf(end);
                if (startBase < endBase) {
                    gene.setGenomicRange(new GenomicRange(chromosome, startBase, endBase));
                } else {
                    gene.setGenomicRange(new GenomicRange(chromosome, endBase, startBase));
                }

                // organize genes by bin, this is for performance reasons, see Bug 4210
                int bin = gene.getGenomicRange().getBin();
                if (!geneCache.containsKey(bin)) {
                    geneCache.put(bin, new HashSet<GeneValueObject>());
                }
                geneCache.get(bin).add(gene);

                genes.add(gene);
            }

            this.bioMartCache.putAll(genes);

            log.info("BioMart request to (" + BIO_MART_URL + ") took " + timer.getTime() + " ms and loaded "
                    + genes.size() + " genes");

        }
    }
}