ubic.gemma.loader.entrez.pubmed.ExpressionExperimentBibRefFinder.java Source code

Java tutorial

Introduction

Here is the source code for ubic.gemma.loader.entrez.pubmed.ExpressionExperimentBibRefFinder.java

Source

/*
 * The Gemma project
 * 
 * Copyright (c) 2007 University of British Columbia
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package ubic.gemma.loader.entrez.pubmed;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import ubic.gemma.model.common.description.BibliographicReference;
import ubic.gemma.model.common.description.DatabaseEntry;
import ubic.gemma.model.common.description.ExternalDatabase;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;

/**
 * @author pavlidis
 * @version $Id: ExpressionExperimentBibRefFinder.java,v 1.3 2011/10/13 21:02:45 paul Exp $
 */
public class ExpressionExperimentBibRefFinder {

    private static Log log = LogFactory.getLog(ExpressionExperimentBibRefFinder.class.getName());

    private static String GEO_SERIES_URL_BASE = "http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=";

    private static String PUBMEDREF_REGEX = "class=\"pubmed_id\" id=\"(\\d+)";

    /**
     * @param ee
     * @return
     */
    public BibliographicReference locatePrimaryReference(ExpressionExperiment ee) {

        if (ee.getPrimaryPublication() != null)
            return ee.getPrimaryPublication();

        DatabaseEntry accession = ee.getAccession();

        ExternalDatabase ed = accession.getExternalDatabase();

        if (!ed.getName().equals("GEO")) {
            log.warn("Don't know how to get references for non-GEO data sets");
            return null;
        }

        String geoId = accession.getAccession();

        int pubMedId = this.locatePubMedId(geoId);

        if (pubMedId < 0)
            return null;

        PubMedXMLFetcher fetcher = new PubMedXMLFetcher();
        return fetcher.retrieveByHTTP(pubMedId);
    }

    /**
     * @param geoSeries
     * @return
     */
    private int locatePubMedId(String geoSeries) {
        if (!geoSeries.matches("GSE\\d+")) {
            log.warn(geoSeries + " is not a GEO Series Accession");
            return -1;
        }
        URL url = null;

        Pattern pat = Pattern.compile(PUBMEDREF_REGEX);

        try {
            url = new URL(GEO_SERIES_URL_BASE + geoSeries);

            URLConnection conn = url.openConnection();
            conn.connect();
            InputStream is = conn.getInputStream();
            BufferedReader br = new BufferedReader(new InputStreamReader(is));
            String line = null;
            while ((line = br.readLine()) != null) {
                Matcher mat = pat.matcher(line);
                log.debug(line);
                if (mat.find()) {
                    String capturedAccession = mat.group(1);
                    if (StringUtils.isBlank(capturedAccession))
                        return -1;
                    return Integer.parseInt(capturedAccession);
                }
            }
            is.close();
        } catch (MalformedURLException e) {
            log.error(e, e);
            throw new RuntimeException("Invalid URL " + url, e);
        } catch (IOException e) {
            log.error(e, e);
            throw new RuntimeException("Could not get data from remote server", e);
        } catch (NumberFormatException e) {
            log.error(e, e);
            throw new RuntimeException("Could not determine valid pubmed id");
        }

        return -1;

    }
}