ubic.gemma.core.loader.entrez.pubmed.ExpressionExperimentBibRefFinder.java Source code

Java tutorial

Introduction

Here is the source code for ubic.gemma.core.loader.entrez.pubmed.ExpressionExperimentBibRefFinder.java

Source

/*
 * The Gemma project
 *
 * Copyright (c) 2007 University of British Columbia
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package ubic.gemma.core.loader.entrez.pubmed;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import ubic.gemma.model.common.description.BibliographicReference;
import ubic.gemma.model.common.description.DatabaseEntry;
import ubic.gemma.model.common.description.ExternalDatabase;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * @author pavlidis
 */
public class ExpressionExperimentBibRefFinder {

    private static final Log log = LogFactory.getLog(ExpressionExperimentBibRefFinder.class.getName());

    private static final String GEO_SERIES_URL_BASE = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=";

    private static final String PUBMEDREF_REGEX = "class=\"pubmed_id\" id=\"(\\d+)";

    public BibliographicReference locatePrimaryReference(ExpressionExperiment ee) {

        if (ee.getPrimaryPublication() != null)
            return ee.getPrimaryPublication();

        DatabaseEntry accession = ee.getAccession();

        ExternalDatabase ed = accession.getExternalDatabase();

        if (!ed.getName().equals("GEO")) {
            ExpressionExperimentBibRefFinder.log.warn("Don't know how to get references for non-GEO data sets");
            return null;
        }

        String geoId = accession.getAccession();

        int pubMedId = this.locatePubMedId(geoId);

        if (pubMedId < 0)
            return null;

        PubMedXMLFetcher fetcher = new PubMedXMLFetcher();
        return fetcher.retrieveByHTTP(pubMedId);
    }

    private int locatePubMedId(String geoSeries) {
        if (!geoSeries.matches("GSE\\d+")) {
            ExpressionExperimentBibRefFinder.log.warn(geoSeries + " is not a GEO Series Accession");
            return -1;
        }
        URL url;

        Pattern pat = Pattern.compile(ExpressionExperimentBibRefFinder.PUBMEDREF_REGEX);

        URLConnection conn;
        try {
            url = new URL(ExpressionExperimentBibRefFinder.GEO_SERIES_URL_BASE + geoSeries);
            conn = url.openConnection();
            conn.connect();
        } catch (IOException e1) {
            ExpressionExperimentBibRefFinder.log.error(e1, e1);
            throw new RuntimeException("Could not get data from remote server", e1);
        }

        try (InputStream is = conn.getInputStream();
                BufferedReader br = new BufferedReader(new InputStreamReader(is))) {

            String line;
            while ((line = br.readLine()) != null) {
                Matcher mat = pat.matcher(line);
                ExpressionExperimentBibRefFinder.log.debug(line);
                if (mat.find()) {
                    String capturedAccession = mat.group(1);
                    if (StringUtils.isBlank(capturedAccession))
                        return -1;
                    return Integer.parseInt(capturedAccession);
                }
            }

        } catch (IOException e) {
            ExpressionExperimentBibRefFinder.log.error(e, e);
            throw new RuntimeException("Could not get data from remote server", e);
        } catch (NumberFormatException e) {
            ExpressionExperimentBibRefFinder.log.error(e, e);
            throw new RuntimeException("Could not determine valid pubmed id");
        }

        return -1;

    }
}