org.dspace.submit.lookup.ArXivService.java Source code

Java tutorial

Introduction

Here is the source code for org.dspace.submit.lookup.ArXivService.java

Source

/**
 * The contents of this file are subject to the license and copyright
 * detailed in the LICENSE and NOTICE files at the root of the source
 * tree and available online at
 *
 * http://www.dspace.org/license/
 */
package org.dspace.submit.lookup;

import gr.ekt.bte.core.Record;

import java.io.IOException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.apache.commons.lang.StringUtils;
import org.apache.http.HttpException;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.StatusLine;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.CoreConnectionPNames;
import org.apache.http.params.HttpParams;
import org.dspace.app.util.XMLUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Element;

/**
 * @author Andrea Bollini
 * @author Kostas Stamatis
 * @author Luigi Andrea Pascarelli
 * @author Panagiotis Koutsourakis
 */
public class ArXivService {
    private int timeout = 1000;

    /**
     * How long to wait for a connection to be established.
     *
     * @param timeout milliseconds
     */
    public void setTimeout(int timeout) {
        this.timeout = timeout;
    }

    public List<Record> getByDOIs(Set<String> dois) throws HttpException, IOException {
        if (dois != null && dois.size() > 0) {
            String doisQuery = StringUtils.join(dois.iterator(), " OR ");
            return search(doisQuery, null, 100);
        }
        return null;
    }

    public List<Record> searchByTerm(String title, String author, int year) throws HttpException, IOException {
        StringBuffer query = new StringBuffer();
        if (StringUtils.isNotBlank(title)) {
            query.append("ti:\"").append(title).append("\"");
        }
        if (StringUtils.isNotBlank(author)) {
            // [FAU]
            if (query.length() > 0)
                query.append(" AND ");
            query.append("au:\"").append(author).append("\"");
        }
        return search(query.toString(), "", 10);
    }

    protected List<Record> search(String query, String arxivid, int max_result) throws IOException, HttpException {
        List<Record> results = new ArrayList<Record>();
        HttpGet method = null;
        try {
            HttpClient client = new DefaultHttpClient();
            HttpParams params = client.getParams();
            params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, timeout);

            try {
                URIBuilder uriBuilder = new URIBuilder("http://export.arxiv.org/api/query");
                uriBuilder.addParameter("id_list", arxivid);
                uriBuilder.addParameter("search_query", query);
                uriBuilder.addParameter("max_results", String.valueOf(max_result));
                method = new HttpGet(uriBuilder.build());
            } catch (URISyntaxException ex) {
                throw new HttpException(ex.getMessage());
            }

            // Execute the method.
            HttpResponse response = client.execute(method);
            StatusLine responseStatus = response.getStatusLine();
            int statusCode = responseStatus.getStatusCode();

            if (statusCode != HttpStatus.SC_OK) {
                if (statusCode == HttpStatus.SC_BAD_REQUEST)
                    throw new RuntimeException("arXiv query is not valid");
                else
                    throw new RuntimeException("Http call failed: " + responseStatus);
            }

            try {
                DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
                factory.setValidating(false);
                factory.setIgnoringComments(true);
                factory.setIgnoringElementContentWhitespace(true);

                DocumentBuilder db = factory.newDocumentBuilder();
                Document inDoc = db.parse(response.getEntity().getContent());

                Element xmlRoot = inDoc.getDocumentElement();
                List<Element> dataRoots = XMLUtils.getElementList(xmlRoot, "entry");

                for (Element dataRoot : dataRoots) {
                    Record crossitem = ArxivUtils.convertArxixDomToRecord(dataRoot);
                    if (crossitem != null) {
                        results.add(crossitem);
                    }
                }
            } catch (Exception e) {
                throw new RuntimeException("ArXiv identifier is not valid or not exist");
            }
        } finally {
            if (method != null) {
                method.releaseConnection();
            }
        }

        return results;
    }

    public Record getByArXivIDs(String raw) throws HttpException, IOException {
        if (StringUtils.isNotBlank(raw)) {
            raw = raw.trim();
            if (raw.startsWith("http://arxiv.org/abs/")) {
                raw = raw.substring("http://arxiv.org/abs/".length());
            } else if (raw.toLowerCase().startsWith("arxiv:")) {
                raw = raw.substring("arxiv:".length());
            }
            List<Record> result = search("", raw, 1);
            if (result != null && result.size() > 0) {
                return result.get(0);
            }
        }
        return null;
    }
}