ixa.pipe.ned.DBpediaSpotlightClient.java Source code

Java tutorial

Introduction

Here is the source code for ixa.pipe.ned.DBpediaSpotlightClient.java

Source

/**
 * Copyright 2011 Pablo Mendes, Max Jakob
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package ixa.pipe.ned;

import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.nio.charset.Charset;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.apache.log4j.Logger;
import org.dbpedia.spotlight.exceptions.AnnotationException;
import org.dbpedia.spotlight.model.TextAdaptation;
import org.json.JSONException;
import org.json.JSONObject;
import org.springframework.util.StreamUtils;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;

/**
 * class based on:
 * Simple web service-based annotation client for DBpedia Spotlight.
 *
 * @author pablomendes, Joachim Daiber
 */

public class DBpediaSpotlightClient {
    public Logger LOG = Logger.getLogger(this.getClass());

    private static final double CONFIDENCE = 0.0;
    private static final int SUPPORT = 0;
    private static final boolean COREFERENCE = false;

    // Create an instance of HttpClient.
    private static HttpClient client = new HttpClient();

    public String request(HttpMethod method) throws AnnotationException {

        String response = null;

        // Provide custom retry handler is necessary
        method.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
                new DefaultHttpMethodRetryHandler(3, false));

        try {
            // Execute the method.
            int statusCode = client.executeMethod(method);

            if (statusCode != HttpStatus.SC_OK) {
                LOG.error("Method failed: " + method.getStatusLine());
            }

            // Read the response body.
            // // Deal with the response.
            // // Use caution: ensure correct character encoding and is not binary data
            InputStream responseBody = method.getResponseBodyAsStream();
            response = StreamUtils.copyToString(responseBody, Charset.forName("UTF-8"));

        } catch (HttpException e) {
            LOG.error("Fatal protocol violation: " + e.getMessage());
            throw new AnnotationException("Protocol error executing HTTP request.", e);
        } catch (IOException e) {
            LOG.error("Fatal transport error: " + e.getMessage());
            LOG.error(method.getQueryString());
            throw new AnnotationException("Transport error executing HTTP request.", e);
        } finally {
            // Release the connection.
            method.releaseConnection();
        }
        return response;

    }

    /*
    http://spotlight.dbpedia.org/rest/candidates/?spotter=SpotXmlParser&text=
    <annotation text="Brazilian oil giant Petrobras and U.S. oilfield service
    company Halliburton have signed a technological cooperation agreement,
    Petrobras announced Monday. The two companies agreed on three projects:
    studies on contamination of fluids in oil wells, laboratory simulation of
    well production, and research on solidification of salt and carbon dioxide
    formations, said Petrobras. Twelve other projects are still under
    negotiation."><surfaceForm name="Brazilian" offset="0"/><surfaceForm name="
    oil" offset="10"/><surfaceForm name="giant" offset="14"/><surfaceForm name="
    Petrobras" offset="20"/><surfaceForm name="U.S." offset="34"/><surfaceForm
    name="oilfield" offset="39"/><surfaceForm name="service" offset="48"/>
    <surfaceForm name="company" offset="56"/><surfaceForm name="Halliburton"
    offset="64"/><surfaceForm name="signed" offset="81"/><surfaceForm name="
    technological" offset="90"/><surfaceForm name="cooperation" offset="104"/>
    <surfaceForm name="agreement" offset="116"/><surfaceForm name="Petrobras"
    offset="127"/><surfaceForm name="Monday" offset="147"/><surfaceForm name="
    companies" offset="163"/><surfaceForm name="projects" offset="189"/>
    <surfaceForm name="contamination" offset="210"/><surfaceForm name="fluids"
    offset="227"/><surfaceForm name="oil wells" offset="237"/><surfaceForm name
    ="laboratory" offset="248"/><surfaceForm name="simulation" offset="259"/>
    <surfaceForm name="production" offset="278"/><surfaceForm name="
    solidification" offset="306"/><surfaceForm name="salt" offset="324"/>
    <surfaceForm name="carbon dioxide" offset="333"/><surfaceForm name="
    Petrobras" offset="365"/><surfaceForm name="Twelve" offset="376"/>
    <surfaceForm name="projects" offset="389"/><surfaceForm name="negotiation"
    offset="414"/></annotation>
     */

    public Document extract(TextAdaptation text, String host, String port, String endpoint)
            throws AnnotationException {

        LOG.info("Querying API.");
        String spotlightResponse = "";
        Document doc = null;
        try {
            String url = host + ":" + port + "/rest/" + endpoint;

            PostMethod method = new PostMethod(url);
            method.setRequestHeader("Content-Type", "application/x-www-form-urlencoded;charset=utf-8");
            NameValuePair[] params = { new NameValuePair("text", text.text()),
                    new NameValuePair("spotter", "SpotXmlParser"),
                    new NameValuePair("confidence", Double.toString(CONFIDENCE)),
                    new NameValuePair("support", Integer.toString(SUPPORT)),
                    new NameValuePair("coreferenceResolution", Boolean.toString(COREFERENCE)) };
            method.setRequestBody(params);
            method.setRequestHeader(new Header("Accept", "text/xml"));
            spotlightResponse = request(method);
            doc = loadXMLFromString(spotlightResponse);
        } catch (javax.xml.parsers.ParserConfigurationException ex) {
        } catch (org.xml.sax.SAXException ex) {
        } catch (java.io.IOException ex) {
        }

        return doc;
    }

    public static Document loadXMLFromString(String xml)
            throws org.xml.sax.SAXException, java.io.IOException, javax.xml.parsers.ParserConfigurationException {
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        DocumentBuilder builder = factory.newDocumentBuilder();
        InputSource is = new InputSource(new StringReader(xml));
        return builder.parse(is);
    }

    public JSONObject extractJSON(TextAdaptation text, String host, String port, String endpoint)
            throws AnnotationException {

        LOG.info("Querying API.");
        String spotlightResponse = "";
        Document doc = null;
        try {
            String url = host + ":" + port + "/rest/" + endpoint;

            PostMethod method = new PostMethod(url);
            method.setRequestHeader("Content-Type", "application/x-www-form-urlencoded;charset=utf-8");
            NameValuePair[] params = { new NameValuePair("text", text.text()),
                    new NameValuePair("spotter", "SpotXmlParser"),
                    new NameValuePair("confidence", Double.toString(CONFIDENCE)),
                    new NameValuePair("support", Integer.toString(SUPPORT)),
                    new NameValuePair("coreferenceResolution", Boolean.toString(COREFERENCE)) };
            method.setRequestBody(params);
            method.setRequestHeader(new Header("Accept", "application/json"));
            spotlightResponse = request(method);
        } catch (Exception e) {
            throw new AnnotationException("Could not encode text.", e);
        }

        assert spotlightResponse != null;
        JSONObject resultJSON = null;

        try {
            resultJSON = new JSONObject(spotlightResponse);
        } catch (JSONException e) {
            throw new AnnotationException("Received invalid response from DBpedia Spotlight API.");
        }

        return resultJSON;

    }

}