tr.edu.gsu.nerwip.tools.freebase.FbIdTools.java Source code

Java tutorial

Introduction

Here is the source code for tr.edu.gsu.nerwip.tools.freebase.FbIdTools.java

Source

package tr.edu.gsu.nerwip.tools.freebase;

/*
 * Nerwip - Named Entity Extraction in Wikipedia Pages
 * Copyright 2011 Yasa Akbulut, Burcu Kpeliolu & Vincent Labatut
 * Copyright 2012 Burcu Kpeliolu, Samet Atda & Vincent Labatut
 * Copyright 2013 Samet Atda & Vincent Labatut
 * Copyright 2014-15 Vincent Labatut
 * 
 * This file is part of Nerwip - Named Entity Extraction in Wikipedia Pages.
 * 
 * Nerwip - Named Entity Extraction in Wikipedia Pages is free software: you can 
 * redistribute it and/or modify it under the terms of the GNU General Public License 
 * as published by the Free Software Foundation, either version 2 of the License, or
 * (at your option) any later version.
 * 
 * Nerwip - Named Entity Extraction in Wikipedia Pages is distributed in the hope 
 * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 
 * License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with Nerwip - Named Entity Extraction in Wikipedia Pages.  
 * If not, see <http://www.gnu.org/licenses/>.
 */

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.ParseException;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;

import tr.edu.gsu.nerwip.tools.file.FileNames;
import tr.edu.gsu.nerwip.tools.log.HierarchicalLogger;
import tr.edu.gsu.nerwip.tools.log.HierarchicalLoggerManager;

/**
 * This class handles Freebase ids, and more particularly
 * the mapping between Wikipedia article titles and Freebase
 * ids.
 * 
 * @author Vincent Labatut
 */
public class FbIdTools {
    /////////////////////////////////////////////////////////////////
    // LOGGING         /////////////////////////////////////////////
    /////////////////////////////////////////////////////////////////
    /** Common object used for logging */
    protected static HierarchicalLogger logger = HierarchicalLoggerManager.getHierarchicalLogger();

    /////////////////////////////////////////////////////////////////
    // WIKIPEDIA TO FREEBASE   /////////////////////////////////////
    /////////////////////////////////////////////////////////////////
    /** Map used as a memory cache for Freebase queries */
    private static FbCache wp2fb;

    /**
     * This method takes the title of a Wikipedia page,
     * and retrieves its Freebase id.
     * 
     * @param wikipediaTitle
     *       Title of the Wikipedia article.
     * @return
     *       A String describing a the FB id.
     * 
     * @throws IOException 
     *       Problem while retrieving the FB types.
     * @throws ClientProtocolException 
     *       Problem while retrieving the FB types.
     * @throws org.json.simple.parser.ParseException 
     *       Problem while retrieving the FB types.
     * @throws ParseException
     *       Problem while retrieving the FB types.
     */
    public static String getId(String wikipediaTitle)
            throws ClientProtocolException, IOException, ParseException, org.json.simple.parser.ParseException {
        logger.increaseOffset();
        String result = null;

        // possibly get result from cache
        if (FbCommonTools.cache) {
            if (wp2fb == null)
                wp2fb = new FbCache(FileNames.FI_IDS);
            result = wp2fb.getValue(wikipediaTitle);
        }

        if (result == null) { // set Freebase query using the MQL-read API
            String query = "[{ \"name\": null, " + "\"id\": null, " + "\"key\": "
                    + "[{ \"namespace\": \"/wikipedia/en\", " + // TODO this part would be different for another source than WP
                    "\"value\": \"" + wikipediaTitle + "\" }] }]";
            String url = FbCommonTools.URL_MQL + "?key=" + FbCommonTools.getKey() + "&query="
                    + URLEncoder.encode(query, "UTF-8");

            logger.log(query);
            logger.log(url);

            // get Freebase answer
            HttpClient httpclient = new DefaultHttpClient();
            HttpGet request = new HttpGet(url);
            HttpResponse response = httpclient.execute(request);

            // builds object from answer
            JSONParser parser = new JSONParser();
            HttpEntity entity = response.getEntity();
            String str = EntityUtils.toString(entity);
            JSONObject jsonData = (JSONObject) parser.parse(str);
            JSONArray answer = (JSONArray) jsonData.get("result");

            // extract types from the answer
            if (answer != null) {
                for (Object object : answer) { // process id
                    result = (String) ((JSONObject) object).get("id");
                }
            }

            // possibly cache the result
            if (FbCommonTools.cache) {
                if (result != null)
                    wp2fb.putValue(wikipediaTitle, result);
            }
        }

        logger.decreaseOffset();
        return result;
    }

    /**
     * Sets the Freebase id of the specified article.
     * 
     * @param wikipediaTitle
     *       Title of the concerned article.
     * @param freebaseId
     *       Corresponding Freebase id.
     * 
     * @throws FileNotFoundException
     *       Problem while accessing the cache file. 
     * @throws UnsupportedEncodingException
     *       Problem while accessing the cache file. 
     */
    public static void setId(String wikipediaTitle, String freebaseId)
            throws FileNotFoundException, UnsupportedEncodingException {
        if (FbCommonTools.cache) {
            if (wp2fb == null)
                wp2fb = new FbCache(FileNames.FI_IDS);
            wp2fb.putValue(wikipediaTitle, freebaseId);
        }
    }
}