tr.edu.gsu.nerwip.recognition.internal.modelless.subee.SubeeTools.java Source code

Java tutorial

Introduction

Here is the source code for tr.edu.gsu.nerwip.recognition.internal.modelless.subee.SubeeTools.java

Source

package tr.edu.gsu.nerwip.recognition.internal.modelless.subee;

/*
 * Nerwip - Named Entity Extraction in Wikipedia Pages
 * Copyright 2011 Yasa Akbulut, Burcu Kpeliolu & Vincent Labatut
 * Copyright 2012 Burcu Kpeliolu, Samet Atda & Vincent Labatut
 * Copyright 2013 Samet Atda & Vincent Labatut
 * Copyright 2014-15 Vincent Labatut
 * 
 * This file is part of Nerwip - Named Entity Extraction in Wikipedia Pages.
 * 
 * Nerwip - Named Entity Extraction in Wikipedia Pages is free software: you can 
 * redistribute it and/or modify it under the terms of the GNU General Public License 
 * as published by the Free Software Foundation, either version 2 of the License, or
 * (at your option) any later version.
 * 
 * Nerwip - Named Entity Extraction in Wikipedia Pages is distributed in the hope 
 * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 
 * License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with Nerwip - Named Entity Extraction in Wikipedia Pages.  
 * If not, see <http://www.gnu.org/licenses/>.
 */

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;

import org.apache.http.client.ClientProtocolException;

import tr.edu.gsu.nerwip.data.entity.EntityType;
import tr.edu.gsu.nerwip.recognition.RecognizerException;
import tr.edu.gsu.nerwip.tools.freebase.FbTypeTools;
import tr.edu.gsu.nerwip.tools.log.HierarchicalLogger;
import tr.edu.gsu.nerwip.tools.log.HierarchicalLoggerManager;

/**
 * This class contains methods used for the maintenance of
 * the maps and list used by our tool Subee. 
 * 
 * @author Vincent Labatut
 */
@SuppressWarnings("unused")
public class SubeeTools {
    /**
     * Launches the maintenance of the Subee files.
     * 
     * @param args
     *       None needed.
     * 
     * @throws Exception 
     *       Problem while loading/retrieving the FB types.
     */
    public static void main(String[] args) throws Exception { // maintenance
        //      updateUnknownTypes();

        // tests
        testTypeConverter();
    }

    /////////////////////////////////////////////////////////////////
    // LOGGING         /////////////////////////////////////////////
    /////////////////////////////////////////////////////////////////
    /** Common object used for logging */
    protected static HierarchicalLogger logger = HierarchicalLoggerManager.getHierarchicalLogger();

    /////////////////////////////////////////////////////////////////
    // TESTS            /////////////////////////////////////////
    /////////////////////////////////////////////////////////////////
    /**
     * Tests the feature allowing to automatically
     * convert a FB type to a {@link EntityType} value.
     * 
     * @throws Exception
     *       Something went wrong...
     */
    private static void testTypeConverter() throws Exception {
        logger.setName("Test-TypeConverter");
        logger.log("Start converting types");
        logger.increaseOffset();

        String title = "World_War_II";

        // retrieve all types
        List<String> types = FbTypeTools.getAllTypes(title);
        logger.log("Types retrieved for " + title + ":");
        logger.increaseOffset();
        logger.log(types);
        logger.decreaseOffset();

        // retrieve only notable type
        //      String type = FbTypeTools.getNotableType(title);
        //      List<String> types = new ArrayList<String>();
        //      types.add(type);
        //      logger.log("Notable type for "+title+": "+type);

        // build Subee and loads the necessary files
        logger.log("Load the Subee files");
        Subee subee = new Subee(true, true, true, true, true);
        subee.prepareRecognizer();

        // convert types
        EntityType type = subee.retrieveEntityType(types);
        logger.log("Corresponding entity type: " + type);

        logger.log("Type conversion complete");
        logger.decreaseOffset();
    }

    /////////////////////////////////////////////////////////////////
    // TYPE MAPS          /////////////////////////////////////////
    /////////////////////////////////////////////////////////////////
    /**
     * Updates the file containing unknown types so that it contains
     * all types retrieved from Freebase not already present in the
     * existing type files.
     * 
     * @throws RecognizerException 
     *       Problem while loading/retrieving the FB types.
     * @throws org.json.simple.parser.ParseException 
     *       Problem while loading/retrieving the FB types.
     * @throws IOException 
     *       Problem while loading/retrieving the FB types.
     * @throws ClientProtocolException 
     *       Problem while loading/retrieving the FB types.
     */
    public static void updateUnknownTypes() throws RecognizerException, ClientProtocolException, IOException,
            org.json.simple.parser.ParseException {
        logger.setName("Updating-Subee-List");
        logger.log("Updating Subee unknown FB types list");

        // build Subee and loads the necessary files
        logger.log("Load the Subee files");
        Subee subee = new Subee(true, true, true, true, true);
        subee.prepareRecognizer();

        // retrieve the loaded lists
        logger.log("Get the existing type lists from these files");
        Set<String> knownTypes = new TreeSet<String>();
        knownTypes.addAll(Subee.TYPE_MAP.keySet());
        knownTypes.addAll(Subee.UNKNOWN_TYPES);

        // retrieve the last types from Freebase
        logger.log("Get the last type from Freebase");
        Set<String> fbTypes = FbTypeTools.retrieveDomainTypes();

        // retain only the unknown ones
        logger.log("Udpdate the 'unknown' file");
        fbTypes.removeAll(knownTypes);

        // append them to the 'unknown' file
        subee.updateUnknownTypes("-NOT A TYPE^^---------------------");
        for (String type : fbTypes)
            subee.updateUnknownTypes(type);
    }
}