cz.vse.fis.keg.entityclassifier.core.THDInstance.java Source code

Java tutorial

Introduction

Here is the source code for cz.vse.fis.keg.entityclassifier.core.THDInstance.java

Source

/*
 * #%L
 * Entityclassifier.eu NER CORE v3.9
 * %%
 * Copyright (C) 2015 Knowledge Engineering Group (KEG) and Web Intelligence Research Group (WIRG) - Milan Dojchinovski (milan.dojchinovski@fit.cvut.cz)
 * %%
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public
 * License along with this program.  If not, see
 * <http://www.gnu.org/licenses/gpl-3.0.html>.
 * #L%
 */
package cz.vse.fis.keg.entityclassifier.core;

import com.mongodb.BasicDBList;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import cz.vse.fis.keg.entityclassifier.core.conf.PropertiesLoader;
import cz.vse.fis.keg.entityclassifier.core.conf.Settings;
import cz.vse.fis.keg.entityclassifier.core.mongodb.MongoDBClient;
import cz.vse.fis.keg.entityclassifier.core.ontologymapper.DBpediaOntologyManager;
import cz.vse.fis.keg.entityclassifier.core.ontologymapper.YagoOntologyManager;
import cz.vse.fis.keg.entityclassifier.core.vao.Article;
import cz.vse.fis.keg.entityclassifier.core.vao.Entity;
import cz.vse.fis.keg.entityclassifier.core.vao.Hypernym;
import gate.Gate;
import gate.Document;
import gate.creole.ExecutionException;
import gate.util.GateException;
import gate.Factory;
import gate.Annotation;
import gate.AnnotationSet;
import gate.FeatureMap;
import gate.Node;
import gate.Corpus;
import gate.ProcessingResource;
import gate.creole.ResourceInstantiationException;
import gate.creole.SerialAnalyserController;
import gate.CreoleRegister;
import gate.util.InvalidOffsetException;
import java.util.Iterator;
import java.io.File;
import java.io.*;
import java.net.*;
import java.util.Properties;
import java.util.ArrayList;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 *
 * @author Milan Doj?inovski 
 * <dojcinovski.milan (at) gmail.com> 
 * Twitter: @m1ci 
 * www: http://dojchinovski.mk
 * 
 */
public class THDInstance {

    private static String gateHome = null;
    private static String pluginHome;

    private static String enEntityExtractionGrammar;
    private static String nlEntityExtractionGrammar;
    private static String deEntityExtractionGrammar;

    private static String enHypernymExtractionGrammar = null;
    private static String deHypernymExtractionGrammar = null;
    private static String nlHypernymExtractionGrammar = null;

    private static String nlTaggerBinary;
    private static String deTaggerBinary;

    private static SerialAnalyserController enEntityExtractionPipeline = null;
    private static SerialAnalyserController nlEntityExtractionPipeline = null;
    private static SerialAnalyserController deEntityExtractionPipeline = null;

    private static SerialAnalyserController hypernymExtractionPipelineEN = null;
    private static SerialAnalyserController hypernymExtractionPipelineDE = null;
    private static SerialAnalyserController hypernymExtractionPipelineNL = null;

    public static THDInstance thdInstance = null;
    private BasicDBObject queryObj;

    public static THDInstance getInstance() {

        if (thdInstance == null) {
            thdInstance = new THDInstance();
            init();
            initGate();
            assamblePipelines();
        }
        return thdInstance;
    }

    private static void assamblePipelines() {
        assambleEntityExtractionPipelineEN();
        assambleEntityExtractionPipelineDE();
        assambleEntityExtractionPipelineNL();
        assambleHypernymExtractionPipelineEN();
        assambleHypernymExtractionPipelineDE();
        assambleHypernymExtractionPipelineNL();
    }

    private DBObject resObj;

    //    public ArrayList<Hypernym> extractEntityHypernyms(String entity, String lang, String kb, String[] provenance) throws UnknownHostException{
    //        
    //        ArrayList<Hypernym> hypernymsList = new ArrayList<Hypernym>();
    //        ArrayList<Hypernym> thdHypernyms = new ArrayList<Hypernym>();
    //        ArrayList<Hypernym> dbpediaHypernyms = new ArrayList<Hypernym>();
    //        ArrayList<Hypernym> yagoHypernyms = new ArrayList<Hypernym>();
    //        
    //        switch (lang) {
    //            case "en":
    //                for(String prov : provenance) {
    //                    
    //                    if(prov.equals("thd")) {
    //                        
    //                        if(kb.equals("linkedHypernymsDataset")){
    //                            thdHypernyms = extractEntityHypernymsLHD(entity, lang, provenance);
    //                        }else{
    //                            thdHypernyms = extractEntityHypernymsEN(entity, kb);
    //                        }
    //                        
    //                    } else if(prov.equals("dbpedia")) {
    //                        dbpediaHypernyms = getDBpediaHypernyms(entity, lang);
    //                    } else if(prov.equals("yago")){
    //                        yagoHypernyms = getYAGOHypernyms(entity, lang);
    //                    }            
    //                }
    //                if(dbpediaHypernyms != null){
    //                    hypernymsList.addAll(dbpediaHypernyms);
    //                }
    //
    //                if(yagoHypernyms != null){
    //                    hypernymsList.addAll(yagoHypernyms);
    //                }
    //
    //                if(thdHypernyms != null) {
    //                    hypernymsList.addAll(thdHypernyms);
    //                }
    //                return hypernymsList;
    //            case "de":
    //                for(String prov : provenance){
    //                    if(prov.equals("thd")) {
    //                        thdHypernyms = extractEntityHypernymsDE(entity, kb);
    //                    } else if(prov.equals("dbpedia")) {
    //                        dbpediaHypernyms = getDBpediaHypernyms(entity, lang);
    //                    } else if(prov.equals("yago")){
    //                        yagoHypernyms = getYAGOHypernyms(entity, lang);
    //                    }            
    //                }
    //                if(dbpediaHypernyms != null){
    //                    hypernymsList.addAll(dbpediaHypernyms);
    //                }
    //
    //                if(yagoHypernyms != null){
    //                    hypernymsList.addAll(yagoHypernyms);
    //                }
    //
    //                if(thdHypernyms != null) {
    //                    hypernymsList.addAll(thdHypernyms);
    //                }
    //                return hypernymsList;
    //            case "nl":                
    //                for(String prov : provenance){
    //                    if(prov.equals("thd")) {
    //                        thdHypernyms = extractEntityHypernymsNL(entity, kb);
    //                    } else if(prov.equals("dbpedia")){
    //                        dbpediaHypernyms = getDBpediaHypernyms(entity, lang);
    //                    } else if(prov.equals("yago")){
    //                        yagoHypernyms = getYAGOHypernyms(entity, lang);
    //                    }            
    //                }
    //                if(dbpediaHypernyms != null){
    //                    hypernymsList.addAll(dbpediaHypernyms);
    //                }
    //
    //                if(yagoHypernyms != null){
    //                    hypernymsList.addAll(yagoHypernyms);
    //                }
    //
    //                if(thdHypernyms != null) {
    //                    hypernymsList.addAll(thdHypernyms);
    //                }
    //                return hypernymsList;
    //
    //        }
    //        return null;
    //    }

    public ArrayList<Hypernym> getDBpediaHypernyms(String entityTitle, String lang) throws UnknownHostException {

        ArrayList<Hypernym> hypernymsList = new ArrayList<Hypernym>();
        queryObj = new BasicDBObject();
        queryObj.append("label", entityTitle);
        queryObj.append("types",
                new BasicDBObject().append("$elemMatch", new BasicDBObject().append("origin", "dbpedia")));
        DBObject resObj;
        BasicDBList dbTypesList;
        switch (lang) {
        case "en":

            resObj = MongoDBClient.getDBInstance().getCollection("en_entities_dbpedia").findOne(queryObj);

            if (resObj != null) {

                dbTypesList = (BasicDBList) resObj.get("types");

                for (int i = 0; i < dbTypesList.size(); i++) {
                    DBObject obj = (DBObject) dbTypesList.get(i);

                    Hypernym hypernym = new Hypernym();
                    hypernym.setEntity(entityTitle);
                    hypernym.setEntityURL(resObj.get("uri").toString());
                    hypernym.setType(obj.get("label").toString());
                    hypernym.setTypeURL(obj.get("uri").toString());
                    hypernym.setOrigin("dbpedia");
                    if (obj.get("uri").toString().contains("http://dbpedia.org/ontology/")) {
                        hypernymsList.add(hypernym);
                    }
                }
            }
            return hypernymsList;

        case "de":
            resObj = MongoDBClient.getDBInstance().getCollection("de_entities_dbpedia").findOne(queryObj);

            if (resObj != null) {
                dbTypesList = (BasicDBList) resObj.get("types");

                for (int i = 0; i < dbTypesList.size(); i++) {

                    DBObject obj = (DBObject) dbTypesList.get(i);

                    Hypernym hypernym = new Hypernym();
                    hypernym.setEntity(entityTitle);
                    hypernym.setEntityURL(resObj.get("uri").toString());
                    hypernym.setType(obj.get("label").toString());
                    hypernym.setTypeURL(obj.get("uri").toString());
                    hypernym.setOrigin("dbpedia");
                    if (obj.get("uri").toString().contains("http://dbpedia.org/ontology/")) {
                        hypernymsList.add(hypernym);
                    }
                }
            }

            return hypernymsList;

        case "nl":
            resObj = MongoDBClient.getDBInstance().getCollection("nl_entities_dbpedia").findOne(queryObj);

            if (resObj != null) {
                dbTypesList = (BasicDBList) resObj.get("types");

                for (int i = 0; i < dbTypesList.size(); i++) {

                    DBObject obj = (DBObject) dbTypesList.get(i);

                    Hypernym hypernym = new Hypernym();
                    hypernym.setEntity(entityTitle);
                    hypernym.setEntityURL(resObj.get("uri").toString());
                    hypernym.setType(obj.get("label").toString());
                    hypernym.setTypeURL(obj.get("uri").toString());
                    hypernym.setOrigin("dbpedia");

                    if (obj.get("uri").toString().contains("http://dbpedia.org/ontology/")) {
                        hypernymsList.add(hypernym);
                    }
                }
            }

            return hypernymsList;

        default:
            return hypernymsList;
        }
    }

    public ArrayList<Hypernym> getYAGOHypernyms(String entityTitle, String lang) throws UnknownHostException {

        ArrayList<Hypernym> hypernymsList = new ArrayList<Hypernym>();

        switch (lang) {

        case "en":
            queryObj = new BasicDBObject();
            queryObj.append("labels.label", entityTitle).append("labels.lang", "en");
            resObj = MongoDBClient.getDBInstance().getCollection("entities_yago").findOne(queryObj);

            if (resObj != null) {
                hypernymsList.addAll(YagoOntologyManager.getInstance().getYagoHypernyms(entityTitle,
                        "http://yago-knowledge.org/resource/" + entityTitle.replaceAll(" ", "_"), "en", "yago"));
            }

            break;

        case "de":
            queryObj = new BasicDBObject();
            queryObj.append("labels.label", entityTitle).append("labels.lang", "de");
            resObj = MongoDBClient.getDBInstance().getCollection("entities_yago").findOne(queryObj);

            if (resObj != null) {
                hypernymsList.addAll(YagoOntologyManager.getInstance().getYagoHypernyms(entityTitle,
                        resObj.get("uri").toString(), "de", "yago"));
            }

            break;

        case "nl":
            break;
        }
        return hypernymsList;
    }

    //    public ArrayList<Hypernym> extractEntityHypernymsLHD(String entityTitle, String lang, String[] provs) throws UnknownHostException{
    //        
    //        ArrayList<Hypernym> hypernymsList = new ArrayList<Hypernym>();
    //        ArrayList<Hypernym> thdHypernyms = new ArrayList<Hypernym>();
    //        ArrayList<Hypernym> dbpediaHypernyms = new ArrayList<Hypernym>();
    //        ArrayList<Hypernym> yagoHypernyms = new ArrayList<Hypernym>();
    //        
    //        boolean thd_check = false;
    //        boolean yago_check = false;
    //        boolean dbpedia_check = false;
    //        
    //        for(String prov : provs){
    //            if(prov.equals("thd") && thd_check != true){
    //                thdHypernyms = getTHDHypernymsLHD(entityTitle, lang);
    //                thd_check = true;
    //            } else if(prov.equals("dbpedia") && dbpedia_check != true){
    //                dbpediaHypernyms = getDBpediaHypernyms(entityTitle, lang);
    //                dbpedia_check = true;
    //            }
    //            else if(prov.equals("yago") && yago_check != true){
    //                yagoHypernyms = getYAGOHypernyms(entityTitle, lang);
    //                yago_check = true;
    //            }            
    //        }
    //        
    //        if(dbpediaHypernyms != null){
    //            hypernymsList.addAll(dbpediaHypernyms);
    //        }
    //        
    //        if(yagoHypernyms != null){
    //            hypernymsList.addAll(yagoHypernyms);
    //        }
    //        
    //        if(thdHypernyms != null) {
    //            hypernymsList.addAll(thdHypernyms);
    //        }
    //        
    //        return hypernymsList;
    //    }

    //    public ArrayList<Hypernym> getTHDHypernymsLHD(String entityTitle, String lang) throws UnknownHostException{
    //        
    //        ArrayList<Hypernym> hypernymsList = new ArrayList<Hypernym>();
    //        BasicDBObject queryObj = new BasicDBObject();
    //        queryObj.append("label", entityTitle);
    //        queryObj.append("types", new BasicDBObject().append("$elemMatch", new BasicDBObject().append("origin", "thd")));
    //        
    //        switch (lang) {
    //            case "en":
    //                
    //                DBObject resObj = MongoDBClient.getDBInstance().getCollection("en_entities_thd").findOne(queryObj);                
    //                
    //                if(resObj != null){
    //                    
    //                    BasicDBList e = (BasicDBList)resObj.get("types");
    //                    DBObject type = (DBObject)e.get(0);
    //                    Hypernym hypernym = new Hypernym();
    //                    hypernym.setEntity(entityTitle);
    //                    hypernym.setEntityURL(resObj.get("uri").toString());
    //                    hypernym.setType(type.get("label").toString());
    //                    hypernym.setTypeURL(type.get("uri").toString());
    //                    hypernym.setOrigin(type.get("origin").toString());
    //                    hypernym.setAccuracy(type.get("accuracy").toString());
    //                    hypernym.setBounds(type.get("bounds").toString());
    //                    hypernymsList.add(hypernym);
    //                    
    //                    BasicDBList typesList = (BasicDBList)resObj.get("types");
    //                    DBObject firstType = (DBObject)typesList.get(0);
    //                    
    //                    // hypernym is mapped to dbpedia ontology
    //                    // creating hierarchy from the dbpedia ontology
    //                    if(firstType.get("mapping").equals("dbOnto")){
    //                        
    //                        OntoRecord initRecord = new OntoRecord();
    //                        initRecord.setUri(firstType.get("uri").toString());
    //                        
    //                        while(initRecord != null){
    //                            
    //                            initRecord = DBpediaOntologyManager.getInstance().getSubclass(initRecord.getUri(), lang);
    //                            
    //                            if(initRecord != null){
    //                                
    //                                Hypernym hypernymDerived = new Hypernym();
    //                                hypernymDerived.setEntity(entityTitle);
    //                                hypernymDerived.setEntityURL(resObj.get("uri").toString());
    //                                hypernymDerived.setType(initRecord.getLabel());
    //                                hypernymDerived.setTypeURL(initRecord.getUri());
    //                                hypernymDerived.setOrigin("thd-derived");
    //                                hypernymDerived.setAccuracy(type.get("accuracy").toString());
    //                                hypernymDerived.setBounds(type.get("bounds").toString());
    //                                hypernymsList.add(hypernymDerived);
    //                            }
    //                        }
    //                    }
    //                    // hypernym is not mapped to dbpedia ontology
    //                    // searching for superclass mapped to dbpedia ontology
    //                    // if found, for the superclass is created hierarchy
    //                    else if(firstType.get("mapping").equals("dbRes")){
    //                        
    //                        String initialUri = firstType.get("uri").toString();
    //                        
    //                        boolean continueSearching = true;
    //                        
    //                        while(continueSearching) {
    //                            // try to find dbonto for dbres
    //                            DBpediaMapping mapping = getSubclassConfirmed(initialUri);
    //                            
    //                            // if not found, try to find dbres for the dbres
    //                            if(mapping == null){
    //                                
    //                                initialUri = getSuperclass(initialUri, "en");
    //                                
    //                                if(initialUri == null){
    //                                    continueSearching = false;
    //                                    
    //                                } else {
    //                                    
    //                                    // superClass uri found
    //                                    // check if uri is dbOnto - create hierarchy
    //                                    // check if uri is dbRes - continue                                    
    //                                    if(initialUri.contains("/resource/")){
    //                                        // do nothing
    //                                        // continue to search
    //                                    } else if(initialUri.contains("/ontology/")) {
    //                                        // create hierarchy
    //                                        continueSearching = false;
    //                                        
    //                                        OntoRecord initRecord = new OntoRecord();
    //                                        initRecord.setUri(initialUri);
    //
    //                                        while(initRecord != null){
    //
    //                                            initRecord = DBpediaOntologyManager.getInstance().getSubclass(initRecord.getUri(), lang);
    //
    //                                            if(initRecord != null){
    //
    //                                                Hypernym hypernymDerived = new Hypernym();
    //                                                hypernymDerived.setEntity(entityTitle);
    //                                                hypernymDerived.setEntityURL(resObj.get("uri").toString());
    //                                                hypernymDerived.setType(initRecord.getLabel());
    //                                                hypernymDerived.setTypeURL(initRecord.getUri());
    //                                                hypernymDerived.setOrigin("thd-derived");
    //                                                hypernymDerived.setAccuracy(type.get("accuracy").toString());
    //                                                hypernymDerived.setBounds(type.get("bounds").toString());
    //                                                hypernymsList.add(hypernymDerived);
    //                                            }
    //                                        }
    //                                    } else {
    //                                        continueSearching = false;
    //                                    }
    //                                }
    //                            }
    //                            // if found, then create hierarchy
    //                            else {
    //                                continueSearching = false;
    //                                // creating hierarchy
    //                                OntoRecord initRecord = new OntoRecord();
    //                                initRecord.setUri(initialUri);
    //
    //                                while(initRecord != null){
    //
    //                                    initRecord = DBpediaOntologyManager.getInstance().getSubclass(initRecord.getUri(), lang);
    //
    //                                    if(initRecord != null) {
    //                                        Hypernym hypernymDerived = new Hypernym();
    //                                        hypernymDerived.setEntity(entityTitle);
    //                                        hypernymDerived.setEntityURL(resObj.get("uri").toString());
    //                                        hypernymDerived.setType(initRecord.getLabel());
    //                                        hypernymDerived.setTypeURL(initRecord.getUri());
    //                                        hypernymDerived.setOrigin("thd-derived");
    //                                        hypernymsList.add(hypernymDerived);
    //                                    }
    //                                }
    //                            }
    //                        }
    //                    }
    //                    
    //                    return hypernymsList;
    //                    
    //                } else {
    //                    return null;
    //                }
    //            case "de":
    //                
    //                resObj = MongoDBClient.getDBInstance().getCollection("de_entities_thd").findOne(queryObj);                
    //                
    //                if(resObj != null) {
    //                    hypernymsList.addAll(YagoOntologyManager.getInstance().getYagoHypernyms(entityTitle, "http://yago-knowledge.org/resource/"+entityTitle.replaceAll(" ", "_"), "de", "thd-derived"));
    //                    BasicDBList e = (BasicDBList)resObj.get("types");
    //                    DBObject type = (DBObject)e.get(0);
    //                    Hypernym hypernym = new Hypernym();
    //                    hypernym.setEntity(entityTitle);
    //                    hypernym.setEntityURL(resObj.get("uri").toString());
    //                    hypernym.setType(type.get("label").toString());
    //                    hypernym.setTypeURL(type.get("uri").toString());
    //                    hypernym.setOrigin(type.get("origin").toString());
    //                    hypernymsList.add(hypernym);
    //                    
    //                    BasicDBList typesList = (BasicDBList)resObj.get("types");
    //                    DBObject firstType = (DBObject)typesList.get(0);
    //                    
    //                    // hypernym is mapped to dbpedia ontology
    //                    // creating hierarchy from the dbpedia ontology
    //                    if(firstType.get("mapping").equals("dbOnto")){
    //                        
    //                        OntoRecord initRecord = new OntoRecord();
    //                        initRecord.setUri(firstType.get("uri").toString());
    //                        
    //                        while(initRecord != null){
    //                            
    //                            initRecord = DBpediaOntologyManager.getInstance().getSubclass(initRecord.getUri(), lang);
    //                            
    //                            if(initRecord != null){
    //                                
    //                                Hypernym hypernymDerived = new Hypernym();
    //                                hypernymDerived.setEntity(entityTitle);
    //                                hypernymDerived.setEntityURL(resObj.get("uri").toString());
    //                                hypernymDerived.setType(initRecord.getLabel());
    //                                hypernymDerived.setTypeURL(initRecord.getUri());
    //                                hypernymDerived.setOrigin("thd-derived");
    //                                hypernymsList.add(hypernymDerived);                                
    //                            }
    //                        }
    //                    }
    //                    // hypernym is not mapped to dbpedia ontology
    //                    // searching for superclass mapped to dbpedia ontology
    //                    // if found, for the superclass is created hierarchy
    //                    else if(firstType.get("mapping").equals("dbRes")){
    //                        
    //                        String initialUri = firstType.get("uri").toString();
    //                        
    //                        boolean continueSearching = true;
    //                        
    //                        while(continueSearching) {
    //                            
    //                            // try to find dbonto for dbres
    //                            DBpediaMapping mapping = getSubclassConfirmed(initialUri);
    //                            
    //                            // if not found, try to find dbres for the dbres
    //                            if(mapping == null){
    //                                
    //                                initialUri = getSuperclass(initialUri, "de");
    //                                
    //                                if(initialUri == null){
    //                                    continueSearching = false;
    //                                    
    //                                } else {
    //                                    // superClass uri found
    //                                    // check if uri is dbOnto - create hierarchy
    //                                    // check if uri is dbRes - continue
    //                                    if(initialUri.contains("/resource/")){
    //                                        // do nothing
    //                                        // continue to search
    //                                    } else if(initialUri.contains("/ontology/")) {
    //                                        // create hierarchy
    //                                        continueSearching = false;
    //                                        
    //                                        OntoRecord initRecord = new OntoRecord();
    //                                        initRecord.setUri(initialUri);
    //
    //                                        while(initRecord != null){
    //
    //                                            initRecord = DBpediaOntologyManager.getInstance().getSubclass(initRecord.getUri(), lang);
    //                                            if(initRecord != null){
    //
    //                                                Hypernym hypernymDerived = new Hypernym();
    //                                                hypernymDerived.setEntity(entityTitle);
    //                                                hypernymDerived.setEntityURL(resObj.get("uri").toString());
    //                                                hypernymDerived.setType(initRecord.getLabel());
    //                                                hypernymDerived.setTypeURL(initRecord.getUri());
    //                                                hypernymDerived.setOrigin("thd-derived");
    //                                                hypernymsList.add(hypernymDerived);
    //                                            }
    //                                        }
    //                                    } else {
    //                                        // some other uri
    //                                        continueSearching = false;
    //                                    }
    //                                }
    //                            }
    //                            // if found, then create hierarchy
    //                            else {
    //                                continueSearching = false;
    //                                // creating hierarchy
    //                                OntoRecord initRecord = new OntoRecord();
    //                                initRecord.setUri(initialUri);
    //
    //                                while(initRecord != null){
    //
    //                                    initRecord = DBpediaOntologyManager.getInstance().getSubclass(initRecord.getUri(), lang);
    //
    //                                    if(initRecord != null){
    //
    //                                        Hypernym hypernymDerived = new Hypernym();
    //                                        hypernymDerived.setEntity(entityTitle);
    //                                        hypernymDerived.setEntityURL(resObj.get("uri").toString());
    //                                        hypernymDerived.setType(initRecord.getLabel());
    //                                        hypernymDerived.setTypeURL(initRecord.getUri());
    //                                        hypernymDerived.setOrigin("thd-derived");
    //                                        hypernymsList.add(hypernymDerived);
    //                                    }
    //                                }
    //                            }
    //                        }
    //                    }
    //                    return hypernymsList;
    //                    
    //                } else {
    //                    return hypernymsList;
    //                }
    //                
    //            case "nl":
    //                
    //                resObj = MongoDBClient.getDBInstance().getCollection("nl_entities_thd").findOne(queryObj);                
    //
    //                if(resObj != null){
    //                    hypernymsList.addAll(YagoOntologyManager.getInstance().getYagoHypernyms(entityTitle, "http://yago-knowledge.org/resource/"+entityTitle.replaceAll(" ", "_"), "en", "thd-derived"));
    //                    BasicDBList e = (BasicDBList)resObj.get("types");
    //                    DBObject type = (DBObject)e.get(0);
    //
    //                    Hypernym hypernym = new Hypernym();
    //                    hypernym.setEntity(entityTitle);
    //                    hypernym.setEntityURL(resObj.get("uri").toString());
    //                    hypernym.setType(type.get("label").toString());
    //                    hypernym.setTypeURL(type.get("uri").toString());
    //                    hypernym.setOrigin(type.get("origin").toString());
    //                    hypernymsList.add(hypernym);
    //                    
    //                    BasicDBList typesList = (BasicDBList)resObj.get("types");
    //                    DBObject firstType = (DBObject)typesList.get(0);
    //                    
    //                    // hypernym is mapped to dbpedia ontology
    //                    // creating hierarchy from the dbpedia ontology
    //                    if(firstType.get("mapping").equals("dbOnto")){
    //                        
    //                        OntoRecord initRecord = new OntoRecord();
    //                        initRecord.setUri(firstType.get("uri").toString());
    //                        
    //                        while(initRecord != null){
    //                            
    //                            initRecord = DBpediaOntologyManager.getInstance().getSubclass(initRecord.getUri(), lang);
    //                            
    //                            if(initRecord != null){
    //                                
    //                                Hypernym hypernymDerived = new Hypernym();
    //                                hypernymDerived.setEntity(entityTitle);
    //                                hypernymDerived.setEntityURL(resObj.get("uri").toString());
    //                                hypernymDerived.setType(initRecord.getLabel());
    //                                hypernymDerived.setTypeURL(initRecord.getUri());
    //                                hypernymDerived.setOrigin("thd-derived");
    //                                hypernymsList.add(hypernymDerived);                                
    //                            }
    //                        }
    //                    }
    //                    // hypernym is not mapped to dbpedia ontology
    //                    // searching for superclass mapped to dbpedia ontology
    //                    // if found, for the superclass is created hierarchy
    //                    else if(firstType.get("mapping").equals("dbRes")){
    //                        
    //                        String initialUri = firstType.get("uri").toString();
    //                        
    //                        boolean continueSearching = true;
    //                        
    //                        while(continueSearching){
    //                            // try to find dbonto for dbres
    //                            DBpediaMapping mapping = getSubclassConfirmed(initialUri);
    //                            
    //                            // if not found, try to find dbres for the dbres
    //                            if(mapping == null){
    //                                
    //                                initialUri = getSuperclass(initialUri, "nl");
    //                                
    //                                if(initialUri == null){
    //                                    continueSearching = false;                                    
    //                                } else {
    //                                    
    //                                    // superClass uri found
    //                                    // check if uri is dbOnto - create hierarchy
    //                                    // check if uri is dbRes - continue                                    
    //                                    if(initialUri.contains("/resource/")){
    //                                        // do nothing
    //                                        // continue to search
    //                                    } else if(initialUri.contains("/ontology/")) {
    //                                        // create hierarchy
    //                                        continueSearching = false;
    //                                        
    //                                        OntoRecord initRecord = new OntoRecord();
    //                                        initRecord.setUri(initialUri);
    //
    //                                        while(initRecord != null){
    //
    //                                            initRecord = DBpediaOntologyManager.getInstance().getSubclass(initRecord.getUri(), lang);
    //
    //                                            if(initRecord != null){
    //
    //                                                Hypernym hypernymDerived = new Hypernym();
    //                                                hypernymDerived.setEntity(entityTitle);
    //                                                hypernymDerived.setEntityURL(resObj.get("uri").toString());
    //                                                hypernymDerived.setType(initRecord.getLabel());
    //                                                hypernymDerived.setTypeURL(initRecord.getUri());
    //                                                hypernymDerived.setOrigin("thd-derived");
    //                                                hypernymsList.add(hypernymDerived);
    //
    //                                            }
    //                                        }
    //                                    } else {
    //                                        // some other uri
    //                                        continueSearching = false;
    //                                    }
    //                                }
    //                            }
    //                            // if found, then create hierarchy
    //                            else {
    //                                continueSearching = false;
    //                                // creating hierarchy
    //                                OntoRecord initRecord = new OntoRecord();
    //                                initRecord.setUri(initialUri);
    //
    //                                while(initRecord != null){
    //
    //                                    initRecord = DBpediaOntologyManager.getInstance().getSubclass(initRecord.getUri(), lang);
    //
    //                                    if(initRecord != null) {
    //                                        
    //                                        Hypernym hypernymDerived = new Hypernym();
    //                                        hypernymDerived.setEntity(entityTitle);
    //                                        hypernymDerived.setEntityURL(resObj.get("uri").toString());
    //                                        hypernymDerived.setType(initRecord.getLabel());
    //                                        hypernymDerived.setTypeURL(initRecord.getUri());
    //                                        hypernymDerived.setOrigin("thd-derived");
    //                                        hypernymsList.add(hypernymDerived);
    //                                    }
    //                                }
    //                            }
    //                        }
    //                    }
    //                    
    //                    return hypernymsList;
    //                    
    //                } else {
    //                    return hypernymsList;
    //                }
    //            default:
    //                return null;
    //        }
    //    }

    //    public DBpediaMapping getSubclassConfirmed(String res) throws UnknownHostException{
    //        
    //        BasicDBObject queryObj2 = new BasicDBObject();
    //        queryObj2.append("label", res);
    //        DBObject resObj2 = MongoDBClient.getDBInstance().getCollection("test_en_entities_superclass").findOne(queryObj2);
    //        
    //        if(resObj2 != null){
    //            BasicDBList e = (BasicDBList)resObj.get("types");
    //            DBObject type = (DBObject)e.get(0);
    //            
    //            DBpediaMapping result = new DBpediaMapping();
    //            result.setLabel(type.get("label").toString());
    //            result.setUri(type.get("uri").toString());
    //            return result;
    //        } else {
    //            return null;
    //        }        
    //    }

    //    public String getSuperclass(String uri, String lang) throws UnknownHostException{
    //        
    //        BasicDBObject tempQueryObj = new BasicDBObject();
    //        tempQueryObj.append("uri", uri);
    //        tempQueryObj.append("types", new BasicDBObject().append("$elemMatch", new BasicDBObject().append("origin", "thd")));
    //        DBObject tempResObj = null;
    //        
    //        switch (lang) {
    //            case "en":
    //                tempResObj = MongoDBClient.getDBInstance().getCollection("test_en_entities").findOne(tempQueryObj);
    //            case "de":
    //                tempResObj = MongoDBClient.getDBInstance().getCollection("test_de_entities").findOne(tempQueryObj);           
    //        }
    //        
    //        if(tempResObj != null){
    //            BasicDBList typesList = (BasicDBList)tempResObj.get("types");
    //            DBObject tmpTypeObj = (DBObject) typesList.get(0);
    //            return tmpTypeObj.get("uri").toString();
    //        } else {
    //            return null;
    //        }        
    //    }

    public ArrayList<Hypernym> extractEntityHypernymsEN(String entity, String kb) {

        ArrayList<Hypernym> hypernymsList = new ArrayList<Hypernym>();
        try {
            URL url = null;
            String path = "";
            if (kb.equals("local")) {
                path = Settings.EN_WIKIPEDIA_LOCAL_EXPORT + entity.replace(" ", "_");
            } else if (kb.equals("live")) {
                path = Settings.EN_WIKIPEDIA_LIVE_EXPORT + entity.replace(" ", "_");
            }
            StringBuffer buffer = new StringBuffer();
            url = new URL(path);
            URLConnection connection = url.openConnection();
            InputStream is = connection.getInputStream();
            Reader isr = new InputStreamReader(is, "UTF-8");
            Reader in = new BufferedReader(isr);
            int ch;

            while ((ch = in.read()) > -1) {
                buffer.append((char) ch);
            }
            in.close();
            Article article = new Article(buffer.toString(), "en");
            Document document = Factory.newDocument(article.getFirstSection());
            Corpus corpus = Factory.newCorpus("");
            corpus.add(document);
            hypernymExtractionPipelineEN.setCorpus(corpus);
            hypernymExtractionPipelineEN.execute();

            Iterator corpus_iter = corpus.iterator();

            while (corpus_iter.hasNext()) {
                Document doc = (Document) corpus_iter.next();
                AnnotationSet as_all = doc.getAnnotations();
                AnnotationSet as_hearst = as_all.get("h");
                Iterator ann_iter = as_hearst.iterator();

                while (ann_iter.hasNext()) {
                    Annotation isaAnnot = (gate.Annotation) ann_iter.next();
                    Node isaStart = isaAnnot.getStartNode();
                    Node isaEnd = isaAnnot.getEndNode();
                    String hypernym = doc.getContent().getContent(isaStart.getOffset(), isaEnd.getOffset())
                            .toString();

                    Hypernym hypObj = new Hypernym();
                    hypObj.setEntity(entity);
                    hypObj.setEntityURL("http://dbpedia.org/resource/" + entity.replace(" ", "_"));
                    hypObj.setType(hypernym.substring(0, 1).toUpperCase() + hypernym.substring(1));
                    hypObj.setTypeURL("http://dbpedia.org/resource/" + hypernym.replace(" ", "_"));
                    hypObj.setOrigin("thd");
                    hypObj.setAccuracy("0.85");
                    hypObj.setBounds("+- 2.5%");
                    hypernymsList.add(hypObj);
                }
            }

        } catch (InvalidOffsetException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        } catch (ExecutionException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        } catch (ResourceInstantiationException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        } catch (IOException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        }
        return hypernymsList;
    }

    public ArrayList<Hypernym> extractEntityHypernymsDE(String entity, String kb) {

        ArrayList<Hypernym> hypernymsList = new ArrayList<Hypernym>();
        try {
            URL url = null;
            String path = "";
            if (kb.equals("local")) {
                path = Settings.DE_WIKIPEDIA_LOCAL_EXPORT + entity.replace(" ", "_");
            } else if (kb.equals("live")) {
                path = Settings.DE_WIKIPEDIA_LIVE_EXPORT + entity.replace(" ", "_");
            }

            StringBuffer buffer = new StringBuffer();
            url = new URL(path);
            URLConnection connection = url.openConnection();
            InputStream is = connection.getInputStream();
            Reader isr = new InputStreamReader(is, "UTF-8");
            Reader in = new BufferedReader(isr);
            int ch;

            while ((ch = in.read()) > -1) {
                buffer.append((char) ch);
            }

            in.close();
            Article article = new Article(buffer.toString(), "en");
            Document document = Factory.newDocument(article.getFirstSection());
            Corpus corpus = Factory.newCorpus("");
            corpus.add(document);

            hypernymExtractionPipelineDE.setCorpus(corpus);
            hypernymExtractionPipelineDE.execute();

            Iterator corpus_iter = corpus.iterator();

            while (corpus_iter.hasNext()) {
                Document doc = (Document) corpus_iter.next();
                AnnotationSet as_all = doc.getAnnotations();
                AnnotationSet as_hearst = as_all.get("h");
                Iterator ann_iter = as_hearst.iterator();
                while (ann_iter.hasNext()) {
                    Annotation isaAnnot = (gate.Annotation) ann_iter.next();
                    Node isaStart = isaAnnot.getStartNode();
                    Node isaEnd = isaAnnot.getEndNode();
                    String hypernym = doc.getContent().getContent(isaStart.getOffset(), isaEnd.getOffset())
                            .toString();

                    Hypernym hypObj = new Hypernym();
                    hypObj.setEntity(entity);
                    hypObj.setEntityURL("http://de.dbpedia.org/resource/" + entity.replace(" ", "_"));
                    hypObj.setType(hypernym.substring(0, 1).toUpperCase() + hypernym.substring(1));
                    hypObj.setTypeURL("http://de.dbpedia.org/resource/" + hypernym.replace(" ", "_"));
                    hypObj.setOrigin("thd");
                    hypObj.setAccuracy("0.77");
                    hypObj.setBounds("+- 2.5%");
                    hypernymsList.add(hypObj);
                }
            }

        } catch (InvalidOffsetException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        } catch (ExecutionException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        } catch (ResourceInstantiationException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        } catch (IOException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        }
        return hypernymsList;
    }

    public ArrayList<Hypernym> extractEntityHypernymsNL(String entity, String kb) {
        ArrayList<Hypernym> hypernymsList = new ArrayList<Hypernym>();
        try {
            URL url = null;
            String path = "";
            if (kb.equals("local")) {
                path = Settings.NL_WIKIPEDIA_LOCAL_EXPORT + entity.replace(" ", "_");
            } else if (kb.equals("live")) {
                path = Settings.NL_WIKIPEDIA_LIVE_EXPORT + entity.replace(" ", "_");
            }

            StringBuffer buffer = new StringBuffer();
            url = new URL(path);
            URLConnection connection = url.openConnection();
            InputStream is = connection.getInputStream();
            Reader isr = new InputStreamReader(is, "UTF-8");
            Reader in = new BufferedReader(isr);
            int ch;

            while ((ch = in.read()) > -1) {
                buffer.append((char) ch);
            }
            in.close();
            Article article = new Article(buffer.toString(), "en");
            Document document = Factory.newDocument(article.getFirstSection());
            Corpus corpus = Factory.newCorpus("");
            corpus.add(document);
            hypernymExtractionPipelineNL.setCorpus(corpus);
            hypernymExtractionPipelineNL.execute();

            Iterator corpus_iter = corpus.iterator();

            while (corpus_iter.hasNext()) {
                Document doc = (Document) corpus_iter.next();
                AnnotationSet as_all = doc.getAnnotations();
                AnnotationSet as_hearst = as_all.get("h");
                Iterator ann_iter = as_hearst.iterator();
                while (ann_iter.hasNext()) {
                    Annotation isaAnnot = (gate.Annotation) ann_iter.next();
                    Node isaStart = isaAnnot.getStartNode();
                    Node isaEnd = isaAnnot.getEndNode();
                    String hypernym = doc.getContent().getContent(isaStart.getOffset(), isaEnd.getOffset())
                            .toString();

                    Hypernym hypObj = new Hypernym();
                    hypObj.setEntity(entity);
                    hypObj.setEntityURL("http://nl.dbpedia.org/resource/" + entity.replace(" ", "_"));
                    hypObj.setType(hypernym.substring(0, 1).toUpperCase() + hypernym.substring(1));
                    hypObj.setTypeURL("http://nl.dbpedia.org/resource/" + hypernym.replace(" ", "_"));
                    hypObj.setOrigin("thd");
                    hypObj.setAccuracy("0.89");
                    hypObj.setBounds("+- 2%");
                    hypernymsList.add(hypObj);
                }
            }

        } catch (InvalidOffsetException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        } catch (ExecutionException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        } catch (ResourceInstantiationException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        } catch (IOException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        }
        return hypernymsList;
    }

    private static void assambleHypernymExtractionPipelineEN() {

        try {

            hypernymExtractionPipelineEN = (SerialAnalyserController) Factory
                    .createResource("gate.creole.RealtimeCorpusController");

            FeatureMap resetFeatureMap = Factory.newFeatureMap();
            ProcessingResource resetPR = (ProcessingResource) Factory
                    .createResource("gate.creole.annotdelete.AnnotationDeletePR", resetFeatureMap);
            hypernymExtractionPipelineEN.add(resetPR);

            FeatureMap tokenizerFeatureMap = Factory.newFeatureMap();
            ProcessingResource tokenizerPR = (ProcessingResource) Factory
                    .createResource("gate.creole.tokeniser.DefaultTokeniser", tokenizerFeatureMap);
            hypernymExtractionPipelineEN.add(tokenizerPR);

            FeatureMap sentenceSplitterFeatureMap = Factory.newFeatureMap();
            ProcessingResource sentenceSplitterPR = (ProcessingResource) Factory
                    .createResource("gate.creole.splitter.SentenceSplitter", sentenceSplitterFeatureMap);
            //ProcessingResource sentenceSplitterPR = (ProcessingResource) Factory.createResource("gate.creole.splitter.RegexSentenceSplitter", sentenceSplitterFeatureMap);
            hypernymExtractionPipelineEN.add(sentenceSplitterPR);

            FeatureMap posTaggerFeatureMap = Factory.newFeatureMap();
            ProcessingResource posTaggerPR = (ProcessingResource) Factory.createResource("gate.creole.POSTagger",
                    posTaggerFeatureMap);
            hypernymExtractionPipelineEN.add(posTaggerPR);

            URL url = thdInstance.getClass().getResource(enHypernymExtractionGrammar);
            File japeOrigFile = new File(url.getFile());
            java.net.URI japeURI = japeOrigFile.toURI();

            FeatureMap transducerFeatureMap = Factory.newFeatureMap();
            try {
                transducerFeatureMap.put("grammarURL", japeURI.toURL());
                transducerFeatureMap.put("encoding", "UTF-8");
            } catch (MalformedURLException e) {

            }
            ProcessingResource japeCandidatesPR = (ProcessingResource) Factory
                    .createResource("gate.creole.Transducer", transducerFeatureMap);

            hypernymExtractionPipelineEN.add(japeCandidatesPR);

        } catch (ResourceInstantiationException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    private static void assambleHypernymExtractionPipelineDE() {

        try {

            hypernymExtractionPipelineDE = (SerialAnalyserController) Factory
                    .createResource("gate.creole.RealtimeCorpusController");

            FeatureMap resetFeatureMap = Factory.newFeatureMap();
            ProcessingResource resetPR = (ProcessingResource) Factory
                    .createResource("gate.creole.annotdelete.AnnotationDeletePR", resetFeatureMap);
            hypernymExtractionPipelineDE.add(resetPR);

            FeatureMap tokenizerFeatureMap = Factory.newFeatureMap();
            ProcessingResource tokenizerPR = (ProcessingResource) Factory
                    .createResource("gate.creole.tokeniser.DefaultTokeniser", tokenizerFeatureMap);
            hypernymExtractionPipelineDE.add(tokenizerPR);

            FeatureMap sentenceSplitterFeatureMap = Factory.newFeatureMap();
            ProcessingResource sentenceSplitterPR = (ProcessingResource) Factory
                    .createResource("gate.creole.splitter.SentenceSplitter", sentenceSplitterFeatureMap);
            //ProcessingResource sentenceSplitterPR = (ProcessingResource) Factory.createResource("gate.creole.splitter.RegexSentenceSplitter", sentenceSplitterFeatureMap);
            hypernymExtractionPipelineDE.add(sentenceSplitterPR);

            FeatureMap taggerFeatureMap = Factory.newFeatureMap();
            taggerFeatureMap.put("debug", "false");
            taggerFeatureMap.put("encoding", "ISO-8859-1");
            taggerFeatureMap.put("failOnUnmappableCharacter", "false");
            taggerFeatureMap.put("featureMapping", "lemma=3;category=2;string=1");
            taggerFeatureMap.put("inputAnnotationType", "Token");
            taggerFeatureMap.put("inputTemplate", "${string}");
            taggerFeatureMap.put("outputAnnotationType", "Token");
            taggerFeatureMap.put("regex", "(.+)   (.+)   (.+)");
            taggerFeatureMap.put("taggerBinary", deTaggerBinary);
            taggerFeatureMap.put("updateAnnotations", false);

            ProcessingResource genTag = (ProcessingResource) Factory
                    .createResource("gate.taggerframework.GenericTagger", taggerFeatureMap);
            hypernymExtractionPipelineDE.add(genTag);

            URL url = thdInstance.getClass().getResource(deHypernymExtractionGrammar);
            File japeOrigFile = new File(url.getFile());
            java.net.URI japeURI = japeOrigFile.toURI();

            FeatureMap transducerFeatureMap = Factory.newFeatureMap();
            try {
                transducerFeatureMap.put("grammarURL", japeURI.toURL());
                transducerFeatureMap.put("encoding", "UTF-8");
            } catch (MalformedURLException e) {
            }

            ProcessingResource japeCandidatesPR = (ProcessingResource) Factory
                    .createResource("gate.creole.Transducer", transducerFeatureMap);
            hypernymExtractionPipelineDE.add(japeCandidatesPR);

        } catch (ResourceInstantiationException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        }

    }

    private static void assambleHypernymExtractionPipelineNL() {

        try {

            hypernymExtractionPipelineNL = (SerialAnalyserController) Factory
                    .createResource("gate.creole.RealtimeCorpusController");

            FeatureMap resetFeatureMap = Factory.newFeatureMap();
            ProcessingResource resetPR = (ProcessingResource) Factory
                    .createResource("gate.creole.annotdelete.AnnotationDeletePR", resetFeatureMap);
            hypernymExtractionPipelineNL.add(resetPR);

            FeatureMap tokenizerFeatureMap = Factory.newFeatureMap();
            ProcessingResource tokenizerPR = (ProcessingResource) Factory
                    .createResource("gate.creole.tokeniser.DefaultTokeniser", tokenizerFeatureMap);
            hypernymExtractionPipelineNL.add(tokenizerPR);

            FeatureMap sentenceSplitterFeatureMap = Factory.newFeatureMap();
            ProcessingResource sentenceSplitterPR = (ProcessingResource) Factory
                    .createResource("gate.creole.splitter.SentenceSplitter", sentenceSplitterFeatureMap);
            //ProcessingResource sentenceSplitterPR = (ProcessingResource) Factory.createResource("gate.creole.splitter.RegexSentenceSplitter", sentenceSplitterFeatureMap);
            hypernymExtractionPipelineNL.add(sentenceSplitterPR);

            FeatureMap taggerFeatureMap = Factory.newFeatureMap();
            taggerFeatureMap.put("debug", "false");
            taggerFeatureMap.put("encoding", "ISO-8859-1");
            taggerFeatureMap.put("failOnUnmappableCharacter", "false");
            taggerFeatureMap.put("featureMapping", "lemma=3;category=2;string=1");
            taggerFeatureMap.put("inputAnnotationType", "Token");
            taggerFeatureMap.put("inputTemplate", "${string}");
            taggerFeatureMap.put("outputAnnotationType", "Token");
            taggerFeatureMap.put("regex", "(.+)   (.+)   (.+)");
            taggerFeatureMap.put("taggerBinary", nlTaggerBinary);
            taggerFeatureMap.put("updateAnnotations", false);

            ProcessingResource genTag = (ProcessingResource) Factory
                    .createResource("gate.taggerframework.GenericTagger", taggerFeatureMap);
            hypernymExtractionPipelineNL.add(genTag);

            URL url = thdInstance.getClass().getResource(nlHypernymExtractionGrammar);
            File japeOrigFile = new File(url.getFile());
            java.net.URI japeURI = japeOrigFile.toURI();

            FeatureMap transducerFeatureMap = Factory.newFeatureMap();
            try {
                transducerFeatureMap.put("grammarURL", japeURI.toURL());
                transducerFeatureMap.put("encoding", "UTF-8");
            } catch (MalformedURLException e) {
            }
            ProcessingResource japeCandidatesPR = (ProcessingResource) Factory
                    .createResource("gate.creole.Transducer", transducerFeatureMap);

            hypernymExtractionPipelineNL.add(japeCandidatesPR);

        } catch (ResourceInstantiationException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        }

    }

    private static void assambleEntityExtractionPipelineEN() {
        try {
            URL url = thdInstance.getClass().getResource(enEntityExtractionGrammar);
            File japeOrigFile = new File(url.getFile());
            java.net.URI japeURI = japeOrigFile.toURI();
            FeatureMap transducerFeatureMap = Factory.newFeatureMap();

            try {
                transducerFeatureMap.put("grammarURL", japeURI.toURL());
                transducerFeatureMap.put("encoding", "UTF-8");
            } catch (MalformedURLException e) {
            }

            FeatureMap tokenizerFeatureMap = Factory.newFeatureMap();
            ProcessingResource tokenizerPR = (ProcessingResource) Factory
                    .createResource("gate.creole.tokeniser.DefaultTokeniser", tokenizerFeatureMap);

            FeatureMap sentenceSplitterFeatureMap = Factory.newFeatureMap();
            ProcessingResource sentenceSplitterPR = (ProcessingResource) Factory
                    .createResource("gate.creole.splitter.SentenceSplitter", sentenceSplitterFeatureMap);

            FeatureMap posTaggerFeatureMap = Factory.newFeatureMap();
            ProcessingResource posTaggerPR = (ProcessingResource) Factory.createResource("gate.creole.POSTagger",
                    posTaggerFeatureMap);

            ProcessingResource japeCandidatesPR = (ProcessingResource) Factory
                    .createResource("gate.creole.Transducer", transducerFeatureMap);

            enEntityExtractionPipeline = (SerialAnalyserController) Factory
                    .createResource("gate.creole.SerialAnalyserController");

            enEntityExtractionPipeline.add(tokenizerPR);
            enEntityExtractionPipeline.add(sentenceSplitterPR);
            enEntityExtractionPipeline.add(posTaggerPR);
            enEntityExtractionPipeline.add(japeCandidatesPR);
        } catch (ResourceInstantiationException ex) {
            ;
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    private static void assambleEntityExtractionPipelineDE() {
        try {
            FeatureMap resetFeatureMap = Factory.newFeatureMap();
            ProcessingResource resetPR = (ProcessingResource) Factory
                    .createResource("gate.creole.annotdelete.AnnotationDeletePR", resetFeatureMap);

            FeatureMap tokenizerFeatureMap = Factory.newFeatureMap();
            ProcessingResource tokenizerPR = (ProcessingResource) Factory
                    .createResource("gate.creole.tokeniser.DefaultTokeniser", tokenizerFeatureMap);

            FeatureMap taggerFeatureMap = Factory.newFeatureMap();
            taggerFeatureMap.put("debug", "false");
            taggerFeatureMap.put("encoding", "ISO-8859-1");
            taggerFeatureMap.put("failOnUnmappableCharacter", "false");
            taggerFeatureMap.put("featureMapping", "lemma=3;category=2;string=1");
            taggerFeatureMap.put("inputAnnotationType", "Token");
            taggerFeatureMap.put("inputTemplate", "${string}");
            taggerFeatureMap.put("outputAnnotationType", "Token");
            taggerFeatureMap.put("regex", "(.+)   (.+)   (.+)");
            taggerFeatureMap.put("taggerBinary", deTaggerBinary);
            taggerFeatureMap.put("updateAnnotations", false);

            ProcessingResource genTag = (ProcessingResource) Factory
                    .createResource("gate.taggerframework.GenericTagger", taggerFeatureMap);

            //JAPE Entity Extraction grammar
            URL url = thdInstance.getClass().getResource(deEntityExtractionGrammar);

            File japeOrigFile = new File(url.getFile());
            java.net.URI japeURI = japeOrigFile.toURI();
            FeatureMap transducerFeatureMap = Factory.newFeatureMap();

            try {
                transducerFeatureMap.put("grammarURL", japeURI.toURL());
                transducerFeatureMap.put("encoding", "UTF-8");
            } catch (MalformedURLException e) {
            }

            ProcessingResource japeCandidatesPR = (ProcessingResource) Factory
                    .createResource("gate.creole.Transducer", transducerFeatureMap);

            deEntityExtractionPipeline = (SerialAnalyserController) Factory
                    .createResource("gate.creole.RealtimeCorpusController");
            deEntityExtractionPipeline.add(resetPR);
            deEntityExtractionPipeline.add(tokenizerPR);
            deEntityExtractionPipeline.add(genTag);
            deEntityExtractionPipeline.add(japeCandidatesPR);
        } catch (ResourceInstantiationException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    private static void assambleEntityExtractionPipelineNL() {

        try {

            FeatureMap resetFeatureMap = Factory.newFeatureMap();
            ProcessingResource resetPR = (ProcessingResource) Factory
                    .createResource("gate.creole.annotdelete.AnnotationDeletePR", resetFeatureMap);

            FeatureMap tokenizerFeatureMap = Factory.newFeatureMap();
            ProcessingResource tokenizerPR = (ProcessingResource) Factory
                    .createResource("gate.creole.tokeniser.DefaultTokeniser", tokenizerFeatureMap);

            FeatureMap taggerFeatureMap = Factory.newFeatureMap();
            taggerFeatureMap.put("debug", "false");
            taggerFeatureMap.put("encoding", "ISO-8859-1");
            taggerFeatureMap.put("failOnUnmappableCharacter", "false");
            taggerFeatureMap.put("featureMapping", "lemma=3;category=2;string=1");
            taggerFeatureMap.put("inputAnnotationType", "Token");
            taggerFeatureMap.put("inputTemplate", "${string}");
            taggerFeatureMap.put("outputAnnotationType", "Token");
            taggerFeatureMap.put("regex", "(.+)   (.+)   (.+)");
            taggerFeatureMap.put("taggerBinary", nlTaggerBinary);
            taggerFeatureMap.put("updateAnnotations", false);

            ProcessingResource genTag = (ProcessingResource) Factory
                    .createResource("gate.taggerframework.GenericTagger", taggerFeatureMap);

            //JAPE Entity Extraction grammar
            URL url = thdInstance.getClass().getResource(nlEntityExtractionGrammar);
            File japeOrigFile = new File(url.getFile());
            java.net.URI japeURI = japeOrigFile.toURI();
            FeatureMap transducerFeatureMap = Factory.newFeatureMap();

            try {
                transducerFeatureMap.put("grammarURL", japeURI.toURL());
                transducerFeatureMap.put("encoding", "UTF-8");
            } catch (MalformedURLException e) {
            }

            ProcessingResource japeCandidatesPR = (ProcessingResource) Factory
                    .createResource("gate.creole.Transducer", transducerFeatureMap);

            nlEntityExtractionPipeline = (SerialAnalyserController) Factory
                    .createResource("gate.creole.RealtimeCorpusController");
            nlEntityExtractionPipeline.add(resetPR);
            nlEntityExtractionPipeline.add(tokenizerPR);
            nlEntityExtractionPipeline.add(genTag);
            nlEntityExtractionPipeline.add(japeCandidatesPR);
        } catch (ResourceInstantiationException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    private static void init() {
        Properties prop = new PropertiesLoader().getProperties();
        readSettings(prop);
    }

    private static void readSettings(Properties prop) {

        gateHome = prop.getProperty("gateHome");
        pluginHome = prop.getProperty("pluginHome");

        enEntityExtractionGrammar = prop.getProperty("enEntityExtractionGrammar");
        nlEntityExtractionGrammar = prop.getProperty("nlEntityExtractionGrammar");
        deEntityExtractionGrammar = prop.getProperty("deEntityExtractionGrammar");

        enHypernymExtractionGrammar = prop.getProperty("enHypernymExtractionGrammar");
        deHypernymExtractionGrammar = prop.getProperty("deHypernymExtractionGrammar");
        nlHypernymExtractionGrammar = prop.getProperty("nlHypernymExtractionGrammar");

        nlTaggerBinary = prop.getProperty("nlTaggerBinary");
        deTaggerBinary = prop.getProperty("deTaggerBinary");

        DBpediaOntologyManager.setDbpediaOntologyFileLocation(prop.getProperty("dbpediaOntologyFileLocation"));
        YagoOntologyManager.setYagoOntologyFileLocation(prop.getProperty("yagoOntologyFileLocation"));

    }

    private static void initGate() {

        File gateHomeFile = new File(gateHome);
        Gate.setGateHome(gateHomeFile);
        File pluginsHome = new File(pluginHome);
        Gate.setPluginsHome(pluginsHome);
        Gate.setUserConfigFile(new File(gateHome, "user-gate.xml"));
        URL annieHome = null;
        URL taggerHome = null;
        try {
            annieHome = new File(pluginsHome, "ANNIE").toURL();
            taggerHome = new File(pluginsHome, "Tagger_Framework").toURL();
        } catch (java.net.MalformedURLException e) {
        }
        try {
            Gate.init();
            CreoleRegister register = Gate.getCreoleRegister();
            register.registerDirectories(annieHome);
            register.registerDirectories(taggerHome);
        } catch (GateException ge) {
            //SimpleDebug.printDebug(ge.toString());
        }
    }

    public THDInstance() {
        // THD constructor
    }

    public ArrayList<Entity> extractEntityCandidates(String query, String lang, String entity_type)
            throws ResourceInstantiationException, ExecutionException, InvalidOffsetException {

        ArrayList<Entity> candidates = null;
        switch (lang) {
        case "en":
            candidates = extractEntityCandidatesEN(query, lang, entity_type);
            break;
        case "de":
            candidates = extractEntityCandidatesDE(query, lang, entity_type);
            break;
        case "nl":
            candidates = extractEntityCandidatesNL(query, lang, entity_type);
            break;
        }

        return candidates;
    }

    public ArrayList<Entity> extractEntityCandidatesDE(String query, String lang, String entity_type) {
        try {
            Document doc = Factory.newDocument(query);
            doc.setName("Query_Document");
            Corpus corpus = Factory.newCorpus("");
            corpus.add(doc);

            deEntityExtractionPipeline.setCorpus(corpus);
            deEntityExtractionPipeline.execute();

            Document[] docs = (Document[]) corpus.toArray(new Document[corpus.size()]);
            ArrayList<Entity> candidates = new ArrayList<Entity>();

            Document d = docs[0];
            AnnotationSet as_all = d.getAnnotations();
            if (entity_type.equals("all") || entity_type.equals("ne")) {
                AnnotationSet as_entity = as_all.get("ne");

                Iterator anot = as_entity.iterator();
                while (anot.hasNext()) {
                    try {
                        Annotation isaAnnot = (gate.Annotation) anot.next();
                        Node annStart = isaAnnot.getStartNode();
                        Node annEnd = isaAnnot.getEndNode();
                        AnnotationSet as_token = as_all.get("Token", annStart.getOffset(), annEnd.getOffset());
                        String content = "";

                        if (as_token.size() > 1) {
                            content = d.getContent().getContent(annStart.getOffset(), annEnd.getOffset())
                                    .toString();
                        } else {
                            Iterator as_token_iter = as_token.iterator();

                            Annotation tok = (gate.Annotation) as_token_iter.next();
                            String lemma = tok.getFeatures().get("lemma").toString();

                            if (!lemma.equals("<unknown>")) {
                                content = lemma;
                            } else {
                                content = d.getContent().getContent(annStart.getOffset(), annEnd.getOffset())
                                        .toString();
                            }
                        }
                        candidates.add(new Entity(
                                d.getContent().getContent(annStart.getOffset(), annEnd.getOffset()).toString(),
                                annStart.getOffset(), annEnd.getOffset(), "named entity"));
                    } catch (InvalidOffsetException ex) {
                        Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
                    }
                }
            }
            if (entity_type.equals("all") || entity_type.equals("ce")) {

                AnnotationSet as_common_entity = as_all.get("e");
                Iterator anot_e = as_common_entity.iterator();

                while (anot_e.hasNext()) {
                    try {
                        Annotation isaAnnot = (gate.Annotation) anot_e.next();
                        Node annStart = isaAnnot.getStartNode();
                        Node annEnd = isaAnnot.getEndNode();
                        AnnotationSet as_token = as_all.get("Token", annStart.getOffset(), annEnd.getOffset());

                        String content = "";

                        if (as_token.size() > 1) {
                            content = d.getContent().getContent(annStart.getOffset(), annEnd.getOffset())
                                    .toString();
                        } else {
                            Iterator as_token_iter = as_token.iterator();

                            Annotation tok = (gate.Annotation) as_token_iter.next();
                            String lemma = tok.getFeatures().get("lemma").toString();

                            if (!lemma.equals("<unknown>")) {
                                content = lemma;
                            } else {
                                content = d.getContent().getContent(annStart.getOffset(), annEnd.getOffset())
                                        .toString();
                            }
                        }
                        candidates.add(new Entity(
                                d.getContent().getContent(annStart.getOffset(), annEnd.getOffset()).toString(),
                                annStart.getOffset(), annEnd.getOffset(), "common entity"));
                    } catch (InvalidOffsetException ex) {
                        Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
                    }
                }
            }
            return candidates;

        } catch (ExecutionException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        } catch (ResourceInstantiationException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        }
        return null;
    }

    public ArrayList<Entity> extractEntityCandidatesNL(String query, String lang, String entity_type) {

        try {
            Document doc = Factory.newDocument(query);
            doc.setName("Query_Document");
            Corpus corpus = Factory.newCorpus("");
            corpus.add(doc);

            nlEntityExtractionPipeline.setCorpus(corpus);
            nlEntityExtractionPipeline.execute();

            Document[] docs = (Document[]) corpus.toArray(new Document[corpus.size()]);
            ArrayList<Entity> candidates = new ArrayList<Entity>();

            Document d = docs[0];
            AnnotationSet as_all = d.getAnnotations();

            if (entity_type.equals("all") || entity_type.equals("ne")) {

                AnnotationSet as_entity = as_all.get("ne");
                Iterator anot = as_entity.iterator();
                while (anot.hasNext()) {
                    try {
                        Annotation isaAnnot = (gate.Annotation) anot.next();
                        Node annStart = isaAnnot.getStartNode();
                        Node annEnd = isaAnnot.getEndNode();
                        String content = d.getContent().getContent(annStart.getOffset(), annEnd.getOffset())
                                .toString();
                        candidates
                                .add(new Entity(content, annStart.getOffset(), annEnd.getOffset(), "named entity"));
                    } catch (InvalidOffsetException ex) {
                        Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
                    }
                }
            }
            if (entity_type.equals("all") || entity_type.equals("ce")) {
                AnnotationSet as_common_entity = as_all.get("e");
                Iterator anot_e = as_common_entity.iterator();
                while (anot_e.hasNext()) {
                    try {
                        Annotation isaAnnot = (gate.Annotation) anot_e.next();
                        Node annStart = isaAnnot.getStartNode();
                        Node annEnd = isaAnnot.getEndNode();
                        String content = d.getContent().getContent(annStart.getOffset(), annEnd.getOffset())
                                .toString();
                        candidates.add(
                                new Entity(content, annStart.getOffset(), annEnd.getOffset(), "common entity"));
                    } catch (InvalidOffsetException ex) {
                        Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
                    }
                }
            }
            return candidates;

        } catch (ExecutionException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        } catch (ResourceInstantiationException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        }
        return null;
    }

    public ArrayList<Entity> extractEntityCandidatesEN(String query, String lang, String entity_type) {

        try {

            Document doc = Factory.newDocument(query);
            doc.setName("Query_Document");
            Corpus corpus = Factory.newCorpus("");
            corpus.add(doc);

            enEntityExtractionPipeline.setCorpus(corpus);
            enEntityExtractionPipeline.execute();

            Document[] docs = (Document[]) corpus.toArray(new Document[corpus.size()]);
            ArrayList<Entity> candidates = new ArrayList<Entity>();

            Document d = docs[0];
            AnnotationSet as_all = d.getAnnotations();

            if (entity_type.equals("all") || entity_type.equals("ne")) {

                AnnotationSet as_named_entity = as_all.get("ne");
                Iterator anot = as_named_entity.iterator();

                while (anot.hasNext()) {
                    try {
                        Annotation isaAnnot = (gate.Annotation) anot.next();
                        Node annStart = isaAnnot.getStartNode();
                        Node annEnd = isaAnnot.getEndNode();
                        String content = d.getContent().getContent(annStart.getOffset(), annEnd.getOffset())
                                .toString();
                        candidates
                                .add(new Entity(content, annStart.getOffset(), annEnd.getOffset(), "named entity"));
                    } catch (InvalidOffsetException ex) {
                        Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
                    }
                }
            }
            if (entity_type.equals("all") || entity_type.equals("ce")) {

                AnnotationSet as_common_nouns = as_all.get("e");
                Iterator cn_anot = as_common_nouns.iterator();

                while (cn_anot.hasNext()) {
                    try {
                        Annotation isaAnnot = (gate.Annotation) cn_anot.next();
                        Node annStart = isaAnnot.getStartNode();
                        Node annEnd = isaAnnot.getEndNode();
                        String content = d.getContent().getContent(annStart.getOffset(), annEnd.getOffset())
                                .toString();
                        candidates.add(
                                new Entity(content, annStart.getOffset(), annEnd.getOffset(), "common entity"));
                    } catch (InvalidOffsetException ex) {
                        Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
                    }
                }
            }

            return candidates;

        } catch (ExecutionException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        } catch (ResourceInstantiationException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        }
        return null;
    }
}