cz.vse.fis.keg.entityclassifier.core.THDWorker.java Source code

Java tutorial

Introduction

Here is the source code for cz.vse.fis.keg.entityclassifier.core.THDWorker.java

Source

/*
 * #%L
 * Entityclassifier.eu NER CORE v3.9
 * %%
 * Copyright (C) 2015 Knowledge Engineering Group (KEG) and Web Intelligence Research Group (WIRG) - Milan Dojchinovski (milan.dojchinovski@fit.cvut.cz)
 * %%
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public
 * License along with this program.  If not, see
 * <http://www.gnu.org/licenses/gpl-3.0.html>.
 * #L%
 */
package cz.vse.fis.keg.entityclassifier.core;

import cz.vse.fis.keg.entityclassifier.core.conf.Settings;
import com.mongodb.BasicDBList;
import com.mongodb.BasicDBObject;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import cz.vse.fis.keg.entityclassifier.core.entitylinking.AllVoting;
import cz.vse.fis.keg.entityclassifier.core.entitylinking.LinkedEntity;
import cz.vse.fis.keg.entityclassifier.core.entitylinking.LuceneSearch;
import cz.vse.fis.keg.entityclassifier.core.entitylinking.SFISearch;
import cz.vse.fis.keg.entityclassifier.core.entitylinking.SurfaceFormSimilarity;
import cz.vse.fis.keg.entityclassifier.core.mongodb.MongoDBClient;
import cz.vse.fis.keg.entityclassifier.core.ontologymapper.DBpediaMapping;
import cz.vse.fis.keg.entityclassifier.core.ontologymapper.DBpediaOntologyManager;
import cz.vse.fis.keg.entityclassifier.core.ontologymapper.DBpediaOntologyMapper;
import cz.vse.fis.keg.entityclassifier.core.ontologymapper.OntoRecord;
import cz.vse.fis.keg.entityclassifier.core.ontologymapper.TypeMapper;
import cz.vse.fis.keg.entityclassifier.core.ontologymapper.YagoOntologyManager;
import cz.vse.fis.keg.entityclassifier.core.redis.RedisClient;
import cz.vse.fis.keg.entityclassifier.core.vao.Article;
import cz.vse.fis.keg.entityclassifier.core.vao.Confidence;
import cz.vse.fis.keg.entityclassifier.core.vao.Entity;
import cz.vse.fis.keg.entityclassifier.core.vao.Hypernym;
import cz.vse.fis.keg.entityclassifier.core.vao.Type;
import cz.vse.fis.keg.entityclassifier.core.entitylinking.WikipediaSearch;
import cz.vse.fis.keg.entityspotting.SemiTagsSpotting;
import gate.Annotation;
import gate.AnnotationSet;
import gate.Corpus;
import gate.Document;
import gate.Factory;
import gate.FeatureMap;
import gate.Node;
import gate.ProcessingResource;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.creole.SerialAnalyserController;
import gate.util.InvalidOffsetException;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.net.UnknownHostException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 *
 * @author Milan
 */
public class THDWorker {

    private SerialAnalyserController enEntityExtractionPipeline = null;
    private SerialAnalyserController deEntityExtractionPipeline = null;
    private SerialAnalyserController nlEntityExtractionPipeline = null;

    private SerialAnalyserController hypernymExtractionPipelineEN = null;
    private SerialAnalyserController hypernymExtractionPipelineDE = null;
    private SerialAnalyserController hypernymExtractionPipelineNL = null;

    public THDWorker() {
        assamblePipelines();
    }

    public ArrayList<Entity> processTextAPI_MT(String query, String lang, String entity_type, String knowledge_base,
            String[] provenance, boolean priorityEntityLinking, String typesFilter, String spottingMethod,
            String linkingMethod, String tmpPar)
            throws ResourceInstantiationException, ExecutionException, UnknownHostException {

        ArrayList<Entity> resultEntities = new ArrayList<Entity>();
        ArrayList<Entity> extractedEntities = new ArrayList<Entity>();

        extractedEntities = extractEntityMentions(query, lang, entity_type, spottingMethod);

        System.out.println("Extracted candidates: " + extractedEntities.size());
        RedisClient redis = RedisClient.getInstance();

        for (Entity e : extractedEntities) {
            Entity foundEntity = new Entity();
            ;
            String entityString = e.getUnderlyingString().trim();

            LinkedEntity linkedEntity = null;
            boolean continueLinking = true;
            String longestEntityPage = null;
            String[] words = null;

            while (continueLinking) {

                if (knowledge_base.equals("linkedHypernymsDataset")) {

                    String linkedEntityStr = redis
                            .getValue(entityString + lang + "linkedHypernymsDataset" + linkingMethod);

                    if (linkedEntityStr == null) {

                        if (linkingMethod.equals("LuceneSearch")) {
                            linkedEntity = LuceneSearch.getInstance().findWikipediaArticle(entityString, lang,
                                    "local");
                        } else if (linkingMethod.equals("AllVoting")) {
                            linkedEntity = AllVoting.getInstance().findWikipediaArticle(entityString, lang,
                                    "local");
                        } else if (linkingMethod.equals("SurfaceFormSimilarity")) {
                            linkedEntity = SurfaceFormSimilarity.getInstance().findWikipediaArticle(entityString,
                                    lang, "local");
                        } else if (linkingMethod.equals("LuceneSearchSkipDisPage")) {
                            linkedEntity = LuceneSearch.getInstance()
                                    .findWikipediaArticleSkipDisambiguationPage(entityString, lang, "local");
                        } else if (linkingMethod.equals("SFISearch")) {
                            linkedEntity = SFISearch.getInstance().findWikipediaArticle(entityString, lang);
                        } else if (linkingMethod.equals("WikipediaSearch")) {
                            if (tmpPar.equals("irapi")) {
                                linkedEntity = WikipediaSearch.getInstance().findWikipediaArticle(entityString,
                                        lang, "live");
                            } else {
                                linkedEntity = WikipediaSearch.getInstance().findWikipediaArticle(entityString,
                                        lang, "local");
                            }
                        } else {
                        }
                        if (linkedEntity != null) {
                            redis.setKey(entityString + lang + "linkedHypernymsDataset" + linkingMethod,
                                    linkedEntity.toString());
                        }
                    } else {
                        // Linked entity found in cache.
                        linkedEntity = new LinkedEntity();
                        linkedEntity.setPageTitle(linkedEntityStr.split("\\+")[0]);
                        linkedEntity.setConfidence(Double.parseDouble(linkedEntityStr.split("\\+")[1]));
                    }
                } else if (knowledge_base.equals("live")) {

                    String linkedEntityStr = redis.getValue(entityString + lang + "live");

                    if (linkedEntityStr == null) {

                        linkedEntity = WikipediaSearch.getInstance().findWikipediaArticle(entityString, lang,
                                "live");
                        if (linkedEntity != null) {
                            redis.setKey(entityString + lang + "live" + linkingMethod, linkedEntity.toString());
                        }
                    } else {
                        // Linked entity found in cache.
                        linkedEntity = new LinkedEntity();
                        linkedEntity.setPageTitle(linkedEntityStr.split("\\+")[0]);
                        linkedEntity.setConfidence(Double.parseDouble(linkedEntityStr.split("\\+")[1]));
                    }

                } else if (knowledge_base.equals("local")) {
                    String linkedEntityStr = redis.getValue(entityString + lang + "local" + linkingMethod);

                    if (linkedEntityStr == null) {
                        if (linkingMethod.equals("LuceneSearch")) {
                            linkedEntity = LuceneSearch.getInstance().findWikipediaArticle(entityString, lang,
                                    "local");
                        } else if (linkingMethod.equals("LuceneSearchSkipDisPage")) {
                            linkedEntity = LuceneSearch.getInstance()
                                    .findWikipediaArticleSkipDisambiguationPage(entityString, lang, "local");
                        } else if (linkingMethod.equals("WikipediaSearch")) {
                            linkedEntity = WikipediaSearch.getInstance().findWikipediaArticle(entityString, lang,
                                    "local");
                        } else if (linkingMethod.equals("AllVoting")) {
                            linkedEntity = AllVoting.getInstance().findWikipediaArticle(entityString, lang,
                                    "local");
                        } else if (linkingMethod.equals("SurfaceFormSimilarity")) {
                            linkedEntity = SurfaceFormSimilarity.getInstance().findWikipediaArticle(entityString,
                                    lang, "local");
                        } else {
                        }
                        if (linkedEntity != null) {
                            redis.setKey(entityString + lang + "local" + linkingMethod, linkedEntity.toString());
                        }
                    } else {
                        // Linked entity found in cache.
                        linkedEntity = new LinkedEntity();
                        linkedEntity.setPageTitle(linkedEntityStr.split("\\+")[0]);
                        linkedEntity.setConfidence(Double.parseDouble(linkedEntityStr.split("\\+")[1]));
                    }
                }

                // ENTITY NOT MAPPED TO DBpedia
                if (linkedEntity == null) {

                    words = entityString.split("\\s+");
                    if (words.length > 1) {
                        entityString = entityString.split("\\s+", 2)[1];
                        // nothing, try to link again with shorter text
                    } else {
                        // ENTITY LINKED or CANNOT be LINKED
                        if (priorityEntityLinking) {
                            // priority linked
                            foundEntity = new Entity(e.getUnderlyingString(), e.getStartOffset(), e.getEndOffset(),
                                    e.getEntityType());
                            resultEntities.add(foundEntity);

                        } else {
                            // no priority linking
                            foundEntity = new Entity();
                            foundEntity.setStartOffset(e.getStartOffset());
                            foundEntity.setEndOffset(e.getEndOffset());
                            foundEntity.setEntityType(e.getEntityType());

                            if (longestEntityPage != null) {

                                foundEntity.setUnderlyingString(e.getUnderlyingString());
                                ArrayList<Type> entityTypes = new ArrayList<Type>();

                                Type t = new Type();
                                t.setEntityLabel(longestEntityPage);
                                switch (lang) {
                                case "en":
                                    t.setEntityURI(
                                            "http://dbpedia.org/resource/" + longestEntityPage.replace(" ", "_"));
                                    break;

                                case "de":
                                    t.setEntityURI("http://de.dbpedia.org/resource/"
                                            + longestEntityPage.replace(" ", "_"));
                                    break;

                                case "nl":
                                    t.setEntityURI("http://nl.dbpedia.org/resource/"
                                            + longestEntityPage.replace(" ", "_"));
                                    break;
                                }
                                t.setProvenance("thd");
                                entityTypes.add(t);

                                foundEntity.setTypes(entityTypes);
                            } else {
                                foundEntity.setUnderlyingString(e.getUnderlyingString());
                            }
                            resultEntities.add(foundEntity);
                        }
                        continueLinking = false;
                    }
                } else {
                    // entity mapped, checking types
                    HashSet<Hypernym> hypernymsList = null;
                    if (knowledge_base.equals("live")) {
                        hypernymsList = extractEntityTypes(linkedEntity.getPageTitle(), lang, "live", provenance);
                    } else if (knowledge_base.equals("local")) {
                        hypernymsList = extractEntityTypes(linkedEntity.getPageTitle(), lang, "local", provenance);
                    } else if (knowledge_base.equals("linkedHypernymsDataset")) {
                        hypernymsList = extractEntityTypes(linkedEntity.getPageTitle(), lang,
                                "linkedHypernymsDataset", provenance);
                    }

                    if (hypernymsList.size() > 0) {
                        // found types, ending linking
                        foundEntity = new Entity();
                        foundEntity.setUnderlyingString(e.getUnderlyingString());
                        foundEntity.setStartOffset(e.getStartOffset());
                        foundEntity.setEndOffset(e.getEndOffset());
                        foundEntity.setEntityType(e.getEntityType());

                        ArrayList<Type> entityTypes = new ArrayList<Type>();

                        for (Hypernym h : hypernymsList) {

                            Type type = new Type();
                            type.setEntityLabel(h.getEntity());
                            type.setEntityURI(h.getEntityURL());
                            type.setTypeLabel(h.getType());
                            type.setTypeURI(h.getTypeURL());
                            type.setProvenance(h.getOrigin());

                            Confidence classificationConf = new Confidence();
                            classificationConf.setValue(Double.parseDouble(h.getAccuracy()));
                            classificationConf.setType("classification");
                            type.setClassificationConfidence(classificationConf);

                            Confidence linkingConf = new Confidence();
                            linkingConf.setType("linking");

                            DecimalFormat df = new DecimalFormat("0.000");
                            double fConfidence = -1.0;
                            fConfidence = Double.parseDouble(df.format(linkedEntity.getConfidence()));

                            type.setLinkingConfidence(linkingConf);
                            linkingConf.setValue(fConfidence);

                            entityTypes.add(type);

                        }

                        foundEntity.setTypes(entityTypes);
                        resultEntities.add(foundEntity);
                        continueLinking = false;
                    } else {

                        // NO TYPES
                        // if priority, then end, otherwise continue
                        if (priorityEntityLinking) {
                            // PRIORITY ON
                            // priority linking
                            foundEntity = new Entity();
                            foundEntity.setUnderlyingString(e.getUnderlyingString());
                            foundEntity.setStartOffset(e.getStartOffset());
                            foundEntity.setEndOffset(e.getEndOffset());
                            foundEntity.setEntityType(e.getEntityType());
                            ArrayList<Type> entityTypes = new ArrayList<Type>();
                            Type t = new Type();
                            t.setProvenance("thd");
                            switch (lang) {
                            case "en":
                                t.setEntityURI("http://dbpedia.org/resource/"
                                        + linkedEntity.getPageTitle().replace(" ", "_"));
                                break;
                            case "de":
                                t.setEntityURI("http://de.dbpedia.org/resource/"
                                        + linkedEntity.getPageTitle().replace(" ", "_"));
                                break;
                            case "nl":
                                t.setEntityURI("http://nl.dbpedia.org/resource/"
                                        + linkedEntity.getPageTitle().replace(" ", "_"));
                                break;
                            }
                            t.setEntityLabel(linkedEntity.getPageTitle());

                            Confidence linkingConf = new Confidence();
                            linkingConf.setType("linking");

                            DecimalFormat df = new DecimalFormat("0.000");
                            double fConfidence = -1.0;
                            fConfidence = Double.parseDouble(df.format(linkedEntity.getConfidence()));

                            linkingConf.setValue(fConfidence);
                            t.setLinkingConfidence(linkingConf);

                            entityTypes.add(t);
                            foundEntity.setTypes(entityTypes);
                            resultEntities.add(foundEntity);

                            continueLinking = false;
                        } else {
                            // PRIORITY OFF, NO TYPES found
                            // no priority linking continue to search

                            words = entityString.split("\\s+");
                            if (words.length > 1) {
                                // PRIORITY OFF, MORE SEARCHING
                                entityString = entityString.split("\\s+", 2)[1];
                                if (longestEntityPage == null) {
                                    longestEntityPage = linkedEntity.getPageTitle();
                                }
                            } else {
                                // PRIORITY OFF, NO MORE SEARCH
                                foundEntity = new Entity();
                                foundEntity.setStartOffset(e.getStartOffset());
                                foundEntity.setEndOffset(e.getEndOffset());
                                foundEntity.setEntityType(e.getEntityType());
                                ArrayList<Type> entityTypes = new ArrayList<Type>();
                                Type t = new Type();
                                t.setProvenance("thd");

                                if (longestEntityPage != null) {
                                    foundEntity.setUnderlyingString(e.getUnderlyingString());
                                    switch (lang) {
                                    case "en":
                                        t.setEntityURI("http://dbpedia.org/resource/"
                                                + longestEntityPage.replace(" ", "_"));
                                        entityTypes.add(t);
                                        break;

                                    case "de":
                                        t.setEntityURI("http://de.dbpedia.org/resource/"
                                                + longestEntityPage.replace(" ", "_"));
                                        entityTypes.add(t);
                                        break;

                                    case "nl":
                                        t.setEntityURI("http://nl.dbpedia.org/resource/"
                                                + longestEntityPage.replace(" ", "_"));
                                        entityTypes.add(t);
                                        break;
                                    }

                                    Confidence linkingConf = new Confidence();
                                    linkingConf.setType("linking");

                                    DecimalFormat df = new DecimalFormat("0.000");
                                    double fConfidence = -1.0;
                                    fConfidence = Double.parseDouble(df.format(linkedEntity.getConfidence()));

                                    linkingConf.setValue(fConfidence);
                                    t.setLinkingConfidence(linkingConf);

                                    t.setEntityLabel(longestEntityPage);
                                } else {

                                    foundEntity.setUnderlyingString(e.getUnderlyingString());
                                    switch (lang) {
                                    case "en":
                                        t.setEntityURI("http://dbpedia.org/resource/"
                                                + linkedEntity.getPageTitle().replace(" ", "_"));
                                        entityTypes.add(t);
                                        break;

                                    case "de":
                                        t.setEntityURI("http://de.dbpedia.org/resource/"
                                                + linkedEntity.getPageTitle().replace(" ", "_"));
                                        entityTypes.add(t);
                                        break;

                                    case "nl":
                                        t.setEntityURI("http://nl.dbpedia.org/resource/"
                                                + linkedEntity.getPageTitle().replace(" ", "_"));
                                        entityTypes.add(t);
                                        break;
                                    }
                                    Confidence linkingConf = new Confidence();
                                    linkingConf.setType("linking");

                                    DecimalFormat df = new DecimalFormat("0.000");
                                    double fConfidence = -1.0;
                                    fConfidence = Double.parseDouble(df.format(linkedEntity.getConfidence()));

                                    linkingConf.setValue(fConfidence);
                                    t.setLinkingConfidence(linkingConf);

                                    t.setEntityLabel(linkedEntity.getPageTitle());
                                }
                                foundEntity.setTypes(entityTypes);
                                resultEntities.add(foundEntity);
                                continueLinking = false;
                            }
                        }
                    }
                }
            }
        }

        // filtering out types
        if (typesFilter.equals("dbo")) {

            for (Entity e : resultEntities) {

                String tmpEntityURI = "";
                String tmpEntityLabel = "";
                Confidence tmpLinkConf = null;

                if (e.getTypes() != null) {
                    ArrayList<Type> dboTypes = new ArrayList();
                    for (Type t : e.getTypes()) {
                        if (t.getTypeURI() != null) {
                            tmpEntityURI = t.getEntityURI();
                            tmpEntityLabel = t.getEntityLabel();
                            tmpLinkConf = t.getLinkingConfidence();
                            if (t.getTypeURI().contains("ontology")) {
                                dboTypes.add(t);
                            }
                        }
                    }
                    if (dboTypes.isEmpty()) {
                        Type t = new Type();
                        t.setEntityLabel(tmpEntityLabel);
                        t.setEntityURI(tmpEntityURI);
                        t.setLinkingConfidence(tmpLinkConf);
                        dboTypes.add(t);
                    }
                    e.setTypes(dboTypes);
                }
            }

        } else if (typesFilter.equals("dbinstance")) {

            for (Entity e : resultEntities) {
                String tmpEntityURI = "";
                String tmpEntityLabel = "";
                Confidence tmpLinkConf = null;

                ArrayList<Type> dboTypes = new ArrayList();

                if (e.getTypes() != null) {
                    for (Type t : e.getTypes()) {
                        if (t.getTypeURI() != null) {
                            tmpEntityURI = t.getEntityURI();
                            tmpEntityLabel = t.getEntityLabel();
                            tmpLinkConf = t.getLinkingConfidence();
                            if (t.getTypeURI().contains("resource")) {
                                dboTypes.add(t);
                            }
                        }
                    }
                }

                if (dboTypes.isEmpty()) {
                    Type t = new Type();
                    t.setEntityLabel(tmpEntityLabel);
                    t.setEntityURI(tmpEntityURI);
                    t.setLinkingConfidence(tmpLinkConf);
                    dboTypes.add(t);
                }
                e.setTypes(dboTypes);
            }

        } else if (typesFilter.equals("all")) {
            return resultEntities;
        }
        return resultEntities;
    }

    public ArrayList<Entity> classifyEntityAPI_MT(String entityURI, String lang, String knowledge_base,
            String[] provenance, String typesFilter)
            throws ResourceInstantiationException, ExecutionException, UnknownHostException {

        ArrayList<Entity> resultEntities = new ArrayList<Entity>();
        // entity mapped, checking types
        String pageTitle = entityURI.split("/")[entityURI.split("/").length - 1].replace("_", " ");
        Entity entity = new Entity();
        entity.setEntityLink(entityURI);
        entity.setUnderlyingString(pageTitle);

        HashSet<Hypernym> hypernymsList = null;
        if (knowledge_base.equals("live")) {
            hypernymsList = extractEntityTypes(pageTitle, lang, "live", provenance);
        } else if (knowledge_base.equals("local")) {
            hypernymsList = extractEntityTypes(pageTitle, lang, "local", provenance);
        } else if (knowledge_base.equals("linkedHypernymsDataset")) {
            hypernymsList = extractEntityTypes(pageTitle, lang, "linkedHypernymsDataset", provenance);
        }

        if (hypernymsList.size() > 0) {
            // TYPES FOUND
            // found types, ending linking
            ArrayList<Type> entityTypes = new ArrayList<Type>();

            for (Hypernym h : hypernymsList) {
                Type type = new Type();
                type.setEntityLabel(h.getEntity());
                type.setEntityURI(h.getEntityURL());
                type.setTypeLabel(h.getType());
                type.setTypeURI(h.getTypeURL());
                type.setProvenance(h.getOrigin());

                Confidence classificationConf = new Confidence();
                classificationConf.setValue(Double.parseDouble(h.getAccuracy()));
                classificationConf.setType("classification");
                type.setClassificationConfidence(classificationConf);
                entityTypes.add(type);
            }
            entity.setTypes(entityTypes);
            resultEntities.add(entity);
        }

        // filtering out types
        if (typesFilter.equals("dbo")) {

            for (Entity e : resultEntities) {
                if (e.getTypes() != null) {
                    ArrayList<Type> dboTypes = new ArrayList();
                    for (Type t : e.getTypes()) {
                        if (t.getTypeURI() != null) {
                            if (t.getTypeURI().contains("ontology")) {
                                dboTypes.add(t);
                            }
                        }
                    }
                    e.setTypes(dboTypes);
                }
            }

        } else if (typesFilter.equals("dbinstance")) {

            for (Entity e : resultEntities) {
                ArrayList<Type> dboTypes = new ArrayList();
                for (Type t : e.getTypes()) {
                    if (t.getTypeURI() != null) {
                        if (t.getTypeURI().contains("resource")) {
                            dboTypes.add(t);
                        }
                    }
                }
                e.setTypes(dboTypes);
            }

        } else if (typesFilter.equals("all")) {
            return resultEntities;
        }
        return resultEntities;
    }

    //    public ArrayList<Hypernym> processText_MT(String query, String lang, String entity_type, String knowledge_base, String[] provenance, boolean priorityEntityLinking, String typesFilter) throws ResourceInstantiationException, ExecutionException, UnknownHostException {
    //        try {
    //        ArrayList<Entity> extractedEntities = new ArrayList<Entity>();
    //
    //        extractedEntities = extractEntityMentions(query, lang, entity_type, "");
    //        System.out.println("Extracted candidates: " + extractedEntities.size());
    //        HashSet<Hypernym> resHypList = new HashSet();
    //        RedisClient redis = RedisClient.getInstance();
    //        for(Entity e : extractedEntities){
    //            String entityString = e.getUnderlyingString().trim();
    //                LinkedEntity linkedEntity = null;
    //                boolean continueLinking = true;
    //                String longestEntityPage = null;
    //                String[] words = null;
    //                
    //                while(continueLinking) {
    //                    
    //                    if(knowledge_base.equals("linkedHypernymsDataset")){
    //                        String linkedEntityStr = redis.getValue(entityString + lang + "linkedHypernymsDataset");
    //                        if(linkedEntityStr == null) {
    //                            linkedEntity = LuceneSearch.getInstance().findWikipediaArticle(entityString, lang, "local");
    //                            if(linkedEntity != null){
    //                                redis.setKey(entityString + lang + "linkedHypernymsDataset", linkedEntity.toString());
    //                            }
    //                        } else {
    //                            linkedEntity = new LinkedEntity();
    //                            linkedEntity.setPageTitle(linkedEntityStr.split("\\+")[0]);
    //                            linkedEntity.setConfidence(Double.parseDouble(linkedEntityStr.split("\\+")[1]));
    //                        }
    //                    }else if(knowledge_base.equals("live")) {
    //                        String linkedEntityStr = redis.getValue(entityString + lang + "live");
    //                        if(linkedEntityStr == null) {
    //                            linkedEntity = LuceneSearch.getInstance().findWikipediaArticle(entityString, lang, "live");
    //                            if(linkedEntityStr != null) {                                
    //                                redis.setKey(entityString + lang + "local", linkedEntity.toString());
    //                            }
    //                        }
    //                    } else if(knowledge_base.equals("local")) {
    //                        String linkedEntityStr = redis.getValue(entityString + lang + "local");
    //                        if(linkedEntityStr == null) {
    //                            linkedEntity = LuceneSearch.getInstance().findWikipediaArticle(entityString, lang, "local");
    //                            if(linkedEntityStr != null){
    //                                redis.setKey(entityString + lang + "local", linkedEntity.toString());
    //                            }
    //                        } else {
    //                            linkedEntity = new LinkedEntity();
    //                            linkedEntity.setPageTitle(linkedEntityStr.split("\\+")[0]);
    //                            linkedEntity.setConfidence(Double.parseDouble(linkedEntityStr.split("\\+")[1]));
    //                        }
    //                    }
    //
    //                    // ENTITY NOT MAPPED TO DBpedia
    //                    if ( linkedEntity == null ) {
    //                        words = entityString.split("\\s+");
    //                        if(words.length > 1) {
    //                            entityString = entityString.split("\\s+", 2)[1];
    //                            // nothing, try to link again with shorter text
    //                        } else {
    //                            // ENTITY LINKED or CANNOT be LINKED
    //                            if(priorityEntityLinking) {
    //                                // priority linked
    //                                Hypernym h = new Hypernym();
    //                                h.setAccuracy("-1");
    //                                h.setBounds("-1");
    //                                h.setStartOffset(e.getStartOffset());
    //                                h.setEndOffset(e.getEndOffset());
    //                                h.setOrigin("thd");
    //                                h.setType("");
    //                                h.setTypeURL("");
    //                                h.setEntityURL("");
    //                                h.setEntity(e.getUnderlyingString());
    //                                h.setUnderlyingEntityText(e.getUnderlyingString());
    //                                resHypList.add(h);
    //                                
    //                            } else {
    //                                // no priority linking
    //                                Hypernym h = new Hypernym();
    //                                h.setAccuracy("-1");
    //                                h.setBounds("-1");
    //                                h.setStartOffset(e.getStartOffset());
    //                                h.setEndOffset(e.getEndOffset());
    //                                h.setOrigin("thd");
    //                                h.setType("");
    //                                h.setTypeURL("");
    //                                h.setUnderlyingEntityText(e.getUnderlyingString());
    //                                
    //                                if(longestEntityPage != null){
    //                                    h.setEntity(longestEntityPage);
    //                                    switch(lang){
    //                                        case "en":
    //                                            h.setEntityURL("http://dbpedia.org/resource/"+longestEntityPage.replace(" ", "_"));
    //                                            break;
    //
    //                                        case "de":
    //                                            h.setEntityURL("http://de.dbpedia.org/resource/"+longestEntityPage.replace(" ", "_"));
    //                                            break;
    //
    //                                        case "nl":
    //                                            h.setEntityURL("http://nl.dbpedia.org/resource/"+longestEntityPage.replace(" ", "_"));
    //                                            break;
    //                                    }
    //                                }else{
    //                                    h.setEntity("");
    //                                    h.setEntityURL("");                                
    //                                }
    //                                resHypList.add(h);                            
    //                            }
    //                            continueLinking = false;
    //                        }
    //                    } else {
    //                        // entity mapped, checking types
    //                        HashSet<Hypernym> hypernymsList = null;
    //                    
    //                        if (knowledge_base.equals("live")) {
    //                            hypernymsList = extractEntityTypes(linkedEntity.getPageTitle(), lang, "live", provenance);                    
    //                        } else if (knowledge_base.equals("local")) {                    
    //                            hypernymsList = extractEntityTypes(linkedEntity.getPageTitle(), lang, "local", provenance);                    
    //                        } else if (knowledge_base.equals("linkedHypernymsDataset")) {
    //                            hypernymsList = extractEntityTypes(linkedEntity.getPageTitle(), lang, "linkedHypernymsDataset", provenance);
    //                        }
    //                        
    //                        if (hypernymsList.size() > 0) {
    //                            // TYPES FOUND
    //                            // found types, ending linking
    //                            continueLinking = false;
    //                            for(Hypernym h : hypernymsList) {
    //                                h.setUnderlyingEntityText(e.getUnderlyingString());
    //                                h.setStartOffset(e.getStartOffset());
    //                                h.setEndOffset(e.getEndOffset());
    //                                resHypList.add(h);
    //                            }
    //                        } else {
    //                            // NO TYPES
    //                            // if priority, then end, otherwise continue
    //                            if(priorityEntityLinking) {
    //                                // PRIORITY ON
    //                                // priority linking
    //                                Hypernym h = new Hypernym();
    //                                h.setAccuracy("-1");
    //                                h.setBounds("-1");
    //                                h.setStartOffset(e.getStartOffset());
    //                                h.setEndOffset(e.getEndOffset());
    //                                h.setOrigin("thd");
    //                                h.setType("");
    //                                h.setTypeURL("");
    //                                h.setEntity(linkedEntity.getPageTitle());
    //                                h.setUnderlyingEntityText(e.getUnderlyingString());
    //                                
    //                                switch(lang) {
    //                                        case "en":
    //                                            h.setEntityURL("http://dbpedia.org/resource/"+linkedEntity.getPageTitle().replace(" ", "_"));
    //                                            break;
    //
    //                                        case "de":
    //                                            h.setEntityURL("http://de.dbpedia.org/resource/"+linkedEntity.getPageTitle().replace(" ", "_"));
    //                                            break;
    //
    //                                        case "nl":
    //                                            h.setEntityURL("http://nl.dbpedia.org/resource/"+linkedEntity.getPageTitle().replace(" ", "_"));
    //                                            break;
    //                                    }
    //                                resHypList.add(h);
    //                                continueLinking = false;
    //                            } else {
    //                                // PRIORITY OFF, NO TYPES found
    //                                // no priority linking continue to search
    //                                words = entityString.split("\\s+");
    //                                if(words.length > 1) {
    //                                    // PRIORITY OFF, MORE SEARCHING
    //                                    entityString = entityString.split("\\s+", 2)[1];
    //                                    if(longestEntityPage == null){
    //                                        longestEntityPage = linkedEntity.getPageTitle();
    //                                    }
    //                                } else {
    //                                    // PRIORITY OFF, NO MORE SEARCH
    //                                    Hypernym h = new Hypernym();
    //                                    h.setAccuracy("-1");
    //                                    h.setBounds("-1");
    //                                    h.setStartOffset(e.getStartOffset());
    //                                    h.setEndOffset(e.getEndOffset());
    //                                    h.setOrigin("thd");
    //                                    h.setType("");
    //                                    h.setTypeURL("");                                    
    //                                    h.setUnderlyingEntityText(e.getUnderlyingString());
    //                                    if(longestEntityPage != null) {
    //                                        h.setEntity(longestEntityPage);
    //                                        switch(lang){
    //                                            case "en":
    //                                                h.setEntityURL("http://dbpedia.org/resource/"+longestEntityPage.replace(" ", "_"));
    //                                                break;
    //
    //                                            case "de":
    //                                                h.setEntityURL("http://de.dbpedia.org/resource/"+longestEntityPage.replace(" ", "_"));
    //                                                break;
    //
    //                                            case "nl":
    //                                                h.setEntityURL("http://nl.dbpedia.org/resource/"+longestEntityPage.replace(" ", "_"));
    //                                                break;
    //                                        }
    //                                    }else{
    //                                        h.setEntity(linkedEntity.getPageTitle());
    //                                        switch(lang){
    //                                            case "en":
    //                                                h.setEntityURL("http://dbpedia.org/resource/"+linkedEntity.getPageTitle().replace(" ", "_"));
    //                                                break;
    //
    //                                            case "de":
    //                                                h.setEntityURL("http://de.dbpedia.org/resource/"+linkedEntity.getPageTitle().replace(" ", "_"));
    //                                                break;
    //
    //                                            case "nl":
    //                                                h.setEntityURL("http://nl.dbpedia.org/resource/"+linkedEntity.getPageTitle().replace(" ", "_"));
    //                                                break;
    //                                        }
    //                                    }
    //                                    resHypList.add(h);
    //                                    continueLinking = false;
    //                                }
    //                            }
    //                        }
    //                    }
    //
    //                    // Align the DE/NL enity URIs with EN URI
    //                    switch(lang){
    //                        case "de":
    //                            // Spawning one more entity with just another localized URI
    //                            HashSet<Hypernym> resHypListEnde = new HashSet();
    //                            for(Hypernym h : resHypList) {
    //                                if(h.getEntityURL().startsWith("http://de.dbpedia.org/resource/")
    //                                        && (h.getOrigin().equals("thd")
    //                                        || h.getOrigin().equals("thd-derived")
    //                                        )){
    //                                    
    //                                    BasicDBObject queryObj = new BasicDBObject();
    //                                    queryObj.append("de_uri",h.getEntityURL());
    //                                    BasicDBObject projObj = new BasicDBObject();
    //                                    projObj.append("en_uri", 1);
    //                                    
    //                                    DBObject resObj = MongoDBClient.getDBInstance().getCollection("interlanguage_links").findOne(queryObj, projObj);
    //                                    
    //                                    if(resObj != null) {
    //                                        
    //                                        if(resObj.get("en_uri") != null) {
    //                                            String enInterLangLink = resObj.get("en_uri").toString();
    //                                            Hypernym h2 = new Hypernym();
    //                                            h2.setType(h.getType());
    //                                            h2.setTypeURL(h.getTypeURL());
    //                                            h2.setEntity(enInterLangLink.split("/")[enInterLangLink.split("/").length-1].replace("_", " "));
    //                                            h2.setEntityURL(enInterLangLink);
    //                                            h2.setAccuracy(h.getAccuracy());
    //                                            h2.setBounds(h.getBounds());
    //                                            h2.setStartOffset(h.getStartOffset());
    //                                            h2.setEndOffset(h.getEndOffset());
    //                                            h2.setOrigin("thd");
    //                                            h2.setUnderlyingEntityText(h.getUnderlyingEntityText());
    //                                            resHypListEnde.add(h2);
    //                                        }
    //                                        
    //                                    }
    //                                }
    //                            }
    //                            resHypList.addAll(resHypListEnde);
    //                            break;
    //                            
    //                        case "nl":
    //                            
    //                            HashSet<Hypernym> resHypListEnNl = new HashSet();
    //                            
    //                            // Spawning one more entity with just another localized URI
    //                            for(Hypernym h : resHypList){
    //                                if(h.getEntityURL().startsWith("http://nl.dbpedia.org/resource/")
    //                                        && (h.getOrigin().equals("thd")
    //                                        || h.getOrigin().equals("thd-derived")
    //                                        )) {
    //                                    
    //                                    BasicDBObject queryObj = new BasicDBObject();
    //                                    queryObj.append("nl_uri", h.getEntityURL());
    //                                    BasicDBObject projObj = new BasicDBObject();
    //                                    projObj.append("en_uri", 1);
    //                                    
    //                                    DBObject resObj = MongoDBClient.getDBInstance().getCollection("interlanguage_links").findOne(queryObj, projObj);
    //
    //                                    if(resObj != null) {
    //                                        
    //                                        if(resObj.get("en_uri") != null) {
    //                                            
    //                                            String enInterLangLink = resObj.get("en_uri").toString();
    //
    //                                            Hypernym h2 = new Hypernym();
    //                                            h2.setType(h.getType());
    //                                            h2.setTypeURL(h.getTypeURL());
    //                                            h2.setEntity(enInterLangLink.split("/")[enInterLangLink.split("/").length-1].replace("_", " "));
    //                                            h2.setEntityURL(enInterLangLink);
    //                                            h2.setAccuracy(h.getAccuracy());
    //                                            h2.setBounds(h.getBounds());
    //                                            h2.setStartOffset(h.getStartOffset());
    //                                            h2.setEndOffset(h.getEndOffset());
    //                                            h2.setOrigin("thd");
    //                                            h2.setUnderlyingEntityText(h.getUnderlyingEntityText());
    //
    //                                            resHypListEnNl.add(h2);
    //                                        }
    //                                    }
    //                                }
    //                            }
    //                            resHypList.addAll(resHypListEnNl);
    //
    //                            break;
    //                        }                    
    //                }
    //        }
    //        
    //        // filtering out types
    //        if(typesFilter.equals("dbo")) {
    //            
    //            HashSet dboHypList = new HashSet();
    //            for(Hypernym h  : resHypList){
    //                if(h.getTypeURL().contains("ontology")) {
    //                    dboHypList.add(h);
    //                }                
    //            }
    //            resHypList = dboHypList;
    //            
    //        } else if(typesFilter.equals("dbinstance")){
    //            
    //            HashSet dbinstanceHypList = new HashSet();
    //            for(Hypernym h  : resHypList){
    //                if(h.getTypeURL().contains("resource")) {
    //                    dbinstanceHypList.add(h);
    //                }                
    //            }
    //            resHypList = dbinstanceHypList;
    //        
    //        } else if(typesFilter.equals("all")){
    //            return new ArrayList<Hypernym>(resHypList);
    //        }
    //        
    //        return new ArrayList<Hypernym>(resHypList);
    //        
    //        } catch(Exception ex){
    //            Logger.getLogger(THDWorker.class.getName()).log(Level.SEVERE, null, ex);
    //        }
    //        return null;
    //    }

    public ArrayList<Entity> extractEntityMentions(String query, String lang, String entity_type,
            String spotting_method) throws ResourceInstantiationException, ExecutionException {

        ArrayList<Entity> candidates = null;
        switch (lang) {
        case "en":
            if (spotting_method.equals("CRF")) {
                candidates = SemiTagsSpotting.getInstance().getEntityMentions(query, lang);
            } else {
                candidates = extractEntityCandidatesEN(query, lang, entity_type);
            }
            break;
        case "de":
            if (spotting_method.equals("CRF")) {
                candidates = SemiTagsSpotting.getInstance().getEntityMentions(query, lang);
            } else {
                candidates = extractEntityCandidatesDE(query, lang, entity_type);
            }
            break;
        case "nl":
            if (spotting_method.equals("CRF")) {
                candidates = SemiTagsSpotting.getInstance().getEntityMentions(query, lang);
            } else {
                candidates = extractEntityCandidatesNL(query, lang, entity_type);
            }
            break;
        }

        return candidates;
    }

    public ArrayList<Entity> extractEntityCandidatesEN(String query, String lang, String entity_type)
            throws ResourceInstantiationException, ExecutionException {

        Document doc = Factory.newDocument(query);
        doc.setName("Query_Document");
        Corpus corpus = Factory.newCorpus("");
        corpus.add(doc);

        enEntityExtractionPipeline.setCorpus(corpus);
        enEntityExtractionPipeline.execute();

        Document[] docs = (Document[]) corpus.toArray(new Document[corpus.size()]);
        ArrayList<Entity> candidates = new ArrayList<Entity>();

        Document d = docs[0];
        AnnotationSet as_all = d.getAnnotations();

        if (entity_type.equals("all") || entity_type.equals("ne")) {

            AnnotationSet as_named_entity = as_all.get("ne");
            Iterator anot = as_named_entity.iterator();

            while (anot.hasNext()) {
                try {
                    Annotation isaAnnot = (gate.Annotation) anot.next();
                    Node annStart = isaAnnot.getStartNode();
                    Node annEnd = isaAnnot.getEndNode();
                    String content = d.getContent().getContent(annStart.getOffset(), annEnd.getOffset()).toString();
                    candidates.add(new Entity(content, annStart.getOffset(), annEnd.getOffset(), "named entity"));
                } catch (InvalidOffsetException ex) {
                    Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
                }
            }
        }
        if (entity_type.equals("all") || entity_type.equals("ce")) {

            AnnotationSet as_common_nouns = as_all.get("e");
            Iterator cn_anot = as_common_nouns.iterator();

            while (cn_anot.hasNext()) {
                try {
                    Annotation isaAnnot = (gate.Annotation) cn_anot.next();
                    Node annStart = isaAnnot.getStartNode();
                    Node annEnd = isaAnnot.getEndNode();
                    String content = d.getContent().getContent(annStart.getOffset(), annEnd.getOffset()).toString();
                    candidates.add(new Entity(content, annStart.getOffset(), annEnd.getOffset(), "common entity"));
                } catch (InvalidOffsetException ex) {
                    Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
                }
            }
        }
        Factory.deleteResource(doc);
        Factory.deleteResource(corpus);
        return candidates;
    }

    public ArrayList<Entity> extractEntityCandidatesDE(String query, String lang, String entity_type)
            throws ResourceInstantiationException, ExecutionException {

        Document doc = Factory.newDocument(query);
        doc.setName("Query_Document");
        Corpus corpus = Factory.newCorpus("");
        corpus.add(doc);

        deEntityExtractionPipeline.setCorpus(corpus);
        deEntityExtractionPipeline.execute();

        Document[] docs = (Document[]) corpus.toArray(new Document[corpus.size()]);
        ArrayList<Entity> candidates = new ArrayList<Entity>();

        Document d = docs[0];
        AnnotationSet as_all = d.getAnnotations();
        if (entity_type.equals("all") || entity_type.equals("ne")) {
            AnnotationSet as_entity = as_all.get("ne");

            Iterator anot = as_entity.iterator();
            while (anot.hasNext()) {
                try {
                    Annotation isaAnnot = (gate.Annotation) anot.next();
                    Node annStart = isaAnnot.getStartNode();
                    Node annEnd = isaAnnot.getEndNode();
                    AnnotationSet as_token = as_all.get("Token", annStart.getOffset(), annEnd.getOffset());
                    String content = "";

                    if (as_token.size() > 1) {
                        content = d.getContent().getContent(annStart.getOffset(), annEnd.getOffset()).toString();
                    } else {
                        Iterator as_token_iter = as_token.iterator();

                        Annotation tok = (gate.Annotation) as_token_iter.next();
                        String lemma = tok.getFeatures().get("lemma").toString();

                        if (!lemma.equals("<unknown>")) {
                            content = lemma;
                        } else {
                            content = d.getContent().getContent(annStart.getOffset(), annEnd.getOffset())
                                    .toString();
                        }
                    }
                    candidates.add(new Entity(
                            d.getContent().getContent(annStart.getOffset(), annEnd.getOffset()).toString(),
                            annStart.getOffset(), annEnd.getOffset(), "named entity"));
                } catch (InvalidOffsetException ex) {
                    Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
                }
            }
        }
        if (entity_type.equals("all") || entity_type.equals("ce")) {
            AnnotationSet as_common_entity = as_all.get("e");
            Iterator anot_e = as_common_entity.iterator();

            while (anot_e.hasNext()) {
                try {
                    Annotation isaAnnot = (gate.Annotation) anot_e.next();
                    Node annStart = isaAnnot.getStartNode();
                    Node annEnd = isaAnnot.getEndNode();
                    AnnotationSet as_token = as_all.get("Token", annStart.getOffset(), annEnd.getOffset());

                    String content = "";

                    if (as_token.size() > 1) {
                        content = d.getContent().getContent(annStart.getOffset(), annEnd.getOffset()).toString();
                    } else {
                        Iterator as_token_iter = as_token.iterator();

                        Annotation tok = (gate.Annotation) as_token_iter.next();
                        String lemma = tok.getFeatures().get("lemma").toString();

                        if (!lemma.equals("<unknown>")) {
                            content = lemma;
                        } else {
                            content = d.getContent().getContent(annStart.getOffset(), annEnd.getOffset())
                                    .toString();
                        }
                    }
                    candidates.add(new Entity(
                            d.getContent().getContent(annStart.getOffset(), annEnd.getOffset()).toString(),
                            annStart.getOffset(), annEnd.getOffset(), "common entity"));
                } catch (InvalidOffsetException ex) {
                    Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
                }
            }
        }
        Factory.deleteResource(doc);
        Factory.deleteResource(corpus);
        return candidates;

    }

    public ArrayList<Entity> extractEntityCandidatesNL(String query, String lang, String entity_type)
            throws ResourceInstantiationException, ExecutionException {

        Document doc = Factory.newDocument(query);
        doc.setName("Query_Document");
        Corpus corpus = Factory.newCorpus("");
        corpus.add(doc);

        nlEntityExtractionPipeline.setCorpus(corpus);
        nlEntityExtractionPipeline.execute();

        Document[] docs = (Document[]) corpus.toArray(new Document[corpus.size()]);
        ArrayList<Entity> candidates = new ArrayList<Entity>();

        Document d = docs[0];
        AnnotationSet as_all = d.getAnnotations();

        if (entity_type.equals("all") || entity_type.equals("ne")) {

            AnnotationSet as_entity = as_all.get("ne");
            Iterator anot = as_entity.iterator();
            while (anot.hasNext()) {
                try {
                    Annotation isaAnnot = (gate.Annotation) anot.next();
                    Node annStart = isaAnnot.getStartNode();
                    Node annEnd = isaAnnot.getEndNode();
                    String content = d.getContent().getContent(annStart.getOffset(), annEnd.getOffset()).toString();
                    candidates.add(new Entity(content, annStart.getOffset(), annEnd.getOffset(), "named entity"));
                } catch (InvalidOffsetException ex) {
                    Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
                }
            }
        }
        if (entity_type.equals("all") || entity_type.equals("ce")) {
            AnnotationSet as_common_entity = as_all.get("e");
            Iterator anot_e = as_common_entity.iterator();
            while (anot_e.hasNext()) {
                try {
                    Annotation isaAnnot = (gate.Annotation) anot_e.next();
                    Node annStart = isaAnnot.getStartNode();
                    Node annEnd = isaAnnot.getEndNode();
                    String content = d.getContent().getContent(annStart.getOffset(), annEnd.getOffset()).toString();
                    candidates.add(new Entity(content, annStart.getOffset(), annEnd.getOffset(), "common entity"));
                } catch (InvalidOffsetException ex) {
                    Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
                }
            }
        }
        Factory.deleteResource(doc);
        Factory.deleteResource(corpus);
        return candidates;

    }

    private void assamblePipelines() {
        assambleEntityExtractionPipelineEN();
        assambleEntityExtractionPipelineDE();
        assambleEntityExtractionPipelineNL();
        assambleHypernymExtractionPipelineEN();
        assambleHypernymExtractionPipelineDE();
        assambleHypernymExtractionPipelineNL();
    }

    private void assambleEntityExtractionPipelineEN() {

        try {
            URL url = THDController.getInstance().getClass().getResource(Settings.EN_ENTITY_EXTRACTION_GRAMMAR);
            File japeOrigFile = new File(url.getFile());
            java.net.URI japeURI = japeOrigFile.toURI();
            FeatureMap transducerFeatureMap = Factory.newFeatureMap();

            try {
                transducerFeatureMap.put("grammarURL", japeURI.toURL());
                transducerFeatureMap.put("encoding", "UTF-8");
            } catch (MalformedURLException e) {
            }

            FeatureMap tokenizerFeatureMap = Factory.newFeatureMap();
            ProcessingResource tokenizerPR = (ProcessingResource) Factory
                    .createResource("gate.creole.tokeniser.DefaultTokeniser", tokenizerFeatureMap);

            FeatureMap sentenceSplitterFeatureMap = Factory.newFeatureMap();
            ProcessingResource sentenceSplitterPR = (ProcessingResource) Factory
                    .createResource("gate.creole.splitter.SentenceSplitter", sentenceSplitterFeatureMap);

            FeatureMap posTaggerFeatureMap = Factory.newFeatureMap();
            ProcessingResource posTaggerPR = (ProcessingResource) Factory.createResource("gate.creole.POSTagger",
                    posTaggerFeatureMap);

            ProcessingResource japeCandidatesPR = (ProcessingResource) Factory
                    .createResource("gate.creole.Transducer", transducerFeatureMap);

            enEntityExtractionPipeline = (SerialAnalyserController) Factory
                    .createResource("gate.creole.SerialAnalyserController");

            enEntityExtractionPipeline.add(tokenizerPR);
            enEntityExtractionPipeline.add(sentenceSplitterPR);
            enEntityExtractionPipeline.add(posTaggerPR);
            enEntityExtractionPipeline.add(japeCandidatesPR);
        } catch (ResourceInstantiationException ex) {
            ;
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    private void assambleEntityExtractionPipelineDE() {
        try {

            FeatureMap resetFeatureMap = Factory.newFeatureMap();
            ProcessingResource resetPR = (ProcessingResource) Factory
                    .createResource("gate.creole.annotdelete.AnnotationDeletePR", resetFeatureMap);

            FeatureMap tokenizerFeatureMap = Factory.newFeatureMap();
            ProcessingResource tokenizerPR = (ProcessingResource) Factory
                    .createResource("gate.creole.tokeniser.DefaultTokeniser", tokenizerFeatureMap);

            FeatureMap taggerFeatureMap = Factory.newFeatureMap();
            taggerFeatureMap.put("debug", "false");
            taggerFeatureMap.put("encoding", "UTF-8");
            taggerFeatureMap.put("failOnUnmappableCharacter", false);
            taggerFeatureMap.put("featureMapping", "lemma=3;category=2;string=1");
            taggerFeatureMap.put("inputAnnotationType", "Token");
            taggerFeatureMap.put("inputTemplate", "${string}");
            taggerFeatureMap.put("outputAnnotationType", "Token");
            taggerFeatureMap.put("regex", "(.+)   (.+)   (.+)");
            taggerFeatureMap.put("taggerBinary", Settings.DE_TAGGER_BINARY);
            taggerFeatureMap.put("updateAnnotations", true);

            ProcessingResource genTag = (ProcessingResource) Factory
                    .createResource("gate.taggerframework.GenericTagger", taggerFeatureMap);

            //JAPE Entity Extraction grammar
            URL url = THDController.getInstance().getClass().getResource(Settings.DE_ENTITY_EXTRACTION_GRAMMAR);

            File japeOrigFile = new File(url.getFile());
            java.net.URI japeURI = japeOrigFile.toURI();
            FeatureMap transducerFeatureMap = Factory.newFeatureMap();

            try {
                transducerFeatureMap.put("grammarURL", japeURI.toURL());
                transducerFeatureMap.put("encoding", "UTF-8");
            } catch (MalformedURLException e) {
            }

            ProcessingResource japeCandidatesPR = (ProcessingResource) Factory
                    .createResource("gate.creole.Transducer", transducerFeatureMap);

            deEntityExtractionPipeline = (SerialAnalyserController) Factory
                    .createResource("gate.creole.RealtimeCorpusController");
            deEntityExtractionPipeline.add(resetPR);
            deEntityExtractionPipeline.add(tokenizerPR);
            deEntityExtractionPipeline.add(genTag);
            deEntityExtractionPipeline.add(japeCandidatesPR);
        } catch (ResourceInstantiationException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    private void assambleEntityExtractionPipelineNL() {
        try {
            FeatureMap resetFeatureMap = Factory.newFeatureMap();
            ProcessingResource resetPR = (ProcessingResource) Factory
                    .createResource("gate.creole.annotdelete.AnnotationDeletePR", resetFeatureMap);

            FeatureMap tokenizerFeatureMap = Factory.newFeatureMap();
            ProcessingResource tokenizerPR = (ProcessingResource) Factory
                    .createResource("gate.creole.tokeniser.DefaultTokeniser", tokenizerFeatureMap);

            FeatureMap taggerFeatureMap = Factory.newFeatureMap();
            taggerFeatureMap.put("debug", "false");
            taggerFeatureMap.put("encoding", "UTF-8");
            taggerFeatureMap.put("failOnUnmappableCharacter", false);
            //            taggerFeatureMap.put("failOnMissingInputAnnotations", false);
            taggerFeatureMap.put("featureMapping", "lemma=3;category=2;string=1");
            taggerFeatureMap.put("inputAnnotationType", "Token");
            taggerFeatureMap.put("inputTemplate", "${string}");
            taggerFeatureMap.put("outputAnnotationType", "Token");
            taggerFeatureMap.put("regex", "(.+)   (.+)   (.+)");
            taggerFeatureMap.put("taggerBinary", Settings.NL_TAGGER_BINARY);
            taggerFeatureMap.put("updateAnnotations", true);

            ProcessingResource genTag = (ProcessingResource) Factory
                    .createResource("gate.taggerframework.GenericTagger", taggerFeatureMap);

            //JAPE Entity Extraction grammar
            URL url = THDController.getInstance().getClass().getResource(Settings.NL_ENTITY_EXTRACTION_GRAMMAR);
            File japeOrigFile = new File(url.getFile());
            java.net.URI japeURI = japeOrigFile.toURI();
            FeatureMap transducerFeatureMap = Factory.newFeatureMap();

            try {
                transducerFeatureMap.put("grammarURL", japeURI.toURL());
                transducerFeatureMap.put("encoding", "UTF-8");
            } catch (MalformedURLException e) {
            }

            ProcessingResource japeCandidatesPR = (ProcessingResource) Factory
                    .createResource("gate.creole.Transducer", transducerFeatureMap);

            nlEntityExtractionPipeline = (SerialAnalyserController) Factory
                    .createResource("gate.creole.RealtimeCorpusController");
            nlEntityExtractionPipeline.add(resetPR);
            nlEntityExtractionPipeline.add(tokenizerPR);
            nlEntityExtractionPipeline.add(genTag);
            nlEntityExtractionPipeline.add(japeCandidatesPR);

        } catch (ResourceInstantiationException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    private void assambleHypernymExtractionPipelineEN() {

        try {

            hypernymExtractionPipelineEN = (SerialAnalyserController) Factory
                    .createResource("gate.creole.RealtimeCorpusController");

            FeatureMap resetFeatureMap = Factory.newFeatureMap();
            ProcessingResource resetPR = (ProcessingResource) Factory
                    .createResource("gate.creole.annotdelete.AnnotationDeletePR", resetFeatureMap);
            hypernymExtractionPipelineEN.add(resetPR);

            FeatureMap tokenizerFeatureMap = Factory.newFeatureMap();
            ProcessingResource tokenizerPR = (ProcessingResource) Factory
                    .createResource("gate.creole.tokeniser.DefaultTokeniser", tokenizerFeatureMap);
            hypernymExtractionPipelineEN.add(tokenizerPR);

            FeatureMap sentenceSplitterFeatureMap = Factory.newFeatureMap();
            ProcessingResource sentenceSplitterPR = (ProcessingResource) Factory
                    .createResource("gate.creole.splitter.SentenceSplitter", sentenceSplitterFeatureMap);
            //ProcessingResource sentenceSplitterPR = (ProcessingResource) Factory.createResource("gate.creole.splitter.RegexSentenceSplitter", sentenceSplitterFeatureMap);
            hypernymExtractionPipelineEN.add(sentenceSplitterPR);

            FeatureMap posTaggerFeatureMap = Factory.newFeatureMap();
            ProcessingResource posTaggerPR = (ProcessingResource) Factory.createResource("gate.creole.POSTagger",
                    posTaggerFeatureMap);
            hypernymExtractionPipelineEN.add(posTaggerPR);

            URL url = THDController.getInstance().getClass().getResource(Settings.EN_HYPERNYM_EXTRACTION_GRAMMAR);
            File japeOrigFile = new File(url.getFile());
            java.net.URI japeURI = japeOrigFile.toURI();

            FeatureMap transducerFeatureMap = Factory.newFeatureMap();
            try {
                transducerFeatureMap.put("grammarURL", japeURI.toURL());
                transducerFeatureMap.put("encoding", "UTF-8");
            } catch (MalformedURLException e) {
                // TODO
            }
            ProcessingResource japeCandidatesPR = (ProcessingResource) Factory
                    .createResource("gate.creole.Transducer", transducerFeatureMap);

            hypernymExtractionPipelineEN.add(japeCandidatesPR);

        } catch (ResourceInstantiationException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    private void assambleHypernymExtractionPipelineDE() {

        try {

            hypernymExtractionPipelineDE = (SerialAnalyserController) Factory
                    .createResource("gate.creole.RealtimeCorpusController");

            FeatureMap resetFeatureMap = Factory.newFeatureMap();
            ProcessingResource resetPR = (ProcessingResource) Factory
                    .createResource("gate.creole.annotdelete.AnnotationDeletePR", resetFeatureMap);
            hypernymExtractionPipelineDE.add(resetPR);

            FeatureMap tokenizerFeatureMap = Factory.newFeatureMap();
            ProcessingResource tokenizerPR = (ProcessingResource) Factory
                    .createResource("gate.creole.tokeniser.DefaultTokeniser", tokenizerFeatureMap);
            hypernymExtractionPipelineDE.add(tokenizerPR);

            FeatureMap sentenceSplitterFeatureMap = Factory.newFeatureMap();
            ProcessingResource sentenceSplitterPR = (ProcessingResource) Factory
                    .createResource("gate.creole.splitter.SentenceSplitter", sentenceSplitterFeatureMap);
            //ProcessingResource sentenceSplitterPR = (ProcessingResource) Factory.createResource("gate.creole.splitter.RegexSentenceSplitter", sentenceSplitterFeatureMap);
            hypernymExtractionPipelineDE.add(sentenceSplitterPR);

            FeatureMap taggerFeatureMap = Factory.newFeatureMap();
            taggerFeatureMap.put("debug", "false");
            taggerFeatureMap.put("encoding", "UTF-8");
            taggerFeatureMap.put("failOnUnmappableCharacter", "false");
            taggerFeatureMap.put("featureMapping", "lemma=3;category=2;string=1");
            taggerFeatureMap.put("inputAnnotationType", "Token");
            taggerFeatureMap.put("inputTemplate", "${string}");
            taggerFeatureMap.put("outputAnnotationType", "Token");
            taggerFeatureMap.put("regex", "(.+)   (.+)   (.+)");
            taggerFeatureMap.put("taggerBinary", Settings.DE_TAGGER_BINARY);
            taggerFeatureMap.put("updateAnnotations", false);

            ProcessingResource genTag = (ProcessingResource) Factory
                    .createResource("gate.taggerframework.GenericTagger", taggerFeatureMap);
            hypernymExtractionPipelineDE.add(genTag);

            URL url = THDController.getInstance().getClass().getResource(Settings.DE_HYPERNYM_EXTRACTION_GRAMMAR);
            File japeOrigFile = new File(url.getFile());
            java.net.URI japeURI = japeOrigFile.toURI();

            FeatureMap transducerFeatureMap = Factory.newFeatureMap();
            try {
                transducerFeatureMap.put("grammarURL", japeURI.toURL());
                transducerFeatureMap.put("encoding", "UTF-8");
            } catch (MalformedURLException e) {
            }

            ProcessingResource japeCandidatesPR = (ProcessingResource) Factory
                    .createResource("gate.creole.Transducer", transducerFeatureMap);
            hypernymExtractionPipelineDE.add(japeCandidatesPR);

        } catch (ResourceInstantiationException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        }

    }

    private void assambleHypernymExtractionPipelineNL() {

        try {

            hypernymExtractionPipelineNL = (SerialAnalyserController) Factory
                    .createResource("gate.creole.RealtimeCorpusController");

            FeatureMap resetFeatureMap = Factory.newFeatureMap();
            ProcessingResource resetPR = (ProcessingResource) Factory
                    .createResource("gate.creole.annotdelete.AnnotationDeletePR", resetFeatureMap);
            hypernymExtractionPipelineNL.add(resetPR);

            FeatureMap tokenizerFeatureMap = Factory.newFeatureMap();
            ProcessingResource tokenizerPR = (ProcessingResource) Factory
                    .createResource("gate.creole.tokeniser.DefaultTokeniser", tokenizerFeatureMap);
            hypernymExtractionPipelineNL.add(tokenizerPR);

            FeatureMap sentenceSplitterFeatureMap = Factory.newFeatureMap();
            ProcessingResource sentenceSplitterPR = (ProcessingResource) Factory
                    .createResource("gate.creole.splitter.SentenceSplitter", sentenceSplitterFeatureMap);
            //ProcessingResource sentenceSplitterPR = (ProcessingResource) Factory.createResource("gate.creole.splitter.RegexSentenceSplitter", sentenceSplitterFeatureMap);
            hypernymExtractionPipelineNL.add(sentenceSplitterPR);

            FeatureMap taggerFeatureMap = Factory.newFeatureMap();
            taggerFeatureMap.put("debug", "false");
            taggerFeatureMap.put("encoding", "UTF-8");
            taggerFeatureMap.put("failOnUnmappableCharacter", "false");
            taggerFeatureMap.put("featureMapping", "lemma=3;category=2;string=1");
            taggerFeatureMap.put("inputAnnotationType", "Token");
            taggerFeatureMap.put("inputTemplate", "${string}");
            taggerFeatureMap.put("outputAnnotationType", "Token");
            taggerFeatureMap.put("regex", "(.+)   (.+)   (.+)");
            taggerFeatureMap.put("taggerBinary", Settings.NL_TAGGER_BINARY);
            taggerFeatureMap.put("updateAnnotations", false);

            ProcessingResource genTag = (ProcessingResource) Factory
                    .createResource("gate.taggerframework.GenericTagger", taggerFeatureMap);
            hypernymExtractionPipelineNL.add(genTag);

            URL url = THDController.getInstance().getClass().getResource(Settings.NL_HYPERNYM_EXTRACTION_GRAMMAR);
            File japeOrigFile = new File(url.getFile());
            java.net.URI japeURI = japeOrigFile.toURI();

            FeatureMap transducerFeatureMap = Factory.newFeatureMap();
            try {
                transducerFeatureMap.put("grammarURL", japeURI.toURL());
                transducerFeatureMap.put("encoding", "UTF-8");
            } catch (MalformedURLException e) {
            }
            ProcessingResource japeCandidatesPR = (ProcessingResource) Factory
                    .createResource("gate.creole.Transducer", transducerFeatureMap);

            hypernymExtractionPipelineNL.add(japeCandidatesPR);

        } catch (Exception ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    public HashSet<Hypernym> extractEntityTypes(String entity, String lang, String kb, String[] provenance)
            throws UnknownHostException {
        HashSet<Hypernym> hypernymsList = new HashSet();
        HashSet<Hypernym> thdHypernyms = new HashSet();
        HashSet<Hypernym> dbpediaHypernyms = new HashSet();
        HashSet<Hypernym> yagoHypernyms = new HashSet();

        switch (lang) {
        case "en":
            for (String prov : provenance) {

                if (prov.equals("thd")) {

                    if (kb.equals("linkedHypernymsDataset")) {
                        thdHypernyms = extractEntityTypesLHD(entity, lang);
                    } else {
                        thdHypernyms = extractEntityTypesEN(entity, kb);
                    }

                } else if (prov.equals("dbpedia")) {
                    dbpediaHypernyms = getDBpediaHypernyms(entity, lang);
                } else if (prov.equals("yago")) {
                    yagoHypernyms = getYAGOHypernyms(entity, lang);
                }
            }
            if (dbpediaHypernyms != null) {
                hypernymsList.addAll(dbpediaHypernyms);
            }

            if (yagoHypernyms != null) {
                hypernymsList.addAll(yagoHypernyms);
            }

            if (thdHypernyms != null) {
                hypernymsList.addAll(thdHypernyms);
            }
            return hypernymsList;

        case "de":

            for (String prov : provenance) {
                if (prov.equals("thd")) {

                    if (kb.equals("linkedHypernymsDataset")) {
                        thdHypernyms = extractEntityTypesLHD(entity, lang);
                    } else {
                        thdHypernyms = extractEntityTypesDE(entity, kb);
                    }

                } else if (prov.equals("dbpedia")) {
                    HashSet hs = new HashSet();
                    HashSet tmp1 = getDBpediaHypernyms(entity, lang);
                    HashSet tmp2 = getDBpediaHypernyms(entity, "en");
                    hs.addAll(tmp1);
                    hs.addAll(tmp2);
                    dbpediaHypernyms.addAll(hs);
                } else if (prov.equals("yago")) {
                    HashSet hs = new HashSet();
                    HashSet tmp = getYAGOHypernyms(entity, lang);
                    hs.addAll(tmp);

                    String s = getInterlanguageLink(entity, lang, "en");
                    if (s != null) {
                        HashSet tmp2 = getYAGOHypernyms(s, "en");
                        hs.addAll(tmp2);
                        tmp.addAll(tmp2);
                    }
                    yagoHypernyms.addAll(hs);
                }
            }
            if (dbpediaHypernyms != null) {
                hypernymsList.addAll(dbpediaHypernyms);
            }

            if (yagoHypernyms != null) {
                hypernymsList.addAll(yagoHypernyms);
            }

            if (thdHypernyms != null) {
                hypernymsList.addAll(thdHypernyms);
            }
            return hypernymsList;

        case "nl":

            for (String prov : provenance) {
                if (prov.equals("thd")) {
                    if (kb.equals("linkedHypernymsDataset")) {
                        thdHypernyms = extractEntityTypesLHD(entity, lang);
                    } else {
                        thdHypernyms = extractEntityTypesNL(entity, kb);
                    }
                } else if (prov.equals("dbpedia")) {
                    HashSet hs = new HashSet();
                    HashSet tmp1 = getDBpediaHypernyms(entity, lang);
                    HashSet tmp2 = getDBpediaHypernyms(entity, "en");
                    hs.addAll(tmp1);
                    hs.addAll(tmp2);

                    dbpediaHypernyms.addAll(hs);

                } else if (prov.equals("yago")) {
                    HashSet hs = new HashSet();
                    yagoHypernyms = getYAGOHypernyms(entity, lang);
                    String s = getInterlanguageLink(entity, lang, "en");
                    if (s != null) {
                        HashSet tmp = getYAGOHypernyms(s, "en");
                        hs.addAll(tmp);
                    }

                    yagoHypernyms.addAll(hs);
                }
            }
            if (dbpediaHypernyms != null) {
                hypernymsList.addAll(dbpediaHypernyms);
            }

            if (yagoHypernyms != null) {
                hypernymsList.addAll(yagoHypernyms);
            }

            if (thdHypernyms != null) {
                hypernymsList.addAll(thdHypernyms);
            }
            return hypernymsList;
        }
        return hypernymsList;
    }

    public HashSet extractEntityTypesEN(String entity, String kb) {

        HashSet hypernymsList = new HashSet();
        try {
            URL url = null;
            String path = "";
            if (kb.equals("local")) {
                path = Settings.EN_WIKIPEDIA_LOCAL_EXPORT + entity.replace(" ", "_");
            } else if (kb.equals("live")) {
                path = Settings.EN_WIKIPEDIA_LIVE_EXPORT + entity.replace(" ", "_");
            }
            StringBuffer buffer = new StringBuffer();
            url = new URL(path);
            URLConnection connection = url.openConnection();
            InputStream is = connection.getInputStream();
            Reader isr = new InputStreamReader(is, "UTF-8");
            Reader in = new BufferedReader(isr);
            int ch;

            while ((ch = in.read()) > -1) {
                buffer.append((char) ch);
            }
            in.close();
            Article article = new Article(buffer.toString(), "en");
            Document document = Factory.newDocument(article.getFirstSection());

            Corpus corpus = Factory.newCorpus("");
            corpus.add(document);
            hypernymExtractionPipelineEN.setCorpus(corpus);
            hypernymExtractionPipelineEN.execute();

            Iterator corpus_iter = corpus.iterator();

            while (corpus_iter.hasNext()) {
                Document doc = (Document) corpus_iter.next();
                AnnotationSet as_all = doc.getAnnotations();
                AnnotationSet as_hearst = as_all.get("h");
                Iterator ann_iter = as_hearst.iterator();

                while (ann_iter.hasNext()) {

                    Annotation isaAnnot = (gate.Annotation) ann_iter.next();
                    Node isaStart = isaAnnot.getStartNode();
                    Node isaEnd = isaAnnot.getEndNode();
                    String hypernym = doc.getContent().getContent(isaStart.getOffset(), isaEnd.getOffset())
                            .toString();

                    Hypernym hypObj = new Hypernym();
                    hypObj.setEntity(entity);
                    hypObj.setEntityURL("http://dbpedia.org/resource/" + entity.replace(" ", "_"));
                    hypObj.setType(hypernym.substring(0, 1).toUpperCase() + hypernym.substring(1));
                    hypObj.setOrigin("thd");
                    hypObj.setAccuracy("0.85");
                    hypObj.setBounds("+- 2.5%");
                    RedisClient redis = RedisClient.getInstance();

                    LinkedEntity linkedEntity = null;

                    String linkedEntityStr = null;
                    linkedEntityStr = redis.getValue(hypernym + "en" + kb);

                    if (linkedEntityStr == null) {
                        linkedEntity = LuceneSearch.getInstance().findWikipediaArticle(hypernym, "en", kb);
                        if (linkedEntity != null) {
                            redis.setKey(hypernym + "en" + kb, linkedEntity.toString());
                        }
                    } else {
                        linkedEntity = new LinkedEntity();
                        linkedEntity.setPageTitle(linkedEntityStr.split("\\+")[0]);
                        linkedEntity.setConfidence(Double.parseDouble(linkedEntityStr.split("\\+")[1]));
                    }
                    if (linkedEntity != null) {
                        hypObj.setTypeURL(
                                "http://dbpedia.org/resource/" + linkedEntity.getPageTitle().replace(" ", "_"));
                    } else {
                        hypObj.setTypeURL("");
                    }
                    hypernymsList.add(hypObj);
                }
            }
        } catch (InvalidOffsetException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        } catch (ExecutionException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        } catch (ResourceInstantiationException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        } catch (IOException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        }
        return hypernymsList;
    }

    public HashSet extractEntityTypesDE(String entity, String kb) {
        HashSet hypernymsList = new HashSet();
        try {
            URL url = null;
            String path = "";
            if (kb.equals("local")) {
                path = Settings.DE_WIKIPEDIA_LOCAL_EXPORT + URLEncoder.encode(entity.replace(" ", "_"), "UTF-8");
            } else if (kb.equals("live")) {
                path = Settings.DE_WIKIPEDIA_LIVE_EXPORT + URLEncoder.encode(entity.replace(" ", "_"), "UTF-8");
            }

            StringBuffer buffer = new StringBuffer();
            url = new URL(path);
            URLConnection connection = url.openConnection();
            InputStream is = connection.getInputStream();
            Reader isr = new InputStreamReader(is, "UTF-8");
            Reader in = new BufferedReader(isr);
            int ch;

            while ((ch = in.read()) > -1) {
                buffer.append((char) ch);
            }

            in.close();
            Article article = new Article(buffer.toString(), "en");
            Document document = Factory.newDocument(article.getFirstSection());
            Corpus corpus = Factory.newCorpus("");
            corpus.add(document);

            hypernymExtractionPipelineDE.setCorpus(corpus);
            hypernymExtractionPipelineDE.execute();

            Iterator corpus_iter = corpus.iterator();

            while (corpus_iter.hasNext()) {
                Document doc = (Document) corpus_iter.next();
                AnnotationSet as_all = doc.getAnnotations();
                AnnotationSet as_hearst = as_all.get("h");
                Iterator ann_iter = as_hearst.iterator();

                while (ann_iter.hasNext()) {
                    Annotation isaAnnot = (gate.Annotation) ann_iter.next();
                    Node isaStart = isaAnnot.getStartNode();
                    Node isaEnd = isaAnnot.getEndNode();
                    String hypernym = doc.getContent().getContent(isaStart.getOffset(), isaEnd.getOffset())
                            .toString();

                    Hypernym hypObj = new Hypernym();
                    hypObj.setEntity(entity);
                    hypObj.setEntityURL("http://de.dbpedia.org/resource/" + entity.replace(" ", "_"));
                    hypObj.setType(hypernym.substring(0, 1).toUpperCase() + hypernym.substring(1));
                    RedisClient redis = RedisClient.getInstance();

                    LinkedEntity linkedEntity = null;

                    String linkedEntityStr = null;
                    linkedEntityStr = redis.getValue(hypernym + "de" + kb);
                    if (linkedEntityStr == null) {
                        linkedEntity = LuceneSearch.getInstance().findWikipediaArticle(hypernym, "de", kb);
                        if (linkedEntity != null) {
                            redis.setKey(hypernym + "de" + kb, linkedEntity.toString());
                        }
                    } else {
                        linkedEntity = new LinkedEntity();
                        linkedEntity.setPageTitle(linkedEntityStr.split("\\+")[0]);
                        linkedEntity.setConfidence(Double.parseDouble(linkedEntityStr.split("\\+")[1]));
                    }
                    if (linkedEntity != null) {
                        hypObj.setTypeURL(
                                "http://de.dbpedia.org/resource/" + linkedEntity.getPageTitle().replace(" ", "_"));
                    } else {
                        hypObj.setTypeURL("");
                    }
                    hypObj.setOrigin("thd");
                    hypObj.setAccuracy("0.77");
                    hypObj.setBounds("+- 2.5%");
                    hypernymsList.add(hypObj);
                }
            }

        } catch (InvalidOffsetException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        } catch (ExecutionException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        } catch (ResourceInstantiationException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        } catch (IOException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        }
        return hypernymsList;
    }

    public HashSet extractEntityTypesNL(String entity, String kb) {
        HashSet hypernymsList = new HashSet();
        try {
            URL url = null;
            String path = "";
            if (kb.equals("local")) {
                path = Settings.NL_WIKIPEDIA_LOCAL_EXPORT + URLEncoder.encode(entity.replace(" ", "_"), "UTF-8");
            } else if (kb.equals("live")) {
                path = Settings.NL_WIKIPEDIA_LIVE_EXPORT + URLEncoder.encode(entity.replace(" ", "_"), "UTF-8");
            }
            StringBuffer buffer = new StringBuffer();
            url = new URL(path);
            URLConnection connection = url.openConnection();
            InputStream is = connection.getInputStream();
            Reader isr = new InputStreamReader(is, "UTF-8");
            Reader in = new BufferedReader(isr);
            int ch;

            while ((ch = in.read()) > -1) {
                buffer.append((char) ch);
            }
            in.close();
            Article article = new Article(buffer.toString(), "nl");

            Document document = Factory.newDocument(article.getFirstSection());
            Corpus corpus = Factory.newCorpus("");
            corpus.add(document);
            hypernymExtractionPipelineNL.setCorpus(corpus);
            hypernymExtractionPipelineNL.execute();

            Iterator corpus_iter = corpus.iterator();

            while (corpus_iter.hasNext()) {
                Document doc = (Document) corpus_iter.next();
                AnnotationSet as_all = doc.getAnnotations();
                AnnotationSet as_hearst = as_all.get("h");
                Iterator ann_iter = as_hearst.iterator();
                while (ann_iter.hasNext()) {
                    Annotation isaAnnot = (gate.Annotation) ann_iter.next();
                    Node isaStart = isaAnnot.getStartNode();
                    Node isaEnd = isaAnnot.getEndNode();
                    String hypernym = doc.getContent().getContent(isaStart.getOffset(), isaEnd.getOffset())
                            .toString();

                    Hypernym hypObj = new Hypernym();
                    hypObj.setEntity(entity);
                    hypObj.setEntityURL("http://nl.dbpedia.org/resource/" + entity.replace(" ", "_"));
                    hypObj.setType(hypernym.substring(0, 1).toUpperCase() + hypernym.substring(1));
                    RedisClient redis = RedisClient.getInstance();

                    LinkedEntity linkedEntity = null;

                    String linkedEntityStr = null;
                    linkedEntityStr = redis.getValue(hypernym + "nl" + kb);

                    if (linkedEntityStr == null) {
                        linkedEntity = LuceneSearch.getInstance().findWikipediaArticle(hypernym, "nl", kb);
                        if (linkedEntity != null) {
                            redis.setKey(hypernym + "nl" + kb, linkedEntity.toString());
                        }
                    } else {
                        linkedEntity = new LinkedEntity();
                        linkedEntity.setPageTitle(linkedEntityStr.split("\\+")[0]);
                        linkedEntity.setConfidence(Double.parseDouble(linkedEntityStr.split("\\+")[1]));
                    }
                    if (linkedEntity != null) {
                        hypObj.setTypeURL(
                                "http://nl.dbpedia.org/resource/" + linkedEntity.getPageTitle().replace(" ", "_"));
                    } else {
                        hypObj.setTypeURL("");
                    }
                    hypObj.setOrigin("thd");
                    hypObj.setAccuracy("0.89");
                    hypObj.setBounds("+- 2%");
                    hypernymsList.add(hypObj);
                }
            }
        } catch (InvalidOffsetException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        } catch (ExecutionException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        } catch (ResourceInstantiationException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        } catch (IOException ex) {
            Logger.getLogger(THDInstance.class.getName()).log(Level.SEVERE, null, ex);
        }
        return hypernymsList;
    }

    public HashSet<Hypernym> extractEntityTypesLHD(String entityTitle, String lang) throws UnknownHostException {

        HashSet<Hypernym> thdHypernyms = null;
        thdHypernyms = getTHDHypernymsLHDv2(entityTitle, lang);

        if (lang.contains("de") || lang.contains("nl")) {
            String s = getInterlanguageLink(entityTitle, lang, "en");
            if (s != null) {

                HashSet tmp = getTHDHypernymsLHDv2(s, "en");
                if (tmp != null) {
                    try {
                        thdHypernyms.addAll(tmp);
                    } catch (Exception ex) {
                        Logger.getLogger(THDWorker.class.getName()).log(Level.SEVERE, null, ex);
                    }
                } else {
                }
            }
        }
        return thdHypernyms;
    }

    public String getInterlanguageLink(String entityTitle, String langOrig, String langTo) {
        try {
            BasicDBObject queryObj = new BasicDBObject();
            switch (langOrig) {
            case "en":
                queryObj.append("en_uri", "http://dbpedia.org/resource/" + entityTitle.replace(" ", "_"));
                break;

            case "de":
                queryObj.append("de_uri", "http://de.dbpedia.org/resource/" + entityTitle.replace(" ", "_"));
                break;

            case "nl":
                queryObj.append("nl_uri", "http://nl.dbpedia.org/resource/" + entityTitle.replace(" ", "_"));
                break;

            }
            BasicDBObject projObj = new BasicDBObject();
            switch (langTo) {
            case "en":
                projObj.append("en_uri", 1);
                break;

            case "de":
                projObj.append("de_uri", 1);
                break;

            case "nl":
                projObj.append("nl_uri", 1);
                break;
            }

            DBObject resObj = MongoDBClient.getDBInstance().getCollection("interlanguage_links").findOne(queryObj,
                    projObj);
            String otherEntityTitle = null;
            if (resObj != null) {
                switch (langTo) {
                case "en":
                    Object temp = resObj.get("en_uri");
                    if (temp != null) {
                        otherEntityTitle = resObj.get("en_uri").toString();
                    }
                    break;

                case "de":
                    otherEntityTitle = resObj.get("de_uri").toString();
                    break;

                case "nl":
                    otherEntityTitle = resObj.get("nl_uri").toString();
                    break;
                }
            }
            if (otherEntityTitle != null) {
                otherEntityTitle = otherEntityTitle
                        .substring(otherEntityTitle.lastIndexOf('/') + 1, otherEntityTitle.length())
                        .replace("_", " ");
            }
            return otherEntityTitle;

        } catch (UnknownHostException ex) {
            Logger.getLogger(THDWorker.class.getName()).log(Level.SEVERE, null, ex);
            return null;
        }
    }

    public HashSet getTHDHypernymsLHDv2(String entityTitle, String lang) throws UnknownHostException {
        HashSet hypernymsList = new HashSet();
        try {
            BasicDBObject queryObj = new BasicDBObject();
            queryObj.append("types",
                    new BasicDBObject().append("$elemMatch", new BasicDBObject().append("origin", "thd")));

            switch (lang) {
            case "en":
                String encodedURIEn = "http://dbpedia.org/resource/"
                        + URLEncoder.encode(URLEncoder.encode(entityTitle.replace(" ", "_"), "UTF-8"), "UTF-8");
                String notEncodedURIEn = "http://dbpedia.org/resource/" + entityTitle.replace(" ", "_");
                queryObj.append("uri", encodedURIEn);

                DBObject resObj = MongoDBClient.getDBInstance().getCollection("en_entities_thd_lhd10")
                        .findOne(queryObj);

                if (resObj != null) {

                    BasicDBList e = (BasicDBList) resObj.get("types");

                    for (int i = 0; i < e.size(); i++) {

                        DBObject type = (DBObject) e.get(i);

                        String mappingStr = type.get("mapping").toString();

                        // hypernym is mapped to dbpedia ontology
                        // creating hierarchy from the dbpedia ontology
                        if (mappingStr.equals("dbOnto")) {

                            Hypernym hypernym = new Hypernym();
                            hypernym.setEntity(entityTitle);
                            hypernym.setEntityURL(notEncodedURIEn);
                            hypernym.setType(type.get("label").toString());
                            hypernym.setTypeURL(type.get("uri").toString());
                            hypernym.setOrigin(type.get("origin").toString());
                            hypernym.setAccuracy(type.get("accuracy").toString());
                            hypernym.setBounds(type.get("bounds").toString());
                            hypernymsList.add(hypernym);

                            OntoRecord initRecord = new OntoRecord();
                            initRecord.setUri(type.get("uri").toString());

                            while (initRecord != null) {

                                initRecord = DBpediaOntologyManager.getInstance().getSubclass(initRecord.getUri(),
                                        lang);

                                if (initRecord == null) {
                                    return hypernymsList;
                                } else {
                                    Hypernym hypernymDerived = new Hypernym();
                                    hypernymDerived.setEntity(entityTitle);
                                    hypernymDerived.setEntityURL(notEncodedURIEn);
                                    hypernymDerived.setType(initRecord.getLabel());
                                    hypernymDerived.setTypeURL(initRecord.getUri());
                                    hypernymDerived.setOrigin("thd-derived");
                                    hypernymDerived.setAccuracy(type.get("accuracy").toString());
                                    hypernymDerived.setBounds(type.get("bounds").toString());
                                    hypernymsList.add(hypernymDerived);
                                }
                            }
                        }
                        // the type is DBpedia instance, doesn't matter, add it it to the types list
                        else {
                            Hypernym hypernymInst = new Hypernym();
                            hypernymInst.setEntity(entityTitle);
                            hypernymInst.setEntityURL(notEncodedURIEn);
                            hypernymInst.setType(type.get("label").toString());
                            hypernymInst.setTypeURL(type.get("uri").toString());
                            hypernymInst.setOrigin(type.get("origin").toString());
                            hypernymInst.setAccuracy(type.get("accuracy").toString());
                            hypernymInst.setBounds(type.get("bounds").toString());
                            hypernymsList.add(hypernymInst);

                            // try to map the DBpedia instance type to a DBpedia Ontology type
                            String mappedType = TypeMapper.getInstance().getTypeMapping(lang,
                                    type.get("uri").toString());

                            if (mappedType != null) {

                                Hypernym hypernym2 = new Hypernym();
                                hypernym2.setEntity(entityTitle);
                                hypernym2.setEntityURL(notEncodedURIEn);
                                hypernym2.setType(mappedType.split("/")[mappedType.split("/").length - 1]);
                                hypernym2.setTypeURL(mappedType);
                                hypernym2.setOrigin(type.get("origin").toString());
                                hypernym2.setAccuracy(type.get("accuracy").toString());
                                hypernym2.setBounds(type.get("bounds").toString());
                                hypernymsList.add(hypernym2);

                                OntoRecord initRecord = new OntoRecord();
                                initRecord.setUri(mappedType);

                                while (initRecord != null) {

                                    initRecord = DBpediaOntologyManager.getInstance()
                                            .getSubclass(initRecord.getUri(), lang);

                                    if (initRecord == null) {
                                        return hypernymsList;
                                    } else {
                                        Hypernym hypernymDerived = new Hypernym();
                                        hypernymDerived.setEntity(entityTitle);
                                        hypernymDerived.setEntityURL(notEncodedURIEn);
                                        hypernymDerived.setType(initRecord.getLabel());
                                        hypernymDerived.setTypeURL(initRecord.getUri());
                                        hypernymDerived.setOrigin("thd-derived");
                                        hypernymDerived.setAccuracy(type.get("accuracy").toString());
                                        hypernymDerived.setBounds(type.get("bounds").toString());
                                        hypernymsList.add(hypernymDerived);
                                    }
                                }
                            }
                        }
                    }
                }
                return hypernymsList;

            case "de":

                String encodedURIDe = "http://de.dbpedia.org/resource/"
                        + URLEncoder.encode(entityTitle.replace(" ", "_"), "UTF-8");
                String notEncodedURIDe = "http://de.dbpedia.org/resource/" + entityTitle.replace(" ", "_");
                queryObj.append("uri", encodedURIDe);

                resObj = MongoDBClient.getDBInstance().getCollection("de_entities_thd_lhd10").findOne(queryObj);

                if (resObj != null) {

                    BasicDBList typesList = (BasicDBList) resObj.get("types");

                    for (int i = 0; i < typesList.size(); i++) {

                        DBObject type = (DBObject) typesList.get(i);

                        String mappingStr = type.get("mapping").toString();
                        // hypernym is mapped to dbpedia ontology
                        // creating hierarchy from the dbpedia ontology
                        if (mappingStr.equals("dbOnto")) {

                            Hypernym hypernym = new Hypernym();
                            hypernym.setEntity(entityTitle);
                            hypernym.setEntityURL(notEncodedURIDe);
                            hypernym.setType(type.get("label").toString());
                            hypernym.setTypeURL(type.get("uri").toString());
                            hypernym.setAccuracy(type.get("accuracy").toString());
                            hypernym.setBounds(type.get("bounds").toString());
                            hypernym.setOrigin(type.get("origin").toString());
                            hypernymsList.add(hypernym);

                            OntoRecord initRecord = new OntoRecord();
                            initRecord.setUri(type.get("uri").toString());

                            while (initRecord != null) {

                                initRecord = DBpediaOntologyManager.getInstance().getSubclass(initRecord.getUri(),
                                        lang);

                                if (initRecord != null) {

                                    Hypernym hypernymDerived = new Hypernym();
                                    hypernymDerived.setEntity(entityTitle);
                                    hypernymDerived.setEntityURL(notEncodedURIDe);
                                    hypernymDerived.setType(initRecord.getLabel());
                                    hypernymDerived.setTypeURL(initRecord.getUri());
                                    hypernymDerived.setAccuracy(type.get("accuracy").toString());
                                    hypernymDerived.setOrigin("thd-derived");
                                    hypernymsList.add(hypernymDerived);
                                }
                            }
                        }
                        // type is DBpedia instance, doesn't matter, add it to the types list
                        else {
                            Hypernym hypernymInst = new Hypernym();
                            hypernymInst.setEntity(entityTitle);
                            hypernymInst.setEntityURL(notEncodedURIDe);
                            hypernymInst.setType(type.get("label").toString());
                            hypernymInst.setTypeURL(type.get("uri").toString());
                            hypernymInst.setOrigin(type.get("origin").toString());
                            hypernymInst.setAccuracy(type.get("accuracy").toString());
                            hypernymInst.setBounds(type.get("bounds").toString());
                            hypernymsList.add(hypernymInst);

                            // try to map the DBpedia instance type to a DBpedia Ontology type
                            String mappedType = TypeMapper.getInstance().getTypeMapping("de",
                                    type.get("uri").toString());

                            if (mappedType != null) {
                                Hypernym hypernym2 = new Hypernym();
                                hypernym2.setEntity(entityTitle);
                                hypernym2.setEntityURL(notEncodedURIDe);
                                hypernym2.setType(mappedType.split("/")[mappedType.split("/").length - 1]);
                                hypernym2.setTypeURL(mappedType);
                                hypernym2.setOrigin(type.get("origin").toString());
                                hypernym2.setAccuracy(type.get("accuracy").toString());
                                hypernym2.setBounds(type.get("bounds").toString());
                                hypernymsList.add(hypernym2);

                                OntoRecord initRecord = new OntoRecord();
                                initRecord.setUri(mappedType);

                                while (initRecord != null) {

                                    initRecord = DBpediaOntologyManager.getInstance()
                                            .getSubclass(initRecord.getUri(), lang);

                                    if (initRecord == null) {
                                        return hypernymsList;
                                    } else {
                                        Hypernym hypernymDerived = new Hypernym();
                                        hypernymDerived.setEntity(entityTitle);
                                        hypernymDerived.setEntityURL(notEncodedURIDe);
                                        hypernymDerived.setType(initRecord.getLabel());
                                        hypernymDerived.setTypeURL(initRecord.getUri());
                                        hypernymDerived.setOrigin("thd-derived");
                                        hypernymDerived.setAccuracy(type.get("accuracy").toString());
                                        hypernymDerived.setBounds(type.get("bounds").toString());
                                        hypernymsList.add(hypernymDerived);
                                    }
                                }
                            }
                        }
                    }
                }
                return hypernymsList;

            case "nl":

                String encodedURINl = "http://nl.dbpedia.org/resource/"
                        + URLEncoder.encode(entityTitle.replace(" ", "_"), "UTF-8");
                String notEncodedURINl = "http://nl.dbpedia.org/resource/" + entityTitle.replace(" ", "_");
                queryObj.append("uri", encodedURINl);

                resObj = MongoDBClient.getDBInstance().getCollection("nl_entities_thd_lhd10").findOne(queryObj);

                if (resObj != null) {

                    BasicDBList typesList = (BasicDBList) resObj.get("types");

                    for (int i = 0; i < typesList.size(); i++) {

                        DBObject type = (DBObject) typesList.get(i);

                        String typeURI = type.get("mapping").toString();
                        // hypernym is mapped to dbpedia ontology
                        // creating hierarchy from the dbpedia ontology
                        if (typeURI.equals("dbOnto")) {

                            Hypernym hypernym = new Hypernym();
                            hypernym.setEntity(entityTitle);
                            hypernym.setEntityURL(notEncodedURINl);
                            hypernym.setType(type.get("label").toString());
                            hypernym.setTypeURL(type.get("uri").toString());
                            hypernym.setAccuracy(type.get("accuracy").toString());
                            hypernym.setBounds(type.get("bounds").toString());
                            hypernym.setOrigin(type.get("origin").toString());
                            hypernymsList.add(hypernym);

                            OntoRecord initRecord = new OntoRecord();
                            initRecord.setUri(type.get("uri").toString());

                            while (initRecord != null) {

                                initRecord = DBpediaOntologyManager.getInstance().getSubclass(initRecord.getUri(),
                                        lang);

                                if (initRecord != null) {

                                    Hypernym hypernymDerived = new Hypernym();
                                    hypernymDerived.setEntity(entityTitle);
                                    hypernymDerived.setEntityURL(notEncodedURINl);
                                    hypernymDerived.setType(initRecord.getLabel());
                                    hypernymDerived.setTypeURL(initRecord.getUri());
                                    hypernymDerived.setAccuracy(type.get("accuracy").toString());
                                    hypernymDerived.setOrigin("thd-derived");
                                    hypernymsList.add(hypernymDerived);
                                }
                            }
                        }
                        // type is DBpedia instance, doesn't matter, add it to the types list
                        else {
                            Hypernym hypernym = new Hypernym();
                            hypernym.setEntity(entityTitle);
                            hypernym.setEntityURL(notEncodedURINl);
                            hypernym.setType(type.get("label").toString());
                            hypernym.setTypeURL(type.get("uri").toString());
                            hypernym.setOrigin(type.get("origin").toString());
                            hypernym.setAccuracy(type.get("accuracy").toString());
                            hypernym.setBounds(type.get("bounds").toString());
                            hypernymsList.add(hypernym);

                            // try to map the DBpedia instance type to a DBpedia Ontology type
                            String mappedType = TypeMapper.getInstance().getTypeMapping("nl",
                                    type.get("uri").toString());

                            if (mappedType != null) {

                                Hypernym hypernym2 = new Hypernym();
                                hypernym2.setEntity(entityTitle);
                                hypernym2.setEntityURL(notEncodedURINl);
                                hypernym2.setType(mappedType.split("/")[mappedType.split("/").length - 1]);
                                hypernym2.setTypeURL(mappedType);
                                hypernym2.setOrigin(type.get("origin").toString());
                                hypernym2.setAccuracy(type.get("accuracy").toString());
                                hypernym2.setBounds(type.get("bounds").toString());
                                hypernymsList.add(hypernym2);

                                OntoRecord initRecord = new OntoRecord();
                                initRecord.setUri(mappedType);

                                while (initRecord != null) {

                                    initRecord = DBpediaOntologyManager.getInstance()
                                            .getSubclass(initRecord.getUri(), lang);

                                    if (initRecord == null) {
                                        return hypernymsList;
                                    } else {
                                        Hypernym hypernymDerived = new Hypernym();
                                        hypernymDerived.setEntity(entityTitle);
                                        hypernymDerived.setEntityURL(notEncodedURINl);
                                        hypernymDerived.setType(initRecord.getLabel());
                                        hypernymDerived.setTypeURL(initRecord.getUri());
                                        hypernymDerived.setOrigin("thd-derived");
                                        hypernymDerived.setAccuracy(type.get("accuracy").toString());
                                        hypernymDerived.setBounds(type.get("bounds").toString());
                                        hypernymsList.add(hypernymDerived);
                                    }
                                }
                            }
                        }
                    }
                }
                return hypernymsList;
            }
            return hypernymsList;
        } catch (UnsupportedEncodingException ex) {
            Logger.getLogger(THDWorker.class.getName()).log(Level.SEVERE, null, ex);
        }
        return hypernymsList;
    }
    //    public ArrayList<Hypernym> getTHDHypernymsLHD(String entityTitle, String lang) throws UnknownHostException {
    //
    //        ArrayList<Hypernym> hypernymsList = new ArrayList<Hypernym>();
    //        BasicDBObject queryObj = new BasicDBObject();
    //        queryObj.append("label", entityTitle);
    //        queryObj.append("types", new BasicDBObject().append("$elemMatch", new BasicDBObject().append("origin", "thd")));
    //        
    //        switch (lang) {
    //            case "en":
    //                
    //                DBObject resObj = MongoDBClient.getDBInstance().getCollection("en_entities_thd").findOne(queryObj);                
    //                if(resObj != null) {
    //                    
    //                    BasicDBList e = (BasicDBList)resObj.get("types");
    //                    DBObject type = (DBObject)e.get(0);
    //                    
    //                    String mappingStr = type.get("mapping").toString();
    //                    // hypernym is mapped to dbpedia ontology
    //                    // creating hierarchy from the dbpedia ontology
    //                    if(mappingStr.equals("dbOnto")) {
    //                        
    //                        Hypernym hypernym = new Hypernym();
    //                        hypernym.setEntity(entityTitle);
    //                        hypernym.setEntityURL(resObj.get("uri").toString());
    //                        hypernym.setType(type.get("label").toString());
    //                        hypernym.setTypeURL(type.get("uri").toString());
    //                        hypernym.setOrigin(type.get("origin").toString());
    //                        hypernym.setAccuracy(type.get("accuracy").toString());
    //                        hypernym.setBounds(type.get("bounds").toString());
    //                        hypernymsList.add(hypernym);
    //
    //                        OntoRecord initRecord = new OntoRecord();
    //                        initRecord.setUri(type.get("uri").toString());
    //                        
    //                        while(initRecord != null){
    //                            
    //                            initRecord = DBpediaOntologyManager.getInstance().getSubclass(initRecord.getUri(), lang);
    //                            if(initRecord == null){
    //                                return hypernymsList;
    //                            } else {
    //                                Hypernym hypernymDerived = new Hypernym();
    //                                hypernymDerived.setEntity(entityTitle);
    //                                hypernymDerived.setEntityURL(resObj.get("uri").toString());
    //                                hypernymDerived.setType(initRecord.getLabel());
    //                                hypernymDerived.setTypeURL(initRecord.getUri());
    //                                hypernymDerived.setOrigin("thd-derived");
    //                                hypernymDerived.setAccuracy(type.get("accuracy").toString());
    //                                hypernymDerived.setBounds(type.get("bounds").toString());
    //                                hypernymsList.add(hypernymDerived);
    //                            }
    //                        }
    //                    }
    //                    
    //                    // hypernym is not mapped to dbpedia ontology
    //                    // searching for superclass mapped to dbpedia ontology
    //                    // if found, for the superclass is created hierarchy
    //                    else if(mappingStr.equals("dbRes")){
    //                        
    //                        // try to find mapping
    //                        String mappedRes = DBpediaOntologyMapper.getInstance().mapEnResource(type.get("uri").toString());
    //                        
    //                        if(mappedRes != null) {
    //                            
    //                            Hypernym hypernym2 = new Hypernym();
    //                            hypernym2.setEntity(entityTitle);
    //                            hypernym2.setEntityURL(resObj.get("uri").toString());
    //                            hypernym2.setType(type.get("label").toString());
    //                            hypernym2.setTypeURL(mappedRes);
    //                            hypernym2.setOrigin(type.get("origin").toString());
    //                            hypernym2.setAccuracy(type.get("accuracy").toString());
    //                            hypernym2.setBounds(type.get("bounds").toString());
    //                            hypernymsList.add(hypernym2);
    //                            
    //                            // try to derive some more types
    //                            OntoRecord initRecord = new OntoRecord();
    //                            initRecord.setUri(mappedRes);
    //
    //                            while(initRecord != null){
    //
    //                                initRecord = DBpediaOntologyManager.getInstance().getSubclass(initRecord.getUri(), lang);
    //                                if(initRecord == null){
    //                                    return hypernymsList;
    //                                } else {
    //                                    Hypernym hypernymDerived = new Hypernym();
    //                                    hypernymDerived.setEntity(entityTitle);
    //                                    hypernymDerived.setEntityURL(resObj.get("uri").toString());
    //                                    hypernymDerived.setType(initRecord.getLabel());
    //                                    hypernymDerived.setTypeURL(initRecord.getUri());
    //                                    hypernymDerived.setOrigin("thd-derived");
    //                                    hypernymDerived.setAccuracy(type.get("accuracy").toString());
    //                                    hypernymDerived.setBounds(type.get("bounds").toString());
    //                                    hypernymsList.add(hypernymDerived);
    //                                }
    //                            }
    //                        } else {
    //                            String initialUri = type.get("uri").toString();
    //
    //                            boolean continueSearching = true;
    //
    //                            while(continueSearching) {
    //                                // try to find dbonto for dbres
    //                                DBpediaMapping mapping = getSubclassConfirmed(initialUri);
    //
    //                                // if not found, try to find dbres for the dbres
    //                                if(mapping == null) {
    //
    //                                    initialUri = getSuperclass(initialUri, "en");
    //
    //                                    if(initialUri == null){
    //                                        //System.out.println("No superclass found, finishing searching");
    //                                        continueSearching = false;
    //
    //                                    } else {
    //                                        // superClass uri found
    //                                        // check if uri is dbOnto - create hierarchy
    //                                        // check if uri is dbRes - continue
    //                                        if(initialUri.contains("/resource/")){
    //                                            //System.out.println("Found superclass is dbres");
    //                                            // do nothing
    //                                            // continue to search
    //                                        } else if(initialUri.contains("/ontology/")) {
    //                                            //System.out.println("Found superclass is dbonto, finishing searching and creating hierarchy");
    //                                            // create hierarchy
    //                                            continueSearching = false;
    //
    //                                            OntoRecord initRecord = new OntoRecord();
    //                                            initRecord.setUri(initialUri);
    //
    //                                            while(initRecord != null){
    //
    //                                                initRecord = DBpediaOntologyManager.getInstance().getSubclass(initRecord.getUri(), lang);
    //
    //                                                if(initRecord != null){
    //
    //                                                    Hypernym hypernymDerived = new Hypernym();
    //                                                    hypernymDerived.setEntity(entityTitle);
    //                                                    hypernymDerived.setEntityURL(resObj.get("uri").toString());
    //                                                    hypernymDerived.setType(initRecord.getLabel());
    //                                                    hypernymDerived.setTypeURL(initRecord.getUri());
    //                                                    hypernymDerived.setOrigin("thd-derived");
    //                                                    hypernymDerived.setAccuracy(type.get("accuracy").toString());
    //                                                    hypernymDerived.setBounds(type.get("bounds").toString());
    //                                                    hypernymsList.add(hypernymDerived);
    //                                                }
    //                                            }
    //                                        } else {
    //                                            // some other uri
    //                                            continueSearching = false;
    //                                        }
    //                                    }
    //                                }
    //                                // if found, then create hierarchy
    //                                else {
    //                                    continueSearching = false;
    //                                    // creating hierarchy
    //                                    OntoRecord initRecord = new OntoRecord();
    //                                    initRecord.setUri(initialUri);
    //
    //                                    while(initRecord != null){
    //
    //                                        initRecord = DBpediaOntologyManager.getInstance().getSubclass(initRecord.getUri(), lang);
    //
    //                                        if(initRecord != null){
    //
    //                                            Hypernym hypernymDerived = new Hypernym();
    //                                            hypernymDerived.setEntity(entityTitle);
    //                                            hypernymDerived.setEntityURL(resObj.get("uri").toString());
    //                                            hypernymDerived.setType(initRecord.getLabel());
    //                                            hypernymDerived.setTypeURL(initRecord.getUri());
    //                                            hypernymDerived.setOrigin("thd-derived");
    //                                            hypernymsList.add(hypernymDerived);
    //                                        }
    //                                    }
    //                                }
    //                            }
    //                        }
    //                    }
    //                    return hypernymsList;
    //                    
    //                } else {
    //                    return hypernymsList;
    //                }
    //            case "de":
    //                
    //                resObj = MongoDBClient.getDBInstance().getCollection("de_entities_thd").findOne(queryObj);                
    //                
    //                if(resObj != null){
    //                    
    //                    hypernymsList.addAll(YagoOntologyManager.getInstance().getYagoHypernyms(entityTitle, "http://yago-knowledge.org/resource/"+entityTitle.replaceAll(" ", "_"), "de", "thd-derived"));
    //
    //                    BasicDBList typesList = (BasicDBList)resObj.get("types");
    //                    DBObject firstType = (DBObject)typesList.get(0);
    //                    
    //                    String mappingStr = firstType.get("mapping").toString();
    //                    // hypernym is mapped to dbpedia ontology
    //                    // creating hierarchy from the dbpedia ontology
    //                    if(mappingStr.equals("dbOnto")){
    //                        
    //                        Hypernym hypernym = new Hypernym();
    //                        hypernym.setEntity(entityTitle);
    //                        hypernym.setEntityURL(resObj.get("uri").toString());
    //                        hypernym.setType(firstType.get("label").toString());
    //                        hypernym.setTypeURL(firstType.get("uri").toString());
    //                        hypernym.setAccuracy(firstType.get("accuracy").toString());
    //                        hypernym.setBounds(firstType.get("bounds").toString());
    //                        hypernym.setOrigin(firstType.get("origin").toString());
    //                        hypernymsList.add(hypernym);
    //
    //                        OntoRecord initRecord = new OntoRecord();
    //                        initRecord.setUri(firstType.get("uri").toString());
    //                        
    //                        while(initRecord != null){
    //                            
    //                            initRecord = DBpediaOntologyManager.getInstance().getSubclass(initRecord.getUri(), lang);
    //                            
    //                            if(initRecord != null){
    //                                
    //                                Hypernym hypernymDerived = new Hypernym();
    //                                hypernymDerived.setEntity(entityTitle);
    //                                hypernymDerived.setEntityURL(resObj.get("uri").toString());
    //                                hypernymDerived.setType(initRecord.getLabel());
    //                                hypernymDerived.setTypeURL(initRecord.getUri());
    //                                hypernymDerived.setOrigin("thd-derived");
    //                                hypernymsList.add(hypernymDerived);                                
    //                            }
    //                        }
    //                    }
    //                    // hypernym is not mapped to dbpedia ontology
    //                    // searching for superclass mapped to dbpedia ontology
    //                    // if found, for the superclass is created hierarchy
    //                    else if(mappingStr.equals("dbRes")) {
    //                        
    //                        String mappedRes = DBpediaOntologyMapper.getInstance().mapDeResource(firstType.get("uri").toString());
    //                        
    //                        if(mappedRes != null) {
    //                            
    //                            Hypernym hypernym2 = new Hypernym();
    //                            hypernym2.setEntity(entityTitle);
    //                            hypernym2.setEntityURL(resObj.get("uri").toString());
    //                            hypernym2.setType(firstType.get("label").toString());
    //                            hypernym2.setTypeURL(mappedRes);
    //                            hypernym2.setOrigin(firstType.get("origin").toString());
    //                            hypernym2.setAccuracy(firstType.get("accuracy").toString());
    //                            hypernym2.setBounds(firstType.get("bounds").toString());
    //                            hypernymsList.add(hypernym2);
    //                            
    //                            OntoRecord initRecord = new OntoRecord();
    //                            initRecord.setUri(firstType.get("uri").toString());
    //
    //                            while(initRecord != null){
    //
    //                                initRecord = DBpediaOntologyManager.getInstance().getSubclass(initRecord.getUri(), lang);
    //
    //                                if(initRecord != null){
    //
    //                                    Hypernym hypernymDerived = new Hypernym();
    //                                    hypernymDerived.setEntity(entityTitle);
    //                                    hypernymDerived.setEntityURL(resObj.get("uri").toString());
    //                                    hypernymDerived.setType(initRecord.getLabel());
    //                                    hypernymDerived.setTypeURL(initRecord.getUri());
    //                                    hypernymDerived.setOrigin("thd-derived");
    //                                    hypernymsList.add(hypernymDerived);                                
    //                                }
    //                            }
    //                            // try to derive some more types
    //                        } else {
    //                            String initialUri = firstType.get("uri").toString();
    //
    //                            boolean continueSearching = true;
    //
    //                            while(continueSearching){
    //                                // try to find dbonto for dbres
    //                                DBpediaMapping mapping = getSubclassConfirmed(initialUri);
    //
    //                                // if not found, try to find dbres for the dbres
    //                                if(mapping == null){
    //
    //                                    initialUri = getSuperclass(initialUri, "de");
    //
    //                                    if(initialUri == null){
    //                                        continueSearching = false;
    //
    //                                    } else {
    //                                        // superClass uri found
    //                                        // check if uri is dbOnto - create hierarchy
    //                                        // check if uri is dbRes - continue
    //
    //                                        if(initialUri.contains("/resource/")){
    //                                            // do nothing
    //                                            // continue to search
    //                                        } else if(initialUri.contains("/ontology/")) {
    //                                            // create hierarchy
    //                                            continueSearching = false;
    //
    //                                            OntoRecord initRecord = new OntoRecord();
    //                                            initRecord.setUri(initialUri);
    //
    //                                            while(initRecord != null){
    //
    //                                                initRecord = DBpediaOntologyManager.getInstance().getSubclass(initRecord.getUri(), lang);
    //
    //                                                if(initRecord != null){
    //
    //                                                    Hypernym hypernymDerived = new Hypernym();
    //                                                    hypernymDerived.setEntity(entityTitle);
    //                                                    hypernymDerived.setEntityURL(resObj.get("uri").toString());
    //                                                    hypernymDerived.setType(initRecord.getLabel());
    //                                                    hypernymDerived.setTypeURL(initRecord.getUri());
    //                                                    hypernymDerived.setOrigin("thd-derived");
    //                                                    hypernymsList.add(hypernymDerived);
    //
    //                                                }
    //                                            }
    //                                        } else {
    //                                            // some other uri
    //                                            continueSearching = false;
    //                                        }
    //                                    }
    //                                }
    //                                // if found, then create hierarchy
    //                                else {
    //                                    continueSearching = false;
    //                                    // creating hierarchy
    //                                    OntoRecord initRecord = new OntoRecord();
    //                                    initRecord.setUri(initialUri);
    //
    //                                    while(initRecord != null){
    //
    //                                        initRecord = DBpediaOntologyManager.getInstance().getSubclass(initRecord.getUri(), lang);
    //
    //                                        if(initRecord != null){
    //
    //                                            Hypernym hypernymDerived = new Hypernym();
    //                                            hypernymDerived.setEntity(entityTitle);
    //                                            hypernymDerived.setEntityURL(resObj.get("uri").toString());
    //                                            hypernymDerived.setType(initRecord.getLabel());
    //                                            hypernymDerived.setTypeURL(initRecord.getUri());
    //                                            hypernymDerived.setOrigin("thd-derived");
    //                                            hypernymsList.add(hypernymDerived);
    //
    //                                        }
    //                                    }
    //                                }
    //                            }
    //                        }
    //                    }
    //                    
    //                    return hypernymsList;
    //                    
    //                } else {
    //                    return hypernymsList;
    //                }
    //                
    //            case "nl":
    //                
    //                resObj = MongoDBClient.getDBInstance().getCollection("nl_entities_thd").findOne(queryObj);                
    //
    //                if(resObj != null){
    //                    
    //                    hypernymsList.addAll(YagoOntologyManager.getInstance().getYagoHypernyms(entityTitle, "http://yago-knowledge.org/resource/"+entityTitle.replaceAll(" ", "_"), "en", "thd-derived"));
    //
    //                    BasicDBList e = (BasicDBList)resObj.get("types");
    //                    DBObject type = (DBObject)e.get(0);
    //                    
    //                    BasicDBList typesList = (BasicDBList)resObj.get("types");
    //                    DBObject firstType = (DBObject)typesList.get(0);
    //                    
    //                    String typeURI = firstType.get("mapping").toString();
    //                    // hypernym is mapped to dbpedia ontology
    //                    // creating hierarchy from the dbpedia ontology
    //                    if(typeURI.equals("dbOnto")){
    //                        
    //                        Hypernym hypernym = new Hypernym();
    //                        hypernym.setEntity(entityTitle);
    //                        hypernym.setEntityURL(resObj.get("uri").toString());
    //                        hypernym.setType(type.get("label").toString());
    //                        hypernym.setTypeURL(type.get("uri").toString());
    //                        hypernym.setAccuracy(type.get("accuracy").toString());
    //                        hypernym.setBounds(type.get("bounds").toString());
    //                        hypernym.setOrigin(type.get("origin").toString());
    //                        hypernymsList.add(hypernym);
    //                    
    //                        OntoRecord initRecord = new OntoRecord();
    //                        initRecord.setUri(firstType.get("uri").toString());
    //                        
    //                        while(initRecord != null){
    //                            
    //                            initRecord = DBpediaOntologyManager.getInstance().getSubclass(initRecord.getUri(), lang);
    //                            
    //                            if(initRecord != null){
    //                                
    //                                Hypernym hypernymDerived = new Hypernym();
    //                                hypernymDerived.setEntity(entityTitle);
    //                                hypernymDerived.setEntityURL(resObj.get("uri").toString());
    //                                hypernymDerived.setType(initRecord.getLabel());
    //                                hypernymDerived.setTypeURL(initRecord.getUri());
    //                                hypernymDerived.setOrigin("thd-derived");
    //                                hypernymsList.add(hypernymDerived);                                
    //                            }
    //                        }
    //                    }
    //                    // hypernym is not mapped to dbpedia ontology
    //                    // searching for superclass mapped to dbpedia ontology
    //                    // if found, for the superclass is created hierarchy
    //                    else if(typeURI.equals("dbRes")) {
    //
    //                        String mappedRes = DBpediaOntologyMapper.getInstance().mapNlResource(type.get("uri").toString());
    //                        if(mappedRes != null) {
    //                            Hypernym hypernym = new Hypernym();
    //                            hypernym.setEntity(entityTitle);
    //                            hypernym.setEntityURL(resObj.get("uri").toString());
    //                            hypernym.setType(type.get("label").toString());
    //                            hypernym.setTypeURL(mappedRes);
    //                            hypernym.setAccuracy(type.get("accuracy").toString());
    //                            hypernym.setBounds(type.get("bounds").toString());
    //                            hypernym.setOrigin(type.get("origin").toString());
    //                            hypernymsList.add(hypernym);
    //
    //                            OntoRecord initRecord = new OntoRecord();
    //                            initRecord.setUri(mappedRes.toString());
    //
    //                            while(initRecord != null){
    //
    //                                initRecord = DBpediaOntologyManager.getInstance().getSubclass(initRecord.getUri(), lang);
    //
    //                                if(initRecord != null){
    //
    //                                    Hypernym hypernymDerived = new Hypernym();
    //                                    hypernymDerived.setEntity(entityTitle);
    //                                    hypernymDerived.setEntityURL(resObj.get("uri").toString());
    //                                    hypernymDerived.setType(initRecord.getLabel());
    //                                    hypernymDerived.setTypeURL(initRecord.getUri());
    //                                    hypernymDerived.setOrigin("thd-derived");
    //                                    hypernymsList.add(hypernymDerived);                                
    //                                }
    //                            }
    //                        } else {
    //                            
    //                            String initialUri = firstType.get("uri").toString();
    //
    //                            boolean continueSearching = true;
    //
    //                            while(continueSearching){
    //                                // try to find dbonto for dbres
    //                                DBpediaMapping mapping = getSubclassConfirmed(initialUri);
    //
    //                                // if not found, try to find dbres for the dbres
    //                                if(mapping == null){
    //
    //                                    initialUri = getSuperclass(initialUri, "nl");
    //
    //                                    if(initialUri == null){
    //                                        //System.out.println("No superclass found, finishing searching");
    //                                        continueSearching = false;
    //
    //                                    } else {
    //
    //                                        // superClass uri found
    //                                        // check if uri is dbOnto - create hierarchy
    //                                        // check if uri is dbRes - continue
    //
    //                                        if(initialUri.contains("/resource/")){
    //                                            // do nothing
    //                                            // continue to search
    //                                        } else if(initialUri.contains("/ontology/")) {
    //                                            // create hierarchy
    //                                            continueSearching = false;
    //
    //                                            OntoRecord initRecord = new OntoRecord();
    //                                            initRecord.setUri(initialUri);
    //
    //                                            while(initRecord != null){
    //
    //                                                initRecord = DBpediaOntologyManager.getInstance().getSubclass(initRecord.getUri(), lang);
    //
    //                                                if(initRecord != null){
    //
    //                                                    Hypernym hypernymDerived = new Hypernym();
    //                                                    hypernymDerived.setEntity(entityTitle);
    //                                                    hypernymDerived.setEntityURL(resObj.get("uri").toString());
    //                                                    hypernymDerived.setType(initRecord.getLabel());
    //                                                    hypernymDerived.setTypeURL(initRecord.getUri());
    //                                                    hypernymDerived.setOrigin("thd-derived");
    //                                                    hypernymsList.add(hypernymDerived);
    //
    //                                                }
    //                                            }
    //                                        } else {
    //                                            // some other uri
    //                                            continueSearching = false;
    //                                        }
    //                                    }
    //                                }
    //                                // if found, then create hierarchy
    //                                else {
    //                                    continueSearching = false;
    //                                    // creating hierarchy
    //                                    OntoRecord initRecord = new OntoRecord();
    //                                    initRecord.setUri(initialUri);
    //
    //                                    while(initRecord != null){
    //
    //                                        initRecord = DBpediaOntologyManager.getInstance().getSubclass(initRecord.getUri(), lang);
    //
    //                                        if(initRecord != null){
    //
    //                                            Hypernym hypernymDerived = new Hypernym();
    //                                            hypernymDerived.setEntity(entityTitle);
    //                                            hypernymDerived.setEntityURL(resObj.get("uri").toString());
    //                                            hypernymDerived.setType(initRecord.getLabel());
    //                                            hypernymDerived.setTypeURL(initRecord.getUri());
    //                                            hypernymDerived.setOrigin("thd-derived");
    //                                            hypernymsList.add(hypernymDerived);
    //                                        }
    //                                    }
    //                                }
    //                            }
    //                        }
    //                    }
    //                    
    //                    return hypernymsList;
    //                    
    //                } else {
    //                    return hypernymsList;
    //                }
    //        }
    //        return null;
    //    }    

    public HashSet getDBpediaHypernyms(String entityTitle, String lang) throws UnknownHostException {

        HashSet hypernymsList = new HashSet();
        try {
            BasicDBObject queryObj = new BasicDBObject();
            queryObj.append("types",
                    new BasicDBObject().append("$elemMatch", new BasicDBObject().append("origin", "dbpedia")));

            BasicDBList dbTypesList;
            switch (lang) {
            case "en":
                String uriEn = "http://dbpedia.org/resource/"
                        + URLEncoder.encode(entityTitle.replace(" ", "_"), "UTF-8");
                queryObj.append("uri", uriEn);
                DBCursor cursorEN = MongoDBClient.getDBInstance().getCollection("en_entities_dbpedia")
                        .find(queryObj);

                while (cursorEN.hasNext()) {
                    DBObject resObj = cursorEN.next();
                    if (resObj != null) {

                        dbTypesList = (BasicDBList) resObj.get("types");

                        for (int i = 0; i < dbTypesList.size(); i++) {
                            DBObject obj = (DBObject) dbTypesList.get(i);

                            Hypernym hypernym = new Hypernym();
                            hypernym.setEntity(entityTitle);
                            hypernym.setEntityURL(resObj.get("uri").toString());
                            hypernym.setType(obj.get("label").toString());
                            hypernym.setTypeURL(obj.get("uri").toString());
                            hypernym.setOrigin("dbpedia");
                            hypernym.setAccuracy("-1.0");
                            if (obj.get("uri").toString().contains("http://dbpedia.org/ontology/")) {
                                hypernymsList.add(hypernym);
                            }
                        }
                    }
                }
                cursorEN.close();

                return hypernymsList;

            case "de":

                String uriDe = "http://de.dbpedia.org/resource/" + entityTitle.replace(" ", "_");
                queryObj.append("uri", uriDe);
                DBCursor cursorDE = MongoDBClient.getDBInstance().getCollection("de_entities_dbpedia")
                        .find(queryObj);

                while (cursorDE.hasNext()) {
                    DBObject resObj = cursorDE.next();
                    if (resObj != null) {
                        dbTypesList = (BasicDBList) resObj.get("types");

                        for (int i = 0; i < dbTypesList.size(); i++) {

                            DBObject obj = (DBObject) dbTypesList.get(i);

                            Hypernym hypernym = new Hypernym();
                            hypernym.setEntity(entityTitle);
                            hypernym.setEntityURL(resObj.get("uri").toString());
                            hypernym.setType(obj.get("label").toString());
                            hypernym.setTypeURL(obj.get("uri").toString());
                            hypernym.setOrigin("dbpedia");
                            hypernym.setAccuracy("-1.0");
                            if (obj.get("uri").toString().contains("http://dbpedia.org/ontology/")) {
                                hypernymsList.add(hypernym);
                            }
                        }
                    }
                }
                cursorDE.close();

                return hypernymsList;

            case "nl":

                String uriNl = "http://nl.dbpedia.org/resource/" + entityTitle.replace(" ", "_");
                queryObj.append("uri", uriNl);
                DBCursor cursorNL = MongoDBClient.getDBInstance().getCollection("nl_entities_dbpedia")
                        .find(queryObj);

                while (cursorNL.hasNext()) {
                    DBObject resObj = cursorNL.next();
                    if (resObj != null) {
                        dbTypesList = (BasicDBList) resObj.get("types");

                        for (int i = 0; i < dbTypesList.size(); i++) {

                            DBObject obj = (DBObject) dbTypesList.get(i);

                            Hypernym hypernym = new Hypernym();
                            hypernym.setEntity(entityTitle);
                            hypernym.setEntityURL(resObj.get("uri").toString());
                            hypernym.setType(obj.get("label").toString());
                            hypernym.setTypeURL(obj.get("uri").toString());
                            hypernym.setOrigin("dbpedia");
                            hypernym.setAccuracy("-1.0");

                            if (obj.get("uri").toString().contains("http://dbpedia.org/ontology/")) {
                                hypernymsList.add(hypernym);
                            }
                        }
                    }
                }
                cursorNL.close();

                return hypernymsList;

            default:
                return hypernymsList;
            }
        } catch (UnsupportedEncodingException ex) {
            Logger.getLogger(THDWorker.class.getName()).log(Level.SEVERE, null, ex);
        }
        return hypernymsList;
    }

    public HashSet getYAGOHypernyms(String entityTitle, String lang) throws UnknownHostException {

        HashSet hypernymsList = new HashSet();
        BasicDBObject queryObj = new BasicDBObject();

        switch (lang) {

        case "en":
            queryObj.append("uri", "http://yago-knowledge.org/resource/" + entityTitle.replaceAll(" ", "_"))
                    .append("labels.lang", "en");
            DBObject resObj = MongoDBClient.getDBInstance().getCollection("entities_yago").findOne(queryObj);

            if (resObj != null) {
                hypernymsList = YagoOntologyManager.getInstance().getYagoHypernyms(entityTitle,
                        "http://yago-knowledge.org/resource/" + entityTitle.replaceAll(" ", "_"), "en", "yago");
            }
            return hypernymsList;

        case "de":
            queryObj = new BasicDBObject();
            queryObj.append("uri", "http://yago-knowledge.org/resource/" + entityTitle.replaceAll(" ", "_"))
                    .append("labels.lang", "de");

            resObj = MongoDBClient.getDBInstance().getCollection("entities_yago").findOne(queryObj);

            if (resObj != null) {
                hypernymsList = YagoOntologyManager.getInstance().getYagoHypernyms(entityTitle,
                        resObj.get("uri").toString(), "de", "yago");
            }
            return hypernymsList;

        case "nl":
            break;
        }
        return hypernymsList;
    }

    //    public DBpediaMapping getSubclassConfirmed(String res) throws UnknownHostException{
    //        
    //        BasicDBObject queryObj2 = new BasicDBObject();
    //        queryObj2.append("label", res);
    //        DBObject resObj2 = MongoDBClient.getDBInstance().getCollection("test_en_entities_superclass").findOne(queryObj2);
    //        
    //        if(resObj2 != null){
    //            BasicDBList e = (BasicDBList)resObj2.get("types");
    //            DBObject type = (DBObject)e.get(0);
    //            
    //            DBpediaMapping result = new DBpediaMapping();
    //            result.setLabel(type.get("label").toString());
    //            result.setUri(type.get("uri").toString());
    //            
    //            return result;
    //        } else {
    //            return null;
    //        }        
    //    }

    //    public String getSuperclass(String uri, String lang) throws UnknownHostException{
    //        
    //        BasicDBObject tempQueryObj = new BasicDBObject();
    //        tempQueryObj.append("uri", uri);
    //        tempQueryObj.append("types", new BasicDBObject().append("$elemMatch", new BasicDBObject().append("origin", "thd")));
    //        DBObject tempResObj = null;
    //        
    //        switch (lang) {
    //            case "en":
    //                tempResObj = MongoDBClient.getDBInstance().getCollection("test_en_entities").findOne(tempQueryObj);
    //            case "de":
    //                tempResObj = MongoDBClient.getDBInstance().getCollection("test_de_entities").findOne(tempQueryObj);           
    //        }
    //        
    //        if(tempResObj != null){
    //            BasicDBList typesList = (BasicDBList)tempResObj.get("types");
    //            DBObject tmpTypeObj = (DBObject) typesList.get(0);
    //            return tmpTypeObj.get("uri").toString();
    //        } else {
    //            return null;
    //        }
    //    }

    public ArrayList<Hypernym> removeDuplicates(List<Hypernym> l) {
        Set<Hypernym> s = new TreeSet<Hypernym>(new Comparator<Hypernym>() {

            @Override
            public int compare(Hypernym o1, Hypernym o2) {
                if (o1.getEntityURL().equals(o2.getEntityURL()) && o1.getOrigin().equals(o2.getOrigin())
                        && o1.getTypeURL().equals(o2.getTypeURL())) {
                    return 0;
                } else {
                    return -1;
                }
            }
        });
        s.addAll(l);
        return new ArrayList<Hypernym>(s);
    }
}