edu.ehu.galan.lite.utils.wikiminer.WikiminnerHelper.java Source code

Java tutorial

Introduction

Here is the source code for edu.ehu.galan.lite.utils.wikiminer.WikiminnerHelper.java

Source

package edu.ehu.galan.lite.utils.wikiminer;

/*
 * Copyright (C) 2014 Angel Conde Manjon neuw84 at gmail.com
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
import edu.ehu.galan.lite.model.Topic;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonParser;
import com.sleepycat.je.EnvironmentLockedException;
import edu.ehu.galan.lite.model.Document;
import edu.ehu.galan.lite.model.Term;
import edu.ehu.galan.lite.utils.Caches;
import edu.ehu.galan.lite.utils.ProgressTracker;
import edu.ehu.galan.lite.utils.wikiminer.gsonReaders.compare.GSonCompare;
import edu.ehu.galan.lite.utils.wikiminer.gsonReaders.compare.Interpretation;
import edu.ehu.galan.lite.utils.wikiminer.gsonReaders.data.articles.ArticleList;
import edu.ehu.galan.lite.utils.wikiminer.gsonReaders.data.articles.WikiDataArt;
import edu.ehu.galan.lite.utils.wikiminer.gsonReaders.disambiguate.DisamList;
import edu.ehu.galan.lite.utils.wikiminer.gsonReaders.disambiguate.Disambiguation;
import edu.ehu.galan.lite.utils.wikiminer.gsonReaders.disambiguate.DisambiguationDetailsList;
import edu.ehu.galan.lite.utils.wikiminer.gsonReaders.explore.GSonExplore;
import edu.ehu.galan.lite.utils.wikiminer.gsonReaders.listRelate.Comparison;
import edu.ehu.galan.lite.utils.wikiminer.gsonReaders.listRelate.Comparisons;
import edu.ehu.galan.lite.utils.wikiminer.gsonReaders.listSearch.ListSearch;
import edu.ehu.galan.lite.utils.wikiminer.gsonReaders.markUp.GSonMarkUp;
import edu.ehu.galan.lite.utils.wikiminer.gsonReaders.search.Label;
import edu.ehu.galan.lite.utils.wikiminer.gsonReaders.search.Search;
import edu.ehu.galan.lite.utils.wikiminer.gsonReaders.search.Sense;
import edu.ehu.galan.lite.utils.yago2.Char;
import gnu.trove.set.hash.TLongHashSet;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.stream.Collectors;
import javax.xml.parsers.ParserConfigurationException;
import net.sf.ehcache.Cache;
import net.sf.ehcache.CacheManager;
import net.sf.ehcache.Element;
import org.apache.http.Header;
import org.apache.http.HttpResponse;
import org.apache.http.ParseException;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.GzipDecompressingEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.PoolingClientConnectionManager;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wikipedia.miner.comparison.ArticleComparer;
import org.wikipedia.miner.comparison.LabelComparer;
import org.wikipedia.miner.model.Article;
import org.wikipedia.miner.model.Category;
import org.wikipedia.miner.model.Wikipedia;
import org.wikipedia.miner.util.NGrammer;
import org.wikipedia.miner.util.NGrammer.NGramSpan;
import org.wikipedia.miner.util.WikipediaConfiguration;
import org.xml.sax.SAXException;

/**
 * Class that helps with the interaction with Wikiminner services
 *
 * @author Angel Conde Manjon
 */
public class WikiminnerHelper {

    private final Cache cache;
    private HttpClient httpClient;
    private final Properties props;
    private String wikiminerUrl;
    private final Logger logger = LoggerFactory.getLogger(this.getClass());
    private String lang = "en";
    private int maxTopics = 50;
    private Caches caches;
    private boolean localMode = false;
    private Wikipedia wikipedia;
    private static volatile WikiminnerHelper instance = null;

    private WikiminnerHelper(String pPropDirs) {
        //PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager();
        // Increase max total connection to 200
        // cm.setMaxTotal(100);
        // Increase default max connection per route to 20
        // cm.setDefaultMaxPerRoute(20);
        // Increase max connections for localhost:80 to 50
        PoolingClientConnectionManager pm = new PoolingClientConnectionManager();
        pm.setDefaultMaxPerRoute(20);
        pm.setMaxTotal(200);
        httpClient = new DefaultHttpClient(pm);
        //        ConnectionConfig connectionConfig = ConnectionConfig.custom()
        //                .setMalformedInputAction(CodingErrorAction.IGNORE)
        //                .setUnmappableInputAction(CodingErrorAction.IGNORE)
        //                .setCharset(Consts.UTF_8).build();
        //        cm.setDefaultConnectionConfig(connectionConfig);

        //        httpClient = HttpClients.custom()
        //                .setConnectionManager(cm)
        //                .build();
        httpClient.getParams().setParameter("http.protocol.content-charset", "UTF-8");
        cache = CacheManager.getInstance().getCache("LiteCache");
        props = new Properties();
        caches = new Caches();
        try {
            props.load(new FileInputStream(new File(pPropDirs + "lite/configs/general.conf")));
            wikiminerUrl = props.getProperty("serviceUrl");
            maxTopics = Integer.parseInt(props.getProperty("maxTopics"));
        } catch (IOException ex) {
            logger.error("Error while setting WikiminerHelper properties files, check dirs", ex);
        }
        localMode = !props.get("localMode").equals("false");
    }

    /**
     *
     * @param pPropDirs
     * @return
     */
    public static WikiminnerHelper getInstance(String pPropDirs) {
        if (instance == null) {
            synchronized (WikiminnerHelper.class) {
                if (instance == null) {
                    instance = new WikiminnerHelper(pPropDirs);
                }
            }
        }
        return instance;

    }

    /**
     *
     * @param pPropDirs
     */
    public void setProperties(String pPropDirs) {
        try {
            props.load(new FileInputStream(new File(pPropDirs + "lite/configs/general.conf")));
            wikiminerUrl = props.getProperty("serviceUrl");
            maxTopics = Integer.parseInt(props.getProperty("maxTopics"));
        } catch (IOException ex) {
            logger.error("Error while setting WikiminerHelper properties files, check dirs", ex);
        }

    }

    /**
     * Sets the language if wich wikiminer calls will be used (the default is en)
     *
     * @param pLang
     */
    public void setLanguage(String pLang) {
        lang = pLang;
    }

    /**
     *
     * @param term1
     * @param term2
     * @return
     */
    public Compare compareTopics(String term1, String term2) {
        FileOutputStream st = null;
        Compare comp = new Compare();
        try {
            String ter1 = Char.encodeURIPathComponent(term1);
            String ter2 = Char.encodeURIPathComponent(term2);
            Element elem = cache.get(ter1 + ter2);
            if (elem == null) {
                HttpGet getRequest = new HttpGet(wikiminerUrl + "services/compare?term1=" + ter1.trim() + "&term2="
                        + ter2.trim() + "&wikipedia=" + lang + "&responseFormat=JSON");
                getRequest.addHeader("accept", "application/json");
                getRequest.addHeader("Accept-Encoding", "gzip");

                HttpResponse response = httpClient.execute(getRequest);
                if (response.getStatusLine().getStatusCode() != 200) {
                    throw new RuntimeException(
                            "Failed : HTTP error code : " + response.getStatusLine().getStatusCode());
                }
                Gson son = new GsonBuilder().create();
                GzipDecompressingEntity entity = new GzipDecompressingEntity(response.getEntity());
                String jsonText = EntityUtils.toString(entity, StandardCharsets.UTF_8);
                GSonCompare ex = son.fromJson(jsonText, GSonCompare.class);
                //                System.out.println("###");
                if (ex.getRelatedness() == null) {
                    logger.debug("unrecognized term: {0} {1}", new Object[] { term1, term2 });
                    return null;
                }

                List<Interpretation> interList = ex.getDisambiguationDetails().getInterpretations();
                if (interList.isEmpty()) {
                    return null;
                }
                comp.setDisambiguationConfidence(interList.get(0).getDisambiguationConfidence().floatValue());
                comp.setTerm1(interList.get(0).getTitle1());
                comp.setTerm2(interList.get(0).getTitle2());
                comp.setTerm1Id(interList.get(0).getId1());
                comp.setTerm2Id(interList.get(0).getId2());
                comp.setRelatedness(interList.get(0).getRelatedness().floatValue());
                elem = new Element(ter1 + ter2, comp);
                cache.put(elem);
                return (Compare) elem.getObjectValue();

            } else {
                elem = cache.get(ter1 + ter2);
                return (Compare) elem.getObjectValue();
            }
            //            System.out.println(comp);
        } catch (ClientProtocolException e) {
            logger.error(null, e);
        } catch (IOException e) {
            logger.error(null, e);
        } finally {
            try {
                if (st != null) {
                    st.close();
                }
            } catch (IOException ex) {
                logger.error(null, ex);
            }
        }
        return comp;
    }

    /**
     * Closes the connection to the Wikiminer Rest Services when using Wikiminer in remote mode
     */
    public void closeConnection() {
        if (httpClient != null) {
            httpClient.getConnectionManager().shutdown();
        }
    }

    /**
     * Opens the connection to the Wikiminer Rest Services when using Wikiminer in remote mode
     */
    public void openConnection() {

        httpClient = null;
        httpClient = new DefaultHttpClient();
        httpClient.getParams().setParameter("http.protocol.content-charset", "UTF-8");
    }

    /**
     *
     * Maps a term to Wikipedia using the Wikiminer (only implemented the remote mode
     *
     * @param term1
     * @return
     */
    public Topic searchTopic(String term1) {
        FileOutputStream st = null;
        //        System.out.println(term1);
        try {
            String term = Char.encodeURIPathComponent(term1);
            Element elem = cache.get(term);

            if (elem == null) {
                HttpGet getRequest = new HttpGet(
                        wikiminerUrl + "services/search?query=" + term + "&responseFormat=JSON");
                getRequest.addHeader("accept", "application/json");
                getRequest.addHeader("Accept-Encoding", "gzip");

                HttpResponse response = httpClient.execute(getRequest);
                if (response.getStatusLine().getStatusCode() != 200) {
                    System.out.println(term);
                    throw new RuntimeException(
                            "Failed : HTTP error code : " + response.getStatusLine().getStatusCode());
                }
                Gson son = new GsonBuilder().create();
                JsonParser parser = new JsonParser();
                GzipDecompressingEntity entity = new GzipDecompressingEntity(response.getEntity());
                String jsonText = EntityUtils.toString(entity, StandardCharsets.UTF_8);
                Search ex = son.fromJson(jsonText, Search.class);
                if (ex.getLabels().size() == 1 && ex.getLabels().get(0).getSenses().isEmpty()) {
                    return null;
                } else {
                    Topic top = new Topic(ex.getRequest().getQuery());

                    for (Label lab : ex.getLabels()) {
                        if (!lab.getSenses().isEmpty()) {
                            if (lab.getSenses().size() == 1) {
                                top.setId(lab.getSenses().get(0).getId());
                                top.setSourceTitle(lab.getSenses().get(0).getTitle());
                                //                                top.addSense(lab.getSenses().get(0).getTitle());
                            }
                            //                            top.addLabel(lab.getText());
                            for (Sense sen : lab.getSenses()) {
                                top.addSense(sen.getTitle());
                            }
                        }
                    }
                    //                    System.out.println(term1);
                    elem = new Element(term1, top);
                    cache.put(elem);
                    return top;

                }
            } else {
                return (Topic) elem.getObjectValue();
            }
        } catch (ClientProtocolException e) {
            logger.error(null, e);
        } catch (IOException e) {
        } finally {
            try {
                if (st != null) {
                    st.close();
                }
            } catch (IOException ex) {
                logger.error(null, ex);
            }
        }
        return null;
    }

    /**
     * Given a Wikipedia page title it returns the identifier associated with it
     *
     * @param term1
     * @param pLang
     * @return
     */
    public int getIdFromTitle(String term1, String pLang) {
        FileOutputStream st = null;
        //        System.out.println(term1);
        try {
            String term = Char.encodeURIPathComponent(term1);
            Element elem = cache.get(term);

            if (elem == null) {
                HttpGet getRequest = new HttpGet(wikiminerUrl + "services/exploreArticle?title=" + term
                        + "&responseFormat=JSON&wikipedia=" + pLang);
                getRequest.addHeader("accept", "application/json");
                getRequest.addHeader("Accept-Encoding", "gzip");
                HttpResponse response = httpClient.execute(getRequest);
                if (response.getStatusLine().getStatusCode() != 200) {
                    System.out.println(term);
                    throw new RuntimeException(
                            "Failed : HTTP error code : " + response.getStatusLine().getStatusCode());
                }
                Gson son = new GsonBuilder().create();
                JsonParser parser = new JsonParser();
                GzipDecompressingEntity entity = new GzipDecompressingEntity(response.getEntity());
                String jsonText;
                jsonText = EntityUtils.toString(entity, StandardCharsets.UTF_8);

                WikiDataArt ex = son.fromJson(jsonText, WikiDataArt.class);
                List<ArticleList> list = ex.getArticleList();
                if (list.size() > 0) {
                    return list.get(0).getId();

                } else {
                    return -1;
                }
            }
        } catch (IOException | ParseException ex) {
            logger.error("Error while getting and id form title", ex);
        }
        return -1;

    }

    /**
     *
     * @param term1
     * @return
     */
    public Topic validateTopic(String term1) {
        FileOutputStream st = null;
        Topic top = null;
        //        System.out.println(term1);
        top = searchTopic(term1);
        if (top == null) {
            return null;
        }
        return top.getSenseList().size() == 1 ? top : null;
    }
    //return (top.getSenseList().size() == 1) ? top.getId() : -1;
    //TODO: In the original code we check the following (if one term has more than one sense but one seems to be the one we accept it
    //           while (iter.hasNext()) {
    //                Object object = iter.next();
    //                if (object instanceof Element) {
    //                    Element currentElement = (Element) object;
    //                    if (currentElement.getName().equalsIgnoreCase("label")) {
    //                        if (currentElement.getAttribute("linkDocCount").getIntValue() > 0) {
    //                            List<Element> childs = currentElement.getChildren();
    //                            if (childs.size() == 1) {
    //                                Element curr = childs.get(0);
    //                                top.setWikiId(curr.getAttribute("id").getIntValue());
    //                                top.addSense(curr.getAttributeValue("title"));
    //                                return true;
    //                            } else {
    //                                Iterator itr = childs.iterator();
    //                                while (itr.hasNext()) {
    //                                    Object obj = itr.next();
    //                                    if (obj instanceof Element) {
    //                                        Element curr = (Element) obj;
    //                                        if (curr.getName().equalsIgnoreCase("sense")) {
    //                                            if (curr.getAttributeValue("fromRedirect").equalsIgnoreCase("true")) {
    //                                                if (curr.getAttributeValue("fromTitle").equalsIgnoreCase("true")) {
    //                                                    String probability = curr.getAttributeValue("priorProbability");
    //                                                    float prob = Float.parseFloat(probability);
    //                                                    if (prob > 0.5) {
    //                                                        return true;
    //                                                    }
    //                                                }
    //                                            }
    //                                            top.addSense(curr.getAttributeValue("title"));
    //
    //                                        }
    //                                    }
    //                                }
    //
    //                            }
    //                            return valid;
    //}

    /**
     * Return a list of labels for a Wikipedia article (only remote mode)
     *
     * @param wikiId
     * @return
     */
    public List<String> exploreLabels(int wikiId) {
        FileOutputStream out = null;
        try {
            HttpGet getRequest = new HttpGet(
                    //&definitionmaxImageWidth=800&maxImageHeight=600&emphasisFormat=HTML&definitionLenght=LONG
                    wikiminerUrl + "/services/exploreArticle?id=" + wikiId
                            + "&labels&parentCategories&responseFormat=JSON");
            getRequest.addHeader("accept", "application/json");
            getRequest.addHeader("Accept-Encoding", "gzip");

            HttpResponse response = httpClient.execute(getRequest);
            if (response.getStatusLine().getStatusCode() != 200) {
                throw new RuntimeException(
                        "Failed : HTTP error code : " + response.getStatusLine().getStatusCode());
            }
            Gson son = new GsonBuilder().create();
            GzipDecompressingEntity entity = new GzipDecompressingEntity(response.getEntity());
            String jsonText = EntityUtils.toString(entity, StandardCharsets.UTF_8);
            GSonExplore ex = son.fromJson(jsonText, GSonExplore.class);
            List<String> labelList = new ArrayList<>();
            for (edu.ehu.galan.lite.utils.wikiminer.gsonReaders.explore.Label lab : ex.getLabels()) {
                labelList.add(lab.getText());
            }
            System.out.println(wikiId);
            return labelList;
        } catch (ClientProtocolException e) {
            logger.error(null, e);
        } catch (IOException e) {
            logger.error(null, e);
        } finally {
            if (out != null) {
                try {
                    out.close();
                } catch (IOException ex) {
                    logger.error(null, ex);
                }
            }
        }
        return null;
    }

    /**
     * Maps a term list to Wikipedia using the Wiminer services and returns a list of topics (terms
     * mapped to Wikipedia). Those terms not mapped will be discarded.
     *
     * @param termList
     * @return
     */
    public List<Topic> parallelSearch(List<Term> termList) {
        List<String> lis = new ArrayList<>();
        long timeStart = System.nanoTime();
        List<Topic> topicList = new ArrayList<>();
        Gson son = new GsonBuilder().create();
        JsonParser parser = new JsonParser();
        List<Term> uncachedList = new ArrayList<>();
        int i = 0;
        int j = 0, h = 0, x = 0;
        ListSearch ex;
        if (!localMode) {
            try {

                for (Term term : termList) {
                    Element elem = cache.get(term.getTerm());
                    if (elem == null) {
                        uncachedList.add(term);
                    } else {
                        topicList.add((Topic) elem.getObjectValue());
                    }
                }
                logger.info("Mapping step start:");
                ProgressTracker tracker = new ProgressTracker((termList.size() / maxTopics) + 1, "....",
                        this.getClass());
                while (i < uncachedList.size()) {

                    String req = wikiminerUrl + "/services/search?queryList=";
                    String cacheElem = "";
                    int sum = 0;
                    for (; i < termList.size(); i++) {
                        //                  if(termList.get(i).getTerm().equals("black hole")){
                        //                      System.out.println("black hole");
                        //                  }
                        String string = Char.encodeURIPathComponent(termList.get(i).getTerm());
                        if (string.split("%20").length > 6) {
                            logger.debug("An invalid concept has been detected: " + termList.get(i).getTerm());
                        } else {

                            //TODO: DO THIS WELL via customizable file, however the web service is secured now
                            if (!string.equals("%5B") && !string.equals("!") && !string.equals("")
                                    && !string.equals("%25") && !string.equals("+") && !string.equals("*")
                                    && !string.equals("+%20%2F") && !string.equals("_") && !string.equals("-")
                                    && !string.equals(".)") && !string.equals("%5D") && !string.equals("%5B")
                                    && !string.equals(",") && !string.equals(":") && !string.equals(";")
                                    && !string.equals(".") && !string.equals("/") && !string.equals("\\")
                                    && !string.equals("%2F") && !string.equals("=") && !string.equals("-")
                                    && !string.equals("%3A") && !string.equals("%3B") && !string.equals("%3E")
                                    && !string.equals("%3C") && !string.equals("%3F") && !string.equals("%C2%BF")
                                    && !string.equals("~") && !string.equals("&") && !string.equals("(")
                                    && !string.equals(")") && !string.equals(").")) {
                                cacheElem += string;
                                sum++;
                                h++;
                                string = string.replaceAll("[,;:]", "_");
                                req = req + string + ",";
                                if (sum == maxTopics) {
                                    break;
                                }
                            }
                        }
                    }
                    if (!req.substring(req.length() - 1).equals(",")) {
                        req = req.substring(0, req.length() - 1);
                    }
                    HttpGet getRequest = new HttpGet(req + "&wikipedia=" + lang + "&responseFormat=JSON");
                    getRequest.addHeader("accept", "application/json");
                    getRequest.addHeader("Accept-Encoding", "gzip");
                    HttpResponse response = httpClient.execute(getRequest);
                    GzipDecompressingEntity entity = new GzipDecompressingEntity(response.getEntity());
                    Header contentEncoding = response.getFirstHeader("Content-Encoding");
                    if (contentEncoding == null) {
                        EntityUtils.consume(entity);
                        logger.error("Some characters have crashed the call to the web service. \n " + req);
                        logger.error("The response was: " + response.getStatusLine());
                        return topicList;
                    } else if (response.getStatusLine().getStatusCode() == 502) {
                        EntityUtils.consume(entity);

                        logger.error("The proxy has reverted the web call service call (http 502). \n" + req);
                        logger.error("The response was: " + response.getStatusLine());
                        return topicList;
                    }
                    tracker.update();
                    String jsonText = EntityUtils.toString(entity, StandardCharsets.UTF_8);
                    if (jsonText.contains("java.lang.ArrayIndexOutOfBoundsException")
                            || jsonText.contains("\"error\": \"Parameters missing\"")) {
                        logger.error("The current request hash crashed the web service. Check the terms?? " + req);
                    }
                    ex = son.fromJson(jsonText, ListSearch.class);
                    for (edu.ehu.galan.lite.utils.wikiminer.gsonReaders.listSearch.Label lab : ex.getLabels()) {
                        if (lab.getSenses().isEmpty()) {
                            //                        lis.add(lab.getText());
                            //                        System.out.println(lab.getText() + "\tinvalid");
                        } else {
                            Topic top = new Topic(lab.getText());
                            if (!lab.getSenses().isEmpty()) {
                                if (lab.getSenses().size() == 1) {
                                    top.setId(lab.getSenses().get(0).getId());

                                    //                                top.addSense(lab.getSenses().get(0).getTitle());
                                }
                                lab.getSenses().stream().forEach((sen) -> {
                                    top.addSense(sen.getTitle());
                                    top.addSenseId(sen.getId());
                                });

                            }
                            x++;
                            //System.out.println(lab.getText() + "\t" + top.getId() + "\t" + top.getSenseList());
                            topicList.add(top);
                            Element elen = new Element(top.getTopic(), top);
                            cache.put(elen);
                        }

                    }
                    i++;

                    //JsonArray Jarray = parser.parse(output.toString(),Example.class);
                }
                //
                //            System.out.println(topicList.size()+ " "+x+" "+h);
                //            System.exit(222);
                //           
                return topicList;
            } catch (IOException ex1) {
                logger.error("Web service call failed... check the config url or your web server status", ex1);
            }
            return topicList;
        } else {
            if (wikipedia != null) {
                logger.info("Mapping step start:");
                ProgressTracker tracker = new ProgressTracker((termList.size()), "Mapping articles....",
                        this.getClass());
                NGrammer nGrammer = new NGrammer(wikipedia.getConfig().getSentenceDetector(),
                        wikipedia.getConfig().getTokenizer());
                float minPriorProb = 0.01F;
                for (Term query : termList) {

                    try {
                        NGramSpan span = nGrammer.ngramPosDetect(query.getTerm())[0];
                        org.wikipedia.miner.model.Label label = wikipedia.getLabel(span, query.getTerm());
                        if (label.getSenses().length > 0) {
                            Topic top = new Topic(query.getTerm());
                            if (label.getSenses().length == 1) {
                                top.setId(label.getSenses()[0].getId());
                                //                                top.addSense(lab.getSenses().get(0).getTitle());
                            } else {
                                for (org.wikipedia.miner.model.Label.Sense sense : label.getSenses()) {
                                    if (sense.getPriorProbability() < minPriorProb) {
                                        break;
                                    }
                                    top.addSense((sense.getTitle()));
                                    top.addSenseId(sense.getId());
                                }
                                if (!top.getSenseList().isEmpty()) {
                                    topicList.add(top);
                                }
                            }
                        }
                    } catch (NullPointerException | ArrayIndexOutOfBoundsException ex3) {
                        //logger.info("query: " + query.getTerm(), ex3);
                    }
                    tracker.update();

                }
                return topicList;
            } else {
                logger.error("The Wikipedia is not initizalized, call first to localMode method");
                return topicList;
            }
        }
    }

    /**
     * Relate a list of topics with the important topics of the domain
     *
     * @param pTtopicList
     * @param cGold
     * @param relatedness
     * @param minRelationship
     * @return
     */
    public List<Comparison> parallelRelate(List<Topic> pTtopicList, List<Integer> cGold, float relatedness,
            int minRelationship) {
        List<Comparison> kust = new ArrayList<>();
        if (!localMode) {
            long timeStart = System.nanoTime();
            List<Topic> topicList = new ArrayList<>();
            Gson son = new GsonBuilder().create();
            JsonParser parser = new JsonParser();
            int i = 0;
            List<Integer> intList = new ArrayList<>();
            Comparisons ex = null;
            try {
                ProgressTracker tracker = new ProgressTracker((pTtopicList.size() / maxTopics) + 1, "....",
                        this.getClass());

                while (i < pTtopicList.size()) {
                    String cacheElem = "";
                    String req = wikiminerUrl + "services/compare?ids1=";
                    int sum = 0;
                    for (; i < pTtopicList.size(); i++) {
                        int id = (pTtopicList.get(i).getId());
                        cacheElem += id;
                        sum++;
                        req = req + id + ",";
                        if (sum == maxTopics) {
                            break;
                        }
                    }
                    req = req.substring(0, req.length() - 1);
                    req += "&ids2=";
                    for (Integer gold : cGold) {
                        req = req + gold.toString() + ",";
                        cacheElem += gold.toString();
                    }
                    req = req.substring(0, req.length() - 1);
                    //                Element elem = cache.get(cacheElem);
                    //                if (elem == null) {
                    HttpGet getRequest = new HttpGet(req + "&wikipedia=" + lang + "&responseFormat=JSON");
                    getRequest.addHeader("accept", "application/json");
                    getRequest.addHeader("Accept-Encoding", "gzip");
                    HttpResponse response = httpClient.execute(getRequest);
                    GzipDecompressingEntity entity = new GzipDecompressingEntity(response.getEntity());
                    String jsonText = EntityUtils.toString(entity, StandardCharsets.UTF_8);
                    EntityUtils.consume(entity);
                    ex = son.fromJson(jsonText, Comparisons.class);
                    //                    elem = new Element(cacheElem, ex);
                    //                    cache.put(elem);
                    //                } else {
                    //                    ex = (Comparisons) elem.getObjectValue();
                    //                }
                    for (Comparison comp : ex.getComparisons()) {
                        if (cGold.contains(comp.getHighId())) {
                            comp.setHighId(null);
                            kust.add(comp);
                        } else {
                            comp.setLowId(comp.getHighId());
                            comp.setHighId(null);
                            kust.add(comp);
                        }

                    }
                    //                for (Integer id : ex.getIds()) {
                    //                    intList.add(id);
                    ////                    System.out.println(id);
                    //                }
                    //            }
                    //            for (Integer integer : intList) {
                    //                for (Topic top : pTtopicList) {
                    //                    if (top.getId() == integer) {
                    //                        topicList.add(top);
                    //                        break;
                    //                    }
                    //                }
                    tracker.update();
                }
                long timeEnd = System.nanoTime();
                logger.debug("Parallel Relate processed in: " + ((timeEnd - timeStart) / 1000000) + " for size: "
                        + pTtopicList.size());
                return kust;
            } catch (IOException ex1) {
                logger.error(null, ex1);
            }
            return null;
        } else {
            if (wikipedia != null) {
                ArticleComparer artComparer = null;
                try {
                    artComparer = new ArticleComparer(wikipedia);
                } catch (Exception ex) {
                    logger.error("Error getting article comparer for this wikipedia");
                }
                if (artComparer == null) {
                    logger.error("No comparisons available for this Wikipedia");
                }
                //gather articles from ids1 ;
                TreeSet<Article> articles1 = new TreeSet<>();
                for (Topic id : pTtopicList) {
                    try {
                        Article art = (Article) wikipedia.getPageById(id.getId());
                        articles1.add(art);
                    } catch (Exception e) {
                        //msg.addInvalidId(id.);
                    }
                }

                //gather articles from ids2 ;
                TreeSet<Article> articles2 = new TreeSet<>();
                for (Integer id : cGold) {
                    try {
                        Article art = (Article) wikipedia.getPageById(id);
                        articles2.add(art);
                    } catch (Exception e) {
                        //msg.addInvalidId(id);
                    }
                }
                //if ids2 is not specified, then we want to compare each item in ids1 with every other one
                if (articles2.isEmpty()) {
                    articles2 = articles1;
                }
                TLongHashSet doneKeys = new TLongHashSet();
                float minRelatedness = relatedness;
                //  boolean showTitles = prmTitles.getValue(request);
                for (Article a1 : articles1) {
                    for (Article a2 : articles2) {
                        if (a1.equals(a2)) {
                            continue;
                        }
                        //relatedness is symmetric, so create a unique key for this pair of ids were order doesnt matter 
                        Article min, max;
                        if (a1.getId() < a2.getId()) {
                            min = a1;
                            max = a2;
                        } else {
                            min = a2;
                            max = a1;
                        }
                        //long min = Math.min(a1.getId(), a2.getId()) ;
                        //long max = Math.max(a1.getId(), a2.getId()) ;
                        long key = ((long) min.getId()) + (((long) max.getId()) << 30);
                        if (doneKeys.contains(key)) {
                            continue;
                        }
                        double related = 0;
                        try {
                            related = artComparer.getRelatedness(a1, a2);
                        } catch (Exception ex) {
                        }
                        if (relatedness >= minRelatedness) {
                            Comparison comp = new Comparison();
                            comp.setRelatedness(related);
                            comp.setHighId(max.getId());
                            comp.setLowId(min.getId());
                            if (cGold.contains(comp.getHighId())) {
                                comp.setHighId(null);
                                kust.add(comp);
                            } else {
                                comp.setLowId(comp.getHighId());
                                comp.setHighId(null);
                                kust.add(comp);
                            }
                        }

                        doneKeys.add(key);
                    }
                }

                return kust;
            } else {
                return null;
            }
        }
    }
    // private void disambiguateTopic(Topic top) {
    //        Compare comp;
    //        int count = 0;
    //        int lenght = cGold.size();
    ////        System.out.println("disambiguating: " + top.getTopic());
    //    //        for (String gold : cGold) {
    //            if (comp != null) {
    //                if (comp.getTerm1() != null) {
    //                    top.addProbableSense(comp.getTerm1(), comp.getTerm1Id());
    //                } else {
    //                    count++;
    //                }
    //            }
    //        }
    //        if (count == lenght) {
    //            top.setDisambiguationFail(true);
    //        }
    ////        System.out.println(top.getGoodSense() + "\t" + top.getWikiId());
    //    }

    /**
     *
     * @param pTtopicList
     * @param cGold
     * @return
     */
    public List<Topic> disambiguate(List<Topic> pTtopicList, List<String> cGold) {
        long timeStart = System.nanoTime();
        List<Topic> topicList = new ArrayList<>();
        if (!localMode) {
            Gson son = new GsonBuilder().create();
            JsonParser parser = new JsonParser();
            int i = 0;
            HashMap<String, List<List<edu.ehu.galan.lite.utils.wikiminer.gsonReaders.disambiguate.Interpretation>>> disDict = new HashMap<>();
            Disambiguation ex = null;
            String cGolds = cGold.stream().reduce("", (s1, s2) -> s1.concat(s2));
            logger.info("Disambiguation step start:");
            ProgressTracker tracker = new ProgressTracker((pTtopicList.size() / maxTopics) + 1, "....",
                    this.getClass());
            try {
                while (i < pTtopicList.size()) {
                    int sum = 0;
                    String cacheElem = "";
                    String req = wikiminerUrl + "/services/compare?term1List=";
                    for (; i < pTtopicList.size(); i++) {
                        //                    if(pTtopicList.get(i).getTopic().equalsIgnoreCase("ursa major")){
                        //                        System.out.println("dsdsadsadsa");
                        //                    }
                        String string = Char.encodeURIPathComponent(pTtopicList.get(i).getTopic());
                        if (!string.equals("%5B") && !string.equals("!") && !string.equals("")
                                && !string.equals("%25") && !string.equals("+") && !string.equals("*")
                                && !string.equals("+%20%2F") && !string.equals("_") && !string.equals("-")
                                && !string.equals(".)") && !string.equals("%5D") && !string.equals("%5B")
                                && !string.equals(",") && !string.equals(":") && !string.equals(";")
                                && !string.equals(".") && !string.equals("/") && !string.equals("\\")
                                && !string.equals("%2F") && !string.equals("=") && !string.equals("-")
                                && !string.equals("%3A") && !string.equals("%3B") && !string.equals("%3E")
                                && !string.equals("%3C") && !string.equals("%3F") && !string.equals("%C2%BF")
                                && !string.equals("~") && !string.equals("&") && !string.equals("(")
                                && !string.equals(")") && !string.equals(").")) {
                            cacheElem += string;
                            req = req + string + ",";
                            sum++;
                            if (sum == maxTopics) {
                                break;
                            }
                        }
                    }
                    req = req.substring(0, req.length() - 1);
                    req += "&term2List=";
                    for (String gold : cGold) {
                        req = req + Char.encodeURIPathComponent(gold) + ",";
                        cacheElem += Char.encodeURIPathComponent(gold);
                    }

                    req = req.substring(0, req.length() - 1);
                    Element elem = cache.get(cacheElem);
                    if (elem == null) {
                        HttpGet getRequest = new HttpGet(req + "&wikipedia=" + lang + "&responseFormat=JSON");
                        getRequest.addHeader("accept", "application/json");
                        getRequest.addHeader("Accept-Encoding", "gzip");
                        HttpResponse response = httpClient.execute(getRequest);
                        GzipDecompressingEntity entity = new GzipDecompressingEntity(response.getEntity());
                        Header contentEncoding = response.getFirstHeader("Content-Encoding");
                        if (contentEncoding == null) {
                            EntityUtils.consume(entity);
                            logger.error("Some characters have crashed the call to the web service. \n " + req);
                            logger.error("The response was: " + response.getStatusLine());

                        } else if (response.getStatusLine().getStatusCode() == 502) {
                            EntityUtils.consume(entity);

                            logger.error("The proxy has reverted the web call service call (http 502). \n" + req);
                            logger.error("The response was: " + response.getStatusLine());
                            return topicList;
                        }
                        tracker.update();
                        String jsonText = EntityUtils.toString(entity, StandardCharsets.UTF_8);
                        ex = son.fromJson(jsonText, Disambiguation.class);
                        elem = new Element(cacheElem, ex);

                        cache.put(elem);
                    } else {
                        ex = (Disambiguation) elem.getObjectValue();
                    }
                    int x = 0;
                    List<String> terms = Arrays.asList(ex.getRequest().getTerm1List().split(","));
                    int golf = cGold.size();
                    DisamList dis = null;
                    List<List<edu.ehu.galan.lite.utils.wikiminer.gsonReaders.disambiguate.Interpretation>> iList = null;
                    int m = 0;
                    List<DisambiguationDetailsList> disam = ex.getDisambiguationDetailsList();

                    for (DisambiguationDetailsList disambiguationDetailsList : disam) {
                        String term = disambiguationDetailsList.getTerm1();
                        List<edu.ehu.galan.lite.utils.wikiminer.gsonReaders.disambiguate.Interpretation> interpretations = disambiguationDetailsList
                                .getInterpretations();
                        if (disDict.containsKey(term)) {
                            List<List<edu.ehu.galan.lite.utils.wikiminer.gsonReaders.disambiguate.Interpretation>> li = disDict
                                    .get(term);
                            li.add(interpretations);
                        } else {
                            List<List<edu.ehu.galan.lite.utils.wikiminer.gsonReaders.disambiguate.Interpretation>> li = new ArrayList<>();
                            li.add(interpretations);
                            disDict.put(term, li);
                        }
                    }
                }

                for (Topic top : pTtopicList) {
                    if (disDict.containsKey(top.getTopic())) {
                        List<List<edu.ehu.galan.lite.utils.wikiminer.gsonReaders.disambiguate.Interpretation>> li = disDict
                                .get(top.getTopic());
                        HashMap<String, List<Double>> count = new HashMap<>();
                        top.initializeSenseCount();
                        for (List<edu.ehu.galan.lite.utils.wikiminer.gsonReaders.disambiguate.Interpretation> list : li) {
                            if (list.size() > 0) {
                                edu.ehu.galan.lite.utils.wikiminer.gsonReaders.disambiguate.Interpretation in = list
                                        .get(0);
                                String tittle = in.getTitle1();
                                int id = in.getId1();
                                double disCong = in.getDisambiguationConfidence();
                                top.addProbableSense(tittle, id, disCong);
                            }
                        }
                        top.getGoodSense();

                    } else {
                        top.setDisambiguationFail(true);
                    }
                }

                long timeEnd = System.nanoTime();
                logger.info("Parallel Disamb processed in: " + ((timeEnd - timeStart) / 1000000) + " for size: "
                        + pTtopicList.size());

                return pTtopicList;
            } catch (IOException ex1) {
                logger.error(null, ex1);
            } catch (com.google.gson.JsonSyntaxException ex2) {
                logger.info("Some Topic crashed the web Service: returning empty list");
                return pTtopicList;
            }
            return null;
        } else {
            if (wikipedia != null) {
                logger.info("Disambiguation step start:");
                ProgressTracker tracker = new ProgressTracker((pTtopicList.size() + 1), "Disambiguation...",
                        this.getClass());

                ArticleComparer artiComparer = null;
                try {
                    artiComparer = new ArticleComparer(wikipedia);
                } catch (Exception ex) {
                    logger.error("Error getting article comparer for this wikipedia");
                }
                if (artiComparer == null) {
                    logger.error("No comparisons available for this Wikipedia");
                }
                LabelComparer lbComparer = null;
                try {
                    lbComparer = new LabelComparer(wikipedia, artiComparer);
                } catch (Exception ex) {
                    logger.error("Error getting label comparer for this wikipedia ");
                }
                if (lbComparer == null) {
                    logger.error("Error getting label comparer for this wikipedia");
                }
                List<org.wikipedia.miner.model.Label> labels = new ArrayList<>();
                NGrammer nGrammer = new NGrammer(wikipedia.getConfig().getSentenceDetector(),
                        wikipedia.getConfig().getTokenizer());
                for (String string2 : cGold) {
                    NGrammer.NGramSpan span2 = nGrammer.ngramPosDetect(string2)[0];
                    org.wikipedia.miner.model.Label lab2 = wikipedia.getLabel(span2, string2);
                    labels.add(lab2);
                }
                List<String> invalidTerm = new ArrayList<>();
                for (Topic string1 : pTtopicList) {
                    //   System.out.println(string1);
                    NGrammer.NGramSpan span = nGrammer.ngramPosDetect(string1.getTopic())[0];
                    org.wikipedia.miner.model.Label lab1 = wikipedia.getLabel(span, string1.getTopic());
                    org.wikipedia.miner.model.Label.Sense[] sen1 = lab1.getSenses();
                    string1.initializeSenseCount();
                    if (sen1.length != 0) {
                        int j = 0;
                        for (org.wikipedia.miner.model.Label lab2 : labels) {
                            org.wikipedia.miner.model.Label.Sense[] sen2 = lab2.getSenses();
                            if (sen2.length != 0) {
                                try {
                                    LabelComparer.ComparisonDetails dets = lbComparer.compare(lab1, lab2);
                                    ArrayList<Interpretation> interpretations = new ArrayList<>();
                                    for (LabelComparer.SensePair sp : dets.getCandidateInterpretations()) {
                                        string1.addProbableSense(sp.getSenseA().getTitle(), sp.getSenseA().getId(),
                                                sp.getSenseRelatedness());
                                    }
                                } catch (Exception ex) {
                                    // java.util.logging.Logger.getLogger(WikiminnerHelper.class.getName()).log(Level.SEVERE, null, ex);
                                }

                            }
                            j++;
                        }
                        //System.out.println(string1);
                        string1.getGoodSense();

                    } else {
                        string1.setDisambiguationFail(true);
                    }
                    tracker.update();
                }

                return pTtopicList;

            } else {
                logger.error("The Wikipedia is not initizalized, call first to localMode method");

                return null;
            }

        }
    }

    /**
     *
     * @param pDoc
     * @param links
     */
    public void getData(Document pDoc, boolean links) {
        caches.initializeId2TopicMap(pDoc);
        //        for(Document doc: pCorpus.getDocQueue()){
        if (!localMode) {
            HashMap<Integer, Topic> cacheId = caches.getId2TopicMap();
            List<Topic> topicList = pDoc.getTopicList();
            Gson son = new GsonBuilder().create();
            JsonParser parser = new JsonParser();
            int i = 0;
            List<Integer> invalidList = new ArrayList<>();
            i = 0;
            WikiDataArt ex;
            try {
                logger.info("Getting Wiki data from the mapped articles:");
                ProgressTracker tracker = new ProgressTracker((topicList.size() / maxTopics) + 1, "....",
                        this.getClass());
                while (i < topicList.size()) {
                    String req = wikiminerUrl + "/services/exploreArticle?ids=";
                    String cacheElem = "";
                    int sum = 0;
                    for (; i < topicList.size(); i++) {
                        int id = (topicList.get(i).getId());
                        cacheElem += id;
                        //                    if(id==18105){
                        //                        System.out.println(pTtopicList.get(i).toString());
                        //                    }
                        req = req + id + ",";
                        sum++;
                        if (sum == maxTopics) {
                            break;
                        }
                    }
                    req = req.substring(0, req.length() - 1);
                    Element elem = cache.get(cacheElem);
                    HttpGet getRequest = null;
                    if (elem == null) {
                        if (links) {
                            getRequest = new HttpGet(req + "&wikipedia=" + lang
                                    + "&parentCategories&translations&definition&labels&outLinks&inLinks&linkRelatedness&responseFormat=JSON&responseFormat=JSON");
                        } else {
                            getRequest = new HttpGet(req + "&wikipedia=" + lang
                                    + "&parentCategories&translations&definition&labels&responseFormat=JSON&responseFormat=JSON");
                        }

                        getRequest.addHeader("accept", "application/json");
                        getRequest.addHeader("Accept-Encoding", "gzip");
                        HttpResponse response = httpClient.execute(getRequest);
                        GzipDecompressingEntity entity = new GzipDecompressingEntity(response.getEntity());
                        String jsonText = EntityUtils.toString(entity, StandardCharsets.UTF_8);
                        EntityUtils.consume(entity);
                        ex = son.fromJson(jsonText, WikiDataArt.class);
                        elem = new Element(cacheElem, ex);
                        cache.put(elem);
                    } else {
                        ex = (WikiDataArt) elem.getObjectValue();
                    }
                    List<ArticleList> artiList = ex.getArticleList();
                    int count = 0;
                    for (ArticleList articleList : artiList) {
                        int id = articleList.getId();
                        if (cacheId.containsKey(id)) {
                            Topic top = cacheId.get(id);
                            count++;
                            addInfo2Article(top, articleList, cacheId);
                            //break; if more are disambiguated with the same we get errors
                            //....
                        }
                    }
                    List<Integer> invalids = ex.getInvalidList(); //may containg categories
                    for (Integer integer : invalids) {
                        invalidList.add(integer);
                        if (cacheId.containsKey(integer)) {
                            Topic top = cacheId.get(integer);
                            top.addLabel(top.getTopic());
                            top.addLabel(top.getSourceTitle());
                        }
                    }
                    tracker.update();
                }
                i = 0;

                while (i < invalidList.size()) {
                    int sum = 0;
                    String req = wikiminerUrl + "/services/exploreCategory?ids=";
                    for (; i < invalidList.size(); i++) {

                        int id = invalidList.get(i);
                        //                    if(id==18105){
                        //                        System.out.println(pTtopicList.get(i).toString());
                        //                    }
                        sum++;
                        req = req + id + ",";
                        if (sum == maxTopics) {
                            break;
                        }

                    }
                    req = req.substring(0, req.length() - 1);
                    HttpGet getRequest = new HttpGet(req + "&wikipedia=" + lang
                            + "&parentCategories&translations&definition&labels&responseFormat=JSON");
                    getRequest.addHeader("accept", "application/json");
                    getRequest.addHeader("Accept-Encoding", "gzip");
                    HttpResponse response = httpClient.execute(getRequest);

                    //                WikiData ex = son.fromJson(response2String(response), WikiData.class);
                    //                List<ArticleList> artiList = ex.getArticleList();
                    //                for (ArticleList articleList : artiList) {
                    //                    int id = articleList.getId();
                    //                    for (Topic topic : topicList) {
                    //                        if (topic.getId() == id) {
                    //                            addInfo2Article(topic, articleList);
                    //                            break;
                    //                        }
                    //                    }
                    //                }
                }
            } catch (IOException ex1) {
                logger.error(null, ex1);
            }
            //}
        } else {
            if (wikipedia != null) {
                logger.info("Getting Wiki data from the mapped articles:");
                List<Topic> topicList = pDoc.getTopicList();

                List<Integer> validList = new ArrayList<>();
                for (Topic top : topicList) {
                    validList.add(top.getId());
                }
                ProgressTracker tracker = new ProgressTracker((validList.size()) + 1, "Getting data....",
                        this.getClass());
                Integer[] ids = validList.toArray(new Integer[validList.size()]);
                List<Integer> nullList = new ArrayList<>();
                List<Integer> invalidList = new ArrayList<>();
                List<Article> articleList = new ArrayList<>();
                List<Category> catList = new ArrayList<>();
                ArticleComparer artComparer = null;
                try {
                    artComparer = new ArticleComparer(wikipedia);
                } catch (Exception ex) {
                    logger.error("Error getting article comparer for this wikipedia");
                }
                if (artComparer == null) {
                    logger.error("No comparisons available for this Wikipedia");
                }

                for (int i = 0; i < ids.length; i++) {
                    Integer integer = ids[i];
                    org.wikipedia.miner.model.Page pageIds = wikipedia.getPageById(integer);
                    if (pageIds == null) {
                        nullList.add(integer);
                    }
                    switch (pageIds.getType()) {
                    case disambiguation:
                        break;
                    case article:
                        articleList.add((Article) pageIds);
                        break;
                    default:
                        if (pageIds.getType() == org.wikipedia.miner.model.Page.PageType.category) {
                            catList.add((Category) pageIds);
                        } else {
                            nullList.add(integer);
                        }
                    }

                }
                for (Article art : articleList) {
                    Topic top = caches.getId2TopicMap().get(art.getId());
                    top.setIsIndividual(true);

                    String definition = null;
                    definition = art.getFirstParagraphMarkup();
                    top.setSourceDef(definition);
                    if (definition == null) {
                        top.setSourceDef("");
                    }
                    Article.Label[] labels = art.getLabels();
                    int total = 0;
                    for (Article.Label lbl : labels) {
                        total += lbl.getLinkOccCount();
                    }
                    for (Article.Label lbl : labels) {
                        long occ = lbl.getLinkOccCount();
                        if (occ > 0) {
                            top.addLabel(lbl.getText());
                        }
                    }
                    TreeMap<String, String> translations = art.getTranslations();
                    for (Map.Entry<String, String> entry : translations.entrySet()) {
                        top.addTranslation(entry.getKey(), entry.getValue());
                    }
                    Category[] parents = art.getParentCategories();
                    // logger.info("retrieving parents from " + parents.length + " total");
                    for (Category parent : parents) {
                        top.addParentCagegory(parent.getId(), parent.getTitle());
                    }
                    int start = 0;
                    int max = 300;
                    if (max <= 0) {
                        max = Integer.MAX_VALUE;
                    } else {
                        max += start;
                    }
                    if (links) {
                        Article[] linksOut = art.getLinksOut();
                        //logger.info("retrieving out links [" + start + "," + max + "] from " + linksOut.length + " total");
                        for (int i = start; i < max && i < linksOut.length; i++) {
                            if (artComparer != null) {
                                try {
                                    top.addLinkOut(linksOut[i].getId(),
                                            artComparer.getRelatedness(art, linksOut[i]));
                                } catch (Exception ex) {
                                    //                                logger.debug("error comparing articles" + ex);
                                }
                            }
                        }
                        start = 0;
                        max = 300;
                        if (max <= 0) {
                            max = Integer.MAX_VALUE;
                        } else {
                            max += start;
                        }
                        Article[] linksIn = art.getLinksIn();
                        // logger.info("retrieving in links [" + start + "," + max + "] from " + linksIn.length + " total");
                        for (int i = start; i < max && i < linksIn.length; i++) {
                            if (artComparer != null) {
                                try {
                                    top.addLinkIn(linksIn[i].getId(), artComparer.getRelatedness(art, linksIn[i]));
                                } catch (Exception ex) {
                                    //    logger.debug("error comparing articles" + ex);
                                }
                            }
                        }
                    }
                    tracker.update();

                }

            }
        }
        caches.clearId2TopicMap();

    }

    /**
     *
     * @param pDoc
     * @param maxImagewidth
     * @param maxImageheight
     */
    public void getMarkUpImages(Document pDoc, int maxImagewidth, int maxImageheight) {
        caches.initializeId2TopicMap(pDoc);
        //        for(Document doc: pCorpus.getDocQueue()){
        if (!localMode) {
            HashMap<Integer, Topic> cacheId = caches.getId2TopicMap();
            List<Topic> topicList = pDoc.getTopicList();
            Gson son = new GsonBuilder().create();
            int i = 0;
            List<Integer> invalidList = new ArrayList<>();
            i = 0;
            GSonMarkUp ex;
            try {
                logger.info("Getting markup from the mapped articles:");
                ProgressTracker tracker = new ProgressTracker((topicList.size() / maxTopics) + 1, "....",
                        this.getClass());
                while (i < topicList.size()) {
                    String req = wikiminerUrl + "/services/exploreArticle?ids=";
                    String cacheElem = "";
                    int sum = 0;
                    for (; i < topicList.size(); i++) {
                        int id = (topicList.get(i).getId());
                        cacheElem += id;
                        //                    if(id==18105){
                        //                        System.out.println(pTtopicList.get(i).toString());
                        //                    }
                        req = req + id + ",";
                        sum++;
                        if (sum == maxTopics / 2) {
                            break;
                        }
                    }
                    req = req.substring(0, req.length() - 1);
                    Element elem = cache.get(cacheElem);
                    HttpGet getRequest = null;
                    if (elem == null) {
                        getRequest = new HttpGet(req + "&wikipedia=" + lang + "&markUp&images&maxImageWidth="
                                + maxImagewidth + "&maxImageHeight=" + maxImageheight + "&responseFormat=JSON");
                        getRequest.addHeader("accept", "application/json");
                        getRequest.addHeader("Accept-Encoding", "gzip");
                        HttpResponse response = httpClient.execute(getRequest);
                        GzipDecompressingEntity entity = new GzipDecompressingEntity(response.getEntity());
                        String jsonText = EntityUtils.toString(entity, StandardCharsets.UTF_8);
                        EntityUtils.consume(entity);
                        ex = son.fromJson(jsonText, GSonMarkUp.class);
                        elem = new Element(cacheElem, ex);
                        cache.put(elem);
                    } else {
                        ex = (GSonMarkUp) elem.getObjectValue();
                    }
                    List<edu.ehu.galan.lite.utils.wikiminer.gsonReaders.markUp.ArticleList> artiList = ex
                            .getArticleList();
                    int count = 0;
                    for (edu.ehu.galan.lite.utils.wikiminer.gsonReaders.markUp.ArticleList articleList : artiList) {
                        int id = articleList.getId();
                        if (cacheId.containsKey(id)) {
                            Topic top = cacheId.get(id);
                            count++;
                            addInfo2ArticleMarkUp(top, articleList, cacheId);
                            //break; if more are disambiguated with the same we get errors
                            //....
                        }
                    }
                }
            } catch (IOException ex1) {
                logger.error(null, ex1);
            }
            //}
        } else {
            if (wikipedia != null) {

                logger.info("Getting Wiki data from the mapped articles:");
                List<Topic> topicList = pDoc.getTopicList();

                List<Integer> validList = new ArrayList<>();
                for (Topic top : topicList) {
                    validList.add(top.getId());
                }
                ProgressTracker tracker = new ProgressTracker((validList.size()) + 1, "Getting data....",
                        this.getClass());
                Integer[] ids = validList.toArray(new Integer[validList.size()]);
                List<Integer> nullList = new ArrayList<>();
                List<Integer> invalidList = new ArrayList<>();
                List<Article> articleList = new ArrayList<>();
                List<Category> catList = new ArrayList<>();
                ArticleComparer artComparer = null;
                try {
                    artComparer = new ArticleComparer(wikipedia);
                } catch (Exception ex) {
                    logger.error("Error getting article comparer for this wikipedia");
                }
                if (artComparer == null) {
                    logger.error("No comparisons available for this Wikipedia");
                }

                for (int i = 0; i < ids.length; i++) {
                    Integer integer = ids[i];
                    org.wikipedia.miner.model.Page pageIds = wikipedia.getPageById(integer);
                    if (pageIds == null) {
                        nullList.add(integer);
                    }
                    switch (pageIds.getType()) {
                    case disambiguation:
                        break;
                    case article:
                        articleList.add((Article) pageIds);
                        break;
                    default:
                        if (pageIds.getType() == org.wikipedia.miner.model.Page.PageType.category) {
                            catList.add((Category) pageIds);
                        } else {
                            nullList.add(integer);
                        }
                    }

                }
                for (Article art : articleList) {
                    Topic top = caches.getId2TopicMap().get(art.getId());
                    top.setIsIndividual(true);

                    String definition = null;
                    definition = art.getFirstParagraphMarkup();
                    top.setSourceDef(definition);
                    if (definition == null) {
                        top.setSourceDef("");
                    }
                    Article.Label[] labels = art.getLabels();
                    int total = 0;
                    for (Article.Label lbl : labels) {
                        total += lbl.getLinkOccCount();
                    }
                    for (Article.Label lbl : labels) {
                        long occ = lbl.getLinkOccCount();
                        if (occ > 0) {
                            top.addLabel(lbl.getText());
                        }
                    }
                    TreeMap<String, String> translations = art.getTranslations();
                    for (Map.Entry<String, String> entry : translations.entrySet()) {
                        top.addTranslation(entry.getKey(), entry.getValue());
                    }
                    Category[] parents = art.getParentCategories();
                    // logger.info("retrieving parents from " + parents.length + " total");
                    for (Category parent : parents) {
                        top.addParentCagegory(parent.getId(), parent.getTitle());
                    }
                    int start = 0;
                    int max = 300;
                    if (max <= 0) {
                        max = Integer.MAX_VALUE;
                    } else {
                        max += start;
                    }

                    tracker.update();

                }

            }
        }
        caches.clearId2TopicMap();

    }

    private void addInfo2Article(Topic topic, ArticleList articleList, HashMap<Integer, Topic> pIdCache) {
        topic.setIsIndividual(true);

        for (edu.ehu.galan.lite.utils.wikiminer.gsonReaders.data.articles.Label object : articleList.getLabels()) {
            topic.addLabel(object.getText());
        }
        for (edu.ehu.galan.lite.utils.wikiminer.gsonReaders.data.articles.ParentCategory paren : articleList
                .getParentCategories()) {
            topic.addParentCagegory(paren.getId(), paren.getTitle());
        }
        for (edu.ehu.galan.lite.utils.wikiminer.gsonReaders.data.articles.Translation trans : articleList
                .getTranslations()) {
            topic.addTranslation(trans.getLang(), trans.getText());
        }
        for (edu.ehu.galan.lite.utils.wikiminer.gsonReaders.data.articles.Link link : articleList.getInLinks()) {
            if (pIdCache.containsKey(link.getId())) {
                topic.addLinkIn(link.getId(), link.getRelatedness());
            }
        }
        for (edu.ehu.galan.lite.utils.wikiminer.gsonReaders.data.articles.Link link : articleList.getOutLinks()) {
            if (pIdCache.containsKey(link.getId())) {
                topic.addLinkOut(link.getId(), link.getRelatedness());
            }
        }
        topic.setSourceDef(articleList.getDefinition());
        topic.setSourceTitle(articleList.getTitle());
        topic.addLabel(topic.getTopic());
        if (topic.getSourceDef() == null) {
            System.out.println("");
        }
    }

    /**
     * If we are using the Wikiminer via the API (local mode) we should close it after use
     */
    public void closeWikipedia() {
        wikipedia.close();
    }
    //    public void getDataTitles(Document doc) {
    //        List<Topic> topicList = doc.getTopicList();
    //        Gson son = new GsonBuilder().create();
    //        JsonParser parser = new JsonParser();
    //        int i = 0;
    //        int j = 0;
    //        List<Integer> invalidList = new ArrayList<>();
    //        i = 0;
    //        j = 0;
    //        WikiDataArt ex = null;
    //        for (Topic top : topicList) {
    //            try {
    //                String req = wikiminerUrl + "/services/exploreArticle?title=" + Char.encodeURIPathComponent(top.getSourceTitle());
    //                HttpGet getRequest = new HttpGet(req + "&wikipedia=" + lang + "&parentCategories&translations&definition&labels&responseFormat=JSON");
    //                getRequest.addHeader("accept", "application/json");
    //                getRequest.addHeader("Accept-Encoding", "gzip");
    //                HttpResponse response = httpClient.execute(getRequest);
    //                GzipDecompressingEntity entity = new GzipDecompressingEntity(response.getEntity());
    //                String jsonText = EntityUtils.toString(entity, StandardCharsets.UTF_8);
    //                EntityUtils.consume(entity);
    //
    //                ex = son.fromJson(jsonText, WikiDataArt.class);
    //                List<ArticleList> artiList = ex.getArticleList();
    //
    //                int count = 0;
    //                for (ArticleList articleList : artiList) {
    //                    int id = articleList.getId();
    //                    addInfo2Article(top, articleList, c);
    //                    top.setId(id);
    //                    //break; if more are disambiguated with the same we get errors
    //                    //....
    //
    //                }
    //                System.out.println(top.getTopic() + "\t" + top.getId() + "\t" + top.getSourceTitle());
    //////                List<Integer> invalids = ex.getInvalidList(); //may containg categories
    //////                for (Integer integer : invalids) {
    //////                    invalidList.add(integer);
    //////                    for (Topic topic : topicList) {
    //////                        if (topic.getId() == integer) {
    //////                            topic.addLabel(topic.getTopic());
    //////                            topic.addLabel(topic.getSourceTitle());
    //////                            break;
    //////                        }
    //////                    }
    ////                }
    //            } catch (IOException ex1) {
    //                java.util.logging.Logger.getLogger(WikiminnerHelper.class.getName()).log(Level.SEVERE, null, ex1);
    //            }
    //        }
    //    }

    /**
     *
     * @param enabled
     * @param configFile
     */
    public void setLocalMode(boolean enabled, String configFile) {
        localMode = enabled;
        if (localMode) {
            try {
                WikipediaConfiguration conf = new WikipediaConfiguration(
                        new File(configFile + "-" + lang + ".xml"));
                conf.clearDatabasesToCache();
                wikipedia = new Wikipedia(conf, true);
            } catch (EnvironmentLockedException | ParserConfigurationException | SAXException | IOException
                    | ClassNotFoundException | InstantiationException | IllegalAccessException ex) {
                logger.error("Error loading Wikipedia miner Wikipedia, check the config dirs", ex);
            }
        }
    }

    /**
     *
     * @param enabled
     * @param wiki
     */
    public void setLocalModeWiki(boolean enabled, Wikipedia wiki) {
        localMode = enabled;
        wikipedia = wiki;
    }

    /**
     *
     * @param inteList
     * @param topicsEn
     * @return
     */
    public HashMap<Integer, List<Integer>> getDataTitles(List<Integer> inteList, List<String> topicsEn) {
        HashMap<String, Integer> mapper = new HashMap<>();
        HashMap<Integer, List<Integer>> titles = new HashMap<>();
        for (int i = 0; i < inteList.size(); i++) {
            Integer inte = inteList.get(i);
            String titl = topicsEn.get(i);
            mapper.put(titl, inte);
        }

        HashMap<Integer, List<Integer>> intesList;
        Gson son = new GsonBuilder().create();
        JsonParser parser = new JsonParser();
        int i = 0;
        List<Integer> invalidList = new ArrayList<>();
        WikiDataArt ex;
        try {
            logger.info("Getting Wiki data from the mapped articles:");
            ProgressTracker tracker = new ProgressTracker((topicsEn.size() / maxTopics) + 1, "....",
                    this.getClass());
            while (i < topicsEn.size()) {
                String req = wikiminerUrl + "/services/exploreArticle?titles=";
                String cacheElem = "";
                int sum = 0;
                for (; i < topicsEn.size(); i++) {
                    String string = Char.encodeURIPathComponent(topicsEn.get(i));
                    cacheElem += string;
                    req = req + string + ",";
                    sum++;
                    if (sum == maxTopics) {
                        break;
                    }
                }
                req = req.substring(0, req.length() - 1);
                HttpGet getRequest = null;
                getRequest = new HttpGet(req + "&wikipedia=" + lang + "&parentCategories&responseFormat=JSON");
                getRequest.addHeader("accept", "application/json");
                getRequest.addHeader("Accept-Encoding", "gzip");
                HttpResponse response = httpClient.execute(getRequest);
                GzipDecompressingEntity entity = new GzipDecompressingEntity(response.getEntity());
                String jsonText = EntityUtils.toString(entity, StandardCharsets.UTF_8);
                EntityUtils.consume(entity);
                ex = son.fromJson(jsonText, WikiDataArt.class);
                List<ArticleList> artiList = ex.getArticleList();
                for (ArticleList articleList : artiList) {
                    int id = articleList.getId();
                    String title = articleList.getTitle();
                    List<Integer> catsIds = articleList.getParentCategories().stream().map(c -> c.getId())
                            .collect(Collectors.toList());
                    catsIds.add(id);
                    Integer origID = mapper.get(title);
                    titles.put(origID, catsIds);
                }
                tracker.update();

            }

        } catch (IOException ex1) {
            logger.error(null, ex1);
        }

        return titles;
    }

    private void addInfo2ArticleMarkUp(Topic top,
            edu.ehu.galan.lite.utils.wikiminer.gsonReaders.markUp.ArticleList articleList,
            HashMap<Integer, Topic> cacheId) {
        top.setWikiMarkUp(articleList.getMarkup());
        articleList.getImages().stream().forEach(ac -> top.addImage(ac.getUrl()));

    }

}