de.tudarmstadt.ukp.wikipedia.api.Wikipedia.java Source code

Introduction

Here is the source code for de.tudarmstadt.ukp.wikipedia.api.Wikipedia.java
Source

/*******************************************************************************
 * Copyright 2016
 * Ubiquitous Knowledge Processing (UKP) Lab
 * Technische Universitt Darmstadt
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/
package de.tudarmstadt.ukp.wikipedia.api;

import de.tudarmstadt.ukp.wikipedia.api.exception.WikiApiException;
import de.tudarmstadt.ukp.wikipedia.api.exception.WikiInitializationException;
import de.tudarmstadt.ukp.wikipedia.api.exception.WikiPageNotFoundException;
import de.tudarmstadt.ukp.wikipedia.api.exception.WikiTitleParsingException;
import de.tudarmstadt.ukp.wikipedia.api.hibernate.WikiHibernateUtil;
import de.tudarmstadt.ukp.wikipedia.util.distance.LevenshteinStringDistance;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.hibernate.Session;
import org.hibernate.query.Query;
import org.hibernate.type.IntegerType;
import org.hibernate.type.StringType;

import java.util.*;
import java.util.Map.Entry;

/**
 * Provides access to Wikipedia articles and categories.
 *
 */
// TODO better JavaDocs!
public class Wikipedia implements WikiConstants {

    private final Log logger = LogFactory.getLog(getClass());
    private final Language language;
    private final DatabaseConfiguration dbConfig;

    /**
     * A mapping from page pageIDs to hibernateIDs.
     * It is a kind of cache. It is only filled, if a pageID was previously accessed.
     * The wikiapi startup time is way too long otherwise. */
    private final Map<Integer, Long> idMapPages;
    /**
     * A mapping from categories pageIDs to hibernateIDs.
     * It is a kind of cache. It is only filled, if a pageID was previously accessed.
     * The wikiapi startup time is way too long otherwise. */
    private final Map<Integer, Long> idMapCategories;

    private final MetaData metaData;

    /**
     * Creates a new Wikipedia object accessing the database indicated by the dbConfig parameter.
     * @param dbConfig A database configuration object telling the Wikipedida object where the data is stored and how it can be accessed.
     * @throws WikiInitializationException
     */
    public Wikipedia(DatabaseConfiguration dbConfig) throws WikiInitializationException {
        logger.info("Creating Wikipedia object.");

        this.language = dbConfig.getLanguage();
        this.dbConfig = dbConfig;

        this.idMapPages = new HashMap<Integer, Long>();
        this.idMapCategories = new HashMap<Integer, Long>();

        this.metaData = new MetaData(this);
    }

    /**
     * Gets the page with the given title.
     * If the title is a redirect, the corresponding page is returned.<br>
     * If the title start with a lowercase letter it converts it to an uppercase letter, as each Wikipedia article title starts with an uppercase letter.
     * Spaces in the title are converted to underscores, as this is a convention for Wikipedia article titles.
     *
     * For example, the article "Steam boat" could be queried with
     * - "Steam boat"
     * - "steam boat"
     * - "Steam_boat"
     * - "steam_boat"
     * and additionally all redirects that might point to that article.
     *
     * @param title The title of the page.
     * @return The page object for a given title.
     * @throws WikiApiException If no page or redirect with this title exists or the title could not be properly parsed.
     */
    public Page getPage(String title) throws WikiApiException {
        Page page = new Page(this, title, false);
        return page;
    }

    /**
     * Gets the page with the exactly the given title.<br>
     *
     * Note that when using this method you are responsible for converting a normal search string into the right wiki-style.<br>
     *
     * If the title is a redirect, the corresponding page is returned.<br>
     *
     * @param exactTitle The exact title of the page.
     * @return The page object for a given title.
     * @throws WikiApiException If no page or redirect with this title exists or the title could not be properly parsed.
     */
    public Page getPageByExactTitle(String exactTitle) throws WikiApiException {
        Page page = new Page(this, exactTitle, true);
        return page;
    }

    /**
     * Get all pages which match all lowercase/uppercase version of the given title.<br>
     * If the title is a redirect, the corresponding page is returned.<br>
     * Spaces in the title are converted to underscores, as this is a convention for Wikipedia article titles.
     *
     * @param title The title of the page.
     * @return A set of page objects matching this title.
     * @throws WikiApiException If no page or redirect with this title exists or the title could not be properly parsed.
     */
    public Set<Page> getPages(String title) throws WikiApiException {
        Set<Integer> ids = new HashSet<Integer>(getPageIdsCaseInsensitive(title));

        Set<Page> pages = new HashSet<Page>();
        for (Integer id : ids) {
            pages.add(new Page(this, id));
        }
        return pages;
    }

    /**
     * Gets the page for a given pageId.
     *
     * @param pageId The id of the page.
     * @return The page object for a given pageId.
     * @throws WikiApiException
     */
    public Page getPage(int pageId) throws WikiApiException {
        Page page = new Page(this, pageId);
        return page;
    }

    /**
     * Gets the title for a given pageId.
     *
     * @param pageId The id of the page.
     * @return The title for the given pageId.
     * @throws WikiApiException
     */
    public Title getTitle(int pageId) throws WikiApiException {
        Session session = this.__getHibernateSession();
        session.beginTransaction();
        Object returnValue = session.createNativeQuery("select p.name from PageMapLine as p where p.id = :pId")
                .setParameter("pId", pageId, IntegerType.INSTANCE).uniqueResult();
        session.getTransaction().commit();

        String title = (String) returnValue;
        if (title == null) {
            throw new WikiPageNotFoundException();
        }
        return new Title(title);
    }

    /**
     * Gets the page ids for a given title.<br>
     *
     *
     * @param title The title of the page.
     * @return The id for the page with the given title.
     * @throws WikiApiException
     */
    public List<Integer> getPageIds(String title) throws WikiApiException {
        Session session = this.__getHibernateSession();
        session.beginTransaction();
        Iterator results = session.createQuery("select p.pageID from PageMapLine as p where p.name = :pName")
                .setParameter("pName", title, StringType.INSTANCE).list().iterator();

        session.getTransaction().commit();

        if (!results.hasNext()) {
            throw new WikiPageNotFoundException();
        }
        List<Integer> resultList = new LinkedList<Integer>();
        while (results.hasNext()) {
            resultList.add((Integer) results.next());
        }
        return resultList;
    }

    /**
     * Gets the page ids for a given title with case insensitive matching.<br>
     *
     *
     * @param title The title of the page.
     * @return The ids of the pages with the given title.
     * @throws WikiApiException
     */
    public List<Integer> getPageIdsCaseInsensitive(String title) throws WikiApiException {
        title = title.toLowerCase();
        title = title.replaceAll(" ", "_");

        Session session = this.__getHibernateSession();
        session.beginTransaction();
        Iterator results = session.createQuery("select p.pageID from PageMapLine as p where lower(p.name) = :pName")
                .setParameter("pName", title, StringType.INSTANCE).list().iterator();

        session.getTransaction().commit();

        if (!results.hasNext()) {
            throw new WikiPageNotFoundException();
        }
        List<Integer> resultList = new LinkedList<Integer>();
        while (results.hasNext()) {
            resultList.add((Integer) results.next());
        }
        return resultList;
    }

    /**
     * Returns the article page for a given discussion page.
     *
     * @param discussionPage
     *            the discussion page object
     * @return The page object of the article associated with the discussion. If
     *         the parameter already was an article, it is returned directly.
     * @throws WikiApiException
     */
    public Page getArticleForDiscussionPage(Page discussionPage) throws WikiApiException {
        if (discussionPage.isDiscussion()) {
            String title = discussionPage.getTitle().getPlainTitle().replaceAll(WikiConstants.DISCUSSION_PREFIX,
                    "");

            if (title.contains("/")) {
                //If we have a discussion archive
                //TODO This does not support articles that contain slashes-
                //However, the rest of the API cannot cope with that as well, so this should not be any extra trouble
                title = title.split("/")[0];
            }
            return getPage(title);
        } else {
            return discussionPage;
        }

    }

    /**
     * Gets the discussion page for an article page with the given pageId.
     *
     * @param articlePageId The id of the page.
     * @return The page object for a given pageId.
     * @throws WikiApiException
     */
    public Page getDiscussionPage(int articlePageId) throws WikiApiException {
        //Retrieve discussion page with article title
        //TODO not the prettiest solution, but currently discussions are only marked in the title
        return getDiscussionPage(getPage(articlePageId));
    }

    /**
     * Gets the discussion page for the page with the given title.
     * The page retrieval works as defined in {@link #getPage(String title)}
     *
     * @param title The title of the page for which the discussions should be retrieved.
     * @return The page object for the discussion page.
     * @throws WikiApiException If no page or redirect with this title exists or title could not be properly parsed.
     */
    public Page getDiscussionPage(String title) throws WikiApiException {
        return getDiscussionPage(getPage(title));
    }

    /**
     * Gets the discussion page for the given article page
     * The provided page must not be a discussion page
     *
     * @param articlePage the article page for which a discussion page should be retrieved
     * @return The discussion page object for the given article page object
     * @throws WikiApiException If no page or redirect with this title exists or title could not be properly parsed.
     */
    public Page getDiscussionPage(Page articlePage) throws WikiApiException {
        String articleTitle = articlePage.getTitle().toString();
        if (articleTitle.startsWith(WikiConstants.DISCUSSION_PREFIX)) {
            return articlePage;
        } else {
            return new Page(this, WikiConstants.DISCUSSION_PREFIX + articleTitle);
        }
    }

    /**
    * Returns an iterable containing all archived discussion pages for
     * the page with the given title String. <br>
     * The page retrieval works as defined in {@link #getPage(int)}. <br>
     * The most recent discussion page is NOT included here!
     * It can be obtained with {@link #getDiscussionPage(Page)}.
     *
     * @param articlePageId The id of the page for which to the the discussion archives
     * @return The page object for the discussion page.
     * @throws WikiApiException If no page or redirect with this title exists or title could not be properly parsed.
     */
    public Iterable<Page> getDiscussionArchives(int articlePageId) throws WikiApiException {
        //Retrieve discussion archive pages with page id
        return getDiscussionArchives(getPage(articlePageId));
    }

    /**
    * Returns an iterable containing all archived discussion pages for
     * the page with the given title String. <br>
     * The page retrieval works as defined in {@link #getPage(String title)}.<br>
     * The most recent discussion page is NOT included here!
     * It can be obtained with {@link #getDiscussionPage(Page)}.
     *
     * @param title The title of the page for which the discussions should be retrieved.
     * @return The page object for the discussion page.
     * @throws WikiApiException If no page or redirect with this title exists or title could not be properly parsed.
     */
    public Iterable<Page> getDiscussionArchives(String title) throws WikiApiException {
        //Retrieve discussion archive pages with page title
        return getDiscussionArchives(getPage(title));
    }

    /**
     * Return an iterable containing all archived discussion pages for
     * the given article page. The most recent discussion page is not included.
     * The most recent discussion page can be obtained with {@link #getDiscussionPage(Page)}.
     * <br>
     * The provided page Object must not be a discussion page itself! If it is
     * a discussion page, is returned unchanged.
     *
     * @param articlePage the article page for which a discussion archives should be retrieved
     * @return An iterable with the discussion archive page objects for the given article page object
     * @throws WikiApiException If no page or redirect with this title exists or title could not be properly parsed.
     */
    public Iterable<Page> getDiscussionArchives(Page articlePage) throws WikiApiException {
        String articleTitle = articlePage.getTitle().getWikiStyleTitle();
        if (!articleTitle.startsWith(WikiConstants.DISCUSSION_PREFIX)) {
            articleTitle = WikiConstants.DISCUSSION_PREFIX + articleTitle;
        }

        Session session = this.__getHibernateSession();
        session.beginTransaction();

        List<Page> discussionArchives = new LinkedList<Page>();

        Query query = session.createQuery("SELECT pageID FROM PageMapLine where name like :name");
        query.setParameter("name", articleTitle + "/%", StringType.INSTANCE);
        Iterator results = query.list().iterator();

        session.getTransaction().commit();

        while (results.hasNext()) {
            int pageID = (Integer) results.next();
            discussionArchives.add(getPage(pageID));
        }

        return discussionArchives;

    }

    //// I do not want to make this public at the moment (TZ, March, 2007)
    /**
     * Gets the pages or redirects with a name similar to the pattern.
     * Calling this method is quite costly, as similarity is computed for all names.
     * @param pPattern The pattern.
     * @param pSize The maximum size of the result list. Only the most similar results will be included.
     * @return A map of pages with names similar to the pattern and their distance values. Smaller distances are more similar.
     * @throws WikiApiException
     */
    protected Map<Page, Double> getSimilarPages(String pPattern, int pSize) throws WikiApiException {
        Title title = new Title(pPattern);
        String pattern = title.getWikiStyleTitle();

        // a mapping of the most similar pages and their similarity values
        // It is returned by this method.
        Map<Page, Double> pageMap = new HashMap<Page, Double>();

        // holds a mapping of the best distance values to page IDs
        Map<Integer, Double> distanceMap = new HashMap<Integer, Double>();

        Session session = this.__getHibernateSession();
        session.beginTransaction();
        Iterator results = session.createQuery("select pml.pageID, pml.name from PageMapLine as pml").list()
                .iterator();
        while (results.hasNext()) {
            Object[] row = (Object[]) results.next();
            int pageID = (Integer) row[0];
            String pageName = (String) row[1];

            //// this returns a similarity - if we want to use it, we have to change the semantics the ordering of the results
            //            double distance = new Levenshtein().getSimilarity(pageName, pPattern);
            double distance = new LevenshteinStringDistance().distance(pageName, pattern);

            distanceMap.put(pageID, distance);

            // if there are more than "pSize" entries in the map remove the last one (it has the biggest distance)
            if (distanceMap.size() > pSize) {
                Set<Map.Entry<Integer, Double>> valueSortedSet = new TreeSet<Map.Entry<Integer, Double>>(
                        new ValueComparator());
                valueSortedSet.addAll(distanceMap.entrySet());
                Iterator it = valueSortedSet.iterator();
                // remove the first element
                if (it.hasNext()) {
                    // get the id of this entry and remove it in the distanceMap
                    Map.Entry entry = (Map.Entry) it.next();
                    distanceMap.remove(entry.getKey());
                }
            }

        }
        session.getTransaction().commit();

        for (int pageID : distanceMap.keySet()) {
            Page page = null;
            try {
                page = this.getPage(pageID);
            } catch (WikiPageNotFoundException e) {
                logger.error("Page with pageID " + pageID + " could not be found. Fatal error. Terminating.");
                e.printStackTrace();
                System.exit(1);
            }
            pageMap.put(page, distanceMap.get(pageID));
        }

        return pageMap;
    }

    /**
     * Gets the category for a given title.
     * If the category title start with a lowercase letter it converts it to an uppercase letter, as each Wikipedia category title starts with an uppercase letter.
     * Spaces in the title are converted to underscores, as this is a convention for Wikipedia category titles.
     *
     * For example, the (possible) category "Famous steamboats" could be queried with
     * - "Famous steamboats"
     * - "Famous_steamboats"
     * - "famous steamboats"
     * - "famous_steamboats"
     * @param title The title of the category.
     * @return The category object with the given title.
     * @throws WikiApiException If no category with the given title exists.
     */
    public Category getCategory(String title) throws WikiApiException {
        Category cat = new Category(this, title);
        return cat;
    }

    /**
     * Gets the category for a given pageId.
     * @param pageId The id of the category.
     * @return The category object or null if no category with this pageId exists.
     */
    public Category getCategory(int pageId) {
        long hibernateId = __getCategoryHibernateId(pageId);
        if (hibernateId == -1) {
            return null;
        }

        try {
            Category cat = new Category(this, hibernateId);
            return cat;
        } catch (WikiPageNotFoundException e) {
            return null;
        }
    }

    /**
     * This returns an iterable over all categories, as returning all category objects would be much too expensive.
     * @return An iterable over all categories.
     */
    public Iterable<Category> getCategories() {
        return new CategoryIterable(this);
    }

    /**
     * Get all wikipedia categories.
     * Returns only an iterable, as a collection may not fit into memory for a large wikipedia.
     * @param bufferSize The size of the internal page buffer.
     * @return An iterable over all categories.
     */
    protected Iterable<Category> getCategories(int bufferSize) {
        return new CategoryIterable(this, bufferSize);
    }

    /**
     * Protected method that is much faster than the public version, but exposes too much implementation details.
     * Get a set with all category pageIDs. Returning all category objects is much too expensive.
     * @return A set with all category pageIDs
     */
    protected Set<Integer> __getCategories() {
        // TODO this should be replaced with the buffered category iterator, as it might produce an HeapSpace Overflow, if there are too many categories.

        Session session = this.__getHibernateSession();
        session.beginTransaction();
        List<Integer> idList = session.createQuery("select cat.pageId from Category as cat").list();
        Set<Integer> categorySet = new HashSet<Integer>(idList);
        session.getTransaction().commit();

        return categorySet;
    }

    /**
     * Get all wikipedia pages.
     * Does not include redirects, as they are only pointers to real pages.
     * Returns only an iterable, as a collection may not fit into memory for a large wikipedia.
     * @return An iterable over all pages.
     */
    public Iterable<Page> getPages() {
        return new PageIterable(this, false);
    }

    /**
     * Get all wikipedia pages.
     * Does not include redirects, as they are only pointers to real pages.
     * Returns only an iterable, as a collection may not fit into memory for a large wikipedia.
     * @param bufferSize The size of the internal page buffer.
     * @return An iterable over all pages.
     */
    protected Iterable<Page> getPages(int bufferSize) {
        return new PageIterable(this, false, bufferSize);
    }

    /**
     * Protected method that is much faster than the public version, but exposes too much implementation details.
     * Get a set with all pageIDs. Returning all page objects is much too expensive.
     * Does not include redirects, as they are only pointers to real pages.
     *
     * As ids can be useful for several application (e.g. in combination with
     * the RevisionMachine, they have been made publically available via
     * {@link #getPageIds()}.
     *
     * @return A set with all pageIDs. Returning all pages is much to expensive.
     */
    protected Set<Integer> __getPages() {
        Session session = this.__getHibernateSession();
        session.beginTransaction();
        List<Integer> idList = session.createQuery("select page.pageId from Page as page").list();
        Set<Integer> pageSet = new HashSet<Integer>(idList);
        session.getTransaction().commit();

        return pageSet;
    }

    /**
     * @return an iterable over all pageids (without redirects)
     */
    public Iterable<Integer> getPageIds() {
        return this.__getPages();
    }

    /**
     * Get the pages that match the given query.
     * Does not include redirects, as they are only pointers to real pages.
     * Attention: may be running very slow, depending on the size of the Wikipedia!
     * @param query A query object containing the query conditions.
     * @return A set of pages that match the given query.
     * @throws WikiApiException
     */
    public Iterable<Page> getPages(PageQuery query) throws WikiApiException {
        return new PageQueryIterable(this, query);
    }

    /**
     * Get all articles (pages MINUS disambiguationPages MINUS redirects).
     * Returns only an iterable, as a collection may not fit into memory for a large wikipedia.
     * @return An iterable of all article pages.
     */
    public Iterable<Page> getArticles() {
        return new PageIterable(this, true);
    }

    /**
     * Get all titles including disambiguation pages and redirects).
     * Returns only an iterable, as a collection may not fit into memory for a large wikipedia.
     * @return An iterable of all article pages.
     */
    public Iterable<Title> getTitles() {
        return new TitleIterable(this);
    }

    /**
     * Returns the language of this Wikipedia.
     * @return The language of this Wikipedia.
     */
    public Language getLanguage() {
        return this.language;
    }

    /**
     * Tests, whether a page or redirect with the given title exists.
     * Trying to retrieve a page that does not exist in Wikipedia throws an exception.
     * You may catch the exception or use this test, depending on your task.
     * @param title The title of the page.
     * @return True, if a page or redirect with that title exits, false otherwise.
     */
    public boolean existsPage(String title) {

        if (title == null || title.length() == 0) {
            return false;
        }

        Title t;
        try {
            t = new Title(title);
        } catch (WikiTitleParsingException e) {
            return false;
        }
        String encodedTitle = t.getWikiStyleTitle();

        Session session = this.__getHibernateSession();
        session.beginTransaction();
        Object returnValue = session
                .createNativeQuery("select p.id from PageMapLine as p where p.name = :pName COLLATE utf8_bin")
                .setParameter("pName", encodedTitle, StringType.INSTANCE).uniqueResult();
        session.getTransaction().commit();

        return returnValue != null;
    }

    /**
     * Tests, whether a page with the given pageID exists.
     * Trying to retrieve a pageID that does not exist in Wikipedia throws an exception.
     *
     * @param pageID A pageID.
     * @return True, if a page with that pageID exits, false otherwise.
     */
    public boolean existsPage(int pageID) {

        // TODO carefully, this is a hack to provide a much quicker way to test whether a page exists.
        // Encoding the title in this way surpasses the normal way of creating a title first.
        // This should get a unit test to make sure the encoding function is in line with the title object.
        // Anyway, I do not like this hack :-|

        if (pageID < 0) {
            return false;
        }

        Session session = this.__getHibernateSession();
        session.beginTransaction();
        List returnList = session.createNativeQuery("select p.id from PageMapLine as p where p.pageID = :pageId")
                .setParameter("pageId", pageID, IntegerType.INSTANCE).list();
        session.getTransaction().commit();

        return returnList.size() != 0;
    }

    /**
     * Get the hibernate ID to a given pageID of a page.
     * We need different methods for pages and categories here, as a page and a category can have the same ID.
     *
     * @param pageID A pageID that should be mapped to the corresponding hibernate ID.
     * @return The hibernateID of the page with pageID or -1, if the pageID is not valid
     */
    protected long __getPageHibernateId(int pageID) {
        long hibernateID = -1;

        // first look in the id mapping cache
        if (idMapPages.containsKey(pageID)) {
            return idMapPages.get(pageID);
        }

        // The id was not found in the id mapping cache.
        // It may not be in the cahe or may not exist at all.
        Session session = this.__getHibernateSession();
        session.beginTransaction();
        Object retObjectPage = session.createQuery("select page.id from Page as page where page.pageId = :pageId")
                .setParameter("pageId", pageID, IntegerType.INSTANCE).uniqueResult();
        session.getTransaction().commit();
        if (retObjectPage != null) {
            hibernateID = (Long) retObjectPage;
            // add it to the cache
            idMapPages.put(pageID, hibernateID);
            return hibernateID;
        }

        return hibernateID;
    }

    /**
     * Get the hibernate ID to a given pageID of a category.
     * We need different methods for pages and categories here, as a page and a category can have the same ID.
     *
     * @param pageID A pageID that should be mapped to the corresponding hibernate ID.
     * @return The hibernateID of the page with pageID or -1, if the pageID is not valid
     */
    protected long __getCategoryHibernateId(int pageID) {
        long hibernateID = -1;

        // first look in the id mapping cache
        if (idMapCategories.containsKey(pageID)) {
            return idMapCategories.get(pageID);
        }

        // The id was not found in the id mapping cache.
        // It may not be in the cahe or may not exist at all.
        Session session = this.__getHibernateSession();
        session.beginTransaction();
        Object retObjectPage = session.createQuery("select cat.id from Category as cat where cat.pageId = :pageId")
                .setParameter("pageId", pageID, IntegerType.INSTANCE).uniqueResult();
        session.getTransaction().commit();
        if (retObjectPage != null) {
            hibernateID = (Long) retObjectPage;
            // add it to the cache
            idMapCategories.put(pageID, hibernateID);
        }

        return hibernateID;
    }

    /**
     * Returns a MetaData object containing all meta data about this instance of Wikipedia.
     * @return A MetaData object containing all meta data about this instance of Wikipedia.
     */
    public MetaData getMetaData() {
        return this.metaData;
    }

    /**
     * Returns the DatabaseConfiguration object that was used to create the Wikipedia object.
     * @return The DatabaseConfiguration object that was used to create the Wikipedia object.
     */
    public DatabaseConfiguration getDatabaseConfiguration() {
        return this.dbConfig;
    }

    /**
     * Shortcut for getting a hibernate session.
     * @return
     */
    protected Session __getHibernateSession() {
        return WikiHibernateUtil.getSessionFactory(this.dbConfig).getCurrentSession();
    }

    /**
     * The ID consists of the host, the database, and the language.
     * This should be unique in most cases.
     * @return Returns a unique ID for this Wikipedia object.
     */
    public String getWikipediaId() {
        StringBuilder sb = new StringBuilder();
        sb.append(this.getDatabaseConfiguration().getHost());
        sb.append("_");
        sb.append(this.getDatabaseConfiguration().getDatabase());
        sb.append("_");
        sb.append(this.getDatabaseConfiguration().getLanguage());
        return sb.toString();
    }

}

class ValueComparator implements Comparator<Map.Entry<Integer, Double>> {

    @Override
    public int compare(Entry<Integer, Double> e1, Entry<Integer, Double> e2) {

        double c1 = e1.getValue();
        double c2 = e2.getValue();

        if (c1 < c2) {
            return 1;
        } else if (c1 > c2) {
            return -1;
        } else {
            return 0;
        }
    }
}