it.polito.tellmefirst.web.rest.enhance.Enhancer.java Source code

Java tutorial

Introduction

Here is the source code for it.polito.tellmefirst.web.rest.enhance.Enhancer.java

Source

/**
 * TellMeFirst - A Knowledge Discovery Application
 *
 * Copyright (C) 2012 - 2015 Federico Cairo, Giuseppe Futia, Federico Benedetto, Alessio Melandri
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package it.polito.tellmefirst.web.rest.enhance;

import it.polito.tellmefirst.enhancer.BBCEnhancer;
import it.polito.tellmefirst.enhancer.NYTimesEnhancer;
import it.polito.tellmefirst.lucene.KBIndexSearcher;
import it.polito.tellmefirst.web.rest.apimanager.*;
import it.polito.tellmefirst.web.rest.lodmanager.*;
import it.polito.tellmefirst.apimanager.NYTimesSearcher;
import it.polito.tellmefirst.lucene.SimpleSearcher;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.util.ArrayList;

public class Enhancer {

    private RestManager restManager;
    private DBpediaManager dBpediaManager;
    private NYTimesSearcher nyTimesSearcher;
    private VideoManager videoManager;
    private ArrayList<String> typesWhiteList;
    private SimpleSearcher italianSearcher;
    private SimpleSearcher englishSearcher;
    private KBIndexSearcher kbItalianSearcher;
    private KBIndexSearcher kbEnglishSearcher;
    static Log LOG = LogFactory.getLog(Enhancer.class);
    public final static String DEFAULT_IMAGE = "http://tellmefirst.polito.it/images/default_img.jpg";

    /**
     * Instantiate the enhancer taking as input all indexes involved at the enrichment phase.
     * @param is Italian Corpus Index Searcher.
     * @param es English Corpus Index Searcher.
     * @param kbIt Italian Knowledge Base Index Searcher
     * @param kbEn English Knowledge Base Index Searcher
     *
     * To improve when you create an enhancement git module. In the future, if we find better
     * solutions to get images, we will remove Knowledge Base Indexes to get entities related
     * to a specific URI.
     *
     * We also should improve how to manage language.
     *
     * @since 3.0.0.0.
     */
    public Enhancer(SimpleSearcher is, SimpleSearcher es, KBIndexSearcher kbIt, KBIndexSearcher kbEn) {
        LOG.debug("[constructor] - BEGIN");
        typesWhiteList = createTypesWhiteList();
        restManager = new RestManager();
        dBpediaManager = new DBpediaManager();
        nyTimesSearcher = new NYTimesSearcher();
        videoManager = new VideoManager();
        italianSearcher = is;
        englishSearcher = es;
        kbItalianSearcher = kbIt;
        kbEnglishSearcher = kbEn;
        LOG.debug("[constructor] - END");
    }

    public String getImageFromMediaWiki(String uri, String label) {
        LOG.debug("[getImageFromMediaWiki] - BEGIN");
        String result = "";
        String imageFileName = "";
        try {
            String lang = (uri.startsWith("http://dbpedia")) ? "en" : "it";

            String filePageURL = "https://" + lang + ".wikipedia.org/wiki/Special:Redirect/file/";
            String commonsFilePageURL = "https://commons.wikimedia.org/wiki/Special:Redirect/file/";

            String queryStart = "https://" + lang + ".wikipedia.org/w/api.php?action=query&prop=pageimages&titles=";
            String queryEnd = "&format=xml";
            String query = queryStart + label.replace(" ", "+") + queryEnd;

            LOG.debug("Call to Wikimedia Commons service for the resource " + uri + ": " + query);
            String xml = restManager.getStringFromAPI(query);
            Document doc = Jsoup.parse(xml);
            Elements elementsFound = doc.getElementsByTag("page");
            imageFileName = elementsFound.attr("pageimage");

            if (imageFileName == "") {
                LOG.debug("No images at all from Wikipedia page " + uri + ". We'll search on Wikidata.");

                String findQidStart = "https://wikidata.org/w/api.php?action=wbgetentities&format=xml&sites=" + lang
                        + "wiki&titles=";
                String findQidEnd = "&props=info&format=xml";
                String findQid = findQidStart + label.replace(" ", "+") + findQidEnd;

                LOG.debug("Call to Wikimedia Commons service for the resource " + uri + ": " + findQid);
                xml = restManager.getStringFromAPI(findQid);
                doc = Jsoup.parse(xml);
                elementsFound = doc.getElementsByTag("entity");
                String Qid = elementsFound.attr("title");

                //XXX weak API but is the state of art; waiting for a better one https://phabricator.wikimedia.org/T95026
                findQidStart = "https://www.wikidata.org/w/api.php?action=query&prop=images&titles=";
                findQidEnd = "&format=xml";
                findQid = findQidStart + Qid + findQidEnd;

                LOG.debug("Call to Wikimedia Commons service for the resource " + uri + ": " + findQid);
                xml = restManager.getStringFromAPI(findQid);
                doc = Jsoup.parse(xml);
                elementsFound = doc.getElementsByTag("im");
                imageFileName = elementsFound.attr("title").replace("File:", "");

                if (imageFileName == "") {
                    LOG.debug("[getImageFromMediaWiki] - END");
                    return DEFAULT_IMAGE;
                } else {
                    LOG.debug("[getImageFromMediaWiki] - END");
                    return commonsFilePageURL + imageFileName;
                }
            } else {
                LOG.debug("[getImageFromMediaWiki] - END");
                return filePageURL + imageFileName;
            }
        } catch (Exception e) {
            LOG.error("[getImageFromMediaWiki] - EXCEPTION: ", e);
        }
        return DEFAULT_IMAGE;
    }

    public String[] getCoordinatesFromDBpedia(String uri) {
        LOG.debug("[getCoordinatesFromDBpedia] - BEGIN");
        String[] result = dBpediaManager.getCoordinatesForAPlace(uri);
        LOG.debug("[getCoordinatesFromDBpedia] - END");
        return result;
    }

    public String getNewsFromNYT(String uri) {
        LOG.debug("[getNewsFromNYT] - BEGIN");
        String result;
        NYTimesEnhancer nyTimesEnhancer = new NYTimesEnhancer();
        result = nyTimesEnhancer.getNewsFromNYTimes(uri);
        if (result.equals("")) {
            result = "{\"offset\" : \"0\" , \"results\" : []  , \"total\" : 0}";
        }
        LOG.debug("[getNewsFromNYT] - END");
        return result;
    }

    public String getNewsFromBBC(String uri) throws IOException {
        LOG.debug("[getNewsFromBBC] - BEGIN");
        String result;
        BBCEnhancer bbcEnhancer = new BBCEnhancer();
        String API_KEY = bbcEnhancer.getPropValues();
        String URL = bbcEnhancer.createURL(uri, API_KEY);
        result = bbcEnhancer.getResultFromAPI(URL, "application/json");
        LOG.debug("[getNewsFromBBC] - END");
        return result;
    }

    public String getVideoFromYouTube(String uri, String label) {
        LOG.debug("[getVideoFromYouTube] - BEGIN");
        String result = "";
        String resultFromApi = "";
        String mergedTypes;
        boolean interestingType = false;
        try {
            //this method works only with english resources, but it's ok because resources contained only in DBpedia Italiana
            // are tricky when you try retrieving a video from Youtube
            ArrayList<String> typesArray = dBpediaManager.getTypes(uri);
            StringBuilder sb = new StringBuilder();
            for (String type : typesArray) {
                sb.append(type);
            }
            mergedTypes = sb.toString();
            for (String goodType : typesWhiteList) {
                if (mergedTypes.contains(goodType)) {
                    interestingType = true;
                }
            }
            if (interestingType) {
                if (mergedTypes.contains("http://dbpedia.org/ontology/MusicalWork")
                        && uri.startsWith("http://dbpedia")) {
                    String artist = dBpediaManager.getArtistFromEnglishDBpedia(uri);
                    resultFromApi = restManager.getStringFromAPI(
                            "https://gdata.youtube.com/feeds/api/videos?q=" + artist.replace(" ", "+") + "+"
                                    + label.replace(" ", "+") + "&start-index=1&max-results=1&v=2&hd=true");
                } else if (mergedTypes.contains("http://dbpedia.org/ontology/MusicalWork")
                        && uri.startsWith("http://it.dbpedia")) {
                    String artist = dBpediaManager.getArtistFromItalianDBpedia(uri);
                    resultFromApi = restManager.getStringFromAPI(
                            "https://gdata.youtube.com/feeds/api/videos?q=" + artist.replace(" ", "+") + "+"
                                    + label.replace(" ", "+") + "&start-index=1&max-results=1&v=2&hd=true");
                } else if (mergedTypes.contains("http://dbpedia.org/ontology/Athlete")
                        && uri.startsWith("http://dbpedia")) {
                    if (!dBpediaManager.getNytUri(uri).equals("")) {
                        resultFromApi = restManager.getStringFromAPI("https://gdata.youtube.com/feeds/api/videos?q="
                                + label.replace(" ", "+") + "&start-index=1&max-results=1&v=2&hd=true");
                    }
                } else if (mergedTypes.contains("http://dbpedia.org/ontology/Band")) {
                    if (uri.startsWith("http://dbpedia")) {
                        resultFromApi = restManager.getStringFromAPI("https://gdata.youtube.com/feeds/api/videos?q="
                                + "band+" + label.replace(" ", "+") + "&start-index=1&max-results=1&v=2&hd=true");
                    } else {
                        resultFromApi = restManager.getStringFromAPI("https://gdata.youtube.com/feeds/api/videos?q="
                                + "gruppo+" + label.replace(" ", "+") + "&start-index=1&max-results=1&v=2&hd=true");
                    }
                } else {
                    resultFromApi = restManager.getStringFromAPI("https://gdata.youtube.com/feeds/api/videos?q="
                            + label.replace(" ", "+") + "&start-index=1&max-results=1&v=2&hd=true");
                }
                result = "http://youtu.be/" + videoManager.extractVideoIdFromResult(resultFromApi);
            }

        } catch (Exception e) {
            LOG.error("[getVideoFromYouTube] - EXCEPTION: ", e);
        }
        LOG.debug("[getVideoFromYouTube] - END");
        return result;
    }

    public String getTitleFromDBpedia(String uri, String lang) throws IOException {
        LOG.debug("[getTitleFromDBpedia] - BEGIN");
        String result = "";
        if (lang.equals("italian") && uri.startsWith("http://dbpedia")) {
            String itaUri = italianSearcher.getSameAsFromEngToIta(uri);
            if (!itaUri.equals("")) {
                result = italianSearcher.getTitle(itaUri);
            }
        }
        LOG.debug("[getTitleFromDBpedia] - END");
        return result;
    }

    public String getAbstractFromDBpedia(String uri, String lang) {
        LOG.debug("[getAbstractFromDBpedia] - BEGIN");
        String result;
        result = dBpediaManager.getAbstract(uri, lang);
        // call twice to prevent DBpedia endpoint malfunctions
        if (result.equals("")) {
            result = dBpediaManager.getAbstract(uri, lang);
        }
        LOG.debug("[getAbstractFromDBpedia] - END");
        return result;
    }

    private ArrayList<String> createTypesWhiteList() {
        ArrayList<String> types = new ArrayList<String>();
        types.add("http://dbpedia.org/ontology/Actor");
        types.add("http://dbpedia.org/ontology/Activity");
        types.add("http://dbpedia.org/ontology/Band");
        types.add("http://dbpedia.org/ontology/Artist");
        types.add("http://dbpedia.org/ontology/Athlete");
        types.add("http://dbpedia.org/ontology/MusicalWork");
        types.add("http://dbpedia.org/ontology/Politician");
        types.add("http://umbel.org/umbel/rc/Actor");
        types.add("http://umbel.org/umbel/rc/Artist");
        types.add("http://umbel.org/umbel/rc/Politician");
        return types;
    }
}