com.moviejukebox.plugin.ScriptableScraperPlugin.java Source code

Java tutorial

Introduction

Here is the source code for com.moviejukebox.plugin.ScriptableScraperPlugin.java

Source

/*
 *      Copyright (c) 2004-2016 YAMJ Members
 *      https://github.com/orgs/YAMJ/people
 *
 *      This file is part of the Yet Another Movie Jukebox (YAMJ) project.
 *
 *      YAMJ is free software: you can redistribute it and/or modify
 *      it under the terms of the GNU General Public License as published by
 *      the Free Software Foundation, either version 3 of the License, or
 *      any later version.
 *
 *      YAMJ is distributed in the hope that it will be useful,
 *      but WITHOUT ANY WARRANTY; without even the implied warranty of
 *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *      GNU General Public License for more details.
 *
 *      You should have received a copy of the GNU General Public License
 *      along with YAMJ.  If not, see <http://www.gnu.org/licenses/>.
 *
 *      Web: https://github.com/YAMJ/yamj-v2
 *
 */
package com.moviejukebox.plugin;

import com.moviejukebox.model.Movie;
import com.moviejukebox.model.scriptablescraper.*;
import com.moviejukebox.reader.ScriptableScraperXMLReader;
import com.moviejukebox.tools.PropertiesUtil;
import com.moviejukebox.tools.StringTools;
import com.moviejukebox.tools.WebBrowser;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.*;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Plugin to retrieve movie data based on ScriptableScraper XML file
 */
public class ScriptableScraperPlugin extends ImdbPlugin {

    private static final Logger LOG = LoggerFactory.getLogger(ScriptableScraperPlugin.class);
    public static final String SCRIPTABLESCRAPER_PLUGIN_ID = "scriptablescraper";
    private final boolean debug = PropertiesUtil.getBooleanProperty(SCRIPTABLESCRAPER_PLUGIN_ID + ".debug",
            Boolean.FALSE);
    private final boolean info = PropertiesUtil.getBooleanProperty(SCRIPTABLESCRAPER_PLUGIN_ID + ".info",
            Boolean.TRUE);
    private WebBrowser webBrowser;
    private ScriptableScraper ssData;
    private final int maxGenres;
    private final int maxDirectors;
    private final int maxWriters;
    private final int maxActors;
    // Constants
    private static final int DEFAULT_GENRES = 9;
    private static final int DEFAULT_ACTORS = 10;
    private static final int DEFAULT_WRITERS = 3;
    private static final int DEFAULT_DIRECTORS = 2;

    public ScriptableScraperPlugin() {
        super();

        webBrowser = new WebBrowser();
        maxGenres = PropertiesUtil.getIntProperty("genres.max", DEFAULT_GENRES);
        maxDirectors = PropertiesUtil.getReplacedIntProperty("movie.director.maxCount",
                "plugin.people.maxCount.director", DEFAULT_DIRECTORS);
        maxWriters = PropertiesUtil.getReplacedIntProperty("movie.writer.maxCount", "plugin.people.maxCount.writer",
                DEFAULT_WRITERS);
        maxActors = PropertiesUtil.getReplacedIntProperty("movie.actor.maxCount", "plugin.people.maxCount.actor",
                DEFAULT_ACTORS);

        //        tvdb = new TheTvDBPlugin();
        final ScriptableScraperXMLReader xmlReader = new ScriptableScraperXMLReader();
        String xmlData = PropertiesUtil.getProperty("scriptablescraper.data", Movie.UNKNOWN);
        if (StringTools.isValidString(xmlData)) {
            File xmlFile = new File(xmlData);
            if (xmlFile.exists()) {
                ssData = new ScriptableScraper();
                ssData.setDebug(debug);
                if (xmlReader.parseXML(xmlFile, ssData)) {
                    if (info) {
                        StackTraceElement[] stacktrace = Thread.currentThread().getStackTrace();
                        StackTraceElement e = stacktrace[stacktrace.length - 8];
                        if ("updateMovieData".equals(e.getMethodName())) {
                            LOG.info("");
                            LOG.info("  -=[   ScriptableScraperPlugin   ]=-");
                            LOG.info("Plugin         : {} - {}", ssData.getName(), ssData.getDescription());
                            LOG.info("Plugin version : {} ({})", ssData.getVersion(), ssData.getPublished());
                            LOG.info("Plugin authors : {}", ssData.getAuthor());
                            LOG.info("Plugin language: {}", ssData.getLanguage());
                            LOG.info("");
                        }
                    }
                } else {
                    LOG.error("Reading error XML data file : {}", xmlData);
                }
            } else {
                LOG.error("File not found : {}", xmlData);
            }
        } else {
            LOG.error("Failed mandatory parameter scriptablescraper.data");
        }
    }

    @Override
    public String getPluginID() {
        return SCRIPTABLESCRAPER_PLUGIN_ID;
    }

    @Override
    public boolean scan(Movie mediaFile) {
        boolean retval = false;
        String movieId = mediaFile.getId(SCRIPTABLESCRAPER_PLUGIN_ID);

        if (StringTools.isNotValidString(movieId)) {
            /*
             // Get base info from imdb or tvdb
             if (!mediaFile.isTVShow()) {
             super.scan(mediaFile);
             } else {
             tvdb.scan(mediaFile);
             }
             */

            movieId = getMovieId(mediaFile);
            if (debug) {
                LOG.debug("scan: movieId: {}", movieId);
            }
            mediaFile.setId(SCRIPTABLESCRAPER_PLUGIN_ID, movieId);
        }

        if (StringTools.isValidString(movieId)) {
            retval = updateMediaInfo(mediaFile);
        }

        return retval;
    }

    public String getMovieId(Movie movie) {
        String movieId = movie.getId(SCRIPTABLESCRAPER_PLUGIN_ID);

        if (StringTools.isNotValidString(movieId)) {
            movieId = getMovieId(movie.getOriginalTitle(), movie.getYear());
        }

        return movieId;
    }

    public String getMovieId(String movieName, String year) {
        String movieId = Movie.UNKNOWN;

        ssData.getSection().setVariable("search.title", movieName);
        ssData.getSection().setVariable("search.year", StringTools.isValidString(year) ? year : "0");

        Collection<SectionContentSS> dataSections = ssData.getSections("action", "search");
        runSections(dataSections, "search");
        for (SectionContentSS section : dataSections) {
            movieId = section.getVariable("movie[0].site_id");
            if (StringTools.isValidString(movieId)) {
                ssData.getSection().setVariable("movie.site_id", movieId);
                break;
            }
        }

        if (StringTools.isValidString(movieId)) {
            return movieId;
        }
        return Movie.UNKNOWN;
    }

    private boolean updateMediaInfo(Movie movie) {
        Collection<SectionContentSS> sections = ssData.getSections("action", "get_details");
        if (!sections.isEmpty()) {
            runSections(sections, "get_details");

            SectionContentSS section = sections.iterator().next();

            // Title
            String value = section.getVariable("movie.title");
            if (StringTools.isValidString(value)) {
                movie.setTitle(value, SCRIPTABLESCRAPER_PLUGIN_ID);
            }

            // Year
            value = section.getVariable("movie.year");
            if (StringTools.isValidString(value)) {
                movie.setYear(value, SCRIPTABLESCRAPER_PLUGIN_ID);
            }

            // Plot
            value = section.getVariable("movie.summary");
            if (StringTools.isValidString(value)) {
                movie.setPlot(value, SCRIPTABLESCRAPER_PLUGIN_ID);
                movie.setOutline(value, SCRIPTABLESCRAPER_PLUGIN_ID);
            }

            // Genres
            value = section.getVariable("movie.genres");
            if (StringTools.isValidString(value)) {
                List<String> newGenres = new LinkedList<>();
                for (String genre : Arrays.asList(value.split("\\|"))) {
                    newGenres.add(genre);
                    if (newGenres.size() == maxGenres) {
                        break;
                    }
                }

                movie.setGenres(newGenres, SCRIPTABLESCRAPER_PLUGIN_ID);
            }

            // Rating
            value = section.getVariable("movie.score");
            if (StringTools.isValidString(value)) {
                movie.addRating(SCRIPTABLESCRAPER_PLUGIN_ID,
                        StringTools.parseRating(NumberUtils.toFloat(value.replace(',', '.'), -1) / 10));
            }

            // Top 250
            value = section.getVariable("movie.top250");
            if (StringTools.isValidString(value)) {
                movie.setTop250(value, IMDB_PLUGIN_ID);
            }

            // Director
            updatePersonInfo(movie, "directors", section.getVariable("movie.directors"), maxDirectors);

            // Writer
            updatePersonInfo(movie, "writers", section.getVariable("movie.writers"), maxWriters);

            // Actors
            updatePersonInfo(movie, "actors", section.getVariable("movie.actors"), maxActors);

            // Studio/Company
            value = section.getVariable("movie.studios");
            if (StringTools.isValidString(value)) {
                movie.setCompany(value.replaceAll("\\|", Movie.SPACE_SLASH_SPACE), SCRIPTABLESCRAPER_PLUGIN_ID);
            }

            // Run time
            value = section.getVariable("movie.runtime");
            if (StringTools.isValidString(value)) {
                movie.setRuntime(value, SCRIPTABLESCRAPER_PLUGIN_ID);
            }

            // Tagline
            value = section.getVariable("movie.tagline");
            if (StringTools.isValidString(value)) {
                movie.setTagline(value, SCRIPTABLESCRAPER_PLUGIN_ID);
            }

            // Certification
            value = section.getVariable("movie.certification");
            if (StringTools.isValidString(value)) {
                movie.setCertification(value, SCRIPTABLESCRAPER_PLUGIN_ID);
            }

            // Country
            value = section.getVariable("movie.country");
            if (StringTools.isValidString(value)) {
                List<String> countries = new ArrayList<>();
                for (String country : Arrays.asList(value.split("\\|"))) {
                    countries.add(country);
                }
                movie.setCountries(countries, SCRIPTABLESCRAPER_PLUGIN_ID);
            }

            // Quotes
            value = section.getVariable("movie.quotes");
            if (StringTools.isValidString(value)) {
                for (String quote : Arrays.asList(value.split("\\|"))) {
                    movie.setQuote(cleanStringEnding(quote), SCRIPTABLESCRAPER_PLUGIN_ID);
                    break;
                }
            }

            // Did you know
            value = section.getVariable("movie.didyouknow");
            if (StringTools.isValidString(value)) {
                for (String dyk : Arrays.asList(value.split("\\|"))) {
                    movie.addDidYouKnow(dyk);
                }
            }

            return true;
        }

        return false;
    }

    private static int updatePersonInfo(Movie movie, String mode, String value, int personMax) {
        int count = 0;
        if (StringTools.isValidString(value)) {
            for (String person : Arrays.asList(value.split("\\|"))) {
                if (null != mode) {
                    switch (mode) {
                    case "directors":
                        movie.addDirector(person, SCRIPTABLESCRAPER_PLUGIN_ID);
                        break;
                    case "writers":
                        movie.addWriter(person, SCRIPTABLESCRAPER_PLUGIN_ID);
                        break;
                    case "actors":
                        movie.addActor(person, SCRIPTABLESCRAPER_PLUGIN_ID);
                        break;
                    default:
                        break;
                    }
                }
                count++;
                if (personMax == count) {
                    break;
                }
            }
        }
        return count;
    }

    @SuppressWarnings({ "unchecked", "rawtypes" })
    private void runSections(Collection<SectionContentSS> sections, String sectionName) {
        try {
            if (debug) {
                LOG.debug("runSections: {}", sectionName);
            }
            String value;
            for (SectionContentSS content : sections) {
                SectionSS cSection = (SectionSS) content;
                for (int looperItem = 0; looperItem < cSection.getItems().size(); looperItem++) {
                    ItemSS item = cSection.getItem(looperItem);
                    String type = item.getType();
                    String key = item.getKey();
                    if (debug) {
                        LOG.debug("item: {} : {}", type, key);
                    }
                    if ("retrieve".equals(type)) {
                        RetrieveSS retrieve = cSection.getRetrieve(key);
                        if (retrieve != null) {
                            String url = cSection.compileValue(retrieve.getURL());
                            if (StringUtils.isNotBlank(retrieve.getCookies())) {
                                String domain = new URL(url).getHost();
                                List<String> values;
                                for (String cookie : Arrays.asList(retrieve.getCookies().split("&"))) {
                                    values = Arrays.asList(cookie.split("="));
                                    if (debug) {
                                        LOG.debug("retrieve page from domain '{}' with name '{}' and value'{}'",
                                                domain, values.get(0), values.get(1));
                                    }
                                    webBrowser.putCookie(domain, values.get(0), values.get(1));
                                }
                            }
                            if (StringTools.isValidString(url)) {
                                String page = "";
                                for (int looper = 0; looper <= retrieve.getRetries(); looper++) {
                                    page = webBrowser.request(url, retrieve.getEncoding()).replaceAll("\\r", "")
                                            .replaceAll("\\n", " ");
                                    if (StringTools.isValidString(page)) {
                                        break;
                                    }
                                }
                                if (StringTools.isNotValidString(page)) {
                                    LOG.error("Page does not retrieved for '{}' with URL {}", key, url);
                                    page = "";
                                }
                                cSection.setGlobalVariable(cSection.compileValue(key), page);
                            }
                        }
                    } else if ("set".equals(type)) {
                        value = cSection.getSet(key);
                        if (debug) {
                            LOG.debug("getSet: key: {} value: {}", key, value);
                        }
                        key = cSection.compileValue(key);
                        value = cSection.compileValue(value);
                        cSection.setGlobalVariable(key, value);
                    } else if ("parse".equals(type)) {
                        ParseSS parse = cSection.getParse(key);
                        if (parse != null) {
                            key = cSection.compileValue(key);
                            value = cSection.parseInput(cSection.compileValue(parse.getInput()),
                                    cSection.compileValue(parse.getRegex()));
                            cSection.setGlobalVariable(key, value);
                        }
                    } else if ("math".equals(type)) {
                        MathSS math = cSection.getMath(key);
                        if (math != null) {
                            boolean found = false;
                            float res = -0.000001f;
                            float value1 = Float.parseFloat(cSection.compileValue(math.getValue1()));
                            float value2 = Float.parseFloat(cSection.compileValue(math.getValue2()));
                            String typeName = math.getType();
                            if (null != typeName) {
                                switch (typeName) {
                                case "add":
                                    res = value1 + value2;
                                    found = true;
                                    break;
                                case "subtract":
                                    res = value1 - value2;
                                    found = true;
                                    break;
                                case "multiply":
                                    res = value1 * value2;
                                    found = true;
                                    break;
                                case "divide":
                                    if (value2 > 0f) {
                                        res = value1 / value2;
                                    } else {
                                        res = 0f;
                                    }
                                    found = true;
                                    break;
                                default:
                                    LOG.error("Unknown math type: {}", typeName);
                                    break;
                                }
                            }

                            if (found) {
                                if ("float".equals(math.getResultType())) {
                                    value = Float.toString(res);
                                } else {
                                    value = Integer.toString(Math.round(res));
                                }
                                cSection.setGlobalVariable(cSection.compileValue(key), value);
                            }
                        }
                    } else if ("content".equals(type)) {
                        SectionSS section = (SectionSS) cSection.getContent(Integer.parseInt(key));
                        String name = section.getName();
                        if (debug) {
                            LOG.debug("subsection: {}", name);
                        }
                        if ("if".equals(name)) {
                            String condition = section.getAttribute("test");
                            if (StringTools.isValidString(condition) && section.testCondition(condition)) {
                                runSections((Collection) Arrays.asList(section), sectionName);
                            }
                        } else if ("loop".equals(name)) {
                            String sName = section.getAttribute("name");
                            value = section.getAttribute("on");
                            if (StringTools.isValidString(sName) && StringTools.isValidString(value)) {
                                if (section.hasVariable(value)) {
                                    value = section.getVariable(value);
                                    if (debug) {
                                        LOG.debug("loop: value: {}", value);
                                    }
                                    if (StringTools.isValidString(value)) {
                                        List<String> values = Arrays
                                                .asList(value.split(ScriptableScraper.ARRAY_GROUP_DIVIDER));
                                        int limit = values.size();

                                        value = section.getAttribute("limit");
                                        if (StringTools.isValidString(value) && (limit > Integer.parseInt(value))) {
                                            limit = Integer.parseInt(value);
                                        }

                                        if (limit > 0) {
                                            for (int looper = 0; looper < limit; looper++) {
                                                if (debug) {
                                                    LOG.debug("loop: {}: {}", sName, values.get(looper));
                                                }
                                                section.setVariable(sName, values.get(looper));
                                                section.setVariable("count", Integer.toString(looper));
                                                runSections((Collection) Arrays.asList(section), sectionName);
                                            }
                                        }
                                    }
                                } else {
                                    LOG.error("Does not exist '{}' for 'loop' name=\"{}\"",
                                            section.getAttribute("on"), sName);
                                }
                            } else {
                                LOG.error("Wrong attribute 'on' value '{}' of 'loop' name=\"{}\"",
                                        section.getAttribute("on"), sName);
                            }
                        }
                    }
                }
            }
        } catch (IOException error) {
            LOG.error("Failed run section : {}", sectionName);
            LOG.error("Error : {}", error.getMessage());
        }
    }
}