Java tutorial
/* * Copyright 2012 - 2016 Manuel Laggner * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.tinymediamanager.scraper.ofdb; import java.io.InputStream; import java.net.URLDecoder; import java.net.URLEncoder; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.lang3.StringEscapeUtils; import org.apache.commons.lang3.StringUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.tinymediamanager.scraper.MediaMetadata; import org.tinymediamanager.scraper.MediaProviderInfo; import org.tinymediamanager.scraper.MediaScrapeOptions; import org.tinymediamanager.scraper.MediaSearchOptions; import org.tinymediamanager.scraper.MediaSearchResult; import org.tinymediamanager.scraper.UnsupportedMediaTypeException; import org.tinymediamanager.scraper.entities.MediaCastMember; import org.tinymediamanager.scraper.entities.MediaGenres; import org.tinymediamanager.scraper.entities.MediaTrailer; import org.tinymediamanager.scraper.entities.MediaType; import org.tinymediamanager.scraper.http.Url; import org.tinymediamanager.scraper.mediaprovider.IMovieMetadataProvider; import org.tinymediamanager.scraper.mediaprovider.IMovieTrailerProvider; import org.tinymediamanager.scraper.util.MetadataUtil; import org.tinymediamanager.scraper.util.StrgUtils; import net.xeoh.plugins.base.annotations.PluginImplementation; /** * The Class OfdbMetadataProvider. A meta data provider for the site ofdb.de * * @author Myron Boyle (myron0815@gmx.net) */ @PluginImplementation public class OfdbMetadataProvider implements IMovieMetadataProvider, IMovieTrailerProvider { private static final Logger LOGGER = LoggerFactory.getLogger(OfdbMetadataProvider.class); private static final String BASE_URL = "http://www.ofdb.de"; private static MediaProviderInfo providerInfo = createMediaProviderInfo(); public OfdbMetadataProvider() { } private static MediaProviderInfo createMediaProviderInfo() { MediaProviderInfo providerInfo = new MediaProviderInfo("ofdb", "Online Filmdatenbank (OFDb.de)", "<html><h3>Online Filmdatenbank (OFDb)</h3><br />A german movie database driven by the community.<br /><br />Available languages: DE</html>", OfdbMetadataProvider.class.getResource("/ofdb_de.png")); providerInfo.setVersion(OfdbMetadataProvider.class); return providerInfo; } @Override public MediaProviderInfo getProviderInfo() { return providerInfo; } /* * <meta property="og:title" content="Bourne Vermachtnis, Das (2012)" /> <meta property="og:type" content="movie" /> <meta property="og:url" * content="http://www.ofdb.de/film/226745,Das-Bourne-Vermchtnis" /> <meta property="og:image" content="http://img.ofdb.de/film/226/226745.jpg" /> * <meta property="og:site_name" content="OFDb" /> <meta property="fb:app_id" content="198140443538429" /> <script * src="http://www.ofdb.de/jscripts/vn/immer_oben.js" type="text/javascript"></script> */ @Override public MediaMetadata getMetadata(MediaScrapeOptions options) throws Exception { LOGGER.debug("getMetadata() " + options.toString()); if (options.getType() != MediaType.MOVIE) { throw new UnsupportedMediaTypeException(options.getType()); } // we have 3 entry points here // a) getMetadata has been called with an ofdbId // b) getMetadata has been called with an imdbId // c) getMetadata has been called from a previous search String detailUrl = ""; // case a) and c) if (StringUtils.isNotBlank(options.getId(getProviderInfo().getId())) || options.getResult() != null) { if (StringUtils.isNotBlank(options.getId(getProviderInfo().getId()))) { detailUrl = "http://www.ofdb.de/view.php?page=film&fid=" + options.getId(getProviderInfo().getId()); } else { detailUrl = options.getResult().getUrl(); } } // case b) if (options.getResult() == null && StringUtils.isNotBlank(options.getId(MediaMetadata.IMDB))) { MediaSearchOptions searchOptions = new MediaSearchOptions(MediaType.MOVIE); searchOptions.setImdbId(options.getId(MediaMetadata.IMDB)); try { List<MediaSearchResult> results = search(searchOptions); if (results != null && !results.isEmpty()) { options.setResult(results.get(0)); detailUrl = options.getResult().getUrl(); } } catch (Exception e) { LOGGER.warn("failed IMDB search: " + e.getMessage()); } } // we can only work further if we got a search result on ofdb.de if (StringUtils.isBlank(detailUrl)) { throw new Exception("We did not get any useful movie url"); } MediaMetadata md = new MediaMetadata(providerInfo.getId()); // generic Elements used all over Elements el = null; String ofdbId = StrgUtils.substr(detailUrl, "film\\/(\\d+),"); if (StringUtils.isBlank(ofdbId)) { ofdbId = StrgUtils.substr(detailUrl, "fid=(\\d+)"); } Url url; try { LOGGER.trace("get details page"); url = new Url(detailUrl); InputStream in = url.getInputStream(); Document doc = Jsoup.parse(in, "UTF-8", ""); in.close(); if (doc.getAllElements().size() < 10) { throw new Exception("meh - we did not receive a valid web page"); } // parse details // IMDB ID "http://www.imdb.com/Title?1194173" el = doc.getElementsByAttributeValueContaining("href", "imdb.com"); if (!el.isEmpty()) { md.setId(MediaMetadata.IMDB, "tt" + StrgUtils.substr(el.first().attr("href"), "\\?(\\d+)")); } // title / year // <meta property="og:title" content="Bourne Vermchtnis, Das (2012)" /> el = doc.getElementsByAttributeValue("property", "og:title"); if (!el.isEmpty()) { String[] ty = parseTitle(el.first().attr("content")); md.setTitle(StrgUtils.removeCommonSortableName(ty[0])); try { md.setYear(Integer.parseInt(ty[1])); } catch (Exception ignored) { } } // another year position if (md.getYear() == 0) { // <a href="view.php?page=blaettern&Kat=Jahr&Text=2012">2012</a> el = doc.getElementsByAttributeValueContaining("href", "Kat=Jahr"); try { md.setYear(Integer.parseInt(el.first().text())); } catch (Exception ignored) { } } // original title (has to be searched with a regexp) // <tr valign="top"> // <td nowrap=""><font class="Normal" face="Arial,Helvetica,sans-serif" // size="2">Originaltitel:</font></td> // <td> </td> // <td width="99%"><font class="Daten" face="Arial,Helvetica,sans-serif" // size="2"><b>Brave</b></font></td> // </tr> String originalTitle = StrgUtils.substr(doc.body().html(), "(?s)Originaltitel.*?<b>(.*?)</b>"); if (!originalTitle.isEmpty()) { md.setOriginalTitle(StrgUtils.removeCommonSortableName(originalTitle)); } // Genre: <a href="view.php?page=genre&Genre=Action">Action</a> el = doc.getElementsByAttributeValueContaining("href", "page=genre"); for (Element g : el) { md.addGenre(getTmmGenre(g.text())); } // rating // <div itemtype="http://schema.org/AggregateRating" itemscope // itemprop="aggregateRating">Note: <span // itemprop="ratingValue">6.73</span><meta // itemprop="worstRating" content="1" /> el = doc.getElementsByAttributeValue("itemprop", "ratingValue"); if (!el.isEmpty()) { String r = el.text(); if (!r.isEmpty()) { try { md.setRating(Float.parseFloat(r)); } catch (Exception e) { LOGGER.debug("could not parse rating"); } } } // get PlotLink; open url and parse // <a href="plot/22523,31360,Die-Bourne-Identitt"><b>[mehr]</b></a> LOGGER.trace("parse plot"); el = doc.getElementsByAttributeValueMatching("href", "plot\\/\\d+,"); if (!el.isEmpty()) { String plotUrl = BASE_URL + "/" + el.first().attr("href"); try { url = new Url(plotUrl); in = url.getInputStream(); Document plot = Jsoup.parse(in, "UTF-8", ""); in.close(); Elements block = plot.getElementsByClass("Blocksatz"); // first // Blocksatz // is plot String p = block.first().text(); // remove all html stuff p = p.substring(p.indexOf("Mal gelesen") + 12); // remove "header" md.setPlot(p); } catch (Exception e) { LOGGER.error("failed to get plot page: " + e.getMessage()); } } // http://www.ofdb.de/view.php?page=film_detail&fid=226745 LOGGER.debug("parse actor detail"); String movieDetail = BASE_URL + "/view.php?page=film_detail&fid=" + ofdbId; doc = null; try { url = new Url(movieDetail); in = url.getInputStream(); doc = Jsoup.parse(in, "UTF-8", ""); in.close(); } catch (Exception e) { LOGGER.error("failed to get detail page: " + e.getMessage()); } if (doc != null) { parseCast(doc.getElementsContainingOwnText("Regie"), MediaCastMember.CastType.DIRECTOR, md); parseCast(doc.getElementsContainingOwnText("Darsteller"), MediaCastMember.CastType.ACTOR, md); parseCast(doc.getElementsContainingOwnText("Stimme/Sprecher"), MediaCastMember.CastType.ACTOR, md); parseCast(doc.getElementsContainingOwnText("Synchronstimme (deutsch)"), MediaCastMember.CastType.ACTOR, md); parseCast(doc.getElementsContainingOwnText("Drehbuchautor(in)"), MediaCastMember.CastType.WRITER, md); parseCast(doc.getElementsContainingOwnText("Produzent(in)"), MediaCastMember.CastType.PRODUCER, md); } } catch (Exception e) { LOGGER.error("Error parsing " + detailUrl); throw e; } return md; } // parse actors // find the header // go up until TR table row // get next TR for casts entries private void parseCast(Elements el, MediaCastMember.CastType type, MediaMetadata md) { if (el != null && !el.isEmpty()) { Element castEl = null; for (Element element : el) { if (!element.tagName().equals("option")) { // we get more, just do not take the optionbox castEl = element; } } if (castEl == null) { LOGGER.debug("meh, no " + type.name() + " found"); return; } // walk up to table TR... while (!((castEl == null) || (castEl.tagName().equalsIgnoreCase("tr")))) { castEl = castEl.parent(); } // ... and take the next table row ^^ Element tr = castEl.nextElementSibling(); if (tr != null) { for (Element a : tr.getElementsByAttributeValue("valign", "middle")) { String act = a.toString(); String aname = StrgUtils.substr(act, "alt=\"(.*?)\""); if (!aname.isEmpty()) { MediaCastMember cm = new MediaCastMember(); cm.setName(aname); String id = StrgUtils.substr(act, "id=(.*?)[^\"]\">"); if (!id.isEmpty()) { cm.setId(id); // thumb // http://www.ofdb.de/thumbnail.php?cover=images%2Fperson%2F7%2F7689.jpg&size=6 // fullsize ;) http://www.ofdb.de/images/person/7/7689.jpg try { String imgurl = URLDecoder .decode(StrgUtils.substr(act, "images%2Fperson%2F(.*?)&size"), "UTF-8"); if (!imgurl.isEmpty()) { imgurl = BASE_URL + "/images/person/" + imgurl; } cm.setImageUrl(imgurl); } catch (Exception e) { } } String arole = StrgUtils.substr(act, "\\.\\.\\. (.*?)</font>").replaceAll("<[^>]*>", ""); cm.setCharacter(arole); cm.setType(type); md.addCastMember(cm); } } } } } /* * Maps scraper Genres to internal TMM genres */ private MediaGenres getTmmGenre(String genre) { MediaGenres g = null; if (genre.isEmpty()) { return g; } // @formatter:off else if (genre.equals("Abenteuer")) { g = MediaGenres.ADVENTURE; } else if (genre.equals("Action")) { g = MediaGenres.ACTION; } else if (genre.equals("Amateur")) { g = MediaGenres.INDIE; } else if (genre.equals("Animation")) { g = MediaGenres.ANIMATION; } else if (genre.equals("Anime")) { g = MediaGenres.ANIMATION; } else if (genre.equals("Biographie")) { g = MediaGenres.BIOGRAPHY; } else if (genre.equals("Dokumentation")) { g = MediaGenres.DOCUMENTARY; } else if (genre.equals("Drama")) { g = MediaGenres.DRAMA; } else if (genre.equals("Eastern")) { g = MediaGenres.EASTERN; } else if (genre.equals("Erotik")) { g = MediaGenres.EROTIC; } else if (genre.equals("Essayfilm")) { g = MediaGenres.INDIE; } else if (genre.equals("Experimentalfilm")) { g = MediaGenres.INDIE; } else if (genre.equals("Fantasy")) { g = MediaGenres.FANTASY; } else if (genre.equals("Grusel")) { g = MediaGenres.HORROR; } else if (genre.equals("Hardcore")) { g = MediaGenres.EROTIC; } else if (genre.equals("Heimatfilm")) { g = MediaGenres.TV_MOVIE; } else if (genre.equals("Historienfilm")) { g = MediaGenres.HISTORY; } else if (genre.equals("Horror")) { g = MediaGenres.HORROR; } else if (genre.equals("Kampfsport")) { g = MediaGenres.SPORT; } else if (genre.equals("Katastrophen")) { g = MediaGenres.DISASTER; } else if (genre.equals("Kinder-/Familienfilm")) { g = MediaGenres.FAMILY; } else if (genre.equals("Komdie")) { g = MediaGenres.COMEDY; } else if (genre.equals("Krieg")) { g = MediaGenres.WAR; } else if (genre.equals("Krimi")) { g = MediaGenres.CRIME; } else if (genre.equals("Kurzfilm")) { g = MediaGenres.SHORT; } else if (genre.equals("Liebe/Romantik")) { g = MediaGenres.ROMANCE; } else if (genre.equals("Mondo")) { g = MediaGenres.DOCUMENTARY; } else if (genre.equals("Musikfilm")) { g = MediaGenres.MUSIC; } else if (genre.equals("Mystery")) { g = MediaGenres.MYSTERY; } else if (genre.equals("Science-Fiction")) { g = MediaGenres.SCIENCE_FICTION; } else if (genre.equals("Serial")) { g = MediaGenres.SERIES; } else if (genre.equals("Sex")) { g = MediaGenres.EROTIC; } else if (genre.equals("Splatter")) { g = MediaGenres.HORROR; } else if (genre.equals("Sportfilm")) { g = MediaGenres.SPORT; } else if (genre.equals("Stummfilm")) { g = MediaGenres.SILENT_MOVIE; } else if (genre.equals("TV-Film")) { g = MediaGenres.TV_MOVIE; } else if (genre.equals("TV-Mini-Serie")) { g = MediaGenres.SERIES; } else if (genre.equals("TV-Pilotfilm")) { g = MediaGenres.TV_MOVIE; } else if (genre.equals("TV-Serie")) { g = MediaGenres.SERIES; } else if (genre.equals("Thriller")) { g = MediaGenres.THRILLER; } else if (genre.equals("Tierfilm")) { g = MediaGenres.ANIMAL; } else if (genre.equals("Webserie")) { g = MediaGenres.SERIES; } else if (genre.equals("Western")) { g = MediaGenres.WESTERN; } // @formatter:on if (g == null) { g = MediaGenres.getGenre(genre); } return g; } /* * Removes all weird characters from search as well some "stopwords" as der|die|das|the|a */ private String cleanSearch(String q) { q = " " + q + " "; // easier regex // TODO: doppelte hintereinander funzen so nicht q = q.replaceAll("(?i)( a | the | der | die | das |\\(\\d+\\))", " "); q = q.replaceAll("[^A-Za-z0-9 ]", " "); q = q.replaceAll(" ", ""); return q.trim(); } @Override public List<MediaSearchResult> search(MediaSearchOptions options) throws Exception { LOGGER.debug("search() " + options.toString()); if (options.getMediaType() != MediaType.MOVIE) { throw new UnsupportedMediaTypeException(options.getMediaType()); } List<MediaSearchResult> resultList = new ArrayList<>(); String searchString = ""; String searchQuery = ""; String imdb = ""; Elements filme = null; int myear = options.getYear(); /* * Kat = All | Titel | Person | DTitel | OTitel | Regie | Darsteller | Song | Rolle | EAN| IMDb | Google * http://www.ofdb.de//view.php?page=suchergebnis &Kat=xxxxxxxxx&SText=yyyyyyyyyyy */ // 1. search with imdbId if (StringUtils.isNotEmpty(options.getImdbId()) && (filme == null || filme.isEmpty())) { try { imdb = options.getImdbId(); searchString = BASE_URL + "/view.php?page=suchergebnis&Kat=IMDb&SText=" + imdb; LOGGER.debug("search with imdbId: " + imdb); Url url = new Url(searchString); InputStream in = url.getInputStream(); Document doc = Jsoup.parse(in, "UTF-8", ""); in.close(); // only look for movie links filme = doc.getElementsByAttributeValueMatching("href", "film\\/\\d+,"); LOGGER.debug("found " + filme.size() + " search results"); } catch (Exception e) { LOGGER.error("failed to search for imdb Id " + imdb + ": " + e.getMessage()); } } // 2. search for search string if (StringUtils.isNotEmpty(options.getQuery()) && (filme == null || filme.isEmpty())) { try { String query = options.getQuery(); searchQuery = query; query = MetadataUtil.removeNonSearchCharacters(query); searchString = BASE_URL + "/view.php?page=suchergebnis&Kat=All&SText=" + URLEncoder.encode(cleanSearch(query), "UTF-8"); LOGGER.debug("search for everything: " + query); Url url = new Url(searchString); InputStream in = url.getInputStream(); Document doc = Jsoup.parse(in, "UTF-8", ""); in.close(); // only look for movie links filme = doc.getElementsByAttributeValueMatching("href", "film\\/\\d+,"); LOGGER.debug("found " + filme.size() + " search results"); } catch (Exception e) { LOGGER.error("failed to search for " + searchQuery + ": " + e.getMessage()); } } if (filme == null || filme.isEmpty()) { LOGGER.debug("nothing found :("); return resultList; } // <a href="film/22523,Die-Bourne-Identitt" // onmouseover="Tip('<img src="images/film/22/22523.jpg" // width="120" height="170">',SHADOW,true)">Bourne // Identitt, Die<font size="1"> / Bourne Identity, The</font> (2002)</a> HashSet<String> foundResultUrls = new HashSet<>(); for (Element a : filme) { try { MediaSearchResult sr = new MediaSearchResult(providerInfo.getId(), MediaType.MOVIE); if (StringUtils.isNotEmpty(imdb)) { sr.setIMDBId(imdb); } sr.setId(StrgUtils.substr(a.toString(), "film\\/(\\d+),")); // OFDB ID sr.setTitle(StringEscapeUtils.unescapeHtml4(StrgUtils .removeCommonSortableName(StrgUtils.substr(a.toString(), ".*>(.*?)(\\[.*?\\])?<font")))); LOGGER.debug("found movie " + sr.getTitle()); sr.setOriginalTitle(StringEscapeUtils.unescapeHtml4( StrgUtils.removeCommonSortableName(StrgUtils.substr(a.toString(), ".*> / (.*?)</font")))); try { sr.setYear(Integer.parseInt(StrgUtils.substr(a.toString(), "font> \\((.*?)\\)<\\/a"))); } catch (Exception ignored) { } sr.setUrl(BASE_URL + "/" + StrgUtils.substr(a.toString(), "href=\\\"(.*?)\\\"")); sr.setPosterUrl(BASE_URL + "/images" + StrgUtils.substr(a.toString(), "images(.*?)\\"")); // check if it has at least a title and url if (StringUtils.isBlank(sr.getTitle()) || StringUtils.isBlank(sr.getUrl())) { continue; } // OFDB could provide linke twice - check if that has been already added if (foundResultUrls.contains(sr.getUrl())) { continue; } foundResultUrls.add(sr.getUrl()); if (imdb.equals(sr.getIMDBId())) { // perfect match sr.setScore(1); } else { // compare score based on names float score = MetadataUtil.calculateScore(searchQuery, sr.getTitle()); if (yearDiffers(myear, sr.getYear())) { float diff = (float) Math.abs(myear - sr.getYear()) / 100; LOGGER.debug( "parsed year does not match search result year - downgrading score by " + diff); score -= diff; } sr.setScore(score); } resultList.add(sr); } catch (Exception e) { LOGGER.warn("error parsing movie result: " + e.getMessage()); } } Collections.sort(resultList); Collections.reverse(resultList); return resultList; } @Override public List<MediaTrailer> getTrailers(MediaScrapeOptions options) throws Exception { LOGGER.debug("getTrailers() " + options.toString()); List<MediaTrailer> trailers = new ArrayList<>(); if (!MetadataUtil.isValidImdbId(options.getImdbId())) { LOGGER.debug("IMDB id not found"); return trailers; } /* * function getTrailerData(ci) { switch (ci) { case 'http://de.clip-1.filmtrailer.com/9507_31566_a_1.flv?log_var=72|491100001 -1|-' : return * '<b>Trailer 1</b><br><i>(small)</i><br><br>» 160px<br><br>Download:<br>» <a href= * "http://de.clip-1.filmtrailer.com/9507_31566_a_1.wmv?log_var=72|491100001-1|-" >wmv</a><br>'; case * 'http://de.clip-1.filmtrailer.com/9507_31566_a_2.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 1</b><br><i>(medium)</i><br><br>» * 240px<br><br>Download:<br>» <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_2.wmv?log_var=72|491100001-1|-" >wmv</a><br>'; case * 'http://de.clip-1.filmtrailer.com/9507_31566_a_3.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 1</b><br><i>(large)</i><br><br>» * 320px<br><br>Download:<br>» <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_3.wmv?log_var=72|491100001-1|-" >wmv</a><br>» * <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_3.mp4?log_var=72|491100001-1|-" >mp4</a><br>» <a href= * "http://de.clip-1.filmtrailer.com/9507_31566_a_3.webm?log_var=72|491100001-1|-" >webm</a><br>'; case * 'http://de.clip-1.filmtrailer.com/9507_31566_a_4.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 1</b><br><i>(xlarge)</i><br><br>» * 400px<br><br>Download:<br>» <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_4.wmv?log_var=72|491100001-1|-" >wmv</a><br>» * <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_4.mp4?log_var=72|491100001-1|-" >mp4</a><br>» <a href= * "http://de.clip-1.filmtrailer.com/9507_31566_a_4.webm?log_var=72|491100001-1|-" >webm</a><br>'; case * 'http://de.clip-1.filmtrailer.com/9507_31566_a_5.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 1</b><br><i>(xxlarge)</i><br><br>» * 640px<br><br>Download:<br>» <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_5.wmv?log_var=72|491100001-1|-" >wmv</a><br>» * <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_5.mp4?log_var=72|491100001-1|-" >mp4</a><br>» <a href= * "http://de.clip-1.filmtrailer.com/9507_31566_a_5.webm?log_var=72|491100001-1|-" >webm</a><br>'; case * 'http://de.clip-1.filmtrailer.com/9507_39003_a_1.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 2</b><br><i>(small)</i><br><br>» * 160px<br><br>Download:<br>» <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_1.wmv?log_var=72|491100001-1|-" >wmv</a><br>'; case * 'http://de.clip-1.filmtrailer.com/9507_39003_a_2.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 2</b><br><i>(medium)</i><br><br>» * 240px<br><br>Download:<br>» <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_2.wmv?log_var=72|491100001-1|-" >wmv</a><br>'; case * 'http://de.clip-1.filmtrailer.com/9507_39003_a_3.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 2</b><br><i>(large)</i><br><br>» * 320px<br><br>Download:<br>» <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_3.wmv?log_var=72|491100001-1|-" >wmv</a><br>» * <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_3.mp4?log_var=72|491100001-1|-" >mp4</a><br>» <a href= * "http://de.clip-1.filmtrailer.com/9507_39003_a_3.webm?log_var=72|491100001-1|-" >webm</a><br>'; case * 'http://de.clip-1.filmtrailer.com/9507_39003_a_4.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 2</b><br><i>(xlarge)</i><br><br>» * 400px<br><br>Download:<br>» <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_4.wmv?log_var=72|491100001-1|-" >wmv</a><br>» * <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_4.mp4?log_var=72|491100001-1|-" >mp4</a><br>» <a href= * "http://de.clip-1.filmtrailer.com/9507_39003_a_4.webm?log_var=72|491100001-1|-" >webm</a><br>'; case * 'http://de.clip-1.filmtrailer.com/9507_39003_a_5.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 2</b><br><i>(xxlarge)</i><br><br>» * 640px<br><br>Download:<br>» <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_5.wmv?log_var=72|491100001-1|-" >wmv</a><br>» * <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_5.mp4?log_var=72|491100001-1|-" >mp4</a><br>» <a href= * "http://de.clip-1.filmtrailer.com/9507_39003_a_5.webm?log_var=72|491100001-1|-" >webm</a><br>'; } } */ Url url = null; String searchString = BASE_URL + "/view.php?page=suchergebnis&Kat=IMDb&SText=" + options.getImdbId(); try { // search with IMDB url = new Url(searchString); InputStream in = url.getInputStream(); Document doc = Jsoup.parse(in, "UTF-8", ""); in.close(); Elements filme = doc.getElementsByAttributeValueMatching("href", "film\\/\\d+,"); if (filme == null || filme.isEmpty()) { LOGGER.debug("found no search results"); return trailers; } LOGGER.debug("found " + filme.size() + " search results"); // hopefully // only one LOGGER.debug("get (trailer) details page"); url = new Url(BASE_URL + "/" + StrgUtils.substr(filme.first().toString(), "href=\\\"(.*?)\\\"")); in = url.getInputStream(); doc = Jsoup.parse(in, "UTF-8", ""); in.close(); // OLD STYLE // <b>Trailer 1</b><br><i>(xxlarge)</i><br><br>» 640px<br><br>Download:<br>» <a href= // "http://de.clip-1.filmtrailer.com/9507_31566_a_5.wmv?log_var=72|491100001-1|-" >wmv</a><br>» <a href= // "http://de.clip-1.filmtrailer.com/9507_31566_a_5.mp4?log_var=72|491100001-1|-" >mp4</a><br>» <a href= // "http://de.clip-1.filmtrailer.com/9507_31566_a_5.webm?log_var=72|491100001-1|-" >webm</a><br> Pattern regex = Pattern.compile("return '(.*?)';"); Matcher m = regex.matcher(doc.toString()); while (m.find()) { String s = m.group(1); String tname = StrgUtils.substr(s, "<b>(.*?)</b>"); String tpix = StrgUtils.substr(s, "raquo; (.*?)x<br>"); // String tqual = StrgUtils.substr(s, "<i>\\((.*?)\\)</i>"); // url + format Pattern lr = Pattern.compile("<a href=\"(.*?)\">(.*?)</a>"); Matcher lm = lr.matcher(s); while (lm.find()) { String turl = lm.group(1); // String tformat = lm.group(2); MediaTrailer trailer = new MediaTrailer(); trailer.setName(tname); // trailer.setQuality(tpix + " (" + tformat + ")"); trailer.setQuality(tpix); trailer.setProvider("filmtrailer"); trailer.setUrl(turl); LOGGER.debug(trailer.toString()); trailers.add(trailer); } } // NEW STYLE (additional!) // <div class="clips" id="clips2" style="display: none;"> // <img src="images/flag_de.gif" align="left" vspace="3" width="18" height="12"> // <img src="images/trailer_6.gif" align="top" vspace="1" width="16" height="16" alt="freigegeben ab 6 Jahren"> // <i>Trailer 1:</i> // <a href="http://de.clip-1.filmtrailer.com/2845_6584_a_1.flv?log_var=67|491100001-1|-"> small </a> // <a href="http://de.clip-1.filmtrailer.com/2845_6584_a_2.flv?log_var=67|491100001-1|-"> medium </a> // <a href="http://de.clip-1.filmtrailer.com/2845_6584_a_3.flv?log_var=67|491100001-1|-"> large </a> // <a href="http://de.clip-1.filmtrailer.com/2845_6584_a_4.flv?log_var=67|491100001-1|-"> xlarge </a> // <a href="http://de.clip-1.filmtrailer.com/2845_6584_a_5.flv?log_var=67|491100001-1|-"> xxlarge </a> // <br> // <img src="images/flag_de.gif" align="left" vspace="3" width="18" height="12"> // <img src="images/trailer_6.gif" align="top" vspace="1" width="16" height="16" alt="freigegeben ab 6 Jahren"> // <i>Trailer 2:</i> // <a href="http://de.clip-1.filmtrailer.com/2845_8244_a_1.flv?log_var=67|491100001-1|-"> small </a> // <a href="http://de.clip-1.filmtrailer.com/2845_8244_a_2.flv?log_var=67|491100001-1|-"> medium </a> // <a href="http://de.clip-1.filmtrailer.com/2845_8244_a_3.flv?log_var=67|491100001-1|-"> large </a> // <a href="http://de.clip-1.filmtrailer.com/2845_8244_a_4.flv?log_var=67|491100001-1|-"> xlarge </a> // <a href="http://de.clip-1.filmtrailer.com/2845_8244_a_5.flv?log_var=67|491100001-1|-"> xxlarge </a> // <br> // <img src="images/flag_de.gif" align="left" vspace="3" width="18" height="12"> // <img src="images/trailer_6.gif" align="top" vspace="1" width="16" height="16" alt="freigegeben ab 6 Jahren"> // <i>Trailer 3:</i> // <a href="http://de.clip-1.filmtrailer.com/2845_14749_a_1.flv?log_var=67|491100001-1|-"> small </a> // <a href="http://de.clip-1.filmtrailer.com/2845_14749_a_2.flv?log_var=67|491100001-1|-"> medium </a> // <a href="http://de.clip-1.filmtrailer.com/2845_14749_a_3.flv?log_var=67|491100001-1|-"> large </a> // <a href="http://de.clip-1.filmtrailer.com/2845_14749_a_4.flv?log_var=67|491100001-1|-"> xlarge </a> // <a href="http://de.clip-1.filmtrailer.com/2845_14749_a_5.flv?log_var=67|491100001-1|-"> xxlarge </a> // <br> // <br> // </div> // new style size // 1 = 160 x 90 = small // 2 = 240 x 136 = medium // 3 = 320 x 180 = large // 4 = 400 x 226 = xlarge // 5 = 640 x 360 = xxlarge ; regex = Pattern.compile("<i>(.*?)</i>(.*?)<br>", Pattern.DOTALL); // get them as single trailer line m = regex.matcher(doc.getElementsByClass("clips").html()); while (m.find()) { // LOGGER.info(doc.getElementsByClass("clips").html()); // parse each line with 5 qualities String tname = m.group(1).trim(); tname = tname.replaceFirst(":$", ""); // replace ending colon String urls = m.group(2); // url + format Pattern lr = Pattern.compile("<a href=\"(.*?)\">(.*?)</a>"); Matcher lm = lr.matcher(urls); while (lm.find()) { String turl = lm.group(1); String tpix = ""; String tformat = lm.group(2).replaceAll(" ", "").trim(); switch (tformat) { case "small": tpix = "90p"; break; case "medium": tpix = "136p"; break; case "large": tpix = "180p"; break; case "xlarge": tpix = "226p"; break; case "xxlarge": tpix = "360p"; break; default: break; } MediaTrailer trailer = new MediaTrailer(); trailer.setName(tname); // trailer.setQuality(tpix + " (" + tformat + ")"); trailer.setQuality(tpix); trailer.setProvider("filmtrailer"); trailer.setUrl(turl); LOGGER.debug(trailer.toString()); trailers.add(trailer); } } } catch (Exception e) { if (url != null) { LOGGER.error("Error parsing {}", url.toString()); } else { LOGGER.error("Error parsing {}", searchString); } throw e; } return trailers; } /** * return a 2 element array. 0 = title; 1=date * * parses the title in the format Title YEAR or Title (YEAR) * * @param title * the title * @return the string[] */ private String[] parseTitle(String title) { String v[] = { "", "" }; if (title == null) return v; Pattern p = Pattern.compile("(.*)\\s+\\(?([0-9]{4})\\)?", Pattern.CASE_INSENSITIVE); Matcher m = p.matcher(title); if (m.find()) { v[0] = m.group(1); v[1] = m.group(2); } else { v[0] = title; } return v; } /** * Is i1 != i2 (when >0) */ private boolean yearDiffers(Integer i1, Integer i2) { return i1 != null && i1 != 0 && i2 != null && i2 != 0 && i1 != i2; } }