List of usage examples for org.jsoup.select Elements first
public Element first()
From source file:org.bonitasoft.web.designer.visitors.HtmlBuilderVisitorTest.java
@Test public void should_add_elements_to_the_container_rows() throws Exception { // we should have two div.col-xs-12 with two div.row containing added components Elements rows = toBody( visitor.visit(aContainer().with(aRow().with(aComponent().withWidgetId("pbLabel").build()), aRow().with(aComponent().withWidgetId("customLabel").build())).build())).select(".row"); assertThat(rows.size()).isEqualTo(2); assertThat(rows.first().select("pb-label").outerHtml()).isEqualTo("<pb-label></pb-label>"); assertThat(rows.last().select("custom-label").outerHtml()).isEqualTo("<custom-label></custom-label>"); }
From source file:org.coronastreet.gpxconverter.StravaForm.java
public void upload() { //httpClient = new DefaultHttpClient(); httpClient = HttpClientBuilder.create().build(); localContext = new BasicHttpContext(); cookieStore = new BasicCookieStore(); localContext.setAttribute(HttpClientContext.COOKIE_STORE, cookieStore); //httpClient.getParams().setParameter(ClientPNames.COOKIE_POLICY, CookiePolicy.BROWSER_COMPATIBILITY); if (doLogin()) { //log("Ok....logged in..."); try {/*from w ww . j av a 2 s.com*/ // Have to fetch the form to get the CSRF Token HttpGet get = new HttpGet(uploadFormURL); HttpResponse formResponse = httpClient.execute(get, localContext); //log("Fetched the upload form...: " + formResponse.getStatusLine()); org.jsoup.nodes.Document doc = Jsoup.parse(EntityUtils.toString(formResponse.getEntity())); String csrftoken, csrfparam; Elements metalinksParam = doc.select("meta[name=csrf-param]"); if (!metalinksParam.isEmpty()) { csrfparam = metalinksParam.first().attr("content"); } else { csrfparam = null; log("Missing csrf-param?"); } Elements metalinksToken = doc.select("meta[name=csrf-token]"); if (!metalinksToken.isEmpty()) { csrftoken = metalinksToken.first().attr("content"); } else { csrftoken = null; log("Missing csrf-token?"); } HttpPost request = new HttpPost(uploadURL); request.setHeader("X-CSRF-Token", csrftoken); MultipartEntity entity = new MultipartEntity(HttpMultipartMode.BROWSER_COMPATIBLE); entity.addPart("method", new StringBody("post")); entity.addPart("new_uploader", new StringBody("1")); entity.addPart(csrfparam, new StringBody(csrftoken)); entity.addPart("files[]", new InputStreamBody(document2InputStream(outDoc), "application/octet-stream", "temp.tcx")); // Need to do this bit because without it you can't disable chunked encoding, and Strava doesn't support chunked. ByteArrayOutputStream bArrOS = new ByteArrayOutputStream(); entity.writeTo(bArrOS); bArrOS.flush(); ByteArrayEntity bArrEntity = new ByteArrayEntity(bArrOS.toByteArray()); bArrOS.close(); bArrEntity.setChunked(false); bArrEntity.setContentEncoding(entity.getContentEncoding()); bArrEntity.setContentType(entity.getContentType()); request.setEntity(bArrEntity); HttpResponse response = httpClient.execute(request, localContext); if (response.getStatusLine().getStatusCode() != 200) { log("Failed to Upload"); HttpEntity en = response.getEntity(); if (en != null) { String output = EntityUtils.toString(en); log(output); } } else { HttpEntity ent = response.getEntity(); if (ent != null) { String output = EntityUtils.toString(ent); //log(output); JSONObject userInfo = new JSONArray(output).getJSONObject(0); //log("Object: " + userInfo.toString()); if (userInfo.get("workflow").equals("Error")) { log("Upload Error: " + userInfo.get("error")); } else { log("Successful Uploaded. ID is " + userInfo.get("id")); } } } httpClient.close(); } catch (Exception ex) { log("Exception? " + ex.getMessage()); ex.printStackTrace(); // handle exception here } } else { log("Failed to upload!"); } }
From source file:org.coronastreet.gpxconverter.StravaForm.java
protected boolean doLogin() { boolean ret = false; log("Authenticating athlete..."); try {//w w w . ja va 2 s . c om HttpGet get = new HttpGet(loginURL); HttpResponse response = httpClient.execute(get, localContext); //log("Fetched the login form...: " + response.getStatusLine()); org.jsoup.nodes.Document doc = Jsoup.parse(EntityUtils.toString(response.getEntity())); String csrftoken, csrfparam; Elements metalinksParam = doc.select("meta[name=csrf-param]"); if (!metalinksParam.isEmpty()) { csrfparam = metalinksParam.first().attr("content"); log("Setting csrf-param to " + csrfparam); } else { csrfparam = null; log("Missing csrf-param?"); } Elements metalinksToken = doc.select("meta[name=csrf-token]"); if (!metalinksToken.isEmpty()) { csrftoken = metalinksToken.first().attr("content"); log("Setting csrf-token to " + csrftoken); } else { csrftoken = null; log("Missing csrf-token?"); } HttpPost post = new HttpPost(sessionURL); post.setHeader("Referer", "https://www.strava.com/login"); List<NameValuePair> nvps = new ArrayList<NameValuePair>(); nvps.add(new BasicNameValuePair(csrfparam, csrftoken)); nvps.add(new BasicNameValuePair("plan", "")); nvps.add(new BasicNameValuePair("email", email)); nvps.add(new BasicNameValuePair("password", password)); post.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8)); HttpResponse sessionResponse = httpClient.execute(post, localContext); if (sessionResponse.getStatusLine().getStatusCode() != 302) { log("Failed to Login. " + sessionResponse.getStatusLine().getStatusCode()); String output = EntityUtils.toString(sessionResponse.getEntity()); log(output); ret = false; } else { ret = true; } HttpEntity entity = sessionResponse.getEntity(); EntityUtils.consume(entity); } catch (Exception ex) { // handle exception here ex.printStackTrace(); } return ret; }
From source file:org.opens.tanaguru.crawler.CrawlerImpl.java
/** * Waiting for a better implementation, we parse here the html content * to detect the presence of the rel=canonical property. * @param content//from ww w.j a va 2s . co m * @return whether the current page defines a rel canonical Url and whether * this url is different from the current url. */ private boolean isRelCanonicalPage(Content content) { // @TODO make this implementation cleaner if (!treatRelCanonical) { return false; } if (!(content instanceof SSP)) { return false; } if (StringUtils.isBlank(((SSP) content).getSource())) { return false; } Elements relCanonical = Jsoup.parse(((SSP) content).getSource()).select(REL_CANONICAL_CSS_LIKE_QUERY); if (relCanonical.isEmpty() || relCanonical.size() > 1) { return false; } // At this step, we are sure that the rel canonical is defined and // is unique String href = relCanonical.first().attr("href"); if (href.equals(".")) { return false; } if (href.contains("//")) { href = href.substring(href.indexOf("//") + 2); } if (href.endsWith("/")) { href = href.substring(0, href.length() - 1); } String currentUrl = content.getURI(); if (currentUrl.endsWith("/")) { currentUrl = currentUrl.substring(0, currentUrl.length() - 1); } if (currentUrl.contains("//")) { currentUrl = currentUrl.substring(currentUrl.indexOf("//") + 2); } if (currentUrl.equals(href)) { LOGGER.info("rel canonical present but points to itself " + content.getURI()); return false; } return true; }
From source file:org.sbs.goodcrawler.extractor.selector.AbstractElementCssSelector.java
/** * ????/*from ww w.ja va2 s.co m*/ * @param elements * @return */ protected String getExtractText(Elements elements) { if (elements.size() == 0) return ""; String temp = ""; if (attr.equals("tostring")) { if (index == 0 || index > elements.size()) temp = elements.first().toString(); else temp = elements.get(index).toString(); } else { if (index == 0 || index > elements.size()) temp = elements.first().text(); else temp = elements.get(index).text(); } if (null != pattern) { Matcher m = pattern.matcher(temp); if (m.find()) { temp = m.group(1); } } return temp; }
From source file:org.sbs.goodcrawler.extractor.selector.AbstractElementCssSelector.java
/** * ??????//from w w w .j ava 2 s. co m * @param elements * @param attr * @return */ protected String getExtractAttr(Elements elements, String attr) { String temp = ""; if (attr.equals("tostring")) { if (index == 0 || index > elements.size()) temp = elements.first().toString(); else temp = elements.get(index).toString(); } else { if (index == 0 || index > elements.size()) temp = elements.first().attr(attr); else temp = elements.get(index).attr(attr); } if (null != pattern) { Matcher m = pattern.matcher(temp); if (m.find()) { temp = m.group(1); } } return temp; }
From source file:org.silverpeas.mobile.server.servlets.PublicationContentServlet.java
private void displayWysiwyg(String html, HttpServletRequest request, HttpServletResponse response, String instanceId) throws IOException { html = "<html><body>" + html + "</body></html>"; Document doc = Jsoup.parse(html); Elements body = doc.getElementsByTag("body"); if (!body.isEmpty()) { html = body.first().html(); }/*from w w w . j av a2 s .c om*/ Elements images = doc.getElementsByTag("img"); for (Element img : images) { String source = img.attr("src"); String newSource = source; if (source.contains("/silverpeas")) { // need to convert in dataurl newSource = convertSpImageUrlToDataUrl(source); } img.attr("src", newSource); } Elements embeds = doc.getElementsByTag("embed"); for (Element embed : embeds) { String htmlPart = embed.outerHtml(); if (htmlPart.contains("flash")) { String attachmentId = htmlPart .substring(htmlPart.indexOf("attachmentId/") + "attachmentId/".length()); attachmentId = attachmentId.substring(0, attachmentId.indexOf("/")); SimpleDocument attachment = AttachmentServiceProvider.getAttachmentService().searchDocumentById( new SimpleDocumentPK(attachmentId), getUserInSession(request).getUserPreferences().getLanguage()); String type = attachment.getContentType(); String url = getServletContext().getContextPath() + "/services/spmobile/Attachment"; url = url + "?id=" + attachmentId + "&instanceId=" + instanceId + "&lang=" + getUserInSession(request).getUserPreferences().getLanguage() + "&userId=" + getUserInSession(request).getId(); if (type.equals("audio/mpeg") || type.equals("audio/ogg") || type.equals("audio/wav")) { embed.parent().append("<audio controls><source src='" + url + "' type='" + type + "'></audio>"); embed.remove(); } else if (type.equals("video/mp4") || type.equals("video/ogg") || type.equals("video/webm")) { embed.parent() .append("<video controls='controls'><source src='" + url + "' type='" + type + "' />"); embed.remove(); } } } html = doc.outerHtml(); OutputStreamWriter out = new OutputStreamWriter(response.getOutputStream(), "UTF-8"); writeContainer(out, html); out.flush(); }
From source file:org.tinymediamanager.scraper.aebn.AebnMetadataProvider.java
/** * Get movie meta data from aebn.net./*www . j a v a 2s . c o m*/ * */ @Override public MediaMetadata getMetadata(MediaScrapeOptions options) throws Exception { LOGGER.debug("AEBN: getMetadata() {}", options); // check if there is already meta data present in the result if ((options.getResult() != null) && (options.getResult().getMediaMetadata() != null)) { LOGGER.debug("AEBN: return metadata from cache"); return options.getResult().getMediaMetadata(); } MediaMetadata md = new MediaMetadata(providerInfo.getId()); Elements elements = null; Element element = null; Integer aebnId = 0; // get AebnId from previous search result if ((options.getResult() != null) && (options.getResult().getId() != null)) { aebnId = Integer.parseInt(options.getResult().getId()); LOGGER.debug("AEBN: aebnId() from previous search result = {}", aebnId); // preset some values from search result (if there is one) // Use core.Utils.RemoveSortableName() if you want e.g. "Bourne Legacy, The" -> "The Bourne Legacy". md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, StrgUtils.removeCommonSortableName(options.getResult().getOriginalTitle())); md.storeMetadata(MediaMetadata.TITLE, StrgUtils.removeCommonSortableName(options.getResult().getTitle())); } // or get AebnId from options if (!isValidAebnId(aebnId) && (options.getId(AEBNID) != null)) { LOGGER.debug("AEBN: aebnId() from options = {}", options.getId(AEBNID)); aebnId = Integer.parseInt(options.getId(AEBNID)); } if (!isValidAebnId(aebnId)) { LOGGER.warn("AEBN: no or incorrect aebnId, aborting"); return md; } // ID md.setId(providerInfo.getId(), aebnId); LOGGER.debug("AEBN: aebnId({})", aebnId); // Base download url for data scraping String downloadUrl = BASE_DATAURL + "/dispatcher/movieDetail?movieId=" + aebnId; String locale = options.getLanguage().name(); if (!StringUtils.isBlank(locale)) { downloadUrl = downloadUrl + "&locale=" + locale; LOGGER.debug("AEBN: used locale({})", locale); } // begin download and scrape try { LOGGER.debug("AEBN: download movie detail page"); Url url = new Url(downloadUrl); InputStream in = url.getInputStream(); Document document = Jsoup.parse(in, "UTF-8", ""); in.close(); // Title // <h1 itemprop="name" class="md-movieTitle" >Titelname</h1> LOGGER.debug("AEBN: parse title"); elements = document.getElementsByAttributeValue("class", "md-movieTitle"); if (elements.size() > 0) { LOGGER.debug("AEBN: {} elements found (should be one!)", elements.size()); element = elements.first(); String movieTitle = cleanString(element.text()); LOGGER.debug("AEBN: title({})", movieTitle); md.storeMetadata(MediaMetadata.TITLE, movieTitle); } // Poster // front cover: // http://pic.aebn.net/Stream/Movie/Boxcovers/a66568_xlf.jpg String posterUrl = BASE_IMGURL + "/Stream/Movie/Boxcovers/a" + aebnId.toString() + "_xlf.jpg"; md.storeMetadata(MediaMetadata.POSTER_URL, posterUrl); // Fanart/Background // http://pic.aebn.net/Stream/Movie/Scenes/a113324_s534541.jpg // <img class="sceneThumbnail" alt="Scene Thumbnail" title="Scene Thumbnail" onError="..." // src="http://pic.aebn.net/Stream/Movie/Scenes/a113324_s534544.jpg" onclick="..." /> LOGGER.debug("AEBN: parse fanart / scene thumbs"); elements = document.getElementsByAttributeValue("class", "SceneThumbnail"); LOGGER.debug("AEBN: {} elements found", elements.size()); int i = 1; for (Element anchor : elements) { String backgroundUrl = anchor.attr("src"); LOGGER.debug("AEBN: backgroundUrl{}({})", i, backgroundUrl); md.storeMetadata("backgroundUrl" + Integer.valueOf(i).toString(), backgroundUrl); i++; } // Runtime LOGGER.debug("AEBN: parse runtime"); elements = document.getElementsByAttributeValue("id", "md-details").select("[itemprop=duration]"); if (elements.size() > 0) { LOGGER.debug("AEBN: " + elements.size() + " elements found (should be one!)"); element = elements.first(); String movieRuntime = cleanString(element.attr("content")); movieRuntime = StrgUtils.substr(movieRuntime, "PT(\\d+)M"); LOGGER.debug("AEBN: runtime({})", movieRuntime); md.storeMetadata(MediaMetadata.RUNTIME, movieRuntime); } // Year LOGGER.debug("AEBN: parse year"); elements = document.getElementsByAttributeValue("id", "md-details").select("[itemprop=datePublished]"); if (elements.size() > 0) { LOGGER.debug("AEBN: " + elements.size() + " elements found (should be one!)"); element = elements.first(); String movieYear = cleanString(element.attr("content")); movieYear = StrgUtils.substr(movieYear, "(\\d+)-"); LOGGER.debug("AEBN: year({})", movieYear); md.storeMetadata(MediaMetadata.YEAR, movieYear); } // Series (Collection) LOGGER.debug("AEBN: parse collection"); elements = document.getElementsByAttributeValue("id", "md-details").select("[class=series]"); if (elements.size() > 0) { LOGGER.debug("AEBN: {} elements found (should be one!)", elements.size()); element = elements.first(); String movieCollection = cleanString(element.text()); // Fake a TMDB_SET based on the hash value of the collection name int movieCollectionHash = movieCollection.hashCode(); md.storeMetadata(MediaMetadata.COLLECTION_NAME, movieCollection); md.storeMetadata(MediaMetadata.TMDB_SET, movieCollectionHash); LOGGER.debug("AEBN: collection({}), hashcode({})", movieCollection, movieCollectionHash); } // Studio LOGGER.debug("AEBN: parse studio"); elements = document.getElementsByAttributeValue("id", "md-details") .select("[itemprop=productionCompany]"); if (elements.size() > 0) { LOGGER.debug("AEBN: {} elements found (should be one!)", elements.size()); String movieStudio = cleanString(elements.first().text()); LOGGER.debug("AEBN: studio({})", movieStudio); md.storeMetadata(MediaMetadata.PRODUCTION_COMPANY, movieStudio); } // Genre LOGGER.debug("AEBN: parse genre"); elements = document.getElementsByAttributeValue("id", "md-details").select("[itemprop=genre]"); for (Element g : elements) { md.addGenre(getTmmGenre(g.text())); } // add basic genre, since all genres at AEBN could be summarised // into this one md.addGenre(MediaGenres.EROTIC); // Certification // no data scrapeable---but obviously it's adult only, so simply // generate it String movieCertification = null; Certification certification = null; String country = options.getCountry().getAlpha2(); LOGGER.debug("AEBN: generate certification for {}", country); // @formatter:off if (country.equals("DE")) { movieCertification = "FSK 18"; } if (country.equals("US")) { movieCertification = "NC-17"; } if (country.equals("GB")) { movieCertification = "R18"; } if (country.equals("FR")) { movieCertification = "18"; } if (country.equals("ES")) { movieCertification = "PX"; } if (country.equals("JP")) { movieCertification = "R18+"; } if (country.equals("IT")) { movieCertification = "V.M.18"; } if (country.equals("NL")) { movieCertification = "16"; } // @formatter:on certification = Certification.getCertification(options.getCountry(), movieCertification); if (certification != null) { LOGGER.debug("AEBN: certification({})", certification); md.addCertification(certification); } // Plot and Tagline LOGGER.debug("AEBN: parse plot"); elements = document.getElementsByAttributeValue("id", "md-details").select("[itemprop=about]"); if (elements.size() > 0) { LOGGER.debug("AEBN: {} elements found (should be one!)", elements.size()); String moviePlot = cleanString(elements.first().text()); md.storeMetadata(MediaMetadata.PLOT, moviePlot); // no separate tagline available, so extract the first sentence // from the movie plot String movieTagline = StrgUtils.substr(moviePlot, "^(.*?[.!?:])"); LOGGER.debug("AEBN: tagline(" + movieTagline + ")"); md.storeMetadata(MediaMetadata.TAGLINE, movieTagline); } // Actors LOGGER.debug("AEBN: parse actors"); elements = document.getElementsByAttributeValue("id", "md-details").select("[itemprop=actor]"); LOGGER.debug("AEBN: {} actors found", elements.size()); for (Element anchor : elements) { String actorid = StrgUtils.substr(anchor.toString(), "starId=(\\d+)"); String actorname = cleanString(anchor.select("[itemprop=name]").first().text()); String actordetailsurl = BASE_DATAURL + anchor.attr("href"); if (!actorname.isEmpty()) { LOGGER.debug("AEBN: add actor id({}), name({}), details({})", actorid, actorname, actordetailsurl); MediaCastMember cm = new MediaCastMember(); cm.setType(MediaCastMember.CastType.ACTOR); cm.setName(actorname); if (!actorid.isEmpty()) { cm.setId(actorid); } // Actor detail page try { Url starurl = new Url(actordetailsurl); InputStream starurlstream = starurl.getInputStream(); Document stardocument = Jsoup.parse(starurlstream, "UTF-8", ""); starurlstream.close(); Elements elements2 = stardocument.getElementsByAttributeValue("class", "StarInfo"); if (elements2.size() == 0) { LOGGER.debug("AEBN: no additional actor details found"); } else { // Actor image String actorimage = elements2.select("[itemprop=image]").first().attr("src"); LOGGER.debug("AEBN: actor image({})", actorimage); if (!actorimage.isEmpty()) { cm.setImageUrl(actorimage); } // Actor 'fanart' images // unsure if this is ever shown in tmm elements2 = stardocument.getElementsByAttributeValue("class", "StarDetailGallery") .select("a"); LOGGER.debug("AEBN: {} gallery images found", elements2.size()); for (Element thumbnail : elements2) { LOGGER.debug("AEBN: add fanart image({})", thumbnail.attr("href")); cm.addFanart(thumbnail.attr("href")); } } } catch (Exception e) { LOGGER.error("AEBN: Error downloading {}: {}", actordetailsurl, e); } md.addCastMember(cm); } } // Director LOGGER.debug("AEBN: parse director"); elements = document.getElementsByAttributeValue("id", "md-details").select("[itemprop=director]"); if (elements.size() > 0) { LOGGER.debug("AEBN: {} elements found (should be one!)", elements.size()); String directorid = StrgUtils.substr(elements.toString(), "directorID=(\\d+)"); String directorname = cleanString(elements.select("[itemprop=name]").first().text()); if (!directorname.isEmpty()) { MediaCastMember cm = new MediaCastMember(CastType.DIRECTOR); cm.setName(directorname); if (!directorid.isEmpty()) { cm.setId(directorid); } cm.setImageUrl(""); md.addCastMember(cm); LOGGER.debug("AEBN: add director id({}), name({})", directorid, directorname); } } // Original Title // if we have no original title, just copy the title if (StringUtils.isBlank(md.getStringValue(MediaMetadata.ORIGINAL_TITLE))) { md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, md.getStringValue(MediaMetadata.TITLE)); } } catch (Exception e) { LOGGER.error("AEBN: Error parsing {}: {}", options.getResult().getUrl(), e); } return md; }
From source file:org.tinymediamanager.scraper.imdb.ImdbMetadataProvider.java
@Override public MediaMetadata getMetadata(MediaScrapeOptions options) throws Exception { LOGGER.debug("getMetadata() " + options.toString()); // check if there is a md in the result if (options.getResult() != null && options.getResult().getMetadata() != null) { LOGGER.debug("IMDB: getMetadata from cache: " + options.getResult()); return options.getResult().getMetadata(); }//from w ww . j a va 2s.c om MediaMetadata md = new MediaMetadata(providerInfo.getId()); String imdbId = ""; // imdbId from searchResult if (options.getResult() != null) { imdbId = options.getResult().getIMDBId(); } // imdbid from scraper option if (!MetadataUtil.isValidImdbId(imdbId)) { imdbId = options.getImdbId(); } if (!MetadataUtil.isValidImdbId(imdbId)) { return md; } LOGGER.debug("IMDB: getMetadata(imdbId): " + imdbId); md.setId(MediaMetadata.IMDBID, imdbId); ExecutorCompletionService<Document> compSvcImdb = new ExecutorCompletionService<Document>(executor); ExecutorCompletionService<MediaMetadata> compSvcTmdb = new ExecutorCompletionService<MediaMetadata>( executor); // worker for imdb request (/combined) (everytime from akas.imdb.com) // StringBuilder sb = new StringBuilder(imdbSite.getSite()); StringBuilder sb = new StringBuilder(ImdbSiteDefinition.IMDB_COM.getSite()); sb.append("title/"); sb.append(imdbId); sb.append("/combined"); Callable<Document> worker = new ImdbWorker(sb.toString(), options.getLanguage().name(), options.getCountry().getAlpha2()); Future<Document> futureCombined = compSvcImdb.submit(worker); // worker for imdb request (/plotsummary) (from chosen site) Future<Document> futurePlotsummary = null; sb = new StringBuilder(imdbSite.getSite()); sb.append("title/"); sb.append(imdbId); sb.append("/plotsummary"); worker = new ImdbWorker(sb.toString(), options.getLanguage().name(), options.getCountry().getAlpha2()); futurePlotsummary = compSvcImdb.submit(worker); // worker for tmdb request Future<MediaMetadata> futureTmdb = null; if (options.isScrapeImdbForeignLanguage() || options.isScrapeCollectionInfo()) { Callable<MediaMetadata> worker2 = new TmdbWorker(imdbId, options.getLanguage(), options.getCountry()); futureTmdb = compSvcTmdb.submit(worker2); } Document doc; doc = futureCombined.get(); /* * title and year have the following structure * * <div id="tn15title"><h1>Merida - Legende der Highlands <span>(<a href="/year/2012/">2012</a>) <span class="pro-link">...</span> <span * class="title-extra">Brave <i>(original title)</i></span> </span></h1> </div> */ // parse title and year Element title = doc.getElementById("tn15title"); if (title != null) { Element element = null; // title Elements elements = title.getElementsByTag("h1"); if (elements.size() > 0) { element = elements.first(); String movieTitle = cleanString(element.ownText()); md.storeMetadata(MediaMetadata.TITLE, movieTitle); } // year elements = title.getElementsByTag("span"); if (elements.size() > 0) { element = elements.first(); String content = element.text(); // search year Pattern yearPattern = Pattern.compile("\\(([0-9]{4})|/\\)"); Matcher matcher = yearPattern.matcher(content); while (matcher.find()) { if (matcher.group(1) != null) { String movieYear = matcher.group(1); md.storeMetadata(MediaMetadata.YEAR, movieYear); break; } } } // original title elements = title.getElementsByAttributeValue("class", "title-extra"); if (elements.size() > 0) { element = elements.first(); String content = element.text(); content = content.replaceAll("\\(original title\\)", "").trim(); md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, content); } } // poster Element poster = doc.getElementById("primary-poster"); if (poster != null) { String posterUrl = poster.attr("src"); posterUrl = posterUrl.replaceAll("SX[0-9]{2,4}_", "SX400_"); posterUrl = posterUrl.replaceAll("SY[0-9]{2,4}_", "SY400_"); processMediaArt(md, MediaArtworkType.POSTER, "Poster", posterUrl); } /* * <div class="starbar-meta"> <b>7.4/10</b> <a href="ratings" class="tn15more">52,871 votes</a> » </div> */ // rating and rating count Element ratingElement = doc.getElementById("tn15rating"); if (ratingElement != null) { Elements elements = ratingElement.getElementsByClass("starbar-meta"); if (elements.size() > 0) { Element div = elements.get(0); // rating comes in <b> tag Elements b = div.getElementsByTag("b"); if (b.size() == 1) { String ratingAsString = b.text(); Pattern ratingPattern = Pattern.compile("([0-9]\\.[0-9])/10"); Matcher matcher = ratingPattern.matcher(ratingAsString); while (matcher.find()) { if (matcher.group(1) != null) { float rating = 0; try { rating = Float.valueOf(matcher.group(1)); } catch (Exception e) { } md.storeMetadata(MediaMetadata.RATING, rating); break; } } } // count Elements a = div.getElementsByAttributeValue("href", "ratings"); if (a.size() == 1) { String countAsString = a.text().replaceAll("[.,]|votes", "").trim(); int voteCount = 0; try { voteCount = Integer.parseInt(countAsString); } catch (Exception e) { } md.storeMetadata(MediaMetadata.VOTE_COUNT, voteCount); } } // top250 elements = ratingElement.getElementsByClass("starbar-special"); if (elements.size() > 0) { Elements a = elements.get(0).getElementsByTag("a"); if (a.size() > 0) { Element anchor = a.get(0); Pattern topPattern = Pattern.compile("Top 250: #([0-9]{1,3})"); Matcher matcher = topPattern.matcher(anchor.ownText()); while (matcher.find()) { if (matcher.group(1) != null) { int top250 = 0; try { top250 = Integer.parseInt(matcher.group(1)); } catch (Exception e) { } md.storeMetadata(MediaMetadata.TOP_250, top250); } } } } } // parse all items coming by <div class="info"> Elements elements = doc.getElementsByClass("info"); for (Element element : elements) { // only parse divs if (!"div".equals(element.tag().getName())) { continue; } // elements with h5 are the titles of the values Elements h5 = element.getElementsByTag("h5"); if (h5.size() > 0) { Element firstH5 = h5.first(); String h5Title = firstH5.text(); // release date /* * <div class="info"><h5>Release Date:</h5><div class="info-content">5 January 1996 (USA)<a class="tn15more inline" * href="/title/tt0114746/releaseinfo" * onclick="(new Image()).src='/rg/title-tease/releasedates/images/b.gif?link=/title/tt0114746/releaseinfo';"> See more</a> </div></div> */ if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getReleaseDate() + ".*")) { Elements div = element.getElementsByClass("info-content"); if (div.size() > 0) { Element releaseDateElement = div.first(); String releaseDate = cleanString(releaseDateElement.ownText().replaceAll("", "")); Pattern pattern = Pattern.compile("(.*)\\(.*\\)"); Matcher matcher = pattern.matcher(releaseDate); if (matcher.find()) { try { SimpleDateFormat sdf = new SimpleDateFormat("d MMM yyyy"); Date parsedDate = sdf.parse(matcher.group(1)); sdf = new SimpleDateFormat("dd-MM-yyyy"); md.storeMetadata(MediaMetadata.RELEASE_DATE, sdf.format(parsedDate)); } catch (Exception e) { } } } } /* * <div class="info"><h5>Tagline:</h5><div class="info-content"> (7) To Defend Us... <a class="tn15more inline" * href="/title/tt0472033/taglines" onClick= "(new Image()).src='/rg/title-tease/taglines/images/b.gif?link=/title/tt0472033/taglines';" >See * more</a> » </div></div> */ // tagline if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getTagline() + ".*") && !options.isScrapeImdbForeignLanguage()) { Elements div = element.getElementsByClass("info-content"); if (div.size() > 0) { Element taglineElement = div.first(); String tagline = cleanString(taglineElement.ownText().replaceAll("", "")); md.storeMetadata(MediaMetadata.TAGLINE, tagline); } } /* * <div class="info-content"><a href="/Sections/Genres/Animation/">Animation</a> | <a href="/Sections/Genres/Action/">Action</a> | <a * href="/Sections/Genres/Adventure/">Adventure</a> | <a href="/Sections/Genres/Fantasy/">Fantasy</a> | <a * href="/Sections/Genres/Mystery/">Mystery</a> | <a href="/Sections/Genres/Sci-Fi/">Sci-Fi</a> | <a * href="/Sections/Genres/Thriller/">Thriller</a> <a class="tn15more inline" href="/title/tt0472033/keywords" onClick= * "(new Image()).src='/rg/title-tease/keywords/images/b.gif?link=/title/tt0472033/keywords';" > See more</a> » </div> */ // genres are only scraped from akas.imdb.com if (h5Title.matches("(?i)" + imdbSite.getGenre() + "(.*)")) { Elements div = element.getElementsByClass("info-content"); if (div.size() > 0) { Elements a = div.first().getElementsByTag("a"); for (Element anchor : a) { if (anchor.attr("href").matches("/Sections/Genres/.*")) { md.addGenre(getTmmGenre(anchor.ownText())); } } } } // } /* * <div class="info"><h5>Runtime:</h5><div class="info-content">162 min | 171 min (special edition) | 178 min (extended cut)</div></div> */ // runtime // if (h5Title.matches("(?i)" + imdbSite.getRuntime() + ".*")) { if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getRuntime() + ".*")) { Elements div = element.getElementsByClass("info-content"); if (div.size() > 0) { Element taglineElement = div.first(); String first = taglineElement.ownText().split("\\|")[0]; String runtimeAsString = cleanString(first.replaceAll("min", "")); int runtime = 0; try { runtime = Integer.parseInt(runtimeAsString); } catch (Exception e) { // try to filter out the first number we find Pattern runtimePattern = Pattern.compile("([0-9]{2,3})"); Matcher matcher = runtimePattern.matcher(runtimeAsString); if (matcher.find()) { runtime = Integer.parseInt(matcher.group(0)); } } md.storeMetadata(MediaMetadata.RUNTIME, runtime); } } /* * <div class="info"><h5>Country:</h5><div class="info-content"><a href="/country/fr">France</a> | <a href="/country/es">Spain</a> | <a * href="/country/it">Italy</a> | <a href="/country/hu">Hungary</a></div></div> */ // country if (h5Title.matches("(?i)Country.*")) { Elements a = element.getElementsByTag("a"); String countries = ""; for (Element anchor : a) { Pattern pattern = Pattern.compile("/country/(.*)"); Matcher matcher = pattern.matcher(anchor.attr("href")); if (matcher.matches()) { String country = matcher.group(1); if (StringUtils.isNotEmpty(countries)) { countries += ", "; } countries += country.toUpperCase(); } } md.storeMetadata(MediaMetadata.COUNTRY, countries); } /* * <div class="info"><h5>Language:</h5><div class="info-content"><a href="/language/en">English</a> | <a href="/language/de">German</a> | <a * href="/language/fr">French</a> | <a href="/language/it">Italian</a></div> */ // Spoken languages if (h5Title.matches("(?i)Language.*")) { Elements a = element.getElementsByTag("a"); String spokenLanguages = ""; for (Element anchor : a) { Pattern pattern = Pattern.compile("/language/(.*)"); Matcher matcher = pattern.matcher(anchor.attr("href")); if (matcher.matches()) { String langu = matcher.group(1); if (StringUtils.isNotEmpty(spokenLanguages)) { spokenLanguages += ", "; } spokenLanguages += langu; } } md.storeMetadata(MediaMetadata.SPOKEN_LANGUAGES, spokenLanguages); } /* * <div class="info"><h5>Certification:</h5><div class="info-content"><a href="/search/title?certificates=us:pg">USA:PG</a> <i>(certificate * #47489)</i> | <a href="/search/title?certificates=ca:pg">Canada:PG</a> <i>(Ontario)</i> | <a * href="/search/title?certificates=au:pg">Australia:PG</a> | <a href="/search/title?certificates=in:u">India:U</a> | <a * href="/search/title?certificates=ie:pg">Ireland:PG</a> ...</div></div> */ // certification // if (h5Title.matches("(?i)" + imdbSite.getCertification() + ".*")) { if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getCertification() + ".*")) { Elements a = element.getElementsByTag("a"); for (Element anchor : a) { // certification for the right country if (anchor.attr("href").matches( "(?i)/search/title\\?certificates=" + options.getCountry().getAlpha2() + ".*")) { Pattern certificationPattern = Pattern.compile(".*:(.*)"); Matcher matcher = certificationPattern.matcher(anchor.ownText()); Certification certification = null; while (matcher.find()) { if (matcher.group(1) != null) { certification = Certification.getCertification(options.getCountry(), matcher.group(1)); } } if (certification != null) { md.addCertification(certification); break; } } } } } /* * <div id="director-info" class="info"> <h5>Director:</h5> <div class="info-content"><a href="/name/nm0000416/" onclick= * "(new Image()).src='/rg/directorlist/position-1/images/b.gif?link=name/nm0000416/';" >Terry Gilliam</a><br/> </div> </div> */ // director if ("director-info".equals(element.id())) { Elements a = element.getElementsByTag("a"); for (Element anchor : a) { if (anchor.attr("href").matches("/name/nm.*")) { MediaCastMember cm = new MediaCastMember(CastType.DIRECTOR); cm.setName(anchor.ownText()); md.addCastMember(cm); } } } } /* * <table class="cast"> <tr class="odd"><td class="hs"><a href="http://pro.imdb.com/widget/resume_redirect/" onClick= * "(new Image()).src='/rg/resume/prosystem/images/b.gif?link=http://pro.imdb.com/widget/resume_redirect/';" ><img src= * "http://i.media-imdb.com/images/SF9113d6f5b7cb1533c35313ccd181a6b1/tn15/no_photo.png" width="25" height="31" border="0"></td><td class="nm"><a * href="/name/nm0577828/" onclick= "(new Image()).src='/rg/castlist/position-1/images/b.gif?link=/name/nm0577828/';" >Joseph Melito</a></td><td * class="ddd"> ... </td><td class="char"><a href="/character/ch0003139/">Young Cole</a></td></tr> <tr class="even"><td class="hs"><a * href="/name/nm0000246/" onClick= "(new Image()).src='/rg/title-tease/tinyhead/images/b.gif?link=/name/nm0000246/';" ><img src= * "http://ia.media-imdb.com/images/M/MV5BMjA0MjMzMTE5OF5BMl5BanBnXkFtZTcwMzQ2ODE3Mw@@._V1._SY30_SX23_.jpg" width="23" height="32" * border="0"></a><br></td><td class="nm"><a href="/name/nm0000246/" onclick= * "(new Image()).src='/rg/castlist/position-2/images/b.gif?link=/name/nm0000246/';" >Bruce Willis</a></td><td class="ddd"> ... </td><td * class="char"><a href="/character/ch0003139/">James Cole</a></td></tr> <tr class="odd"><td class="hs"><a href="/name/nm0781218/" onClick= * "(new Image()).src='/rg/title-tease/tinyhead/images/b.gif?link=/name/nm0781218/';" ><img src= * "http://ia.media-imdb.com/images/M/MV5BODI1MTA2MjkxM15BMl5BanBnXkFtZTcwMTcwMDg2Nw@@._V1._SY30_SX23_.jpg" width="23" height="32" * border="0"></a><br></td><td class="nm"><a href="/name/nm0781218/" onclick= * "(new Image()).src='/rg/castlist/position-3/images/b.gif?link=/name/nm0781218/';" >Jon Seda</a></td><td class="ddd"> ... </td><td * class="char"><a href="/character/ch0003143/">Jose</a></td></tr>...</table> */ // cast elements = doc.getElementsByClass("cast"); if (elements.size() > 0) { Elements tr = elements.get(0).getElementsByTag("tr"); for (Element row : tr) { Elements td = row.getElementsByTag("td"); MediaCastMember cm = new MediaCastMember(); for (Element column : td) { // actor thumb if (column.hasClass("hs")) { Elements img = column.getElementsByTag("img"); if (img.size() > 0) { String thumbUrl = img.get(0).attr("src"); if (thumbUrl.contains("no_photo.png")) { cm.setImageUrl(""); } else { thumbUrl = thumbUrl.replaceAll("SX[0-9]{2,4}_", "SX400_"); thumbUrl = thumbUrl.replaceAll("SY[0-9]{2,4}_", ""); cm.setImageUrl(thumbUrl); } } } // actor name if (column.hasClass("nm")) { cm.setName(cleanString(column.text())); } // character if (column.hasClass("char")) { cm.setCharacter(cleanString(column.text())); } } if (StringUtils.isNotEmpty(cm.getName()) && StringUtils.isNotEmpty(cm.getCharacter())) { cm.setType(CastType.ACTOR); md.addCastMember(cm); } } } Element content = doc.getElementById("tn15content"); if (content != null) { elements = content.getElementsByTag("table"); for (Element table : elements) { // writers if (table.text().contains(ImdbSiteDefinition.IMDB_COM.getWriter())) { Elements anchors = table.getElementsByTag("a"); for (Element anchor : anchors) { if (anchor.attr("href").matches("/name/nm.*")) { MediaCastMember cm = new MediaCastMember(CastType.WRITER); cm.setName(anchor.ownText()); md.addCastMember(cm); } } } // producers if (table.text().contains(ImdbSiteDefinition.IMDB_COM.getProducers())) { Elements rows = table.getElementsByTag("tr"); for (Element row : rows) { if (row.text().contains(ImdbSiteDefinition.IMDB_COM.getProducers())) { continue; } Elements columns = row.children(); if (columns.size() == 0) { continue; } MediaCastMember cm = new MediaCastMember(CastType.PRODUCER); String name = cleanString(columns.get(0).text()); if (StringUtils.isBlank(name)) { continue; } cm.setName(name); if (columns.size() >= 3) { cm.setPart(cleanString(columns.get(2).text())); } md.addCastMember(cm); } } } } // Production companies elements = doc.getElementsByClass("blackcatheader"); for (Element blackcatheader : elements) { if (blackcatheader.ownText().equals(ImdbSiteDefinition.IMDB_COM.getProductionCompanies())) { Elements a = blackcatheader.nextElementSibling().getElementsByTag("a"); StringBuilder productionCompanies = new StringBuilder(); for (Element anchor : a) { if (StringUtils.isNotEmpty(productionCompanies)) { productionCompanies.append(", "); } productionCompanies.append(anchor.ownText()); } md.storeMetadata(MediaMetadata.PRODUCTION_COMPANY, productionCompanies.toString()); break; } } /* * plot from /plotsummary */ // build the url doc = null; doc = futurePlotsummary.get(); // imdb.com has another site structure if (imdbSite == ImdbSiteDefinition.IMDB_COM) { Elements zebraList = doc.getElementsByClass("zebraList"); if (zebraList != null && !zebraList.isEmpty()) { Elements odd = zebraList.get(0).getElementsByClass("odd"); if (odd.isEmpty()) { odd = zebraList.get(0).getElementsByClass("even"); // sometimes imdb has even } if (odd.size() > 0) { Elements p = odd.get(0).getElementsByTag("p"); if (p.size() > 0) { String plot = cleanString(p.get(0).ownText()); md.storeMetadata(MediaMetadata.PLOT, plot); } } } } else { Element wiki = doc.getElementById("swiki.2.1"); if (wiki != null) { String plot = cleanString(wiki.ownText()); md.storeMetadata(MediaMetadata.PLOT, plot); } } // title also from chosen site if we are not scraping akas.imdb.com if (imdbSite != ImdbSiteDefinition.IMDB_COM) { title = doc.getElementById("tn15title"); if (title != null) { Element element = null; // title elements = title.getElementsByClass("main"); if (elements.size() > 0) { element = elements.first(); String movieTitle = cleanString(element.ownText()); md.storeMetadata(MediaMetadata.TITLE, movieTitle); } } } // } // get data from tmdb? if (options.isScrapeImdbForeignLanguage() || options.isScrapeCollectionInfo()) { MediaMetadata tmdbMd = futureTmdb.get(); if (options.isScrapeImdbForeignLanguage() && tmdbMd != null && StringUtils.isNotBlank(tmdbMd.getStringValue(MediaMetadata.PLOT))) { // tmdbid md.setId(MediaMetadata.TMDBID, tmdbMd.getId(MediaMetadata.TMDBID)); // title md.storeMetadata(MediaMetadata.TITLE, tmdbMd.getStringValue(MediaMetadata.TITLE)); // original title md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, tmdbMd.getStringValue(MediaMetadata.ORIGINAL_TITLE)); // tagline md.storeMetadata(MediaMetadata.TAGLINE, tmdbMd.getStringValue(MediaMetadata.TAGLINE)); // plot md.storeMetadata(MediaMetadata.PLOT, tmdbMd.getStringValue(MediaMetadata.PLOT)); // collection info md.storeMetadata(MediaMetadata.COLLECTION_NAME, tmdbMd.getStringValue(MediaMetadata.COLLECTION_NAME)); md.storeMetadata(MediaMetadata.TMDBID_SET, tmdbMd.getIntegerValue(MediaMetadata.TMDBID_SET)); } if (options.isScrapeCollectionInfo() && tmdbMd != null) { md.storeMetadata(MediaMetadata.TMDBID_SET, tmdbMd.getIntegerValue(MediaMetadata.TMDBID_SET)); md.storeMetadata(MediaMetadata.COLLECTION_NAME, tmdbMd.getStringValue(MediaMetadata.COLLECTION_NAME)); } } // if we have still no original title, take the title if (StringUtils.isBlank(md.getStringValue(MediaMetadata.ORIGINAL_TITLE))) { md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, md.getStringValue(MediaMetadata.TITLE)); } return md; }
From source file:org.tinymediamanager.scraper.imdb.ImdbMovieParser.java
MediaMetadata getMovieMetadata(MediaScrapeOptions options) throws Exception { MediaMetadata md = new MediaMetadata(providerInfo.getId()); // check if there is a md in the result if (options.getResult() != null && options.getResult().getMediaMetadata() != null) { LOGGER.debug("IMDB: getMetadata from cache: " + options.getResult()); return options.getResult().getMediaMetadata(); }// w ww. j a v a 2s .c o m String imdbId = ""; // imdbId from searchResult if (options.getResult() != null) { imdbId = options.getResult().getIMDBId(); } // imdbid from scraper option if (!MetadataUtil.isValidImdbId(imdbId)) { imdbId = options.getImdbId(); } if (!MetadataUtil.isValidImdbId(imdbId)) { return md; } LOGGER.debug("IMDB: getMetadata(imdbId): " + imdbId); md.setId(providerInfo.getId(), imdbId); ExecutorCompletionService<Document> compSvcImdb = new ExecutorCompletionService<>(executor); ExecutorCompletionService<MediaMetadata> compSvcTmdb = new ExecutorCompletionService<>(executor); // worker for imdb request (/reference) (everytime from www.imdb.com) // StringBuilder sb = new StringBuilder(imdbSite.getSite()); StringBuilder sb = new StringBuilder(ImdbSiteDefinition.IMDB_COM.getSite()); sb.append("title/"); sb.append(imdbId); sb.append("/reference"); Callable<Document> worker = new ImdbWorker(sb.toString(), options.getLanguage().getLanguage(), options.getCountry().getAlpha2(), imdbSite); Future<Document> futureReference = compSvcImdb.submit(worker); // worker for imdb request (/plotsummary) (from chosen site) Future<Document> futurePlotsummary; sb = new StringBuilder(imdbSite.getSite()); sb.append("title/"); sb.append(imdbId); sb.append("/plotsummary"); worker = new ImdbWorker(sb.toString(), options.getLanguage().getLanguage(), options.getCountry().getAlpha2(), imdbSite); futurePlotsummary = compSvcImdb.submit(worker); // worker for tmdb request Future<MediaMetadata> futureTmdb = null; if (ImdbMetadataProvider.providerInfo.getConfig().getValueAsBool("useTmdb") || ImdbMetadataProvider.providerInfo.getConfig().getValueAsBool("scrapeCollectionInfo")) { Callable<MediaMetadata> worker2 = new TmdbWorker(imdbId, options.getLanguage(), options.getCountry()); futureTmdb = compSvcTmdb.submit(worker2); } Document doc; doc = futureReference.get(); parseReferencePage(doc, options, md); /* * plot from /plotsummary */ // build the url doc = futurePlotsummary.get(); parsePlotsummaryPage(doc, options, md); // title also from chosen site if we are not scraping akas.imdb.com if (imdbSite != ImdbSiteDefinition.IMDB_COM) { Element title = doc.getElementById("tn15title"); if (title != null) { Element element; // title Elements elements = title.getElementsByClass("main"); if (elements.size() > 0) { element = elements.first(); String movieTitle = cleanString(element.ownText()); md.setTitle(movieTitle); } } } // get the release info page Future<Document> futureReleaseinfo; sb = new StringBuilder(imdbSite.getSite()); sb.append("title/"); sb.append(imdbId); sb.append("/releaseinfo"); worker = new ImdbWorker(sb.toString(), options.getLanguage().getLanguage(), options.getCountry().getAlpha2(), imdbSite); futureReleaseinfo = compSvcImdb.submit(worker); doc = futureReleaseinfo.get(); // parse original title here!! parseReleaseinfoPageAKAs(doc, options, md); // did we get a release date? if (md.getReleaseDate() == null || ImdbMetadataProvider.providerInfo.getConfig().getValueAsBool("localReleaseDate")) { parseReleaseinfoPage(doc, options, md); } // get data from tmdb? if (futureTmdb != null && (ImdbMetadataProvider.providerInfo.getConfig().getValueAsBool("useTmdb") || ImdbMetadataProvider.providerInfo.getConfig().getValueAsBool("scrapeCollectionInfo"))) { try { MediaMetadata tmdbMd = futureTmdb.get(); if (ImdbMetadataProvider.providerInfo.getConfig().getValueAsBool("useTmdb") && tmdbMd != null) { // tmdbid md.setId(MediaMetadata.TMDB, tmdbMd.getId(MediaMetadata.TMDB)); // title if (StringUtils.isNotBlank(tmdbMd.getTitle())) { md.setTitle(tmdbMd.getTitle()); } // original title if (StringUtils.isNotBlank(tmdbMd.getOriginalTitle())) { md.setOriginalTitle(tmdbMd.getOriginalTitle()); } // tagline if (StringUtils.isNotBlank(tmdbMd.getTagline())) { md.setTagline(tmdbMd.getTagline()); } // plot if (StringUtils.isNotBlank(tmdbMd.getPlot())) { md.setPlot(tmdbMd.getPlot()); } // collection info if (StringUtils.isNotBlank(tmdbMd.getCollectionName())) { md.setCollectionName(tmdbMd.getCollectionName()); md.setId(MediaMetadata.TMDB_SET, tmdbMd.getId(MediaMetadata.TMDB_SET)); } } if (ImdbMetadataProvider.providerInfo.getConfig().getValueAsBool("scrapeCollectionInfo") && tmdbMd != null) { md.setId(MediaMetadata.TMDB_SET, tmdbMd.getId(MediaMetadata.TMDB_SET)); md.setCollectionName(tmdbMd.getCollectionName()); } md.setId(tmdbMd.getProviderId(), tmdbMd.getId(tmdbMd.getProviderId())); } catch (Exception ignored) { } } // if we have still no original title, take the title if (StringUtils.isBlank(md.getOriginalTitle())) { md.setOriginalTitle(md.getTitle()); } // populate id md.setId(ImdbMetadataProvider.providerInfo.getId(), imdbId); return md; }