List of usage examples for org.jsoup.nodes Element getElementsByAttributeValueStarting
public Elements getElementsByAttributeValueStarting(String key, String valuePrefix)
From source file:biz.shadowservices.DegreesToolbox.DataFetcher.java
public FetchResult updateData(Context context, boolean force) { //Open database DBOpenHelper dbhelper = new DBOpenHelper(context); SQLiteDatabase db = dbhelper.getWritableDatabase(); // check for internet connectivity try {//from www .j ava 2 s.c om if (!isOnline(context)) { Log.d(TAG, "We do not seem to be online. Skipping Update."); return FetchResult.NOTONLINE; } } catch (Exception e) { exceptionReporter.reportException(Thread.currentThread(), e, "Exception during isOnline()"); } SharedPreferences sp = PreferenceManager.getDefaultSharedPreferences(context); if (!force) { try { if (sp.getBoolean("loginFailed", false) == true) { Log.d(TAG, "Previous login failed. Skipping Update."); DBLog.insertMessage(context, "i", TAG, "Previous login failed. Skipping Update."); return FetchResult.LOGINFAILED; } if (sp.getBoolean("autoupdates", true) == false) { Log.d(TAG, "Automatic updates not enabled. Skipping Update."); DBLog.insertMessage(context, "i", TAG, "Automatic updates not enabled. Skipping Update."); return FetchResult.NOTALLOWED; } if (!isBackgroundDataEnabled(context) && sp.getBoolean("obeyBackgroundData", true)) { Log.d(TAG, "Background data not enabled. Skipping Update."); DBLog.insertMessage(context, "i", TAG, "Background data not enabled. Skipping Update."); return FetchResult.NOTALLOWED; } if (!isAutoSyncEnabled() && sp.getBoolean("obeyAutoSync", true) && sp.getBoolean("obeyBackgroundData", true)) { Log.d(TAG, "Auto sync not enabled. Skipping Update."); DBLog.insertMessage(context, "i", TAG, "Auto sync not enabled. Skipping Update."); return FetchResult.NOTALLOWED; } if (isWifi(context) && !sp.getBoolean("wifiUpdates", true)) { Log.d(TAG, "On wifi, and wifi auto updates not allowed. Skipping Update"); DBLog.insertMessage(context, "i", TAG, "On wifi, and wifi auto updates not allowed. Skipping Update"); return FetchResult.NOTALLOWED; } else if (!isWifi(context)) { Log.d(TAG, "We are not on wifi."); if (!isRoaming(context) && !sp.getBoolean("2DData", true)) { Log.d(TAG, "Automatic updates on 2Degrees data not enabled. Skipping Update."); DBLog.insertMessage(context, "i", TAG, "Automatic updates on 2Degrees data not enabled. Skipping Update."); return FetchResult.NOTALLOWED; } else if (isRoaming(context) && !sp.getBoolean("roamingData", false)) { Log.d(TAG, "Automatic updates on roaming mobile data not enabled. Skipping Update."); DBLog.insertMessage(context, "i", TAG, "Automatic updates on roaming mobile data not enabled. Skipping Update."); return FetchResult.NOTALLOWED; } } } catch (Exception e) { exceptionReporter.reportException(Thread.currentThread(), e, "Exception while finding if to update."); } } else { Log.d(TAG, "Update Forced"); } try { String username = sp.getString("username", null); String password = sp.getString("password", null); if (username == null || password == null) { DBLog.insertMessage(context, "i", TAG, "Username or password not set."); return FetchResult.USERNAMEPASSWORDNOTSET; } // Find the URL of the page to send login data to. Log.d(TAG, "Finding Action. "); HttpGetter loginPageGet = new HttpGetter("https://secure.2degreesmobile.co.nz/web/ip/login"); String loginPageString = loginPageGet.execute(); if (loginPageString != null) { Document loginPage = Jsoup.parse(loginPageString, "https://secure.2degreesmobile.co.nz/web/ip/login"); Element loginForm = loginPage.getElementsByAttributeValue("name", "loginFrm").first(); String loginAction = loginForm.attr("action"); // Send login form List<NameValuePair> loginValues = new ArrayList<NameValuePair>(); loginValues.add(new BasicNameValuePair("externalURLRedirect", "")); loginValues.add(new BasicNameValuePair("hdnAction", "login_userlogin")); loginValues.add(new BasicNameValuePair("hdnAuthenticationType", "M")); loginValues.add(new BasicNameValuePair("hdnlocale", "")); loginValues.add(new BasicNameValuePair("userid", username)); loginValues.add(new BasicNameValuePair("password", password)); Log.d(TAG, "Sending Login "); HttpPoster sendLoginPoster = new HttpPoster(loginAction, loginValues); // Parse result String loginResponse = sendLoginPoster.execute(); Document loginResponseParsed = Jsoup.parse(loginResponse); // Determine if this is a pre-pay or post-paid account. boolean postPaid; if (loginResponseParsed .getElementById("p_CustomerPortalPostPaidHomePage_WAR_customerportalhomepage") == null) { Log.d(TAG, "Pre-pay account or no account."); postPaid = false; } else { Log.d(TAG, "Post-paid account."); postPaid = true; } String homepageUrl = "https://secure.2degreesmobile.co.nz/group/ip/home"; if (postPaid) { homepageUrl = "https://secure.2degreesmobile.co.nz/group/ip/postpaid"; } HttpGetter homepageGetter = new HttpGetter(homepageUrl); String homepageHTML = homepageGetter.execute(); Document homePage = Jsoup.parse(homepageHTML); Element accountSummary = homePage.getElementById("accountSummary"); if (accountSummary == null) { Log.d(TAG, "Login failed."); return FetchResult.LOGINFAILED; } db.delete("cache", "", null); /* This code fetched some extra details for postpaid users, but on reflection they aren't that useful. * Might reconsider this. * if (postPaid) { Element accountBalanceSummaryTable = accountSummary.getElementsByClass("tableBillSummary").first(); Elements rows = accountBalanceSummaryTable.getElementsByTag("tr"); int rowno = 0; for (Element row : rows) { if (rowno > 1) { break; } //Log.d(TAG, "Starting row"); //Log.d(TAG, row.html()); Double value; try { Element amount = row.getElementsByClass("tableBillamount").first(); String amountHTML = amount.html(); Log.d(TAG, amountHTML.substring(1)); value = Double.parseDouble(amountHTML.substring(1)); } catch (Exception e) { Log.d(TAG, "Failed to parse amount from row."); value = null; } String expiresDetails = ""; String expiresDate = null; String name = null; try { Element details = row.getElementsByClass("tableBilldetail").first(); name = details.ownText(); Element expires = details.getElementsByTag("em").first(); if (expires != null) { expiresDetails = expires.text(); } Log.d(TAG, expiresDetails); Pattern pattern; pattern = Pattern.compile("\\(payment is due (.*)\\)"); Matcher matcher = pattern.matcher(expiresDetails); if (matcher.find()) { /*Log.d(TAG, "matched expires"); Log.d(TAG, "group 0:" + matcher.group(0)); Log.d(TAG, "group 1:" + matcher.group(1)); Log.d(TAG, "group 2:" + matcher.group(2)); * String expiresDateString = matcher.group(1); Date expiresDateObj; if (expiresDateString != null) { if (expiresDateString.length() > 0) { try { expiresDateObj = DateFormatters.EXPIRESDATE.parse(expiresDateString); expiresDate = DateFormatters.ISO8601DATEONLYFORMAT.format(expiresDateObj); } catch (java.text.ParseException e) { Log.d(TAG, "Could not parse date: " + expiresDateString); } } } } } catch (Exception e) { Log.d(TAG, "Failed to parse details from row."); } String expirev = null; ContentValues values = new ContentValues(); values.put("name", name); values.put("value", value); values.put("units", "$NZ"); values.put("expires_value", expirev ); values.put("expires_date", expiresDate); db.insert("cache", "value", values ); rowno++; } } */ Element accountSummaryTable = accountSummary.getElementsByClass("tableAccountSummary").first(); Elements rows = accountSummaryTable.getElementsByTag("tr"); for (Element row : rows) { // We are now looking at each of the rows in the data table. //Log.d(TAG, "Starting row"); //Log.d(TAG, row.html()); Double value; String units; try { Element amount = row.getElementsByClass("tableBillamount").first(); String amountHTML = amount.html(); //Log.d(TAG, amountHTML); String[] amountParts = amountHTML.split(" ", 2); //Log.d(TAG, amountParts[0]); //Log.d(TAG, amountParts[1]); if (amountParts[0].contains("Included") || amountParts[0].equals("All You Need") || amountParts[0].equals("Unlimited Text*")) { value = Values.INCLUDED; } else { try { value = Double.parseDouble(amountParts[0]); } catch (NumberFormatException e) { exceptionReporter.reportException(Thread.currentThread(), e, "Decoding value."); value = 0.0; } } units = amountParts[1]; } catch (NullPointerException e) { //Log.d(TAG, "Failed to parse amount from row."); value = null; units = null; } Element details = row.getElementsByClass("tableBilldetail").first(); String name = details.getElementsByTag("strong").first().text(); Element expires = details.getElementsByTag("em").first(); String expiresDetails = ""; if (expires != null) { expiresDetails = expires.text(); } Log.d(TAG, expiresDetails); Pattern pattern; if (postPaid == false) { pattern = Pattern.compile("\\(([\\d\\.]*) ?\\w*? ?expiring on (.*)\\)"); } else { pattern = Pattern.compile("\\(([\\d\\.]*) ?\\w*? ?will expire on (.*)\\)"); } Matcher matcher = pattern.matcher(expiresDetails); Double expiresValue = null; String expiresDate = null; if (matcher.find()) { /*Log.d(TAG, "matched expires"); Log.d(TAG, "group 0:" + matcher.group(0)); Log.d(TAG, "group 1:" + matcher.group(1)); Log.d(TAG, "group 2:" + matcher.group(2)); */ try { expiresValue = Double.parseDouble(matcher.group(1)); } catch (NumberFormatException e) { expiresValue = null; } String expiresDateString = matcher.group(2); Date expiresDateObj; if (expiresDateString != null) { if (expiresDateString.length() > 0) { try { expiresDateObj = DateFormatters.EXPIRESDATE.parse(expiresDateString); expiresDate = DateFormatters.ISO8601DATEONLYFORMAT.format(expiresDateObj); } catch (java.text.ParseException e) { Log.d(TAG, "Could not parse date: " + expiresDateString); } } } } ContentValues values = new ContentValues(); values.put("name", name); values.put("value", value); values.put("units", units); values.put("expires_value", expiresValue); values.put("expires_date", expiresDate); db.insert("cache", "value", values); } if (postPaid == false) { Log.d(TAG, "Getting Value packs..."); // Find value packs HttpGetter valuePacksPageGet = new HttpGetter( "https://secure.2degreesmobile.co.nz/group/ip/prevaluepack"); String valuePacksPageString = valuePacksPageGet.execute(); //DBLog.insertMessage(context, "d", "", valuePacksPageString); if (valuePacksPageString != null) { Document valuePacksPage = Jsoup.parse(valuePacksPageString); Elements enabledPacks = valuePacksPage.getElementsByClass("yellow"); for (Element enabledPack : enabledPacks) { Element offerNameElemt = enabledPack .getElementsByAttributeValueStarting("name", "offername").first(); if (offerNameElemt != null) { String offerName = offerNameElemt.val(); DBLog.insertMessage(context, "d", "", "Got element: " + offerName); ValuePack[] packs = Values.valuePacks.get(offerName); if (packs == null) { DBLog.insertMessage(context, "d", "", "Offer name: " + offerName + " not matched."); } else { for (ValuePack pack : packs) { ContentValues values = new ContentValues(); values.put("plan_startamount", pack.value); values.put("plan_name", offerName); DBLog.insertMessage(context, "d", "", "Pack " + pack.type.id + " start value set to " + pack.value); db.update("cache", values, "name = '" + pack.type.id + "'", null); } } } } } } SharedPreferences.Editor prefedit = sp.edit(); Date now = new Date(); prefedit.putString("updateDate", DateFormatters.ISO8601FORMAT.format(now)); prefedit.putBoolean("loginFailed", false); prefedit.putBoolean("networkError", false); prefedit.commit(); DBLog.insertMessage(context, "i", TAG, "Update Successful"); return FetchResult.SUCCESS; } } catch (ClientProtocolException e) { DBLog.insertMessage(context, "w", TAG, "Network error: " + e.getMessage()); return FetchResult.NETWORKERROR; } catch (IOException e) { DBLog.insertMessage(context, "w", TAG, "Network error: " + e.getMessage()); return FetchResult.NETWORKERROR; } finally { db.close(); } return null; }
From source file:org.tinymediamanager.scraper.imdb.ImdbMovieParser.java
private MediaMetadata parseReleaseinfoPage(Document doc, MediaScrapeOptions options, MediaMetadata md) { Date releaseDate = null;/*from w w w. jav a 2 s . c o m*/ Pattern pattern = Pattern.compile("/calendar/\\?region=(.{2})"); // old way Element tableReleaseDates = doc.getElementById("release_dates"); if (tableReleaseDates != null) { Elements rows = tableReleaseDates.getElementsByTag("tr"); // first round: check the release date for the first one with the requested country for (Element row : rows) { // get the anchor Element anchor = row.getElementsByAttributeValueStarting("href", "/calendar/").first(); if (anchor != null) { Matcher matcher = pattern.matcher(anchor.attr("href")); if (matcher.find() && options.getCountry().getAlpha2().equalsIgnoreCase(matcher.group(1))) { Element column = row.getElementsByClass("release_date").first(); if (column != null) { try { SimpleDateFormat sdf = new SimpleDateFormat("d MMMM yyyy", Locale.US); releaseDate = sdf.parse(column.text()); break; } catch (ParseException otherformat) { try { SimpleDateFormat sdf = new SimpleDateFormat("MMMM yyyy", Locale.US); releaseDate = sdf.parse(column.text()); break; } catch (ParseException ignored) { } } } } } } } // new way; iterating over class name items if (releaseDate == null) { Elements rows = doc.getElementsByClass("release-date-item"); for (Element row : rows) { Element anchor = row.getElementsByAttributeValueStarting("href", "/calendar/").first(); if (anchor != null) { Matcher matcher = pattern.matcher(anchor.attr("href")); if (matcher.find() && options.getCountry().getAlpha2().equalsIgnoreCase(matcher.group(1))) { Element column = row.getElementsByClass("release-date-item__date").first(); if (column != null) { try { SimpleDateFormat sdf = new SimpleDateFormat("d MMMM yyyy", Locale.US); releaseDate = sdf.parse(column.text()); break; } catch (ParseException otherformat) { try { SimpleDateFormat sdf = new SimpleDateFormat("MMMM yyyy", Locale.US); releaseDate = sdf.parse(column.text()); break; } catch (ParseException ignored) { } } } } else { LOGGER.trace("country {} does not match ours {}", matcher.group(1), options.getCountry().getAlpha2()); } } } } // no matching local release date found; take the first one if (releaseDate == null) { Element column = doc.getElementsByClass("release_date").first(); if (column == null) { column = doc.getElementsByClass("release-date-item__date").first(); } if (column != null) { try { SimpleDateFormat sdf = new SimpleDateFormat("d MMMM yyyy", Locale.US); releaseDate = sdf.parse(column.text()); } catch (ParseException otherformat) { try { SimpleDateFormat sdf = new SimpleDateFormat("MMMM yyyy", Locale.US); releaseDate = sdf.parse(column.text()); } catch (ParseException ignored) { } } } } if (releaseDate != null) { md.setReleaseDate(releaseDate); } return md; }
From source file:org.tinymediamanager.scraper.imdb.ImdbParser.java
protected MediaMetadata parseReferencePage(Document doc, MediaScrapeOptions options, MediaMetadata md) { /*/*from ww w .j a v a 2s. c o m*/ * title and year have the following structure * * <div id="tn15title"><h1>Merida - Legende der Highlands <span>(<a href="/year/2012/">2012</a>) <span class="pro-link">...</span> <span * class="title-extra">Brave <i>(original title)</i></span> </span></h1> </div> */ // title Element title = doc.getElementsByAttributeValue("name", "title").first(); if (title != null) { String movieTitle = cleanString(title.attr("content")); int yearStart = movieTitle.lastIndexOf("("); if (yearStart > 0) { movieTitle = movieTitle.substring(0, yearStart - 1).trim(); md.setTitle(movieTitle); } } // original title and year Element originalTitleYear = doc.getElementsByAttributeValue("property", "og:title").first(); if (originalTitleYear != null) { String content = originalTitleYear.attr("content"); int startOfYear = content.lastIndexOf("("); if (startOfYear > 0) { // noo - this is NOT the original title!!! (seems always english?) parse from AKAs page... // String originalTitle = content.substring(0, startOfYear - 1).trim(); // md.setOriginalTitle(originalTitle); String yearText = content.substring(startOfYear); // search year Pattern yearPattern = Pattern.compile("[1-2][0-9]{3}"); Matcher matcher = yearPattern.matcher(yearText); while (matcher.find()) { if (matcher.group(0) != null) { String movieYear = matcher.group(0); try { md.setYear(Integer.parseInt(movieYear)); break; } catch (Exception ignored) { } } } } } // poster Element poster = doc.getElementsByAttributeValue("property", "og:image").first(); if (poster != null) { String posterUrl = poster.attr("content"); int fileStart = posterUrl.lastIndexOf("/"); if (fileStart > 0) { int parameterStart = posterUrl.indexOf("_", fileStart); if (parameterStart > 0) { int startOfExtension = posterUrl.lastIndexOf("."); if (startOfExtension > parameterStart) { posterUrl = posterUrl.substring(0, parameterStart) + posterUrl.substring(startOfExtension); } } } processMediaArt(md, MediaArtwork.MediaArtworkType.POSTER, posterUrl); } /* * <div class="starbar-meta"> <b>7.4/10</b> <a href="ratings" class="tn15more">52,871 votes</a> » </div> */ // rating and rating count Element ratingElement = doc.getElementsByClass("ipl-rating-star__rating").first(); if (ratingElement != null) { String ratingAsString = ratingElement.ownText().replace(",", "."); try { md.setRating(Float.valueOf(ratingAsString)); } catch (Exception ignored) { } Element votesElement = doc.getElementsByClass("ipl-rating-star__total-votes").first(); if (votesElement != null) { String countAsString = votesElement.ownText().replaceAll("[.,()\\u00a0]", "").trim(); try { md.setVoteCount(Integer.parseInt(countAsString)); } catch (Exception ignored) { } } } // top250 Element topRatedElement = doc.getElementsByAttributeValue("href", "/chart/top").first(); if (topRatedElement != null) { Pattern topPattern = Pattern.compile("Top Rated Movies: #([0-9]{1,3})"); Matcher matcher = topPattern.matcher(topRatedElement.ownText()); while (matcher.find()) { if (matcher.group(1) != null) { try { String top250Text = matcher.group(1); md.setTop250(Integer.parseInt(top250Text)); } catch (Exception ignored) { } } } } // releasedate Element releaseDateElement = doc .getElementsByAttributeValue("href", "/title/" + options.getImdbId().toLowerCase() + "/releaseinfo") .first(); if (releaseDateElement != null) { String releaseDateText = releaseDateElement.ownText(); int startOfCountry = releaseDateText.indexOf("("); if (startOfCountry > 0) { releaseDateText = releaseDateText.substring(0, startOfCountry - 1).trim(); } try { SimpleDateFormat sdf = new SimpleDateFormat("d MMMM yyyy", Locale.US); Date parsedDate = sdf.parse(releaseDateText); md.setReleaseDate(parsedDate); } catch (ParseException otherformat) { try { SimpleDateFormat sdf = new SimpleDateFormat("MMMM yyyy", Locale.US); Date parsedDate = sdf.parse(releaseDateText); md.setReleaseDate(parsedDate); } catch (ParseException ignored) { } } } Elements elements = doc.getElementsByClass("ipl-zebra-list__label"); for (Element element : elements) { // only parse tds if (!"td".equals(element.tag().getName())) { continue; } String elementText = element.ownText(); if (elementText.equals("Taglines")) { if (!ImdbMetadataProvider.providerInfo.getConfig().getValueAsBool("useTmdb")) { Element taglineElement = element.nextElementSibling(); if (taglineElement != null) { String tagline = cleanString(taglineElement.ownText().replaceAll("", "")); md.setTagline(tagline); } } } if (elementText.equals("Genres")) { Element nextElement = element.nextElementSibling(); if (nextElement != null) { Elements genreElements = nextElement.getElementsByAttributeValueStarting("href", "/genre/"); for (Element genreElement : genreElements) { String genreText = genreElement.ownText(); md.addGenre(getTmmGenre(genreText)); } } } /* * Old HTML, but maybe the same content formart <div class="info"><h5>Runtime:</h5><div class="info-content">162 min | 171 min (special edition) * | 178 min (extended cut)</div></div> */ if (elementText.equals("Runtime")) { Element nextElement = element.nextElementSibling(); if (nextElement != null) { Element runtimeElement = nextElement.getElementsByClass("ipl-inline-list__item").first(); if (runtimeElement != null) { String first = runtimeElement.ownText().split("\\|")[0]; String runtimeAsString = cleanString(first.replaceAll("min", "")); int runtime = 0; try { runtime = Integer.parseInt(runtimeAsString); } catch (Exception e) { // try to filter out the first number we find Pattern runtimePattern = Pattern.compile("([0-9]{2,3})"); Matcher matcher = runtimePattern.matcher(runtimeAsString); if (matcher.find()) { runtime = Integer.parseInt(matcher.group(0)); } } md.setRuntime(runtime); } } } if (elementText.equals("Country")) { Element nextElement = element.nextElementSibling(); if (nextElement != null) { Elements countryElements = nextElement.getElementsByAttributeValueStarting("href", "/country/"); Pattern pattern = Pattern.compile("/country/(.*)"); for (Element countryElement : countryElements) { Matcher matcher = pattern.matcher(countryElement.attr("href")); if (matcher.matches()) { if (ImdbMetadataProvider.providerInfo.getConfig() .getValueAsBool("scrapeLanguageNames")) { md.addCountry(LanguageUtils.getLocalizedCountryForLanguage( options.getLanguage().getLanguage(), countryElement.text(), matcher.group(1))); } else { md.addCountry(matcher.group(1)); } } } } } if (elementText.equals("Language")) { Element nextElement = element.nextElementSibling(); if (nextElement != null) { Elements languageElements = nextElement.getElementsByAttributeValueStarting("href", "/language/"); Pattern pattern = Pattern.compile("/language/(.*)"); for (Element languageElement : languageElements) { Matcher matcher = pattern.matcher(languageElement.attr("href")); if (matcher.matches()) { if (ImdbMetadataProvider.providerInfo.getConfig() .getValueAsBool("scrapeLanguageNames")) { md.addSpokenLanguage(LanguageUtils.getLocalizedLanguageNameFromLocalizedString( options.getLanguage(), languageElement.text(), matcher.group(1))); } else { md.addSpokenLanguage(matcher.group(1)); } } } } } if (elementText.equals("Certification")) { Element nextElement = element.nextElementSibling(); if (nextElement != null) { String languageCode = options.getCountry().getAlpha2(); Elements certificationElements = nextElement.getElementsByAttributeValueStarting("href", "/search/title?certificates=" + languageCode); boolean done = false; for (Element certificationElement : certificationElements) { String certText = certificationElement.ownText(); int startOfCert = certText.indexOf(":"); if (startOfCert > 0 && certText.length() > startOfCert + 1) { certText = certText.substring(startOfCert + 1); } Certification certification = Certification.getCertification(options.getCountry(), certText); if (certification != null) { md.addCertification(certification); done = true; break; } } if (!done && languageCode.equals("DE")) { certificationElements = nextElement.getElementsByAttributeValueStarting("href", "/search/title?certificates=XWG"); for (Element certificationElement : certificationElements) { String certText = certificationElement.ownText(); int startOfCert = certText.indexOf(":"); if (startOfCert > 0 && certText.length() > startOfCert + 1) { certText = certText.substring(startOfCert + 1); } Certification certification = Certification.getCertification(options.getCountry(), certText); if (certification != null) { md.addCertification(certification); break; } } } } } } // director Element directorsElement = doc.getElementById("directors"); while (directorsElement != null && directorsElement.tag().getName() != "header") { directorsElement = directorsElement.parent(); } if (directorsElement != null) { directorsElement = directorsElement.nextElementSibling(); } if (directorsElement != null) { for (Element directorElement : directorsElement.getElementsByClass("name")) { String director = directorElement.text().trim(); MediaCastMember cm = new MediaCastMember(MediaCastMember.CastType.DIRECTOR); cm.setName(director); md.addCastMember(cm); } } // actors Element castTableElement = doc.getElementsByClass("cast_list").first(); if (castTableElement != null) { Elements tr = castTableElement.getElementsByTag("tr"); for (Element row : tr) { MediaCastMember cm = parseCastMember(row); if (cm != null && StringUtils.isNotEmpty(cm.getName()) && StringUtils.isNotEmpty(cm.getCharacter())) { cm.setType(MediaCastMember.CastType.ACTOR); md.addCastMember(cm); } } } // writers Element writersElement = doc.getElementById("writers"); while (writersElement != null && writersElement.tag().getName() != "header") { writersElement = writersElement.parent(); } if (writersElement != null) { writersElement = writersElement.nextElementSibling(); } if (writersElement != null) { Elements writersElements = writersElement.getElementsByAttributeValueStarting("href", "/name/"); for (Element writerElement : writersElements) { String writer = cleanString(writerElement.ownText()); MediaCastMember cm = new MediaCastMember(MediaCastMember.CastType.WRITER); cm.setName(writer); md.addCastMember(cm); } } // producers Element producersElement = doc.getElementById("producers"); while (producersElement != null && producersElement.tag().getName() != "header") { producersElement = producersElement.parent(); } if (producersElement != null) { producersElement = producersElement.nextElementSibling(); } if (producersElement != null) { Elements producersElements = producersElement.getElementsByAttributeValueStarting("href", "/name/"); for (Element producerElement : producersElements) { String producer = cleanString(producerElement.ownText()); MediaCastMember cm = new MediaCastMember(MediaCastMember.CastType.PRODUCER); cm.setName(producer); md.addCastMember(cm); } } // producers Elements prodCompHeaderElements = doc.getElementsByClass("ipl-list-title"); Element prodCompHeaderElement = null; for (Element possibleProdCompHeaderEl : prodCompHeaderElements) { if (possibleProdCompHeaderEl.ownText().equals("Production Companies")) { prodCompHeaderElement = possibleProdCompHeaderEl; break; } } while (prodCompHeaderElement != null && prodCompHeaderElement.tag().getName() != "header") { prodCompHeaderElement = prodCompHeaderElement.parent(); } if (prodCompHeaderElement != null) { prodCompHeaderElement = prodCompHeaderElement.nextElementSibling(); } if (prodCompHeaderElement != null) { Elements prodCompElements = prodCompHeaderElement.getElementsByAttributeValueStarting("href", "/company/"); for (Element prodCompElement : prodCompElements) { String prodComp = prodCompElement.ownText(); md.addProductionCompany(prodComp); } } return md; }
From source file:org.tinymediamanager.scraper.imdb.ImdbParser.java
protected MediaCastMember parseCastMember(Element row) { Element nameElement = row.getElementsByAttributeValueStarting("itemprop", "name").first(); if (nameElement == null) { return null; }//from w w w .j av a 2 s. c o m String name = cleanString(nameElement.ownText()); String characterName = ""; Element characterElement = row.getElementsByClass("character").first(); if (characterElement != null) { characterName = cleanString(characterElement.text()); // and now strip off trailing commentaries like - (120 episodes, // 2006-2014) characterName = characterName.replaceAll("\\(.*?\\)$", "").trim(); } String image = ""; Element imageElement = row.getElementsByTag("img").first(); if (imageElement != null) { String imageSrc = imageElement.attr("loadlate"); if (!StringUtils.isEmpty(imageSrc)) { int fileStart = imageSrc.lastIndexOf("/"); if (fileStart > 0) { // parse out the rescale/crop params int parameterStart = imageSrc.indexOf("._", fileStart); if (parameterStart > 0) { int startOfExtension = imageSrc.lastIndexOf("."); if (startOfExtension > parameterStart) { // rebuild the path - scaled to 632 px height as in tmdb scraper imageSrc = imageSrc.substring(0, parameterStart) + "._UY632" + imageSrc.substring(startOfExtension); } } } image = imageSrc; } } MediaCastMember cm = new MediaCastMember(); cm.setCharacter(characterName); cm.setName(name); cm.setImageUrl(image); return cm; }
From source file:org.tinymediamanager.scraper.imdb.ImdbTvShowParser.java
private boolean parseEpisodeList(int season, List<MediaEpisode> episodes, Document doc) { Pattern unknownPattern = Pattern.compile("Unknown"); Pattern seasonEpisodePattern = Pattern.compile("S([0-9]*), Ep([0-9]*)"); int episodeCounter = 0; // parse episodes Elements tables = doc.getElementsByClass("eplist"); for (Element table : tables) { Elements rows = table.getElementsByClass("list_item"); for (Element row : rows) { Matcher matcher = season == 0 ? unknownPattern.matcher(row.text()) : seasonEpisodePattern.matcher(row.text()); if (matcher.find() && (season == 0 || matcher.groupCount() >= 2)) { try { // we found a row containing episode data MediaEpisode ep = new MediaEpisode(providerInfo.getId()); // parse season and ep number if (season == 0) { ep.season = season; ep.episode = ++episodeCounter; } else { ep.season = Integer.parseInt(matcher.group(1)); ep.episode = Integer.parseInt(matcher.group(2)); }/*from ww w . j a va2s .co m*/ // check if we have still valid data if (season > 0 && season != ep.season) { return false; } // get ep title and id Elements anchors = row.getElementsByAttributeValueStarting("href", "/title/tt"); for (Element anchor : anchors) { if ("name".equals(anchor.attr("itemprop"))) { ep.title = anchor.text(); break; } } String id = ""; Matcher idMatcher = IMDB_ID_PATTERN.matcher(anchors.get(0).attr("href")); while (idMatcher.find()) { if (idMatcher.group(1) != null) { id = idMatcher.group(1); } } if (StringUtils.isNotBlank(id)) { ep.ids.put(providerInfo.getId(), id); } // plot Element plot = row.getElementsByClass("item_description").first(); if (plot != null) { ep.plot = plot.ownText(); } // rating and rating count Element ratingElement = row.getElementsByClass("ipl-rating-star__rating").first(); if (ratingElement != null) { String ratingAsString = ratingElement.ownText().replace(",", "."); try { ep.rating = Float.valueOf(ratingAsString); } catch (Exception ignored) { } Element votesElement = row.getElementsByClass("ipl-rating-star__total-votes").first(); if (votesElement != null) { String countAsString = votesElement.ownText().replaceAll("[.,()]", "").trim(); try { ep.voteCount = Integer.parseInt(countAsString); } catch (Exception ignored) { } } } // release date Element releaseDate = row.getElementsByClass("airdate").first(); if (releaseDate != null) { ep.firstAired = releaseDate.ownText(); } // poster Element image = row.getElementsByTag("img").first(); if (image != null) { String posterUrl = image.attr("src"); posterUrl = posterUrl.replaceAll("UX[0-9]{2,4}_", ""); posterUrl = posterUrl.replaceAll("UY[0-9]{2,4}_", ""); posterUrl = posterUrl.replaceAll("CR[0-9]{1,3},[0-9]{1,3},[0-9]{1,3},[0-9]{1,3}_", ""); if (StringUtils.isNotBlank(posterUrl)) { MediaArtwork ma = new MediaArtwork(ImdbMetadataProvider.providerInfo.getId(), MediaArtwork.MediaArtworkType.THUMB); ma.setPreviewUrl(posterUrl); ma.setDefaultUrl(posterUrl); ep.artwork.add(ma); } } episodes.add(ep); } catch (Exception e) { LOGGER.warn("failed parsing: " + row.text() + " for ep data; " + e.getMessage()); } } } } return true; }