Example usage for org.jsoup.nodes Element getElementsByAttributeValueStarting

List of usage examples for org.jsoup.nodes Element getElementsByAttributeValueStarting

Introduction

In this page you can find the example usage for org.jsoup.nodes Element getElementsByAttributeValueStarting.

Prototype

public Elements getElementsByAttributeValueStarting(String key, String valuePrefix) 

Source Link

Document

Find elements that have attributes that start with the value prefix.

Usage

From source file:biz.shadowservices.DegreesToolbox.DataFetcher.java

public FetchResult updateData(Context context, boolean force) {
    //Open database
    DBOpenHelper dbhelper = new DBOpenHelper(context);
    SQLiteDatabase db = dbhelper.getWritableDatabase();

    // check for internet connectivity
    try {//from   www .j ava 2  s.c om
        if (!isOnline(context)) {
            Log.d(TAG, "We do not seem to be online. Skipping Update.");
            return FetchResult.NOTONLINE;
        }
    } catch (Exception e) {
        exceptionReporter.reportException(Thread.currentThread(), e, "Exception during isOnline()");
    }
    SharedPreferences sp = PreferenceManager.getDefaultSharedPreferences(context);
    if (!force) {
        try {
            if (sp.getBoolean("loginFailed", false) == true) {
                Log.d(TAG, "Previous login failed. Skipping Update.");
                DBLog.insertMessage(context, "i", TAG, "Previous login failed. Skipping Update.");
                return FetchResult.LOGINFAILED;
            }
            if (sp.getBoolean("autoupdates", true) == false) {
                Log.d(TAG, "Automatic updates not enabled. Skipping Update.");
                DBLog.insertMessage(context, "i", TAG, "Automatic updates not enabled. Skipping Update.");
                return FetchResult.NOTALLOWED;
            }
            if (!isBackgroundDataEnabled(context) && sp.getBoolean("obeyBackgroundData", true)) {
                Log.d(TAG, "Background data not enabled. Skipping Update.");
                DBLog.insertMessage(context, "i", TAG, "Background data not enabled. Skipping Update.");
                return FetchResult.NOTALLOWED;
            }
            if (!isAutoSyncEnabled() && sp.getBoolean("obeyAutoSync", true)
                    && sp.getBoolean("obeyBackgroundData", true)) {
                Log.d(TAG, "Auto sync not enabled. Skipping Update.");
                DBLog.insertMessage(context, "i", TAG, "Auto sync not enabled. Skipping Update.");
                return FetchResult.NOTALLOWED;
            }
            if (isWifi(context) && !sp.getBoolean("wifiUpdates", true)) {
                Log.d(TAG, "On wifi, and wifi auto updates not allowed. Skipping Update");
                DBLog.insertMessage(context, "i", TAG,
                        "On wifi, and wifi auto updates not allowed. Skipping Update");
                return FetchResult.NOTALLOWED;
            } else if (!isWifi(context)) {
                Log.d(TAG, "We are not on wifi.");
                if (!isRoaming(context) && !sp.getBoolean("2DData", true)) {
                    Log.d(TAG, "Automatic updates on 2Degrees data not enabled. Skipping Update.");
                    DBLog.insertMessage(context, "i", TAG,
                            "Automatic updates on 2Degrees data not enabled. Skipping Update.");
                    return FetchResult.NOTALLOWED;
                } else if (isRoaming(context) && !sp.getBoolean("roamingData", false)) {
                    Log.d(TAG, "Automatic updates on roaming mobile data not enabled. Skipping Update.");
                    DBLog.insertMessage(context, "i", TAG,
                            "Automatic updates on roaming mobile data not enabled. Skipping Update.");
                    return FetchResult.NOTALLOWED;
                }

            }
        } catch (Exception e) {
            exceptionReporter.reportException(Thread.currentThread(), e,
                    "Exception while finding if to update.");
        }

    } else {
        Log.d(TAG, "Update Forced");
    }

    try {
        String username = sp.getString("username", null);
        String password = sp.getString("password", null);
        if (username == null || password == null) {
            DBLog.insertMessage(context, "i", TAG, "Username or password not set.");
            return FetchResult.USERNAMEPASSWORDNOTSET;
        }

        // Find the URL of the page to send login data to.
        Log.d(TAG, "Finding Action. ");
        HttpGetter loginPageGet = new HttpGetter("https://secure.2degreesmobile.co.nz/web/ip/login");
        String loginPageString = loginPageGet.execute();
        if (loginPageString != null) {
            Document loginPage = Jsoup.parse(loginPageString,
                    "https://secure.2degreesmobile.co.nz/web/ip/login");
            Element loginForm = loginPage.getElementsByAttributeValue("name", "loginFrm").first();
            String loginAction = loginForm.attr("action");
            // Send login form
            List<NameValuePair> loginValues = new ArrayList<NameValuePair>();
            loginValues.add(new BasicNameValuePair("externalURLRedirect", ""));
            loginValues.add(new BasicNameValuePair("hdnAction", "login_userlogin"));
            loginValues.add(new BasicNameValuePair("hdnAuthenticationType", "M"));
            loginValues.add(new BasicNameValuePair("hdnlocale", ""));

            loginValues.add(new BasicNameValuePair("userid", username));
            loginValues.add(new BasicNameValuePair("password", password));
            Log.d(TAG, "Sending Login ");
            HttpPoster sendLoginPoster = new HttpPoster(loginAction, loginValues);
            // Parse result

            String loginResponse = sendLoginPoster.execute();
            Document loginResponseParsed = Jsoup.parse(loginResponse);
            // Determine if this is a pre-pay or post-paid account.
            boolean postPaid;
            if (loginResponseParsed
                    .getElementById("p_CustomerPortalPostPaidHomePage_WAR_customerportalhomepage") == null) {
                Log.d(TAG, "Pre-pay account or no account.");
                postPaid = false;
            } else {
                Log.d(TAG, "Post-paid account.");
                postPaid = true;
            }

            String homepageUrl = "https://secure.2degreesmobile.co.nz/group/ip/home";
            if (postPaid) {
                homepageUrl = "https://secure.2degreesmobile.co.nz/group/ip/postpaid";
            }
            HttpGetter homepageGetter = new HttpGetter(homepageUrl);
            String homepageHTML = homepageGetter.execute();
            Document homePage = Jsoup.parse(homepageHTML);

            Element accountSummary = homePage.getElementById("accountSummary");
            if (accountSummary == null) {
                Log.d(TAG, "Login failed.");
                return FetchResult.LOGINFAILED;
            }
            db.delete("cache", "", null);
            /* This code fetched some extra details for postpaid users, but on reflection they aren't that useful.
             * Might reconsider this.
             *
             if (postPaid) {
                     
               Element accountBalanceSummaryTable = accountSummary.getElementsByClass("tableBillSummary").first();
               Elements rows = accountBalanceSummaryTable.getElementsByTag("tr");
               int rowno = 0;
               for (Element row : rows) {
                  if (rowno > 1) {
             break;
                  }
                  //Log.d(TAG, "Starting row");
                  //Log.d(TAG, row.html());
                  Double value;
                  try {
             Element amount = row.getElementsByClass("tableBillamount").first();
             String amountHTML = amount.html();
             Log.d(TAG, amountHTML.substring(1));
             value = Double.parseDouble(amountHTML.substring(1));
                  } catch (Exception e) {
             Log.d(TAG, "Failed to parse amount from row.");
             value = null;
                  }
                  String expiresDetails = "";
                  String expiresDate = null;
                  String name = null;
                  try {
             Element details = row.getElementsByClass("tableBilldetail").first();
             name = details.ownText();
             Element expires = details.getElementsByTag("em").first();
             if (expires != null) {
                 expiresDetails = expires.text();
             } 
             Log.d(TAG, expiresDetails);
             Pattern pattern;
             pattern = Pattern.compile("\\(payment is due (.*)\\)");
             Matcher matcher = pattern.matcher(expiresDetails);
             if (matcher.find()) {
                /*Log.d(TAG, "matched expires");
                Log.d(TAG, "group 0:" + matcher.group(0));
                Log.d(TAG, "group 1:" + matcher.group(1));
                Log.d(TAG, "group 2:" + matcher.group(2)); *
                String expiresDateString = matcher.group(1);
                Date expiresDateObj;
                if (expiresDateString != null) {
                   if (expiresDateString.length() > 0) {
                      try {
                         expiresDateObj = DateFormatters.EXPIRESDATE.parse(expiresDateString);
                         expiresDate = DateFormatters.ISO8601DATEONLYFORMAT.format(expiresDateObj);
                      } catch (java.text.ParseException e) {
                         Log.d(TAG, "Could not parse date: " + expiresDateString);
                      }
                   }   
                }
             }
                  } catch (Exception e) {
             Log.d(TAG, "Failed to parse details from row.");
                  }
                  String expirev = null;
                  ContentValues values = new ContentValues();
                  values.put("name", name);
                  values.put("value", value);
                  values.put("units", "$NZ");
                  values.put("expires_value", expirev );
                  values.put("expires_date", expiresDate);
                  db.insert("cache", "value", values );
                  rowno++;
               }
            } */
            Element accountSummaryTable = accountSummary.getElementsByClass("tableAccountSummary").first();
            Elements rows = accountSummaryTable.getElementsByTag("tr");
            for (Element row : rows) {
                // We are now looking at each of the rows in the data table.
                //Log.d(TAG, "Starting row");
                //Log.d(TAG, row.html());
                Double value;
                String units;
                try {
                    Element amount = row.getElementsByClass("tableBillamount").first();
                    String amountHTML = amount.html();
                    //Log.d(TAG, amountHTML);
                    String[] amountParts = amountHTML.split("&nbsp;", 2);
                    //Log.d(TAG, amountParts[0]);
                    //Log.d(TAG, amountParts[1]);
                    if (amountParts[0].contains("Included") || amountParts[0].equals("All You Need")
                            || amountParts[0].equals("Unlimited Text*")) {
                        value = Values.INCLUDED;
                    } else {
                        try {
                            value = Double.parseDouble(amountParts[0]);
                        } catch (NumberFormatException e) {
                            exceptionReporter.reportException(Thread.currentThread(), e, "Decoding value.");
                            value = 0.0;
                        }
                    }
                    units = amountParts[1];
                } catch (NullPointerException e) {
                    //Log.d(TAG, "Failed to parse amount from row.");
                    value = null;
                    units = null;
                }
                Element details = row.getElementsByClass("tableBilldetail").first();
                String name = details.getElementsByTag("strong").first().text();
                Element expires = details.getElementsByTag("em").first();
                String expiresDetails = "";
                if (expires != null) {
                    expiresDetails = expires.text();
                }
                Log.d(TAG, expiresDetails);
                Pattern pattern;
                if (postPaid == false) {
                    pattern = Pattern.compile("\\(([\\d\\.]*) ?\\w*? ?expiring on (.*)\\)");
                } else {
                    pattern = Pattern.compile("\\(([\\d\\.]*) ?\\w*? ?will expire on (.*)\\)");
                }
                Matcher matcher = pattern.matcher(expiresDetails);
                Double expiresValue = null;
                String expiresDate = null;
                if (matcher.find()) {
                    /*Log.d(TAG, "matched expires");
                    Log.d(TAG, "group 0:" + matcher.group(0));
                    Log.d(TAG, "group 1:" + matcher.group(1));
                    Log.d(TAG, "group 2:" + matcher.group(2)); */
                    try {
                        expiresValue = Double.parseDouble(matcher.group(1));
                    } catch (NumberFormatException e) {
                        expiresValue = null;
                    }
                    String expiresDateString = matcher.group(2);
                    Date expiresDateObj;
                    if (expiresDateString != null) {
                        if (expiresDateString.length() > 0) {
                            try {
                                expiresDateObj = DateFormatters.EXPIRESDATE.parse(expiresDateString);
                                expiresDate = DateFormatters.ISO8601DATEONLYFORMAT.format(expiresDateObj);
                            } catch (java.text.ParseException e) {
                                Log.d(TAG, "Could not parse date: " + expiresDateString);
                            }
                        }
                    }
                }
                ContentValues values = new ContentValues();
                values.put("name", name);
                values.put("value", value);
                values.put("units", units);
                values.put("expires_value", expiresValue);
                values.put("expires_date", expiresDate);
                db.insert("cache", "value", values);
            }

            if (postPaid == false) {
                Log.d(TAG, "Getting Value packs...");
                // Find value packs
                HttpGetter valuePacksPageGet = new HttpGetter(
                        "https://secure.2degreesmobile.co.nz/group/ip/prevaluepack");
                String valuePacksPageString = valuePacksPageGet.execute();
                //DBLog.insertMessage(context, "d", "",  valuePacksPageString);
                if (valuePacksPageString != null) {
                    Document valuePacksPage = Jsoup.parse(valuePacksPageString);
                    Elements enabledPacks = valuePacksPage.getElementsByClass("yellow");
                    for (Element enabledPack : enabledPacks) {
                        Element offerNameElemt = enabledPack
                                .getElementsByAttributeValueStarting("name", "offername").first();
                        if (offerNameElemt != null) {
                            String offerName = offerNameElemt.val();
                            DBLog.insertMessage(context, "d", "", "Got element: " + offerName);
                            ValuePack[] packs = Values.valuePacks.get(offerName);
                            if (packs == null) {
                                DBLog.insertMessage(context, "d", "",
                                        "Offer name: " + offerName + " not matched.");
                            } else {
                                for (ValuePack pack : packs) {
                                    ContentValues values = new ContentValues();
                                    values.put("plan_startamount", pack.value);
                                    values.put("plan_name", offerName);
                                    DBLog.insertMessage(context, "d", "",
                                            "Pack " + pack.type.id + " start value set to " + pack.value);
                                    db.update("cache", values, "name = '" + pack.type.id + "'", null);
                                }
                            }
                        }
                    }
                }
            }

            SharedPreferences.Editor prefedit = sp.edit();
            Date now = new Date();
            prefedit.putString("updateDate", DateFormatters.ISO8601FORMAT.format(now));
            prefedit.putBoolean("loginFailed", false);
            prefedit.putBoolean("networkError", false);
            prefedit.commit();
            DBLog.insertMessage(context, "i", TAG, "Update Successful");
            return FetchResult.SUCCESS;

        }
    } catch (ClientProtocolException e) {
        DBLog.insertMessage(context, "w", TAG, "Network error: " + e.getMessage());
        return FetchResult.NETWORKERROR;
    } catch (IOException e) {
        DBLog.insertMessage(context, "w", TAG, "Network error: " + e.getMessage());
        return FetchResult.NETWORKERROR;
    } finally {
        db.close();
    }
    return null;
}

From source file:org.tinymediamanager.scraper.imdb.ImdbMovieParser.java

private MediaMetadata parseReleaseinfoPage(Document doc, MediaScrapeOptions options, MediaMetadata md) {
    Date releaseDate = null;/*from  w  w w. jav a 2 s  . c  o  m*/
    Pattern pattern = Pattern.compile("/calendar/\\?region=(.{2})");

    // old way
    Element tableReleaseDates = doc.getElementById("release_dates");
    if (tableReleaseDates != null) {
        Elements rows = tableReleaseDates.getElementsByTag("tr");
        // first round: check the release date for the first one with the requested country
        for (Element row : rows) {
            // get the anchor
            Element anchor = row.getElementsByAttributeValueStarting("href", "/calendar/").first();
            if (anchor != null) {
                Matcher matcher = pattern.matcher(anchor.attr("href"));
                if (matcher.find() && options.getCountry().getAlpha2().equalsIgnoreCase(matcher.group(1))) {
                    Element column = row.getElementsByClass("release_date").first();
                    if (column != null) {
                        try {
                            SimpleDateFormat sdf = new SimpleDateFormat("d MMMM yyyy", Locale.US);
                            releaseDate = sdf.parse(column.text());
                            break;
                        } catch (ParseException otherformat) {
                            try {
                                SimpleDateFormat sdf = new SimpleDateFormat("MMMM yyyy", Locale.US);
                                releaseDate = sdf.parse(column.text());
                                break;
                            } catch (ParseException ignored) {
                            }
                        }
                    }
                }
            }
        }
    }

    // new way; iterating over class name items
    if (releaseDate == null) {
        Elements rows = doc.getElementsByClass("release-date-item");
        for (Element row : rows) {
            Element anchor = row.getElementsByAttributeValueStarting("href", "/calendar/").first();
            if (anchor != null) {
                Matcher matcher = pattern.matcher(anchor.attr("href"));
                if (matcher.find() && options.getCountry().getAlpha2().equalsIgnoreCase(matcher.group(1))) {
                    Element column = row.getElementsByClass("release-date-item__date").first();
                    if (column != null) {
                        try {
                            SimpleDateFormat sdf = new SimpleDateFormat("d MMMM yyyy", Locale.US);
                            releaseDate = sdf.parse(column.text());
                            break;
                        } catch (ParseException otherformat) {
                            try {
                                SimpleDateFormat sdf = new SimpleDateFormat("MMMM yyyy", Locale.US);
                                releaseDate = sdf.parse(column.text());
                                break;
                            } catch (ParseException ignored) {
                            }
                        }
                    }
                } else {
                    LOGGER.trace("country {} does not match ours {}", matcher.group(1),
                            options.getCountry().getAlpha2());
                }
            }
        }
    }

    // no matching local release date found; take the first one
    if (releaseDate == null) {
        Element column = doc.getElementsByClass("release_date").first();
        if (column == null) {
            column = doc.getElementsByClass("release-date-item__date").first();
        }
        if (column != null) {
            try {
                SimpleDateFormat sdf = new SimpleDateFormat("d MMMM yyyy", Locale.US);
                releaseDate = sdf.parse(column.text());
            } catch (ParseException otherformat) {
                try {
                    SimpleDateFormat sdf = new SimpleDateFormat("MMMM yyyy", Locale.US);
                    releaseDate = sdf.parse(column.text());
                } catch (ParseException ignored) {
                }
            }
        }
    }

    if (releaseDate != null) {
        md.setReleaseDate(releaseDate);
    }
    return md;
}

From source file:org.tinymediamanager.scraper.imdb.ImdbParser.java

protected MediaMetadata parseReferencePage(Document doc, MediaScrapeOptions options, MediaMetadata md) {
    /*/*from  ww  w  .j  a v a  2s.  c o  m*/
     * title and year have the following structure
     * 
     * <div id="tn15title"><h1>Merida - Legende der Highlands <span>(<a href="/year/2012/">2012</a>) <span class="pro-link">...</span> <span
     * class="title-extra">Brave <i>(original title)</i></span> </span></h1> </div>
     */

    // title
    Element title = doc.getElementsByAttributeValue("name", "title").first();
    if (title != null) {
        String movieTitle = cleanString(title.attr("content"));
        int yearStart = movieTitle.lastIndexOf("(");
        if (yearStart > 0) {
            movieTitle = movieTitle.substring(0, yearStart - 1).trim();
            md.setTitle(movieTitle);
        }
    }

    // original title and year
    Element originalTitleYear = doc.getElementsByAttributeValue("property", "og:title").first();
    if (originalTitleYear != null) {
        String content = originalTitleYear.attr("content");
        int startOfYear = content.lastIndexOf("(");
        if (startOfYear > 0) {
            // noo - this is NOT the original title!!! (seems always english?) parse from AKAs page...
            // String originalTitle = content.substring(0, startOfYear - 1).trim();
            // md.setOriginalTitle(originalTitle);

            String yearText = content.substring(startOfYear);

            // search year
            Pattern yearPattern = Pattern.compile("[1-2][0-9]{3}");
            Matcher matcher = yearPattern.matcher(yearText);
            while (matcher.find()) {
                if (matcher.group(0) != null) {
                    String movieYear = matcher.group(0);
                    try {
                        md.setYear(Integer.parseInt(movieYear));
                        break;
                    } catch (Exception ignored) {
                    }
                }
            }
        }
    }

    // poster
    Element poster = doc.getElementsByAttributeValue("property", "og:image").first();
    if (poster != null) {
        String posterUrl = poster.attr("content");

        int fileStart = posterUrl.lastIndexOf("/");
        if (fileStart > 0) {
            int parameterStart = posterUrl.indexOf("_", fileStart);
            if (parameterStart > 0) {
                int startOfExtension = posterUrl.lastIndexOf(".");
                if (startOfExtension > parameterStart) {
                    posterUrl = posterUrl.substring(0, parameterStart) + posterUrl.substring(startOfExtension);

                }
            }
        }
        processMediaArt(md, MediaArtwork.MediaArtworkType.POSTER, posterUrl);
    }

    /*
     * <div class="starbar-meta"> <b>7.4/10</b> &nbsp;&nbsp;<a href="ratings" class="tn15more">52,871 votes</a>&nbsp;&raquo; </div>
     */

    // rating and rating count
    Element ratingElement = doc.getElementsByClass("ipl-rating-star__rating").first();
    if (ratingElement != null) {
        String ratingAsString = ratingElement.ownText().replace(",", ".");
        try {
            md.setRating(Float.valueOf(ratingAsString));
        } catch (Exception ignored) {
        }

        Element votesElement = doc.getElementsByClass("ipl-rating-star__total-votes").first();
        if (votesElement != null) {
            String countAsString = votesElement.ownText().replaceAll("[.,()\\u00a0]", "").trim();
            try {
                md.setVoteCount(Integer.parseInt(countAsString));
            } catch (Exception ignored) {
            }
        }
    }
    // top250
    Element topRatedElement = doc.getElementsByAttributeValue("href", "/chart/top").first();
    if (topRatedElement != null) {
        Pattern topPattern = Pattern.compile("Top Rated Movies: #([0-9]{1,3})");
        Matcher matcher = topPattern.matcher(topRatedElement.ownText());
        while (matcher.find()) {
            if (matcher.group(1) != null) {
                try {
                    String top250Text = matcher.group(1);
                    md.setTop250(Integer.parseInt(top250Text));
                } catch (Exception ignored) {
                }
            }
        }
    }

    // releasedate
    Element releaseDateElement = doc
            .getElementsByAttributeValue("href", "/title/" + options.getImdbId().toLowerCase() + "/releaseinfo")
            .first();
    if (releaseDateElement != null) {
        String releaseDateText = releaseDateElement.ownText();
        int startOfCountry = releaseDateText.indexOf("(");
        if (startOfCountry > 0) {
            releaseDateText = releaseDateText.substring(0, startOfCountry - 1).trim();
        }
        try {
            SimpleDateFormat sdf = new SimpleDateFormat("d MMMM yyyy", Locale.US);
            Date parsedDate = sdf.parse(releaseDateText);
            md.setReleaseDate(parsedDate);
        } catch (ParseException otherformat) {
            try {
                SimpleDateFormat sdf = new SimpleDateFormat("MMMM yyyy", Locale.US);
                Date parsedDate = sdf.parse(releaseDateText);
                md.setReleaseDate(parsedDate);
            } catch (ParseException ignored) {
            }
        }
    }

    Elements elements = doc.getElementsByClass("ipl-zebra-list__label");
    for (Element element : elements) {
        // only parse tds
        if (!"td".equals(element.tag().getName())) {
            continue;
        }

        String elementText = element.ownText();

        if (elementText.equals("Taglines")) {
            if (!ImdbMetadataProvider.providerInfo.getConfig().getValueAsBool("useTmdb")) {
                Element taglineElement = element.nextElementSibling();
                if (taglineElement != null) {
                    String tagline = cleanString(taglineElement.ownText().replaceAll("", ""));
                    md.setTagline(tagline);
                }
            }
        }

        if (elementText.equals("Genres")) {
            Element nextElement = element.nextElementSibling();
            if (nextElement != null) {
                Elements genreElements = nextElement.getElementsByAttributeValueStarting("href", "/genre/");

                for (Element genreElement : genreElements) {
                    String genreText = genreElement.ownText();
                    md.addGenre(getTmmGenre(genreText));
                }
            }
        }

        /*
         * Old HTML, but maybe the same content formart <div class="info"><h5>Runtime:</h5><div class="info-content">162 min | 171 min (special edition)
         * | 178 min (extended cut)</div></div>
         */
        if (elementText.equals("Runtime")) {
            Element nextElement = element.nextElementSibling();
            if (nextElement != null) {
                Element runtimeElement = nextElement.getElementsByClass("ipl-inline-list__item").first();
                if (runtimeElement != null) {
                    String first = runtimeElement.ownText().split("\\|")[0];
                    String runtimeAsString = cleanString(first.replaceAll("min", ""));
                    int runtime = 0;
                    try {
                        runtime = Integer.parseInt(runtimeAsString);
                    } catch (Exception e) {
                        // try to filter out the first number we find
                        Pattern runtimePattern = Pattern.compile("([0-9]{2,3})");
                        Matcher matcher = runtimePattern.matcher(runtimeAsString);
                        if (matcher.find()) {
                            runtime = Integer.parseInt(matcher.group(0));
                        }
                    }
                    md.setRuntime(runtime);
                }
            }
        }

        if (elementText.equals("Country")) {
            Element nextElement = element.nextElementSibling();
            if (nextElement != null) {
                Elements countryElements = nextElement.getElementsByAttributeValueStarting("href", "/country/");
                Pattern pattern = Pattern.compile("/country/(.*)");

                for (Element countryElement : countryElements) {
                    Matcher matcher = pattern.matcher(countryElement.attr("href"));
                    if (matcher.matches()) {
                        if (ImdbMetadataProvider.providerInfo.getConfig()
                                .getValueAsBool("scrapeLanguageNames")) {
                            md.addCountry(LanguageUtils.getLocalizedCountryForLanguage(
                                    options.getLanguage().getLanguage(), countryElement.text(),
                                    matcher.group(1)));
                        } else {
                            md.addCountry(matcher.group(1));
                        }
                    }
                }
            }
        }

        if (elementText.equals("Language")) {
            Element nextElement = element.nextElementSibling();
            if (nextElement != null) {
                Elements languageElements = nextElement.getElementsByAttributeValueStarting("href",
                        "/language/");
                Pattern pattern = Pattern.compile("/language/(.*)");

                for (Element languageElement : languageElements) {
                    Matcher matcher = pattern.matcher(languageElement.attr("href"));
                    if (matcher.matches()) {
                        if (ImdbMetadataProvider.providerInfo.getConfig()
                                .getValueAsBool("scrapeLanguageNames")) {
                            md.addSpokenLanguage(LanguageUtils.getLocalizedLanguageNameFromLocalizedString(
                                    options.getLanguage(), languageElement.text(), matcher.group(1)));
                        } else {
                            md.addSpokenLanguage(matcher.group(1));
                        }
                    }
                }
            }
        }

        if (elementText.equals("Certification")) {
            Element nextElement = element.nextElementSibling();
            if (nextElement != null) {
                String languageCode = options.getCountry().getAlpha2();
                Elements certificationElements = nextElement.getElementsByAttributeValueStarting("href",
                        "/search/title?certificates=" + languageCode);
                boolean done = false;
                for (Element certificationElement : certificationElements) {
                    String certText = certificationElement.ownText();
                    int startOfCert = certText.indexOf(":");
                    if (startOfCert > 0 && certText.length() > startOfCert + 1) {
                        certText = certText.substring(startOfCert + 1);
                    }

                    Certification certification = Certification.getCertification(options.getCountry(),
                            certText);
                    if (certification != null) {
                        md.addCertification(certification);
                        done = true;
                        break;
                    }
                }

                if (!done && languageCode.equals("DE")) {
                    certificationElements = nextElement.getElementsByAttributeValueStarting("href",
                            "/search/title?certificates=XWG");
                    for (Element certificationElement : certificationElements) {
                        String certText = certificationElement.ownText();
                        int startOfCert = certText.indexOf(":");
                        if (startOfCert > 0 && certText.length() > startOfCert + 1) {
                            certText = certText.substring(startOfCert + 1);
                        }

                        Certification certification = Certification.getCertification(options.getCountry(),
                                certText);
                        if (certification != null) {
                            md.addCertification(certification);
                            break;
                        }
                    }
                }

            }
        }
    }

    // director
    Element directorsElement = doc.getElementById("directors");
    while (directorsElement != null && directorsElement.tag().getName() != "header") {
        directorsElement = directorsElement.parent();
    }
    if (directorsElement != null) {
        directorsElement = directorsElement.nextElementSibling();
    }
    if (directorsElement != null) {
        for (Element directorElement : directorsElement.getElementsByClass("name")) {
            String director = directorElement.text().trim();

            MediaCastMember cm = new MediaCastMember(MediaCastMember.CastType.DIRECTOR);
            cm.setName(director);
            md.addCastMember(cm);
        }
    }

    // actors
    Element castTableElement = doc.getElementsByClass("cast_list").first();
    if (castTableElement != null) {
        Elements tr = castTableElement.getElementsByTag("tr");
        for (Element row : tr) {
            MediaCastMember cm = parseCastMember(row);
            if (cm != null && StringUtils.isNotEmpty(cm.getName())
                    && StringUtils.isNotEmpty(cm.getCharacter())) {
                cm.setType(MediaCastMember.CastType.ACTOR);
                md.addCastMember(cm);
            }
        }
    }

    // writers
    Element writersElement = doc.getElementById("writers");
    while (writersElement != null && writersElement.tag().getName() != "header") {
        writersElement = writersElement.parent();
    }
    if (writersElement != null) {
        writersElement = writersElement.nextElementSibling();
    }
    if (writersElement != null) {
        Elements writersElements = writersElement.getElementsByAttributeValueStarting("href", "/name/");

        for (Element writerElement : writersElements) {
            String writer = cleanString(writerElement.ownText());
            MediaCastMember cm = new MediaCastMember(MediaCastMember.CastType.WRITER);
            cm.setName(writer);
            md.addCastMember(cm);
        }
    }

    // producers
    Element producersElement = doc.getElementById("producers");
    while (producersElement != null && producersElement.tag().getName() != "header") {
        producersElement = producersElement.parent();
    }
    if (producersElement != null) {
        producersElement = producersElement.nextElementSibling();
    }
    if (producersElement != null) {
        Elements producersElements = producersElement.getElementsByAttributeValueStarting("href", "/name/");

        for (Element producerElement : producersElements) {
            String producer = cleanString(producerElement.ownText());
            MediaCastMember cm = new MediaCastMember(MediaCastMember.CastType.PRODUCER);
            cm.setName(producer);
            md.addCastMember(cm);
        }
    }

    // producers
    Elements prodCompHeaderElements = doc.getElementsByClass("ipl-list-title");
    Element prodCompHeaderElement = null;

    for (Element possibleProdCompHeaderEl : prodCompHeaderElements) {
        if (possibleProdCompHeaderEl.ownText().equals("Production Companies")) {
            prodCompHeaderElement = possibleProdCompHeaderEl;
            break;
        }
    }

    while (prodCompHeaderElement != null && prodCompHeaderElement.tag().getName() != "header") {
        prodCompHeaderElement = prodCompHeaderElement.parent();
    }
    if (prodCompHeaderElement != null) {
        prodCompHeaderElement = prodCompHeaderElement.nextElementSibling();
    }
    if (prodCompHeaderElement != null) {
        Elements prodCompElements = prodCompHeaderElement.getElementsByAttributeValueStarting("href",
                "/company/");

        for (Element prodCompElement : prodCompElements) {
            String prodComp = prodCompElement.ownText();
            md.addProductionCompany(prodComp);
        }
    }

    return md;
}

From source file:org.tinymediamanager.scraper.imdb.ImdbParser.java

protected MediaCastMember parseCastMember(Element row) {

    Element nameElement = row.getElementsByAttributeValueStarting("itemprop", "name").first();
    if (nameElement == null) {
        return null;
    }//from  w  w w .j av  a 2  s.  c o m
    String name = cleanString(nameElement.ownText());
    String characterName = "";

    Element characterElement = row.getElementsByClass("character").first();
    if (characterElement != null) {
        characterName = cleanString(characterElement.text());
        // and now strip off trailing commentaries like - (120 episodes,
        // 2006-2014)
        characterName = characterName.replaceAll("\\(.*?\\)$", "").trim();
    }

    String image = "";
    Element imageElement = row.getElementsByTag("img").first();
    if (imageElement != null) {
        String imageSrc = imageElement.attr("loadlate");

        if (!StringUtils.isEmpty(imageSrc)) {
            int fileStart = imageSrc.lastIndexOf("/");
            if (fileStart > 0) {
                // parse out the rescale/crop params
                int parameterStart = imageSrc.indexOf("._", fileStart);
                if (parameterStart > 0) {
                    int startOfExtension = imageSrc.lastIndexOf(".");
                    if (startOfExtension > parameterStart) {
                        // rebuild the path - scaled to 632 px height as in tmdb scraper
                        imageSrc = imageSrc.substring(0, parameterStart) + "._UY632"
                                + imageSrc.substring(startOfExtension);
                    }
                }
            }
            image = imageSrc;
        }
    }

    MediaCastMember cm = new MediaCastMember();
    cm.setCharacter(characterName);
    cm.setName(name);
    cm.setImageUrl(image);
    return cm;
}

From source file:org.tinymediamanager.scraper.imdb.ImdbTvShowParser.java

private boolean parseEpisodeList(int season, List<MediaEpisode> episodes, Document doc) {
    Pattern unknownPattern = Pattern.compile("Unknown");
    Pattern seasonEpisodePattern = Pattern.compile("S([0-9]*), Ep([0-9]*)");
    int episodeCounter = 0;

    // parse episodes
    Elements tables = doc.getElementsByClass("eplist");
    for (Element table : tables) {
        Elements rows = table.getElementsByClass("list_item");
        for (Element row : rows) {
            Matcher matcher = season == 0 ? unknownPattern.matcher(row.text())
                    : seasonEpisodePattern.matcher(row.text());
            if (matcher.find() && (season == 0 || matcher.groupCount() >= 2)) {
                try {
                    // we found a row containing episode data
                    MediaEpisode ep = new MediaEpisode(providerInfo.getId());

                    // parse season and ep number
                    if (season == 0) {
                        ep.season = season;
                        ep.episode = ++episodeCounter;
                    } else {
                        ep.season = Integer.parseInt(matcher.group(1));
                        ep.episode = Integer.parseInt(matcher.group(2));
                    }/*from  ww w  .  j a va2s  .co  m*/

                    // check if we have still valid data
                    if (season > 0 && season != ep.season) {
                        return false;
                    }

                    // get ep title and id
                    Elements anchors = row.getElementsByAttributeValueStarting("href", "/title/tt");
                    for (Element anchor : anchors) {
                        if ("name".equals(anchor.attr("itemprop"))) {
                            ep.title = anchor.text();
                            break;
                        }
                    }

                    String id = "";
                    Matcher idMatcher = IMDB_ID_PATTERN.matcher(anchors.get(0).attr("href"));
                    while (idMatcher.find()) {
                        if (idMatcher.group(1) != null) {
                            id = idMatcher.group(1);
                        }
                    }

                    if (StringUtils.isNotBlank(id)) {
                        ep.ids.put(providerInfo.getId(), id);
                    }

                    // plot
                    Element plot = row.getElementsByClass("item_description").first();
                    if (plot != null) {
                        ep.plot = plot.ownText();
                    }

                    // rating and rating count
                    Element ratingElement = row.getElementsByClass("ipl-rating-star__rating").first();
                    if (ratingElement != null) {
                        String ratingAsString = ratingElement.ownText().replace(",", ".");
                        try {
                            ep.rating = Float.valueOf(ratingAsString);
                        } catch (Exception ignored) {
                        }

                        Element votesElement = row.getElementsByClass("ipl-rating-star__total-votes").first();
                        if (votesElement != null) {
                            String countAsString = votesElement.ownText().replaceAll("[.,()]", "").trim();
                            try {
                                ep.voteCount = Integer.parseInt(countAsString);
                            } catch (Exception ignored) {
                            }
                        }
                    }

                    // release date
                    Element releaseDate = row.getElementsByClass("airdate").first();
                    if (releaseDate != null) {
                        ep.firstAired = releaseDate.ownText();
                    }

                    // poster
                    Element image = row.getElementsByTag("img").first();
                    if (image != null) {
                        String posterUrl = image.attr("src");
                        posterUrl = posterUrl.replaceAll("UX[0-9]{2,4}_", "");
                        posterUrl = posterUrl.replaceAll("UY[0-9]{2,4}_", "");
                        posterUrl = posterUrl.replaceAll("CR[0-9]{1,3},[0-9]{1,3},[0-9]{1,3},[0-9]{1,3}_", "");

                        if (StringUtils.isNotBlank(posterUrl)) {
                            MediaArtwork ma = new MediaArtwork(ImdbMetadataProvider.providerInfo.getId(),
                                    MediaArtwork.MediaArtworkType.THUMB);
                            ma.setPreviewUrl(posterUrl);
                            ma.setDefaultUrl(posterUrl);
                            ep.artwork.add(ma);
                        }
                    }

                    episodes.add(ep);
                } catch (Exception e) {
                    LOGGER.warn("failed parsing: " + row.text() + " for ep data; " + e.getMessage());
                }
            }
        }
    }
    return true;
}