Example usage for org.jsoup.nodes Element nextElementSibling

List of usage examples for org.jsoup.nodes Element nextElementSibling

Introduction

In this page you can find the example usage for org.jsoup.nodes Element nextElementSibling.

Prototype

public Element nextElementSibling() 

Source Link

Document

Gets the next sibling element of this element.

Usage

From source file:de.geeksfactory.opacclient.apis.Zones.java

@Override
public AccountData account(Account acc) throws IOException, JSONException, OpacErrorException {
    Document login = login(acc);//from w ww .  j a va  2s .c  o m
    if (login == null) {
        return null;
    }

    AccountData res = new AccountData(acc.getId());

    String lentLink = null;
    String resLink = null;
    int lent_cnt = -1;
    int res_cnt = -1;
    for (Element td : login.select(".AccountSummaryCounterNameCell, .AccountSummaryCounterNameCellStripe, "
            + ".CAccountDetailFieldNameCellStripe, .CAccountDetailFieldNameCell")) {
        String section = td.text().trim();
        if (section.contains("Entliehene Medien")) {
            lentLink = td.select("a").attr("href");
            lent_cnt = Integer.parseInt(td.nextElementSibling().text().trim());
        } else if (section.contains("Vormerkungen")) {
            resLink = td.select("a").attr("href");
            res_cnt = Integer.parseInt(td.nextElementSibling().text().trim());
        } else if (section.contains("Kontostand")) {
            res.setPendingFees(td.nextElementSibling().text().trim());
        } else if (section.matches("Ausweis g.ltig bis")) {
            res.setValidUntil(td.nextElementSibling().text().trim());
        }
    }
    for (Element a : login.select("a.AccountMenuLink")) {
        if (a.text().contains("Ausleihen")) {
            lentLink = a.attr("href");
        } else if (a.text().contains("Vormerkungen")) {
            resLink = a.attr("href");
        }
    }
    if (lentLink == null) {
        return null;
    }

    List<LentItem> lentItems = new ArrayList<>();
    String lentUrl = opac_url + "/" + lentLink.replace("utf-8?Method", "utf-8&Method");
    String lentHtml = httpGet(lentUrl, getDefaultEncoding());
    Document lentDoc = Jsoup.parse(lentHtml);
    lentDoc.setBaseUri(lentUrl);
    loadMediaList(lentDoc, lentItems);
    res.setLent(lentItems);

    // In Koeln, the reservations link only doesn't show on the overview page
    if (resLink == null) {
        for (Element a : lentDoc.select("a.AccountMenuLink")) {
            if (a.text().contains("Vormerkungen")) {
                resLink = a.attr("href");
            }
        }
    }

    List<ReservedItem> reservedItems = new ArrayList<>();
    String resHtml = httpGet(opac_url + "/" + resLink, getDefaultEncoding());
    Document resDoc = Jsoup.parse(resHtml);
    loadResList(resDoc, reservedItems);
    res.setReservations(reservedItems);

    return res;
}

From source file:me.vertretungsplan.parser.UntisCommonParser.java

void parseDay(SubstitutionScheduleDay day, Element next, SubstitutionSchedule v, String klasse)
        throws JSONException, CredentialInvalidException {
    if (next.className().equals("subst") || next.select(".list").size() > 0
            || next.text().contains("Vertretungen sind nicht freigegeben")
            || next.text().contains("Keine Vertretungen")) {
        //Vertretungstabelle
        if (next.text().contains("Vertretungen sind nicht freigegeben")) {
            return;
        }/*  ww  w. jav a 2 s .  co m*/
        parseSubstitutionScheduleTable(next, scheduleData.getData(), day, klasse);
    } else {
        //Nachrichten
        parseMessages(next, day);
        next = next.nextElementSibling().nextElementSibling();
        parseSubstitutionScheduleTable(next, scheduleData.getData(), day, klasse);
    }
    v.addDay(day);
}

From source file:me.vertretungsplan.parser.ESchoolParser.java

private void parseTable(Element table, SubstitutionScheduleDay day) {
    for (Element th : table.select("th[colspan=10]")) {
        String lesson;/*from   w w w .  j a v  a  2s.co m*/

        Pattern pattern = Pattern.compile("(\\d+)\\. Stunde");
        Matcher matcher = pattern.matcher(th.text());
        if (matcher.find()) {
            lesson = matcher.group(1);
        } else {
            lesson = th.text();
        }

        // skip over table headers
        Element row = th.parent().nextElementSibling().nextElementSibling();
        while (row != null && row.select("th").size() == 0) {
            Substitution subst = new Substitution();
            subst.setLesson(lesson);

            Elements columns = row.select("td");

            String[] classes = columns.get(0).text().split(", |\\+");
            subst.setClasses(new HashSet<>(Arrays.asList(classes)));

            subst.setPreviousTeacher(getPreviousValue(columns.get(1)));
            subst.setTeacher(getNewValue(columns.get(1)));
            subst.setPreviousSubject(getPreviousValue(columns.get(2)));
            subst.setSubject(getNewValue(columns.get(2)));
            subst.setPreviousRoom(getPreviousValue(columns.get(3)));
            subst.setRoom(getNewValue(columns.get(3)));
            if (columns.get(4).text().isEmpty()) {
                subst.setType("Vertretung");
                subst.setColor(colorProvider.getColor("Vertretung"));
            } else {
                String desc = columns.get(4).text();
                subst.setDesc(desc);
                String recognizedType = recognizeType(desc);
                if (recognizedType == null)
                    recognizedType = "Vertretung";
                subst.setType(recognizedType);
                subst.setColor(colorProvider.getColor(recognizedType));
            }

            day.addSubstitution(subst);

            row = row.nextElementSibling();
        }
    }
}

From source file:cn.edu.hfut.dmic.contentextractor.ContentExtractor.java

/**
 * ??:/*  w ww. j  a  v  a2s . c  o m*/
 * 1. ???
 * 2. ???????
 * 3. ??
 * 4. ?? ??
 * 5. ?
 *
 * @return
 * @throws XpathSyntaxErrorException
 */
private String getAuthor() throws XpathSyntaxErrorException {
    String author = "";
    if (StringUtils.isBlank(srcTime)) {
        author = getAuthor(doc.body().html());
        return author;
    }
    Element cur = doc.body().select("*:containsOwn(" + srcTime + ")").first();
    if (cur == null) {
        LOG.warn("?srcTime=" + srcTime);
        author = getAuthor(doc.body().html());
        return author;
    }

    if (!noText(cur)) {
        String arr[] = cur.html().split(srcTime);
        for (String text : arr) {
            author = getShortText(text);
            if (!StringUtils.isBlank(author))
                return author;
        }
    }
    Element parent = cur.parent();
    while (parent != null && noText(parent)) {
        cur = parent;
        parent = parent.parent();
    }
    author = getAuthor(parent.html());
    if (!StringUtils.isBlank(author))
        return author;

    Element pre = cur.previousElementSibling();
    while (pre != null && noText(pre)) {
        pre = pre.previousElementSibling();
    }
    if (pre != null) {
        author = getShortText(pre.text());
    }
    if (!StringUtils.isBlank(author))
        return author;
    Element next = cur.nextElementSibling();
    while (next != null && noText(next)) {
        next = next.nextElementSibling();
    }
    if (next != null) {
        author = getShortText(next.text());
    }
    if (!StringUtils.isBlank(author))
        return author;

    author = getShortText(parent.html().replace(srcTime, " "));
    if (!StringUtils.isBlank(author))
        return author;

    author = getAuthor(doc.body().html());
    if (StringUtils.isBlank(author)) {
        return author_bak;
    }
    return author;
}

From source file:de.geeksfactory.opacclient.apis.BiBer1992.java

@Override
public List<SearchField> getSearchFields() throws IOException {
    List<SearchField> fields = new ArrayList<>();

    HttpGet httpget;//from  w w w  .  ja  va2  s . c o  m
    if (opacDir.contains("opax")) {
        httpget = new HttpGet(opacUrl + "/" + opacDir + "/de/qsel.html.S");
    } else {
        httpget = new HttpGet(opacUrl + "/" + opacDir + "/de/qsel_main.S");
    }

    HttpResponse response = http_client.execute(httpget);

    if (response.getStatusLine().getStatusCode() == 500) {
        throw new NotReachableException(response.getStatusLine().getReasonPhrase());
    }
    String html = convertStreamToString(response.getEntity().getContent());
    HttpUtils.consume(response.getEntity());

    Document doc = Jsoup.parse(html);

    // get text fields
    Elements text_opts = doc.select("form select[name=REG1] option");
    for (Element opt : text_opts) {
        TextSearchField field = new TextSearchField();
        field.setId(opt.attr("value"));
        field.setDisplayName(opt.text());
        field.setHint("");
        fields.add(field);
    }

    // get media types
    Elements mt_opts = doc.select("form input[name~=(MT|MS)]");
    if (mt_opts.size() > 0) {
        DropdownSearchField mtDropdown = new DropdownSearchField();
        mtDropdown.setId(mt_opts.get(0).attr("name"));
        mtDropdown.setDisplayName("Medientyp");
        for (Element opt : mt_opts) {
            if (!opt.val().equals("")) {
                String text = opt.text();
                if (text.length() == 0) {
                    // text is empty, check layouts:
                    // Essen: <input name="MT"><img title="mediatype">
                    // Schaffenb: <input name="MT"><img alt="mediatype">
                    Element img = opt.nextElementSibling();
                    if (img != null && img.tagName().equals("img")) {
                        text = img.attr("title");
                        if (text.equals("")) {
                            text = img.attr("alt");
                        }
                    }
                }
                if (text.length() == 0) {
                    // text is still empty, check table layout, Example
                    // Friedrichshafen
                    // <td><input name="MT"></td> <td><img
                    // title="mediatype"></td>
                    Element td1 = opt.parent();
                    Element td2 = td1.nextElementSibling();
                    if (td2 != null) {
                        Elements td2Children = td2.select("img[title]");
                        if (td2Children.size() > 0) {
                            text = td2Children.get(0).attr("title");
                        }
                    }
                }
                if (text.length() == 0) {
                    // text is still empty, check images in label layout, Example
                    // Wiedenst
                    // <input type="radio" name="MT" id="MTYP1" value="MTYP1">
                    // <label for="MTYP1"><img src="http://www.wiedenest.de/bib/image/books
                    // .png" alt="Bcher" title="Bcher"></label>
                    Element label = opt.nextElementSibling();
                    if (label != null) {
                        Elements td2Children = label.select("img[title]");
                        if (td2Children.size() > 0) {
                            text = td2Children.get(0).attr("title");
                        }
                    }
                }
                if (text.length() == 0) {
                    // text is still empty: missing end tag like Offenburg
                    text = parse_option_regex(opt);
                }
                mtDropdown.addDropdownValue(opt.val(), text);
            }
        }
        fields.add(mtDropdown);
    }

    // get branches
    Elements br_opts = doc.select("form select[name=ZW] option");
    if (br_opts.size() > 0) {
        DropdownSearchField brDropdown = new DropdownSearchField();
        brDropdown.setId(br_opts.get(0).parent().attr("name"));
        brDropdown.setDisplayName(br_opts.get(0).parent().parent().previousElementSibling().text()
                .replace("\u00a0", "").replace("?", "").trim());
        for (Element opt : br_opts) {
            brDropdown.addDropdownValue(opt.val(), opt.text());
        }
        fields.add(brDropdown);
    }

    return fields;
}

From source file:net.niyonkuru.koodroid.html.SubscribersHandler.java

@Override
public ArrayList<ContentProviderOperation> parse(Document doc, ContentResolver resolver)
        throws HandlerException {
    final ArrayList<ContentProviderOperation> batch = new ArrayList<ContentProviderOperation>();

    Element subscriberLi = doc.select("div#banSelector li:has(div)").first();
    while (subscriberLi != null) {
        String text = subscriberLi.text();

        /* this assumes the name and phone number are separated by a space */
        int separator = text.lastIndexOf(' ') + 1;

        String subscriberId = text.substring(separator).replaceAll("\\D", "");
        if (subscriberId.length() != 10)
            throw new HandlerException(getString(R.string.parser_error_unexpected_input));

        final ContentProviderOperation.Builder builder;

        final Uri subscriberUri = Subscribers.buildSubscriberUri(subscriberId);
        if (subscriberExists(subscriberUri, resolver)) {
            builder = ContentProviderOperation.newUpdate(subscriberUri);
            builder.withValue(Subscribers.UPDATED, System.currentTimeMillis());
        } else {/*ww  w  .ja  v  a  2  s .  c  om*/
            builder = ContentProviderOperation.newInsert(Subscribers.CONTENT_URI);
        }
        builder.withValue(Subscribers.SUBSCRIBER_ID, subscriberId);

        String fullName = "";
        String[] names = text.substring(0, separator).split("\\s");
        for (String name : names) {
            fullName += ParserUtils.capitalize(name) + " ";
        }
        builder.withValue(Subscribers.SUBSCRIBER_FULL_NAME, fullName.trim());

        if (subscriberLi.hasAttr("onClick")) {
            String switchUrl = subscriberLi.attr("onClick");

            /* extract only the url */
            switchUrl = switchUrl.substring(switchUrl.indexOf('/'), switchUrl.lastIndexOf('\''));
            builder.withValue(Subscribers.SUBSCRIBER_SWITCHER, switchUrl);
        } else { /* this is the default subscriber as it doesn't have a switcher url */
            ContentValues cv = new ContentValues(1);
            cv.put(Settings.SUBSCRIBER, subscriberId);

            resolver.insert(Settings.CONTENT_URI, cv);
        }
        builder.withValue(Subscribers.SUBSCRIBER_EMAIL, mParent);

        batch.add(builder.build());

        subscriberLi = subscriberLi.nextElementSibling();
    }
    if (batch.size() == 0)
        throw new HandlerException(getString(R.string.parser_error_unexpected_input));

    JSONObject metadata = new JSONObject();
    try {
        metadata.put("subscribers", batch.size());
        metadata.put("language", getString(R.string.locale));
    } catch (JSONException ignored) {
    }
    Crittercism.setMetadata(metadata);
    Crittercism.setUsername(mParent);

    return batch;
}

From source file:org.tinymediamanager.scraper.imdb.ImdbMetadataProvider.java

@Override
public MediaMetadata getMetadata(MediaScrapeOptions options) throws Exception {
    LOGGER.debug("getMetadata() " + options.toString());
    // check if there is a md in the result
    if (options.getResult() != null && options.getResult().getMetadata() != null) {
        LOGGER.debug("IMDB: getMetadata from cache: " + options.getResult());
        return options.getResult().getMetadata();
    }//w w  w. j a v a2 s  .  c  o  m

    MediaMetadata md = new MediaMetadata(providerInfo.getId());
    String imdbId = "";

    // imdbId from searchResult
    if (options.getResult() != null) {
        imdbId = options.getResult().getIMDBId();
    }

    // imdbid from scraper option
    if (!MetadataUtil.isValidImdbId(imdbId)) {
        imdbId = options.getImdbId();
    }

    if (!MetadataUtil.isValidImdbId(imdbId)) {
        return md;
    }

    LOGGER.debug("IMDB: getMetadata(imdbId): " + imdbId);
    md.setId(MediaMetadata.IMDBID, imdbId);

    ExecutorCompletionService<Document> compSvcImdb = new ExecutorCompletionService<Document>(executor);
    ExecutorCompletionService<MediaMetadata> compSvcTmdb = new ExecutorCompletionService<MediaMetadata>(
            executor);

    // worker for imdb request (/combined) (everytime from akas.imdb.com)
    // StringBuilder sb = new StringBuilder(imdbSite.getSite());
    StringBuilder sb = new StringBuilder(ImdbSiteDefinition.IMDB_COM.getSite());
    sb.append("title/");
    sb.append(imdbId);
    sb.append("/combined");
    Callable<Document> worker = new ImdbWorker(sb.toString(), options.getLanguage().name(),
            options.getCountry().getAlpha2());
    Future<Document> futureCombined = compSvcImdb.submit(worker);

    // worker for imdb request (/plotsummary) (from chosen site)
    Future<Document> futurePlotsummary = null;
    sb = new StringBuilder(imdbSite.getSite());
    sb.append("title/");
    sb.append(imdbId);
    sb.append("/plotsummary");

    worker = new ImdbWorker(sb.toString(), options.getLanguage().name(), options.getCountry().getAlpha2());
    futurePlotsummary = compSvcImdb.submit(worker);

    // worker for tmdb request
    Future<MediaMetadata> futureTmdb = null;
    if (options.isScrapeImdbForeignLanguage() || options.isScrapeCollectionInfo()) {
        Callable<MediaMetadata> worker2 = new TmdbWorker(imdbId, options.getLanguage(), options.getCountry());
        futureTmdb = compSvcTmdb.submit(worker2);
    }

    Document doc;
    doc = futureCombined.get();

    /*
     * title and year have the following structure
     * 
     * <div id="tn15title"><h1>Merida - Legende der Highlands <span>(<a href="/year/2012/">2012</a>) <span class="pro-link">...</span> <span
     * class="title-extra">Brave <i>(original title)</i></span> </span></h1> </div>
     */

    // parse title and year
    Element title = doc.getElementById("tn15title");
    if (title != null) {
        Element element = null;
        // title
        Elements elements = title.getElementsByTag("h1");
        if (elements.size() > 0) {
            element = elements.first();
            String movieTitle = cleanString(element.ownText());
            md.storeMetadata(MediaMetadata.TITLE, movieTitle);
        }

        // year
        elements = title.getElementsByTag("span");
        if (elements.size() > 0) {
            element = elements.first();
            String content = element.text();

            // search year
            Pattern yearPattern = Pattern.compile("\\(([0-9]{4})|/\\)");
            Matcher matcher = yearPattern.matcher(content);
            while (matcher.find()) {
                if (matcher.group(1) != null) {
                    String movieYear = matcher.group(1);
                    md.storeMetadata(MediaMetadata.YEAR, movieYear);
                    break;
                }
            }
        }

        // original title
        elements = title.getElementsByAttributeValue("class", "title-extra");
        if (elements.size() > 0) {
            element = elements.first();
            String content = element.text();
            content = content.replaceAll("\\(original title\\)", "").trim();
            md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, content);
        }
    }

    // poster
    Element poster = doc.getElementById("primary-poster");
    if (poster != null) {
        String posterUrl = poster.attr("src");
        posterUrl = posterUrl.replaceAll("SX[0-9]{2,4}_", "SX400_");
        posterUrl = posterUrl.replaceAll("SY[0-9]{2,4}_", "SY400_");
        processMediaArt(md, MediaArtworkType.POSTER, "Poster", posterUrl);
    }

    /*
     * <div class="starbar-meta"> <b>7.4/10</b> &nbsp;&nbsp;<a href="ratings" class="tn15more">52,871 votes</a>&nbsp;&raquo; </div>
     */

    // rating and rating count
    Element ratingElement = doc.getElementById("tn15rating");
    if (ratingElement != null) {
        Elements elements = ratingElement.getElementsByClass("starbar-meta");
        if (elements.size() > 0) {
            Element div = elements.get(0);

            // rating comes in <b> tag
            Elements b = div.getElementsByTag("b");
            if (b.size() == 1) {
                String ratingAsString = b.text();
                Pattern ratingPattern = Pattern.compile("([0-9]\\.[0-9])/10");
                Matcher matcher = ratingPattern.matcher(ratingAsString);
                while (matcher.find()) {
                    if (matcher.group(1) != null) {
                        float rating = 0;
                        try {
                            rating = Float.valueOf(matcher.group(1));
                        } catch (Exception e) {
                        }
                        md.storeMetadata(MediaMetadata.RATING, rating);
                        break;
                    }
                }
            }

            // count
            Elements a = div.getElementsByAttributeValue("href", "ratings");
            if (a.size() == 1) {
                String countAsString = a.text().replaceAll("[.,]|votes", "").trim();
                int voteCount = 0;
                try {
                    voteCount = Integer.parseInt(countAsString);
                } catch (Exception e) {
                }
                md.storeMetadata(MediaMetadata.VOTE_COUNT, voteCount);
            }
        }

        // top250
        elements = ratingElement.getElementsByClass("starbar-special");
        if (elements.size() > 0) {
            Elements a = elements.get(0).getElementsByTag("a");
            if (a.size() > 0) {
                Element anchor = a.get(0);
                Pattern topPattern = Pattern.compile("Top 250: #([0-9]{1,3})");
                Matcher matcher = topPattern.matcher(anchor.ownText());
                while (matcher.find()) {
                    if (matcher.group(1) != null) {
                        int top250 = 0;
                        try {
                            top250 = Integer.parseInt(matcher.group(1));
                        } catch (Exception e) {
                        }
                        md.storeMetadata(MediaMetadata.TOP_250, top250);
                    }
                }
            }
        }
    }

    // parse all items coming by <div class="info">
    Elements elements = doc.getElementsByClass("info");
    for (Element element : elements) {
        // only parse divs
        if (!"div".equals(element.tag().getName())) {
            continue;
        }

        // elements with h5 are the titles of the values
        Elements h5 = element.getElementsByTag("h5");
        if (h5.size() > 0) {
            Element firstH5 = h5.first();
            String h5Title = firstH5.text();

            // release date
            /*
             * <div class="info"><h5>Release Date:</h5><div class="info-content">5 January 1996 (USA)<a class="tn15more inline"
             * href="/title/tt0114746/releaseinfo"
             * onclick="(new Image()).src='/rg/title-tease/releasedates/images/b.gif?link=/title/tt0114746/releaseinfo';"> See more</a>&nbsp;</div></div>
             */
            if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getReleaseDate() + ".*")) {
                Elements div = element.getElementsByClass("info-content");
                if (div.size() > 0) {
                    Element releaseDateElement = div.first();
                    String releaseDate = cleanString(releaseDateElement.ownText().replaceAll("", ""));
                    Pattern pattern = Pattern.compile("(.*)\\(.*\\)");
                    Matcher matcher = pattern.matcher(releaseDate);
                    if (matcher.find()) {
                        try {
                            SimpleDateFormat sdf = new SimpleDateFormat("d MMM yyyy");
                            Date parsedDate = sdf.parse(matcher.group(1));
                            sdf = new SimpleDateFormat("dd-MM-yyyy");
                            md.storeMetadata(MediaMetadata.RELEASE_DATE, sdf.format(parsedDate));
                        } catch (Exception e) {
                        }
                    }
                }
            }

            /*
             * <div class="info"><h5>Tagline:</h5><div class="info-content"> (7) To Defend Us... <a class="tn15more inline"
             * href="/title/tt0472033/taglines" onClick= "(new Image()).src='/rg/title-tease/taglines/images/b.gif?link=/title/tt0472033/taglines';" >See
             * more</a>&nbsp;&raquo; </div></div>
             */
            // tagline
            if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getTagline() + ".*")
                    && !options.isScrapeImdbForeignLanguage()) {
                Elements div = element.getElementsByClass("info-content");
                if (div.size() > 0) {
                    Element taglineElement = div.first();
                    String tagline = cleanString(taglineElement.ownText().replaceAll("", ""));
                    md.storeMetadata(MediaMetadata.TAGLINE, tagline);
                }
            }

            /*
             * <div class="info-content"><a href="/Sections/Genres/Animation/">Animation</a> | <a href="/Sections/Genres/Action/">Action</a> | <a
             * href="/Sections/Genres/Adventure/">Adventure</a> | <a href="/Sections/Genres/Fantasy/">Fantasy</a> | <a
             * href="/Sections/Genres/Mystery/">Mystery</a> | <a href="/Sections/Genres/Sci-Fi/">Sci-Fi</a> | <a
             * href="/Sections/Genres/Thriller/">Thriller</a> <a class="tn15more inline" href="/title/tt0472033/keywords" onClick=
             * "(new Image()).src='/rg/title-tease/keywords/images/b.gif?link=/title/tt0472033/keywords';" > See more</a>&nbsp;&raquo; </div>
             */
            // genres are only scraped from akas.imdb.com
            if (h5Title.matches("(?i)" + imdbSite.getGenre() + "(.*)")) {
                Elements div = element.getElementsByClass("info-content");
                if (div.size() > 0) {
                    Elements a = div.first().getElementsByTag("a");
                    for (Element anchor : a) {
                        if (anchor.attr("href").matches("/Sections/Genres/.*")) {
                            md.addGenre(getTmmGenre(anchor.ownText()));
                        }
                    }
                }
            }
            // }

            /*
             * <div class="info"><h5>Runtime:</h5><div class="info-content">162 min | 171 min (special edition) | 178 min (extended cut)</div></div>
             */
            // runtime
            // if (h5Title.matches("(?i)" + imdbSite.getRuntime() + ".*")) {
            if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getRuntime() + ".*")) {
                Elements div = element.getElementsByClass("info-content");
                if (div.size() > 0) {
                    Element taglineElement = div.first();
                    String first = taglineElement.ownText().split("\\|")[0];
                    String runtimeAsString = cleanString(first.replaceAll("min", ""));
                    int runtime = 0;
                    try {
                        runtime = Integer.parseInt(runtimeAsString);
                    } catch (Exception e) {
                        // try to filter out the first number we find
                        Pattern runtimePattern = Pattern.compile("([0-9]{2,3})");
                        Matcher matcher = runtimePattern.matcher(runtimeAsString);
                        if (matcher.find()) {
                            runtime = Integer.parseInt(matcher.group(0));
                        }
                    }
                    md.storeMetadata(MediaMetadata.RUNTIME, runtime);
                }
            }

            /*
             * <div class="info"><h5>Country:</h5><div class="info-content"><a href="/country/fr">France</a> | <a href="/country/es">Spain</a> | <a
             * href="/country/it">Italy</a> | <a href="/country/hu">Hungary</a></div></div>
             */
            // country
            if (h5Title.matches("(?i)Country.*")) {
                Elements a = element.getElementsByTag("a");
                String countries = "";
                for (Element anchor : a) {
                    Pattern pattern = Pattern.compile("/country/(.*)");
                    Matcher matcher = pattern.matcher(anchor.attr("href"));
                    if (matcher.matches()) {
                        String country = matcher.group(1);
                        if (StringUtils.isNotEmpty(countries)) {
                            countries += ", ";
                        }
                        countries += country.toUpperCase();
                    }
                }
                md.storeMetadata(MediaMetadata.COUNTRY, countries);
            }

            /*
             * <div class="info"><h5>Language:</h5><div class="info-content"><a href="/language/en">English</a> | <a href="/language/de">German</a> | <a
             * href="/language/fr">French</a> | <a href="/language/it">Italian</a></div>
             */
            // Spoken languages
            if (h5Title.matches("(?i)Language.*")) {
                Elements a = element.getElementsByTag("a");
                String spokenLanguages = "";
                for (Element anchor : a) {
                    Pattern pattern = Pattern.compile("/language/(.*)");
                    Matcher matcher = pattern.matcher(anchor.attr("href"));
                    if (matcher.matches()) {
                        String langu = matcher.group(1);
                        if (StringUtils.isNotEmpty(spokenLanguages)) {
                            spokenLanguages += ", ";
                        }
                        spokenLanguages += langu;
                    }
                }
                md.storeMetadata(MediaMetadata.SPOKEN_LANGUAGES, spokenLanguages);
            }

            /*
             * <div class="info"><h5>Certification:</h5><div class="info-content"><a href="/search/title?certificates=us:pg">USA:PG</a> <i>(certificate
             * #47489)</i> | <a href="/search/title?certificates=ca:pg">Canada:PG</a> <i>(Ontario)</i> | <a
             * href="/search/title?certificates=au:pg">Australia:PG</a> | <a href="/search/title?certificates=in:u">India:U</a> | <a
             * href="/search/title?certificates=ie:pg">Ireland:PG</a> ...</div></div>
             */
            // certification
            // if (h5Title.matches("(?i)" + imdbSite.getCertification() + ".*")) {
            if (h5Title.matches("(?i)" + ImdbSiteDefinition.IMDB_COM.getCertification() + ".*")) {
                Elements a = element.getElementsByTag("a");
                for (Element anchor : a) {
                    // certification for the right country
                    if (anchor.attr("href").matches(
                            "(?i)/search/title\\?certificates=" + options.getCountry().getAlpha2() + ".*")) {
                        Pattern certificationPattern = Pattern.compile(".*:(.*)");
                        Matcher matcher = certificationPattern.matcher(anchor.ownText());
                        Certification certification = null;
                        while (matcher.find()) {
                            if (matcher.group(1) != null) {
                                certification = Certification.getCertification(options.getCountry(),
                                        matcher.group(1));
                            }
                        }

                        if (certification != null) {
                            md.addCertification(certification);
                            break;
                        }
                    }
                }
            }
        }

        /*
         * <div id="director-info" class="info"> <h5>Director:</h5> <div class="info-content"><a href="/name/nm0000416/" onclick=
         * "(new Image()).src='/rg/directorlist/position-1/images/b.gif?link=name/nm0000416/';" >Terry Gilliam</a><br/> </div> </div>
         */
        // director
        if ("director-info".equals(element.id())) {
            Elements a = element.getElementsByTag("a");
            for (Element anchor : a) {
                if (anchor.attr("href").matches("/name/nm.*")) {
                    MediaCastMember cm = new MediaCastMember(CastType.DIRECTOR);
                    cm.setName(anchor.ownText());
                    md.addCastMember(cm);
                }
            }
        }
    }

    /*
     * <table class="cast"> <tr class="odd"><td class="hs"><a href="http://pro.imdb.com/widget/resume_redirect/" onClick=
     * "(new Image()).src='/rg/resume/prosystem/images/b.gif?link=http://pro.imdb.com/widget/resume_redirect/';" ><img src=
     * "http://i.media-imdb.com/images/SF9113d6f5b7cb1533c35313ccd181a6b1/tn15/no_photo.png" width="25" height="31" border="0"></td><td class="nm"><a
     * href="/name/nm0577828/" onclick= "(new Image()).src='/rg/castlist/position-1/images/b.gif?link=/name/nm0577828/';" >Joseph Melito</a></td><td
     * class="ddd"> ... </td><td class="char"><a href="/character/ch0003139/">Young Cole</a></td></tr> <tr class="even"><td class="hs"><a
     * href="/name/nm0000246/" onClick= "(new Image()).src='/rg/title-tease/tinyhead/images/b.gif?link=/name/nm0000246/';" ><img src=
     * "http://ia.media-imdb.com/images/M/MV5BMjA0MjMzMTE5OF5BMl5BanBnXkFtZTcwMzQ2ODE3Mw@@._V1._SY30_SX23_.jpg" width="23" height="32"
     * border="0"></a><br></td><td class="nm"><a href="/name/nm0000246/" onclick=
     * "(new Image()).src='/rg/castlist/position-2/images/b.gif?link=/name/nm0000246/';" >Bruce Willis</a></td><td class="ddd"> ... </td><td
     * class="char"><a href="/character/ch0003139/">James Cole</a></td></tr> <tr class="odd"><td class="hs"><a href="/name/nm0781218/" onClick=
     * "(new Image()).src='/rg/title-tease/tinyhead/images/b.gif?link=/name/nm0781218/';" ><img src=
     * "http://ia.media-imdb.com/images/M/MV5BODI1MTA2MjkxM15BMl5BanBnXkFtZTcwMTcwMDg2Nw@@._V1._SY30_SX23_.jpg" width="23" height="32"
     * border="0"></a><br></td><td class="nm"><a href="/name/nm0781218/" onclick=
     * "(new Image()).src='/rg/castlist/position-3/images/b.gif?link=/name/nm0781218/';" >Jon Seda</a></td><td class="ddd"> ... </td><td
     * class="char"><a href="/character/ch0003143/">Jose</a></td></tr>...</table>
     */
    // cast
    elements = doc.getElementsByClass("cast");
    if (elements.size() > 0) {
        Elements tr = elements.get(0).getElementsByTag("tr");
        for (Element row : tr) {
            Elements td = row.getElementsByTag("td");
            MediaCastMember cm = new MediaCastMember();
            for (Element column : td) {
                // actor thumb
                if (column.hasClass("hs")) {
                    Elements img = column.getElementsByTag("img");
                    if (img.size() > 0) {
                        String thumbUrl = img.get(0).attr("src");
                        if (thumbUrl.contains("no_photo.png")) {
                            cm.setImageUrl("");
                        } else {
                            thumbUrl = thumbUrl.replaceAll("SX[0-9]{2,4}_", "SX400_");
                            thumbUrl = thumbUrl.replaceAll("SY[0-9]{2,4}_", "");
                            cm.setImageUrl(thumbUrl);
                        }
                    }
                }
                // actor name
                if (column.hasClass("nm")) {
                    cm.setName(cleanString(column.text()));
                }
                // character
                if (column.hasClass("char")) {
                    cm.setCharacter(cleanString(column.text()));
                }
            }
            if (StringUtils.isNotEmpty(cm.getName()) && StringUtils.isNotEmpty(cm.getCharacter())) {
                cm.setType(CastType.ACTOR);
                md.addCastMember(cm);
            }
        }
    }

    Element content = doc.getElementById("tn15content");
    if (content != null) {
        elements = content.getElementsByTag("table");
        for (Element table : elements) {
            // writers
            if (table.text().contains(ImdbSiteDefinition.IMDB_COM.getWriter())) {
                Elements anchors = table.getElementsByTag("a");
                for (Element anchor : anchors) {
                    if (anchor.attr("href").matches("/name/nm.*")) {
                        MediaCastMember cm = new MediaCastMember(CastType.WRITER);
                        cm.setName(anchor.ownText());
                        md.addCastMember(cm);
                    }
                }
            }

            // producers
            if (table.text().contains(ImdbSiteDefinition.IMDB_COM.getProducers())) {
                Elements rows = table.getElementsByTag("tr");
                for (Element row : rows) {
                    if (row.text().contains(ImdbSiteDefinition.IMDB_COM.getProducers())) {
                        continue;
                    }
                    Elements columns = row.children();
                    if (columns.size() == 0) {
                        continue;
                    }
                    MediaCastMember cm = new MediaCastMember(CastType.PRODUCER);
                    String name = cleanString(columns.get(0).text());
                    if (StringUtils.isBlank(name)) {
                        continue;
                    }
                    cm.setName(name);
                    if (columns.size() >= 3) {
                        cm.setPart(cleanString(columns.get(2).text()));
                    }
                    md.addCastMember(cm);
                }
            }
        }
    }

    // Production companies
    elements = doc.getElementsByClass("blackcatheader");
    for (Element blackcatheader : elements) {
        if (blackcatheader.ownText().equals(ImdbSiteDefinition.IMDB_COM.getProductionCompanies())) {
            Elements a = blackcatheader.nextElementSibling().getElementsByTag("a");
            StringBuilder productionCompanies = new StringBuilder();
            for (Element anchor : a) {
                if (StringUtils.isNotEmpty(productionCompanies)) {
                    productionCompanies.append(", ");
                }
                productionCompanies.append(anchor.ownText());
            }
            md.storeMetadata(MediaMetadata.PRODUCTION_COMPANY, productionCompanies.toString());
            break;
        }
    }

    /*
     * plot from /plotsummary
     */
    // build the url
    doc = null;
    doc = futurePlotsummary.get();

    // imdb.com has another site structure
    if (imdbSite == ImdbSiteDefinition.IMDB_COM) {
        Elements zebraList = doc.getElementsByClass("zebraList");
        if (zebraList != null && !zebraList.isEmpty()) {
            Elements odd = zebraList.get(0).getElementsByClass("odd");
            if (odd.isEmpty()) {
                odd = zebraList.get(0).getElementsByClass("even"); // sometimes imdb has even
            }
            if (odd.size() > 0) {
                Elements p = odd.get(0).getElementsByTag("p");
                if (p.size() > 0) {
                    String plot = cleanString(p.get(0).ownText());
                    md.storeMetadata(MediaMetadata.PLOT, plot);
                }
            }
        }
    } else {
        Element wiki = doc.getElementById("swiki.2.1");
        if (wiki != null) {
            String plot = cleanString(wiki.ownText());
            md.storeMetadata(MediaMetadata.PLOT, plot);
        }
    }

    // title also from chosen site if we are not scraping akas.imdb.com
    if (imdbSite != ImdbSiteDefinition.IMDB_COM) {
        title = doc.getElementById("tn15title");
        if (title != null) {
            Element element = null;
            // title
            elements = title.getElementsByClass("main");
            if (elements.size() > 0) {
                element = elements.first();
                String movieTitle = cleanString(element.ownText());
                md.storeMetadata(MediaMetadata.TITLE, movieTitle);
            }
        }
    }
    // }

    // get data from tmdb?
    if (options.isScrapeImdbForeignLanguage() || options.isScrapeCollectionInfo()) {
        MediaMetadata tmdbMd = futureTmdb.get();
        if (options.isScrapeImdbForeignLanguage() && tmdbMd != null
                && StringUtils.isNotBlank(tmdbMd.getStringValue(MediaMetadata.PLOT))) {
            // tmdbid
            md.setId(MediaMetadata.TMDBID, tmdbMd.getId(MediaMetadata.TMDBID));
            // title
            md.storeMetadata(MediaMetadata.TITLE, tmdbMd.getStringValue(MediaMetadata.TITLE));
            // original title
            md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, tmdbMd.getStringValue(MediaMetadata.ORIGINAL_TITLE));
            // tagline
            md.storeMetadata(MediaMetadata.TAGLINE, tmdbMd.getStringValue(MediaMetadata.TAGLINE));
            // plot
            md.storeMetadata(MediaMetadata.PLOT, tmdbMd.getStringValue(MediaMetadata.PLOT));
            // collection info
            md.storeMetadata(MediaMetadata.COLLECTION_NAME,
                    tmdbMd.getStringValue(MediaMetadata.COLLECTION_NAME));
            md.storeMetadata(MediaMetadata.TMDBID_SET, tmdbMd.getIntegerValue(MediaMetadata.TMDBID_SET));
        }
        if (options.isScrapeCollectionInfo() && tmdbMd != null) {
            md.storeMetadata(MediaMetadata.TMDBID_SET, tmdbMd.getIntegerValue(MediaMetadata.TMDBID_SET));
            md.storeMetadata(MediaMetadata.COLLECTION_NAME,
                    tmdbMd.getStringValue(MediaMetadata.COLLECTION_NAME));
        }
    }

    // if we have still no original title, take the title
    if (StringUtils.isBlank(md.getStringValue(MediaMetadata.ORIGINAL_TITLE))) {
        md.storeMetadata(MediaMetadata.ORIGINAL_TITLE, md.getStringValue(MediaMetadata.TITLE));
    }

    return md;
}

From source file:org.tinymediamanager.scraper.imdb.ImdbParser.java

protected MediaMetadata parseReferencePage(Document doc, MediaScrapeOptions options, MediaMetadata md) {
    /*/*w  ww  .  j av  a  2s . c  om*/
     * title and year have the following structure
     * 
     * <div id="tn15title"><h1>Merida - Legende der Highlands <span>(<a href="/year/2012/">2012</a>) <span class="pro-link">...</span> <span
     * class="title-extra">Brave <i>(original title)</i></span> </span></h1> </div>
     */

    // title
    Element title = doc.getElementsByAttributeValue("name", "title").first();
    if (title != null) {
        String movieTitle = cleanString(title.attr("content"));
        int yearStart = movieTitle.lastIndexOf("(");
        if (yearStart > 0) {
            movieTitle = movieTitle.substring(0, yearStart - 1).trim();
            md.setTitle(movieTitle);
        }
    }

    // original title and year
    Element originalTitleYear = doc.getElementsByAttributeValue("property", "og:title").first();
    if (originalTitleYear != null) {
        String content = originalTitleYear.attr("content");
        int startOfYear = content.lastIndexOf("(");
        if (startOfYear > 0) {
            // noo - this is NOT the original title!!! (seems always english?) parse from AKAs page...
            // String originalTitle = content.substring(0, startOfYear - 1).trim();
            // md.setOriginalTitle(originalTitle);

            String yearText = content.substring(startOfYear);

            // search year
            Pattern yearPattern = Pattern.compile("[1-2][0-9]{3}");
            Matcher matcher = yearPattern.matcher(yearText);
            while (matcher.find()) {
                if (matcher.group(0) != null) {
                    String movieYear = matcher.group(0);
                    try {
                        md.setYear(Integer.parseInt(movieYear));
                        break;
                    } catch (Exception ignored) {
                    }
                }
            }
        }
    }

    // poster
    Element poster = doc.getElementsByAttributeValue("property", "og:image").first();
    if (poster != null) {
        String posterUrl = poster.attr("content");

        int fileStart = posterUrl.lastIndexOf("/");
        if (fileStart > 0) {
            int parameterStart = posterUrl.indexOf("_", fileStart);
            if (parameterStart > 0) {
                int startOfExtension = posterUrl.lastIndexOf(".");
                if (startOfExtension > parameterStart) {
                    posterUrl = posterUrl.substring(0, parameterStart) + posterUrl.substring(startOfExtension);

                }
            }
        }
        processMediaArt(md, MediaArtwork.MediaArtworkType.POSTER, posterUrl);
    }

    /*
     * <div class="starbar-meta"> <b>7.4/10</b> &nbsp;&nbsp;<a href="ratings" class="tn15more">52,871 votes</a>&nbsp;&raquo; </div>
     */

    // rating and rating count
    Element ratingElement = doc.getElementsByClass("ipl-rating-star__rating").first();
    if (ratingElement != null) {
        String ratingAsString = ratingElement.ownText().replace(",", ".");
        try {
            md.setRating(Float.valueOf(ratingAsString));
        } catch (Exception ignored) {
        }

        Element votesElement = doc.getElementsByClass("ipl-rating-star__total-votes").first();
        if (votesElement != null) {
            String countAsString = votesElement.ownText().replaceAll("[.,()\\u00a0]", "").trim();
            try {
                md.setVoteCount(Integer.parseInt(countAsString));
            } catch (Exception ignored) {
            }
        }
    }
    // top250
    Element topRatedElement = doc.getElementsByAttributeValue("href", "/chart/top").first();
    if (topRatedElement != null) {
        Pattern topPattern = Pattern.compile("Top Rated Movies: #([0-9]{1,3})");
        Matcher matcher = topPattern.matcher(topRatedElement.ownText());
        while (matcher.find()) {
            if (matcher.group(1) != null) {
                try {
                    String top250Text = matcher.group(1);
                    md.setTop250(Integer.parseInt(top250Text));
                } catch (Exception ignored) {
                }
            }
        }
    }

    // releasedate
    Element releaseDateElement = doc
            .getElementsByAttributeValue("href", "/title/" + options.getImdbId().toLowerCase() + "/releaseinfo")
            .first();
    if (releaseDateElement != null) {
        String releaseDateText = releaseDateElement.ownText();
        int startOfCountry = releaseDateText.indexOf("(");
        if (startOfCountry > 0) {
            releaseDateText = releaseDateText.substring(0, startOfCountry - 1).trim();
        }
        try {
            SimpleDateFormat sdf = new SimpleDateFormat("d MMMM yyyy", Locale.US);
            Date parsedDate = sdf.parse(releaseDateText);
            md.setReleaseDate(parsedDate);
        } catch (ParseException otherformat) {
            try {
                SimpleDateFormat sdf = new SimpleDateFormat("MMMM yyyy", Locale.US);
                Date parsedDate = sdf.parse(releaseDateText);
                md.setReleaseDate(parsedDate);
            } catch (ParseException ignored) {
            }
        }
    }

    Elements elements = doc.getElementsByClass("ipl-zebra-list__label");
    for (Element element : elements) {
        // only parse tds
        if (!"td".equals(element.tag().getName())) {
            continue;
        }

        String elementText = element.ownText();

        if (elementText.equals("Taglines")) {
            if (!ImdbMetadataProvider.providerInfo.getConfig().getValueAsBool("useTmdb")) {
                Element taglineElement = element.nextElementSibling();
                if (taglineElement != null) {
                    String tagline = cleanString(taglineElement.ownText().replaceAll("", ""));
                    md.setTagline(tagline);
                }
            }
        }

        if (elementText.equals("Genres")) {
            Element nextElement = element.nextElementSibling();
            if (nextElement != null) {
                Elements genreElements = nextElement.getElementsByAttributeValueStarting("href", "/genre/");

                for (Element genreElement : genreElements) {
                    String genreText = genreElement.ownText();
                    md.addGenre(getTmmGenre(genreText));
                }
            }
        }

        /*
         * Old HTML, but maybe the same content formart <div class="info"><h5>Runtime:</h5><div class="info-content">162 min | 171 min (special edition)
         * | 178 min (extended cut)</div></div>
         */
        if (elementText.equals("Runtime")) {
            Element nextElement = element.nextElementSibling();
            if (nextElement != null) {
                Element runtimeElement = nextElement.getElementsByClass("ipl-inline-list__item").first();
                if (runtimeElement != null) {
                    String first = runtimeElement.ownText().split("\\|")[0];
                    String runtimeAsString = cleanString(first.replaceAll("min", ""));
                    int runtime = 0;
                    try {
                        runtime = Integer.parseInt(runtimeAsString);
                    } catch (Exception e) {
                        // try to filter out the first number we find
                        Pattern runtimePattern = Pattern.compile("([0-9]{2,3})");
                        Matcher matcher = runtimePattern.matcher(runtimeAsString);
                        if (matcher.find()) {
                            runtime = Integer.parseInt(matcher.group(0));
                        }
                    }
                    md.setRuntime(runtime);
                }
            }
        }

        if (elementText.equals("Country")) {
            Element nextElement = element.nextElementSibling();
            if (nextElement != null) {
                Elements countryElements = nextElement.getElementsByAttributeValueStarting("href", "/country/");
                Pattern pattern = Pattern.compile("/country/(.*)");

                for (Element countryElement : countryElements) {
                    Matcher matcher = pattern.matcher(countryElement.attr("href"));
                    if (matcher.matches()) {
                        if (ImdbMetadataProvider.providerInfo.getConfig()
                                .getValueAsBool("scrapeLanguageNames")) {
                            md.addCountry(LanguageUtils.getLocalizedCountryForLanguage(
                                    options.getLanguage().getLanguage(), countryElement.text(),
                                    matcher.group(1)));
                        } else {
                            md.addCountry(matcher.group(1));
                        }
                    }
                }
            }
        }

        if (elementText.equals("Language")) {
            Element nextElement = element.nextElementSibling();
            if (nextElement != null) {
                Elements languageElements = nextElement.getElementsByAttributeValueStarting("href",
                        "/language/");
                Pattern pattern = Pattern.compile("/language/(.*)");

                for (Element languageElement : languageElements) {
                    Matcher matcher = pattern.matcher(languageElement.attr("href"));
                    if (matcher.matches()) {
                        if (ImdbMetadataProvider.providerInfo.getConfig()
                                .getValueAsBool("scrapeLanguageNames")) {
                            md.addSpokenLanguage(LanguageUtils.getLocalizedLanguageNameFromLocalizedString(
                                    options.getLanguage(), languageElement.text(), matcher.group(1)));
                        } else {
                            md.addSpokenLanguage(matcher.group(1));
                        }
                    }
                }
            }
        }

        if (elementText.equals("Certification")) {
            Element nextElement = element.nextElementSibling();
            if (nextElement != null) {
                String languageCode = options.getCountry().getAlpha2();
                Elements certificationElements = nextElement.getElementsByAttributeValueStarting("href",
                        "/search/title?certificates=" + languageCode);
                boolean done = false;
                for (Element certificationElement : certificationElements) {
                    String certText = certificationElement.ownText();
                    int startOfCert = certText.indexOf(":");
                    if (startOfCert > 0 && certText.length() > startOfCert + 1) {
                        certText = certText.substring(startOfCert + 1);
                    }

                    Certification certification = Certification.getCertification(options.getCountry(),
                            certText);
                    if (certification != null) {
                        md.addCertification(certification);
                        done = true;
                        break;
                    }
                }

                if (!done && languageCode.equals("DE")) {
                    certificationElements = nextElement.getElementsByAttributeValueStarting("href",
                            "/search/title?certificates=XWG");
                    for (Element certificationElement : certificationElements) {
                        String certText = certificationElement.ownText();
                        int startOfCert = certText.indexOf(":");
                        if (startOfCert > 0 && certText.length() > startOfCert + 1) {
                            certText = certText.substring(startOfCert + 1);
                        }

                        Certification certification = Certification.getCertification(options.getCountry(),
                                certText);
                        if (certification != null) {
                            md.addCertification(certification);
                            break;
                        }
                    }
                }

            }
        }
    }

    // director
    Element directorsElement = doc.getElementById("directors");
    while (directorsElement != null && directorsElement.tag().getName() != "header") {
        directorsElement = directorsElement.parent();
    }
    if (directorsElement != null) {
        directorsElement = directorsElement.nextElementSibling();
    }
    if (directorsElement != null) {
        for (Element directorElement : directorsElement.getElementsByClass("name")) {
            String director = directorElement.text().trim();

            MediaCastMember cm = new MediaCastMember(MediaCastMember.CastType.DIRECTOR);
            cm.setName(director);
            md.addCastMember(cm);
        }
    }

    // actors
    Element castTableElement = doc.getElementsByClass("cast_list").first();
    if (castTableElement != null) {
        Elements tr = castTableElement.getElementsByTag("tr");
        for (Element row : tr) {
            MediaCastMember cm = parseCastMember(row);
            if (cm != null && StringUtils.isNotEmpty(cm.getName())
                    && StringUtils.isNotEmpty(cm.getCharacter())) {
                cm.setType(MediaCastMember.CastType.ACTOR);
                md.addCastMember(cm);
            }
        }
    }

    // writers
    Element writersElement = doc.getElementById("writers");
    while (writersElement != null && writersElement.tag().getName() != "header") {
        writersElement = writersElement.parent();
    }
    if (writersElement != null) {
        writersElement = writersElement.nextElementSibling();
    }
    if (writersElement != null) {
        Elements writersElements = writersElement.getElementsByAttributeValueStarting("href", "/name/");

        for (Element writerElement : writersElements) {
            String writer = cleanString(writerElement.ownText());
            MediaCastMember cm = new MediaCastMember(MediaCastMember.CastType.WRITER);
            cm.setName(writer);
            md.addCastMember(cm);
        }
    }

    // producers
    Element producersElement = doc.getElementById("producers");
    while (producersElement != null && producersElement.tag().getName() != "header") {
        producersElement = producersElement.parent();
    }
    if (producersElement != null) {
        producersElement = producersElement.nextElementSibling();
    }
    if (producersElement != null) {
        Elements producersElements = producersElement.getElementsByAttributeValueStarting("href", "/name/");

        for (Element producerElement : producersElements) {
            String producer = cleanString(producerElement.ownText());
            MediaCastMember cm = new MediaCastMember(MediaCastMember.CastType.PRODUCER);
            cm.setName(producer);
            md.addCastMember(cm);
        }
    }

    // producers
    Elements prodCompHeaderElements = doc.getElementsByClass("ipl-list-title");
    Element prodCompHeaderElement = null;

    for (Element possibleProdCompHeaderEl : prodCompHeaderElements) {
        if (possibleProdCompHeaderEl.ownText().equals("Production Companies")) {
            prodCompHeaderElement = possibleProdCompHeaderEl;
            break;
        }
    }

    while (prodCompHeaderElement != null && prodCompHeaderElement.tag().getName() != "header") {
        prodCompHeaderElement = prodCompHeaderElement.parent();
    }
    if (prodCompHeaderElement != null) {
        prodCompHeaderElement = prodCompHeaderElement.nextElementSibling();
    }
    if (prodCompHeaderElement != null) {
        Elements prodCompElements = prodCompHeaderElement.getElementsByAttributeValueStarting("href",
                "/company/");

        for (Element prodCompElement : prodCompElements) {
            String prodComp = prodCompElement.ownText();
            md.addProductionCompany(prodComp);
        }
    }

    return md;
}

From source file:org.tinymediamanager.scraper.ofdb.OfdbMetadataProvider.java

private void parseCast(Elements el, MediaCastMember.CastType type, MediaMetadata md) {
    if (el != null && !el.isEmpty()) {
        Element castEl = null;
        for (Element element : el) {
            if (!element.tagName().equals("option")) { // we get more, just do not take the optionbox
                castEl = element;/*  w  ww.j a v a  2s.co m*/
            }
        }
        if (castEl == null) {
            LOGGER.debug("meh, no " + type.name() + " found");
            return;
        }
        // walk up to table TR...
        while (!((castEl == null) || (castEl.tagName().equalsIgnoreCase("tr")))) {
            castEl = castEl.parent();
        }
        // ... and take the next table row ^^
        Element tr = castEl.nextElementSibling();

        if (tr != null) {
            for (Element a : tr.getElementsByAttributeValue("valign", "middle")) {
                String act = a.toString();
                String aname = StrgUtils.substr(act, "alt=\"(.*?)\"");
                if (!aname.isEmpty()) {
                    MediaCastMember cm = new MediaCastMember();
                    cm.setName(aname);
                    String id = StrgUtils.substr(act, "id=(.*?)[^\"]\">");
                    if (!id.isEmpty()) {
                        cm.setId(id);
                        // thumb
                        // http://www.ofdb.de/thumbnail.php?cover=images%2Fperson%2F7%2F7689.jpg&size=6
                        // fullsize ;) http://www.ofdb.de/images/person/7/7689.jpg
                        try {
                            String imgurl = URLDecoder
                                    .decode(StrgUtils.substr(act, "images%2Fperson%2F(.*?)&amp;size"), "UTF-8");
                            if (!imgurl.isEmpty()) {
                                imgurl = BASE_URL + "/images/person/" + imgurl;
                            }
                            cm.setImageUrl(imgurl);
                        } catch (Exception e) {
                        }
                    }
                    String arole = StrgUtils.substr(act, "\\.\\.\\. (.*?)</font>").replaceAll("<[^>]*>", "");
                    cm.setCharacter(arole);
                    cm.setType(type);
                    md.addCastMember(cm);
                }
            }
        }
    }
}

From source file:solarrecorder.SolarRecorder.java

private void getProdData() throws IOException {
    org.jsoup.nodes.Document doc = Jsoup.connect("http://envoy/production").get();

    Element h1 = doc.getElementsByTag("h1").first();
    Element table = h1.nextElementSibling();
    Elements alltr = table.getElementsByTag("tbody").first().getElementsByTag("tr");
    for (Element tr : alltr) {
        Elements alltd = tr.getElementsByTag("td");

        if (alltd.size() == 2) {
            String name = alltd.first().text();
            String value = alltd.last().text();
            switch (name) {
            case "Currently":
            case "Today":
                envoyData.add(new EnvoyData(name, value));
                break;
            }/*w  ww. j  a v a  2 s .co m*/
        }
    }
}