Example usage for org.jsoup.select Elements first

List of usage examples for org.jsoup.select Elements first

Introduction

In this page you can find the example usage for org.jsoup.select Elements first.

Prototype

public Element first() 

Source Link

Document

Get the first matched element.

Usage

From source file:org.tinymediamanager.scraper.ofdb.OfdbMetadataProvider.java

@Override
public MediaMetadata getMetadata(MediaScrapeOptions options) throws Exception {
    LOGGER.debug("getMetadata() " + options.toString());

    if (options.getType() != MediaType.MOVIE) {
        throw new UnsupportedMediaTypeException(options.getType());
    }//from   w  ww  .  j av  a 2  s  . com

    // we have 3 entry points here
    // a) getMetadata has been called with an ofdbId
    // b) getMetadata has been called with an imdbId
    // c) getMetadata has been called from a previous search

    String detailUrl = "";

    // case a) and c)
    if (StringUtils.isNotBlank(options.getId(getProviderInfo().getId())) || options.getResult() != null) {

        if (StringUtils.isNotBlank(options.getId(getProviderInfo().getId()))) {
            detailUrl = "http://www.ofdb.de/view.php?page=film&fid=" + options.getId(getProviderInfo().getId());
        } else {
            detailUrl = options.getResult().getUrl();
        }
    }

    // case b)
    if (options.getResult() == null && StringUtils.isNotBlank(options.getId(MediaMetadata.IMDB))) {
        MediaSearchOptions searchOptions = new MediaSearchOptions(MediaType.MOVIE);
        searchOptions.setImdbId(options.getId(MediaMetadata.IMDB));
        try {
            List<MediaSearchResult> results = search(searchOptions);
            if (results != null && !results.isEmpty()) {
                options.setResult(results.get(0));
                detailUrl = options.getResult().getUrl();
            }
        } catch (Exception e) {
            LOGGER.warn("failed IMDB search: " + e.getMessage());
        }
    }

    // we can only work further if we got a search result on ofdb.de
    if (StringUtils.isBlank(detailUrl)) {
        throw new Exception("We did not get any useful movie url");
    }

    MediaMetadata md = new MediaMetadata(providerInfo.getId());
    // generic Elements used all over
    Elements el = null;
    String ofdbId = StrgUtils.substr(detailUrl, "film\\/(\\d+),");
    if (StringUtils.isBlank(ofdbId)) {
        ofdbId = StrgUtils.substr(detailUrl, "fid=(\\d+)");
    }

    Url url;
    try {
        LOGGER.trace("get details page");
        url = new Url(detailUrl);
        InputStream in = url.getInputStream();
        Document doc = Jsoup.parse(in, "UTF-8", "");
        in.close();

        if (doc.getAllElements().size() < 10) {
            throw new Exception("meh - we did not receive a valid web page");
        }

        // parse details

        // IMDB ID "http://www.imdb.com/Title?1194173"
        el = doc.getElementsByAttributeValueContaining("href", "imdb.com");
        if (!el.isEmpty()) {
            md.setId(MediaMetadata.IMDB, "tt" + StrgUtils.substr(el.first().attr("href"), "\\?(\\d+)"));
        }

        // title / year
        // <meta property="og:title" content="Bourne Vermchtnis, Das (2012)" />
        el = doc.getElementsByAttributeValue("property", "og:title");
        if (!el.isEmpty()) {
            String[] ty = parseTitle(el.first().attr("content"));
            md.setTitle(StrgUtils.removeCommonSortableName(ty[0]));
            try {
                md.setYear(Integer.parseInt(ty[1]));
            } catch (Exception ignored) {
            }
        }
        // another year position
        if (md.getYear() == 0) {
            // <a href="view.php?page=blaettern&Kat=Jahr&Text=2012">2012</a>
            el = doc.getElementsByAttributeValueContaining("href", "Kat=Jahr");
            try {
                md.setYear(Integer.parseInt(el.first().text()));
            } catch (Exception ignored) {
            }
        }

        // original title (has to be searched with a regexp)
        // <tr valign="top">
        // <td nowrap=""><font class="Normal" face="Arial,Helvetica,sans-serif"
        // size="2">Originaltitel:</font></td>
        // <td>&nbsp;&nbsp;</td>
        // <td width="99%"><font class="Daten" face="Arial,Helvetica,sans-serif"
        // size="2"><b>Brave</b></font></td>
        // </tr>
        String originalTitle = StrgUtils.substr(doc.body().html(), "(?s)Originaltitel.*?<b>(.*?)</b>");
        if (!originalTitle.isEmpty()) {
            md.setOriginalTitle(StrgUtils.removeCommonSortableName(originalTitle));
        }

        // Genre: <a href="view.php?page=genre&Genre=Action">Action</a>
        el = doc.getElementsByAttributeValueContaining("href", "page=genre");
        for (Element g : el) {
            md.addGenre(getTmmGenre(g.text()));
        }

        // rating
        // <div itemtype="http://schema.org/AggregateRating" itemscope
        // itemprop="aggregateRating">Note: <span
        // itemprop="ratingValue">6.73</span><meta
        // itemprop="worstRating" content="1" />
        el = doc.getElementsByAttributeValue("itemprop", "ratingValue");
        if (!el.isEmpty()) {
            String r = el.text();
            if (!r.isEmpty()) {
                try {
                    md.setRating(Float.parseFloat(r));
                } catch (Exception e) {
                    LOGGER.debug("could not parse rating");
                }
            }
        }

        // get PlotLink; open url and parse
        // <a href="plot/22523,31360,Die-Bourne-Identitt"><b>[mehr]</b></a>
        LOGGER.trace("parse plot");
        el = doc.getElementsByAttributeValueMatching("href", "plot\\/\\d+,");
        if (!el.isEmpty()) {
            String plotUrl = BASE_URL + "/" + el.first().attr("href");
            try {
                url = new Url(plotUrl);
                in = url.getInputStream();
                Document plot = Jsoup.parse(in, "UTF-8", "");
                in.close();
                Elements block = plot.getElementsByClass("Blocksatz"); // first
                                                                       // Blocksatz
                                                                       // is plot
                String p = block.first().text(); // remove all html stuff
                p = p.substring(p.indexOf("Mal gelesen") + 12); // remove "header"
                md.setPlot(p);
            } catch (Exception e) {
                LOGGER.error("failed to get plot page: " + e.getMessage());
            }
        }

        // http://www.ofdb.de/view.php?page=film_detail&fid=226745
        LOGGER.debug("parse actor detail");
        String movieDetail = BASE_URL + "/view.php?page=film_detail&fid=" + ofdbId;
        doc = null;
        try {
            url = new Url(movieDetail);
            in = url.getInputStream();
            doc = Jsoup.parse(in, "UTF-8", "");
            in.close();
        } catch (Exception e) {
            LOGGER.error("failed to get detail page: " + e.getMessage());
        }

        if (doc != null) {
            parseCast(doc.getElementsContainingOwnText("Regie"), MediaCastMember.CastType.DIRECTOR, md);
            parseCast(doc.getElementsContainingOwnText("Darsteller"), MediaCastMember.CastType.ACTOR, md);
            parseCast(doc.getElementsContainingOwnText("Stimme/Sprecher"), MediaCastMember.CastType.ACTOR, md);
            parseCast(doc.getElementsContainingOwnText("Synchronstimme (deutsch)"),
                    MediaCastMember.CastType.ACTOR, md);
            parseCast(doc.getElementsContainingOwnText("Drehbuchautor(in)"), MediaCastMember.CastType.WRITER,
                    md);
            parseCast(doc.getElementsContainingOwnText("Produzent(in)"), MediaCastMember.CastType.PRODUCER, md);
        }
    } catch (Exception e) {
        LOGGER.error("Error parsing " + detailUrl);
        throw e;
    }

    return md;
}

From source file:org.tinymediamanager.scraper.ofdb.OfdbMetadataProvider.java

@Override
public List<MediaTrailer> getTrailers(MediaScrapeOptions options) throws Exception {
    LOGGER.debug("getTrailers() " + options.toString());
    List<MediaTrailer> trailers = new ArrayList<>();
    if (!MetadataUtil.isValidImdbId(options.getImdbId())) {
        LOGGER.debug("IMDB id not found");
        return trailers;
    }/* w  w  w.ja v  a2s  . c  o  m*/
    /*
     * function getTrailerData(ci) { switch (ci) { case 'http://de.clip-1.filmtrailer.com/9507_31566_a_1.flv?log_var=72|491100001 -1|-' : return
     * '<b>Trailer 1</b><br><i>(small)</i><br><br>&raquo; 160px<br><br>Download:<br>&raquo; <a href=
     * "http://de.clip-1.filmtrailer.com/9507_31566_a_1.wmv?log_var=72|491100001-1|-" >wmv</a><br>'; case
     * 'http://de.clip-1.filmtrailer.com/9507_31566_a_2.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 1</b><br><i>(medium)</i><br><br>&raquo;
     * 240px<br><br>Download:<br>&raquo; <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_2.wmv?log_var=72|491100001-1|-" >wmv</a><br>'; case
     * 'http://de.clip-1.filmtrailer.com/9507_31566_a_3.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 1</b><br><i>(large)</i><br><br>&raquo;
     * 320px<br><br>Download:<br>&raquo; <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_3.wmv?log_var=72|491100001-1|-" >wmv</a><br>&raquo;
     * <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_3.mp4?log_var=72|491100001-1|-" >mp4</a><br>&raquo; <a href=
     * "http://de.clip-1.filmtrailer.com/9507_31566_a_3.webm?log_var=72|491100001-1|-" >webm</a><br>'; case
     * 'http://de.clip-1.filmtrailer.com/9507_31566_a_4.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 1</b><br><i>(xlarge)</i><br><br>&raquo;
     * 400px<br><br>Download:<br>&raquo; <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_4.wmv?log_var=72|491100001-1|-" >wmv</a><br>&raquo;
     * <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_4.mp4?log_var=72|491100001-1|-" >mp4</a><br>&raquo; <a href=
     * "http://de.clip-1.filmtrailer.com/9507_31566_a_4.webm?log_var=72|491100001-1|-" >webm</a><br>'; case
     * 'http://de.clip-1.filmtrailer.com/9507_31566_a_5.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 1</b><br><i>(xxlarge)</i><br><br>&raquo;
     * 640px<br><br>Download:<br>&raquo; <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_5.wmv?log_var=72|491100001-1|-" >wmv</a><br>&raquo;
     * <a href= "http://de.clip-1.filmtrailer.com/9507_31566_a_5.mp4?log_var=72|491100001-1|-" >mp4</a><br>&raquo; <a href=
     * "http://de.clip-1.filmtrailer.com/9507_31566_a_5.webm?log_var=72|491100001-1|-" >webm</a><br>'; case
     * 'http://de.clip-1.filmtrailer.com/9507_39003_a_1.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 2</b><br><i>(small)</i><br><br>&raquo;
     * 160px<br><br>Download:<br>&raquo; <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_1.wmv?log_var=72|491100001-1|-" >wmv</a><br>'; case
     * 'http://de.clip-1.filmtrailer.com/9507_39003_a_2.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 2</b><br><i>(medium)</i><br><br>&raquo;
     * 240px<br><br>Download:<br>&raquo; <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_2.wmv?log_var=72|491100001-1|-" >wmv</a><br>'; case
     * 'http://de.clip-1.filmtrailer.com/9507_39003_a_3.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 2</b><br><i>(large)</i><br><br>&raquo;
     * 320px<br><br>Download:<br>&raquo; <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_3.wmv?log_var=72|491100001-1|-" >wmv</a><br>&raquo;
     * <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_3.mp4?log_var=72|491100001-1|-" >mp4</a><br>&raquo; <a href=
     * "http://de.clip-1.filmtrailer.com/9507_39003_a_3.webm?log_var=72|491100001-1|-" >webm</a><br>'; case
     * 'http://de.clip-1.filmtrailer.com/9507_39003_a_4.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 2</b><br><i>(xlarge)</i><br><br>&raquo;
     * 400px<br><br>Download:<br>&raquo; <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_4.wmv?log_var=72|491100001-1|-" >wmv</a><br>&raquo;
     * <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_4.mp4?log_var=72|491100001-1|-" >mp4</a><br>&raquo; <a href=
     * "http://de.clip-1.filmtrailer.com/9507_39003_a_4.webm?log_var=72|491100001-1|-" >webm</a><br>'; case
     * 'http://de.clip-1.filmtrailer.com/9507_39003_a_5.flv?log_var=72|491100001 -1|-' : return '<b>Trailer 2</b><br><i>(xxlarge)</i><br><br>&raquo;
     * 640px<br><br>Download:<br>&raquo; <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_5.wmv?log_var=72|491100001-1|-" >wmv</a><br>&raquo;
     * <a href= "http://de.clip-1.filmtrailer.com/9507_39003_a_5.mp4?log_var=72|491100001-1|-" >mp4</a><br>&raquo; <a href=
     * "http://de.clip-1.filmtrailer.com/9507_39003_a_5.webm?log_var=72|491100001-1|-" >webm</a><br>'; } }
     */
    Url url = null;
    String searchString = BASE_URL + "/view.php?page=suchergebnis&Kat=IMDb&SText=" + options.getImdbId();
    try {
        // search with IMDB
        url = new Url(searchString);
        InputStream in = url.getInputStream();
        Document doc = Jsoup.parse(in, "UTF-8", "");
        in.close();
        Elements filme = doc.getElementsByAttributeValueMatching("href", "film\\/\\d+,");
        if (filme == null || filme.isEmpty()) {
            LOGGER.debug("found no search results");
            return trailers;
        }
        LOGGER.debug("found " + filme.size() + " search results"); // hopefully
                                                                   // only one

        LOGGER.debug("get (trailer) details page");
        url = new Url(BASE_URL + "/" + StrgUtils.substr(filme.first().toString(), "href=\\\"(.*?)\\\""));
        in = url.getInputStream();
        doc = Jsoup.parse(in, "UTF-8", "");
        in.close();

        // OLD STYLE
        // <b>Trailer 1</b><br><i>(xxlarge)</i><br><br>&raquo; 640px<br><br>Download:<br>&raquo; <a href=
        // "http://de.clip-1.filmtrailer.com/9507_31566_a_5.wmv?log_var=72|491100001-1|-" >wmv</a><br>&raquo; <a href=
        // "http://de.clip-1.filmtrailer.com/9507_31566_a_5.mp4?log_var=72|491100001-1|-" >mp4</a><br>&raquo; <a href=
        // "http://de.clip-1.filmtrailer.com/9507_31566_a_5.webm?log_var=72|491100001-1|-" >webm</a><br>
        Pattern regex = Pattern.compile("return '(.*?)';");
        Matcher m = regex.matcher(doc.toString());
        while (m.find()) {
            String s = m.group(1);
            String tname = StrgUtils.substr(s, "<b>(.*?)</b>");
            String tpix = StrgUtils.substr(s, "raquo; (.*?)x<br>");
            // String tqual = StrgUtils.substr(s, "<i>\\((.*?)\\)</i>");

            // url + format
            Pattern lr = Pattern.compile("<a href=\"(.*?)\">(.*?)</a>");
            Matcher lm = lr.matcher(s);
            while (lm.find()) {
                String turl = lm.group(1);
                // String tformat = lm.group(2);
                MediaTrailer trailer = new MediaTrailer();
                trailer.setName(tname);
                // trailer.setQuality(tpix + " (" + tformat + ")");
                trailer.setQuality(tpix);
                trailer.setProvider("filmtrailer");
                trailer.setUrl(turl);
                LOGGER.debug(trailer.toString());
                trailers.add(trailer);
            }
        }

        // NEW STYLE (additional!)
        // <div class="clips" id="clips2" style="display: none;">
        // <img src="images/flag_de.gif" align="left" vspace="3" width="18" height="12">&nbsp;
        // <img src="images/trailer_6.gif" align="top" vspace="1" width="16" height="16" alt="freigegeben ab 6 Jahren">&nbsp;
        // <i>Trailer 1:</i>
        // <a href="http://de.clip-1.filmtrailer.com/2845_6584_a_1.flv?log_var=67|491100001-1|-">&nbsp;small&nbsp;</a> &nbsp;
        // <a href="http://de.clip-1.filmtrailer.com/2845_6584_a_2.flv?log_var=67|491100001-1|-">&nbsp;medium&nbsp;</a> &nbsp;
        // <a href="http://de.clip-1.filmtrailer.com/2845_6584_a_3.flv?log_var=67|491100001-1|-">&nbsp;large&nbsp;</a> &nbsp;
        // <a href="http://de.clip-1.filmtrailer.com/2845_6584_a_4.flv?log_var=67|491100001-1|-">&nbsp;xlarge&nbsp;</a> &nbsp;
        // <a href="http://de.clip-1.filmtrailer.com/2845_6584_a_5.flv?log_var=67|491100001-1|-">&nbsp;xxlarge&nbsp;</a> &nbsp;
        // <br>
        // <img src="images/flag_de.gif" align="left" vspace="3" width="18" height="12">&nbsp;
        // <img src="images/trailer_6.gif" align="top" vspace="1" width="16" height="16" alt="freigegeben ab 6 Jahren">&nbsp;
        // <i>Trailer 2:</i>
        // <a href="http://de.clip-1.filmtrailer.com/2845_8244_a_1.flv?log_var=67|491100001-1|-">&nbsp;small&nbsp;</a> &nbsp;
        // <a href="http://de.clip-1.filmtrailer.com/2845_8244_a_2.flv?log_var=67|491100001-1|-">&nbsp;medium&nbsp;</a> &nbsp;
        // <a href="http://de.clip-1.filmtrailer.com/2845_8244_a_3.flv?log_var=67|491100001-1|-">&nbsp;large&nbsp;</a> &nbsp;
        // <a href="http://de.clip-1.filmtrailer.com/2845_8244_a_4.flv?log_var=67|491100001-1|-">&nbsp;xlarge&nbsp;</a> &nbsp;
        // <a href="http://de.clip-1.filmtrailer.com/2845_8244_a_5.flv?log_var=67|491100001-1|-">&nbsp;xxlarge&nbsp;</a> &nbsp;
        // <br>
        // <img src="images/flag_de.gif" align="left" vspace="3" width="18" height="12">&nbsp;
        // <img src="images/trailer_6.gif" align="top" vspace="1" width="16" height="16" alt="freigegeben ab 6 Jahren">&nbsp;
        // <i>Trailer 3:</i>
        // <a href="http://de.clip-1.filmtrailer.com/2845_14749_a_1.flv?log_var=67|491100001-1|-">&nbsp;small&nbsp;</a> &nbsp;
        // <a href="http://de.clip-1.filmtrailer.com/2845_14749_a_2.flv?log_var=67|491100001-1|-">&nbsp;medium&nbsp;</a> &nbsp;
        // <a href="http://de.clip-1.filmtrailer.com/2845_14749_a_3.flv?log_var=67|491100001-1|-">&nbsp;large&nbsp;</a> &nbsp;
        // <a href="http://de.clip-1.filmtrailer.com/2845_14749_a_4.flv?log_var=67|491100001-1|-">&nbsp;xlarge&nbsp;</a> &nbsp;
        // <a href="http://de.clip-1.filmtrailer.com/2845_14749_a_5.flv?log_var=67|491100001-1|-">&nbsp;xxlarge&nbsp;</a> &nbsp;
        // <br>
        // <br>
        // </div>

        // new style size
        // 1 = 160 x 90 = small
        // 2 = 240 x 136 = medium
        // 3 = 320 x 180 = large
        // 4 = 400 x 226 = xlarge
        // 5 = 640 x 360 = xxlarge

        ;

        regex = Pattern.compile("<i>(.*?)</i>(.*?)<br>", Pattern.DOTALL); // get them as single trailer line
        m = regex.matcher(doc.getElementsByClass("clips").html());
        while (m.find()) {
            // LOGGER.info(doc.getElementsByClass("clips").html());
            // parse each line with 5 qualities
            String tname = m.group(1).trim();
            tname = tname.replaceFirst(":$", ""); // replace ending colon

            String urls = m.group(2);
            // url + format
            Pattern lr = Pattern.compile("<a href=\"(.*?)\">(.*?)</a>");
            Matcher lm = lr.matcher(urls);
            while (lm.find()) {
                String turl = lm.group(1);
                String tpix = "";
                String tformat = lm.group(2).replaceAll("&nbsp;", "").trim();
                switch (tformat) {
                case "small":
                    tpix = "90p";
                    break;

                case "medium":
                    tpix = "136p";
                    break;

                case "large":
                    tpix = "180p";
                    break;

                case "xlarge":
                    tpix = "226p";
                    break;

                case "xxlarge":
                    tpix = "360p";
                    break;

                default:
                    break;
                }
                MediaTrailer trailer = new MediaTrailer();
                trailer.setName(tname);
                // trailer.setQuality(tpix + " (" + tformat + ")");
                trailer.setQuality(tpix);
                trailer.setProvider("filmtrailer");
                trailer.setUrl(turl);
                LOGGER.debug(trailer.toString());
                trailers.add(trailer);
            }
        }
    } catch (Exception e) {
        if (url != null) {
            LOGGER.error("Error parsing {}", url.toString());
        } else {
            LOGGER.error("Error parsing {}", searchString);
        }

        throw e;
    }
    return trailers;
}

From source file:org.xlrnet.metadict.engines.woxikon.WoxikonEngine.java

private EntryType detectEntryType(@NotNull Element element) {
    Elements wordTypeNodes = element.getElementsByClass(CLASS_WORDTYPE);

    if (wordTypeNodes.size() < 1) {
        LOGGER.debug("No wordType node found - defaulting to {}", EntryType.UNKNOWN);
        return EntryType.UNKNOWN;
    }/*from   w  w  w . j a v a 2s  .  c o  m*/

    EntryType entryType = ENTRY_TYPE_MAP.getOrDefault(wordTypeNodes.first().text(), EntryType.UNKNOWN);

    if (entryType == EntryType.UNKNOWN)
        LOGGER.debug("Unable to resolve entry type \"{}\"", entryType);

    return entryType;
}

From source file:Search.DataManipulation.DataParser.java

public String getIcon(Document dom) throws IOException {
    Elements iconClass = dom.getElementsByClass("cover-container");
    Elements iconClass1 = iconClass.select("img.cover-image[alt=Cover art]");
    String iconUrl = iconClass1.first().attr("src");
    byte[] iconByte = dataHandler.imageDownloader(iconUrl);

    if (iconByte.length == 0) {
        log.warn("Invalid Icon url found by Search.DataManipulation.DataValidator, not adding to appData");
        return null;
    } else {//from  w ww  . j av a  2  s  .c  o  m
        String icon = Base64.getEncoder().encodeToString(iconByte);
        return icon;
    }
}

From source file:Search.DataManipulation.DataParser.java

public String getName(Document dom) {
    Elements appNameClass = dom.getElementsByClass("document-title");
    return appNameClass.first().child(0).ownText();
}

From source file:Search.DataManipulation.DataParser.java

public String getBundleId(Document dom) {
    Elements bundleClass = dom.getElementsByClass("buy-button-container");
    return bundleClass.first().attr("data-docid");
}

From source file:Search.DataManipulation.DataParser.java

public String getDescription(Document dom) {
    Elements descClass = dom.getElementsByClass("id-app-orig-desc");
    return descClass.first().ownText();
}

From source file:Search.DataManipulation.DataParser.java

public String getPrice(Document dom) {
    Elements priceClass = dom.select("button.price");
    Element priceClass1 = priceClass.first();
    Elements priceClass2 = priceClass1.getElementsByTag("span");
    String price = priceClass2.last().ownText();
    if (price.equalsIgnoreCase("install")) {
        price = "Free";
    } else {//  w w  w  . ja  va2 s .c o  m
        String[] split = StringUtils.split(price);
        price = split[0];
    }
    return price;
}

From source file:Search.DataManipulation.DataParser.java

public String getCategory(Document dom) {
    Elements categoryClass = dom.select("a.document-subtitle.category span[itemprop=genre]");
    return categoryClass.first().ownText();
}

From source file:Search.DataManipulation.DataParser.java

public String getThumbnails(Document dom) throws IOException {
    Elements thumbnailsClass = dom.getElementsByClass("thumbnails");
    Elements thumbnails = thumbnailsClass.first().children();

    List<String> imageArray = new ArrayList<String>();

    for (Element images : thumbnails) {
        String imageTagUrl = images.getElementsByTag("img").first().attr("src");
        byte[] imageByte = dataHandler.imageDownloader(imageTagUrl);

        if (imageByte.length == 0) {
            continue;
        }//from w w w.ja  va 2s  .  c o m

        String imageTag = Base64.getEncoder().encodeToString(imageByte);
        imageArray.add(imageTag);
    }

    return JSONValue.toJSONString(imageArray);
}