Example usage for org.jsoup.select Elements html

List of usage examples for org.jsoup.select Elements html

Introduction

In this page you can find the example usage for org.jsoup.select Elements html.

Prototype

public String html() 

Source Link

Document

Get the combined inner HTML of all matched elements.

Usage

From source file:io.sightly.tck.html.HTMLExtractor.java

/**
 * Retrieves the content of an element, without its own markup tags, identified by the {@code selector} from the given {@code markup}.
 * The {@code url} is used only for caching purposes, to avoid parsing multiple times the markup returned for the same resource.
 *
 * @param url      the url that identifies the markup
 * @param markup   the markup/*  ww  w.j  av a2 s .  com*/
 * @param selector the selector used for retrieval
 * @return the contents of the selected element
 */
public static String innerHTML(String url, String markup, String selector) {
    ensureMarkup(url, markup);
    Document document = documents.get(url);
    Elements elements = document.select(selector);
    return elements.html();
}

From source file:com.geecko.QuickLyric.lyrics.Genius.java

public static Lyrics fromURL(String url, String artist, String title) {
    Document lyricsPage;/*from   www. java2s .  c  om*/
    String text;
    try {
        lyricsPage = Jsoup.connect(url).get();
        Elements lyricsDiv = lyricsPage.select("div.lyrics");
        if (lyricsDiv.isEmpty())
            throw new StringIndexOutOfBoundsException();
        else
            text = Jsoup.clean(lyricsDiv.html(), Whitelist.none().addTags("br")).trim();
    } catch (HttpStatusException e) {
        return new Lyrics(Lyrics.NO_RESULT);
    } catch (IOException | StringIndexOutOfBoundsException e) {
        e.printStackTrace();
        return new Lyrics(Lyrics.ERROR);
    }
    if (artist == null) {
        title = lyricsPage.getElementsByClass("text_title").get(0).text();
        artist = lyricsPage.getElementsByClass("text_artist").get(0).text();
    }
    Lyrics result = new Lyrics(Lyrics.POSITIVE_RESULT);
    if ("[Instrumental]".equals(text))
        result = new Lyrics(Lyrics.NEGATIVE_RESULT);
    result.setArtist(artist);
    result.setTitle(title);
    result.setText(text);
    result.setURL(url);
    result.setSource("Genius");
    return result;
}

From source file:model.SongMeaningsScraper.java

private static String scrapeLyricsPage(String songURL) {
    String lyrics = "";

    // Try to load page using Jsoup
    try {/*from  w ww.  j  a va2 s .c o m*/
        // Load page into Document
        Document doc = Jsoup.connect(songURL).get();
        // Get lyricBox from page
        Elements lyricBox = doc.select("#textblock");
        // Remove ads
        lyricBox.get(0).getElementsByTag("div").remove();
        // Remove comments
        ParseUtils.removeComments(lyricBox.get(0));

        // We now have almost perfect lyrics.
        lyrics = lyricBox.html();
        /*TextNode t = TextNode.createFromEncoded(lyrics, "songmeanings.net");
        lyrics = t.getWholeText();
                
        Remove minimal HTML tags, leaving newlines intact
        */
        lyrics = lyrics.replaceAll("<br />", "");
        lyrics = lyrics.replaceAll("<i>", "");
        lyrics = lyrics.replaceAll("</i>", "");
        lyrics = lyrics.replaceAll("<b>", "");
        lyrics = lyrics.replaceAll("</b>", "");
        lyrics = lyrics.replaceAll("<p>", "");
        lyrics = lyrics.replaceAll("</p>", "");

        lyrics = lyrics.replaceAll("&lt;", "<");
        lyrics = lyrics.replaceAll("&gt;", ">");
        lyrics = lyrics.replaceAll("", "\'");

        if (lyrics.contains("Due to copyright restrictions") || lyrics.contains("Due to a publisher block")) {
            Logger.LogToStatusBar("Copyright restrictions on this track, bailing out!");
            return "";
        }

        lyrics = " " + lyrics;
        //System.out.println(lyrics);
    } catch (IOException e) {
        // TODO Auto-generated catch block
        System.out.println("Lyrics not found!");
    }
    System.out.println("Done");
    return lyrics;
}

From source file:com.manisha.allmybooksarepacked.service.BookParser.java

private Double findPrice() {
    Elements price = doc.select(PathMapping.PRICE);
    String str = price.html().replace("$", "").replaceAll(",", "");
    try {/*  w w w  .  j a v a 2s . com*/
        if (StringUtils.isNotBlank(str)) {
            return Double.valueOf(str);
        }
    } catch (Exception ex) {
    }
    return null;
}

From source file:com.gorsini.searcher.CanalplaySearcher.java

public void check() throws HTMLChangeException, IOException {
    String url = makeURL("intouchables");
    Document doc = Jsoup.connect(url).referrer("http://vod.canalplay.com/").get();
    Elements movies = doc.select("div.list_movie");
    String html = movies.html();
    String previousHTML = null;// ww w  .  j av  a 2 s.c  om
    LOG.finest(html);
    File file = new File(CHECK_FILENAME);
    if (file.exists()) {
        previousHTML = FileUtils.readFileToString(new File(CHECK_FILENAME));
    } else {
        LOG.log(Level.INFO, "sauvegarde check");
        FileUtils.writeStringToFile(file, html);
    }
    if (previousHTML != null && !html.equals(previousHTML)) {
        // sauvegarde la nouvelle version pour pouvoir la comparer.
        FileUtils.writeStringToFile(new File(CHECK_FILENAME + ".new"), html);
        throw new HTMLChangeException();
    } else {
        LOG.log(Level.INFO, "no change detected into HTML response");
    }
}

From source file:com.manisha.allmybooksarepacked.service.BookParser.java

private String findTitle() {
    Elements title = doc.select(PathMapping.TITLE);
    title.select("span").remove();
    return title.html().trim();
}

From source file:com.manisha.allmybooksarepacked.service.BookParser.java

private String findIsbn10() {
    Elements isbn10 = doc.select(PathMapping.ISBN_10);
    isbn10.select("b").remove();
    return isbn10.html().trim();
}

From source file:com.manisha.allmybooksarepacked.service.BookParser.java

private String findLanguage() {
    Elements language = doc.select(PathMapping.LANGUAGE);
    language.select("b").remove();
    return language.html().trim();
}

From source file:com.manisha.allmybooksarepacked.service.BookParser.java

private String findPublisher() {
    Elements publisher = doc.select(PathMapping.PUBLISHER);
    publisher.select("b").remove();
    String str = publisher.html().substring(0, publisher.html().indexOf("(")).trim();
    if (str.lastIndexOf(";") != -1) {
        str = str.substring(0, str.lastIndexOf(";"));
    }/*from  w w w  .ja  va  2 s .co m*/
    return str;
}

From source file:com.manisha.allmybooksarepacked.service.BookParser.java

private Integer findPages() {
    Elements pages = doc.select(PathMapping.PAGES_HARDCOVER);
    pages.select("b").remove();
    if (StringUtils.isNotBlank(pages.html())) {
        try {// www . j  ava 2 s  . c o m
            return Integer.valueOf(pages.html().split(" ")[0].replaceAll(",", ""));
        } catch (Exception ex) {
        }
    } else {
        pages = doc.select(PathMapping.PAGES_PAPERBACK);
        pages.select("b").remove();
        try {
            return Integer.valueOf(pages.html().split(" ")[0].replaceAll(",", ""));
        } catch (Exception ex) {
        }
    }
    return null;
}