Example usage for org.jsoup.select Elements traverse

List of usage examples for org.jsoup.select Elements traverse

Introduction

In this page you can find the example usage for org.jsoup.select Elements traverse.

Prototype

public Elements traverse(NodeVisitor nodeVisitor) 

Source Link

Document

Perform a depth-first traversal on each of the selected elements.

Usage

From source file:hello.Scraper.java

@Splitter(inputChannel = "channel1", outputChannel = "channel2")
public List<Element> scrape(ResponseEntity<String> payload) {
    String html = payload.getBody();
    final Document htmlDoc = Jsoup.parse(html);
    final Elements anchorNodes = htmlDoc.select("body").select("ul").select("li");

    final List<Element> anchorList = new ArrayList<Element>();
    anchorNodes.traverse(new NodeVisitor() {
        @Override// w w w  . j a v a2 s.  c o  m
        public void head(org.jsoup.nodes.Node node, int depth) {
            if (node instanceof org.jsoup.nodes.Element) {
                Element e = (Element) node;
                anchorList.add(e);
            }
        }

        @Override
        public void tail(Node node, int depth) {
        }
    });

    return anchorList;
}

From source file:crawler.AScraper.java

@Splitter(inputChannel = "channel1", outputChannel = "channel2")
public List<Element> scrape(ResponseEntity<String> payload) {
    String html = payload.getBody();
    final Document htmlDoc;
    try {//from   w ww .  ja v a  2 s . c  o  m
        htmlDoc = Jsoup.parse(new String(html.getBytes("ISO-8859-1"), "GBK"));
    } catch (UnsupportedEncodingException e) {
        LOG.error("Unsupported page encoding.");
        return null;
    }
    final Elements anchorNodes = htmlDoc.select("body").select("div[id^=read]").select("a");
    final List<Element> anchorList = new ArrayList<>();
    anchorNodes.traverse(new NodeVisitor() {
        @Override
        public void head(org.jsoup.nodes.Node node, int depth) {
            if (node instanceof org.jsoup.nodes.Element) {
                Element e = (Element) node;
                if (StringUtils.containsIgnoreCase(e.text(), ANCHOR_TEXT_PATTERN)) {
                    anchorList.add(e);
                }
            }
        }

        @Override
        public void tail(Node node, int depth) {
        }
    });
    return anchorList;
}