Java tutorial
/* * @(#)Html 1.0 2015-01-06 * * Copyright 2009 chinabank payment All Rights Reserved. * PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. * Author Email: yfchenyun@jd.com */ package org.thorn.emma.model; import org.apache.commons.lang3.StringUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.util.HashSet; import java.util.Set; /** * TODO. * * @author chenyun313@gmail.com, 2015-01-06. * @version 1.0 * @since 1.0 */ public class Html { private Document document; public Html(String content) { this.document = Jsoup.parse(content); } public String fetchHtml(String selector) { Elements elements = this.document.select(selector); return elements.html(); } public String fetchAttr(String selector, String attr) { Elements elements = this.document.select(selector); return elements.attr(attr); } public Set<String> fetchAllUrl() { Set<String> urls = new HashSet<String>(); //css\js?? Elements[] array = new Elements[] { this.document.select("a[href]"), this.document.select("[src]") }; for (Elements links : array) { for (Element element : links) { String href = element.attr("href"); if (!StringUtils.endsWithIgnoreCase(href, ".js") && !StringUtils.endsWithIgnoreCase(href, ".css")) { urls.add(href); } } } return urls; } }