org.thorn.emma.model.Html.java Source code

Java tutorial

Introduction

Here is the source code for org.thorn.emma.model.Html.java

Source

/*
 * @(#)Html  1.0 2015-01-06
 *
 * Copyright 2009 chinabank payment All Rights Reserved.
 * PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
 * Author Email: yfchenyun@jd.com
 */
package org.thorn.emma.model;

import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.util.HashSet;
import java.util.Set;

/**
 * TODO.
 *
 * @author chenyun313@gmail.com, 2015-01-06.
 * @version 1.0
 * @since 1.0
 */
public class Html {

    private Document document;

    public Html(String content) {
        this.document = Jsoup.parse(content);
    }

    public String fetchHtml(String selector) {
        Elements elements = this.document.select(selector);
        return elements.html();
    }

    public String fetchAttr(String selector, String attr) {
        Elements elements = this.document.select(selector);
        return elements.attr(attr);
    }

    public Set<String> fetchAllUrl() {

        Set<String> urls = new HashSet<String>();

        //css\js??
        Elements[] array = new Elements[] { this.document.select("a[href]"), this.document.select("[src]") };

        for (Elements links : array) {

            for (Element element : links) {
                String href = element.attr("href");

                if (!StringUtils.endsWithIgnoreCase(href, ".js") && !StringUtils.endsWithIgnoreCase(href, ".css")) {
                    urls.add(href);
                }
            }
        }

        return urls;
    }

}