Java HTML Parse Jsoup extractRssUrl(String html, URI base)

Description

extract Rss Url

License

Open Source License

Declaration

public static String extractRssUrl(String html, URI base)

Method Source Code


//package com.java2s;
import java.net.URI;

import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

import org.jsoup.select.Elements;

public class Main {
    final static String HREF = "href";
    final static String LINK = "link";
    final static String RSS = "application/rss+xml";
    final static String ATOM = "application/atom+xml";
    final static String TITLE = "title";
    final static String TYPE = "type";
    final static String REL = "rel";
    final static String ALTERNATE = "alternate";
    final static Pattern comment = Pattern.compile("comment", Pattern.CASE_INSENSITIVE);

    public static String extractRssUrl(String html, URI base) {
        Document d = Jsoup.parse(html);
        Elements links = d.getElementsByTag(LINK);

        for (Element link : links) {
            if (ALTERNATE.equalsIgnoreCase(link.attr(REL))) {
                String type = link.attr(TYPE);
                if (RSS.equalsIgnoreCase(type) || ATOM.equalsIgnoreCase(type)) {
                    String href = link.attr(HREF);
                    String title = link.attr(TITLE);
                    if (title == null) {
                        title = "";
                    }//from   w w  w  .  j  av  a  2  s .  c om
                    // ignore comment
                    if (href != null && !comment.matcher(href).find() && !comment.matcher(title).find()) {
                        // return the first one
                        return base.resolve(href).toString();
                    }
                }
            }
        }
        return null;
    }
}

Java HTML Parse Jsoup extractRssUrl(String html, URI base)

Description

License

Declaration

Method Source Code

Related