HTML parser based on HTMLEditorKit.ParserCallback
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;
public class Main {
public static void main(String args[]) throws Exception {
URL url = new URL(args[0]);
Reader reader = new InputStreamReader((InputStream) url.getContent());
new ParserDelegator().parse(reader, new HTMLParse(), false);
}
}
class HTMLParse extends HTMLEditorKit.ParserCallback {
public void handleText(char[] data, int pos) {
System.out.println(data);
}
public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) {
System.out.println("+" + t.toString());
}
public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {
System.out.println("*" + t.toString());
}
public void handleEndTag(HTML.Tag t, int pos) {
System.out.println("-" + t.toString());
}
}
Related examples in the same category