Getting the Text in an HTML Document
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URI;
import java.net.URL;
import java.net.URLConnection;
import javax.swing.text.EditorKit;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;
public class Main {
public static void main(String[] argv) throws Exception {
HTMLDocument doc = new HTMLDocument() {
public HTMLEditorKit.ParserCallback getReader(int pos) {
return new HTMLEditorKit.ParserCallback() {
public void handleText(char[] data, int pos) {
System.out.println(data);
}
};
}
};
URL url = new URI("http://www.google.com").toURL();
URLConnection conn = url.openConnection();
Reader rd = new InputStreamReader(conn.getInputStream());
EditorKit kit = new HTMLEditorKit();
kit.read(rd, doc, 0);
}
}
Related examples in the same category