Getting the Text in an HTML Document - Java Swing

Java examples for Swing:Swing HTML

Description

Getting the Text in an HTML Document

Demo Code

import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLConnection;

import javax.swing.text.BadLocationException;
import javax.swing.text.EditorKit;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;

public class Main {
  public static String getText(String uriStr) {
    final StringBuffer buf = new StringBuffer(1000);

    try {//from w ww .  j  a  v a  2s.c  om
      // Create an HTML document that appends all text to buf
      HTMLDocument doc = new HTMLDocument() {
        public HTMLEditorKit.ParserCallback getReader(int pos) {
          return new HTMLEditorKit.ParserCallback() {
            // This method is whenever text is encountered in the HTML file
            public void handleText(char[] data, int pos) {
              buf.append(data);
              buf.append('\n');
            }
          };
        }
      };

      // Create a reader on the HTML content
      URL url = new URI(uriStr).toURL();
      URLConnection conn = url.openConnection();
      Reader rd = new InputStreamReader(conn.getInputStream());

      // Parse the HTML
      EditorKit kit = new HTMLEditorKit();
      kit.read(rd, doc, 0);
    } catch (Exception e) {
    }
    // Return the text
    return buf.toString();
  }
}

Related Tutorials