ElementIterator Class : HTML Document « Swing « Java Tutorial






import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;

import javax.swing.text.AttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;

public class MainClass {

  public static void main(String args[]) throws Exception {
    URL url = new URL("http://www.google.com");
    URLConnection connection = url.openConnection();
    InputStream is = connection.getInputStream();
    InputStreamReader isr = new InputStreamReader(is);
    BufferedReader br = new BufferedReader(isr);

    HTMLEditorKit htmlKit = new HTMLEditorKit();
    HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
    HTMLEditorKit.Parser parser = new ParserDelegator();
    HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
    parser.parse(br, callback, true);

    for (HTMLDocument.Iterator iterator = htmlDoc.getIterator(HTML.Tag.A); iterator.isValid(); iterator
        .next()) {

      AttributeSet attributes = iterator.getAttributes();
      String srcString = (String) attributes.getAttribute(HTML.Attribute.HREF);
      System.out.print(srcString);
      int startOffset = iterator.getStartOffset();
      int endOffset = iterator.getEndOffset();
      int length = endOffset - startOffset;
      String text = htmlDoc.getText(startOffset, length);
      System.out.println("  " + text);
    }
  }
}
url?sa=p&pref=ig&pval=3&q=http://www.google.ca/ig%3Fhl%3Den&usg=__o-KrRDBI3nbRElKzYEMqfOl3_t0= – Personalize this page
https://www.google.com/accounts/Login?continue=http://www.google.ca/&hl=en – Sign in
  http://images.google.ca/imghp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wi – Images
  http://groups.google.ca/grphp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wg – Groups
  http://news.google.ca/nwshp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wn – News
  /maps?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wl – Maps
  http://scholar.google.com/schhp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=ws – Scholar
  /intl/en/options/ – more »
  /advanced_search?hl=en – Advanced Search
  /preferences?hl=en – Preferences
  /language_tools?hl=en – Language Tools








14.37.HTML Document
14.37.1.HTML Tag Constants
14.37.2.Iterating Across HTML Documents for Links
14.37.3.Look for specific tag types, such as HTML.Tag.H1, HTML.Tag.H2
14.37.4.ElementIterator Class
14.37.5.Finding out interested element
14.37.6.HyperlinkListener ExampleHyperlinkListener Example