Java examples for Swing:Swing HTML
Getting the Links in an HTML Document
import java.io.IOException; import java.io.InputStreamReader; import java.io.Reader; import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.List; import javax.swing.text.BadLocationException; import javax.swing.text.EditorKit; import javax.swing.text.SimpleAttributeSet; import javax.swing.text.html.HTML; import javax.swing.text.html.HTMLDocument; import javax.swing.text.html.HTMLEditorKit; public class Main { public static String[] getLinks(String uriStr) { List result = new ArrayList(); try {/*from ww w. jav a 2 s. c o m*/ // Create a reader on the HTML content URL url = new URI(uriStr).toURL(); URLConnection conn = url.openConnection(); Reader rd = new InputStreamReader(conn.getInputStream()); // Parse the HTML EditorKit kit = new HTMLEditorKit(); HTMLDocument doc = (HTMLDocument) kit.createDefaultDocument(); kit.read(rd, doc, 0); // Find all the A elements in the HTML document HTMLDocument.Iterator it = doc.getIterator(HTML.Tag.A); while (it.isValid()) { SimpleAttributeSet s = (SimpleAttributeSet) it.getAttributes(); String link = (String) s.getAttribute(HTML.Attribute.HREF); if (link != null) { // Add the link to the result list result.add(link); } it.next(); } } catch (MalformedURLException e) { } catch (URISyntaxException e) { } catch (BadLocationException e) { } catch (IOException e) { } // Return all found links return (String[]) result.toArray(new String[result.size()]); } }