List of usage examples for javax.swing.text EditorKit createDefaultDocument
public abstract Document createDefaultDocument();
From source file:Main.java
public static void main(String[] argv) throws Exception { URL url = new URI("http://www.google.com").toURL(); URLConnection conn = url.openConnection(); Reader rd = new InputStreamReader(conn.getInputStream()); EditorKit kit = new HTMLEditorKit(); HTMLDocument doc = (HTMLDocument) kit.createDefaultDocument(); kit.read(rd, doc, 0);/* w ww . j av a2 s . c om*/ HTMLDocument.Iterator it = doc.getIterator(HTML.Tag.A); while (it.isValid()) { SimpleAttributeSet s = (SimpleAttributeSet) it.getAttributes(); String link = (String) s.getAttribute(HTML.Attribute.HREF); if (link != null) { System.out.println(link); } it.next(); } }
From source file:HTML.java
/** * Utility method to convert HTML to text. * @param html The string containing HTML. * @return a String containing the derived text . *//*from w w w.ja v a 2 s . c o m*/ public static final String html2text(String html) { EditorKit kit = new HTMLEditorKit(); Document doc = kit.createDefaultDocument(); doc.putProperty("IgnoreCharsetDirective", Boolean.TRUE); try { Reader reader = new StringReader(html); kit.read(reader, doc, 0); return doc.getText(0, doc.getLength()); } catch (Exception e) { return ""; } }
From source file:org.python.pydev.core.docutils.StringUtils.java
/** * Given some html, extracts its text.//from w w w.j av a 2 s .c om */ public static String extractTextFromHTML(String html) { try { EditorKit kit = new HTMLEditorKit(); Document doc = kit.createDefaultDocument(); // The Document class does not yet handle charset's properly. doc.putProperty("IgnoreCharsetDirective", Boolean.TRUE); // Create a reader on the HTML content. Reader rd = new StringReader(html); // Parse the HTML. kit.read(rd, doc, 0); // The HTML text is now stored in the document return doc.getText(0, doc.getLength()); } catch (Exception e) { } return ""; }