List of usage examples for javax.swing.text EditorKit read
public abstract void read(Reader in, Document doc, int pos) throws IOException, BadLocationException;
From source file:Main.java
public static void main(String[] argv) throws Exception { HTMLDocument doc = new HTMLDocument() { public HTMLEditorKit.ParserCallback getReader(int pos) { return new HTMLEditorKit.ParserCallback() { public void handleText(char[] data, int pos) { System.out.println(data); }//from w w w . j a v a2 s . c om }; } }; URL url = new URI("http://www.google.com").toURL(); URLConnection conn = url.openConnection(); Reader rd = new InputStreamReader(conn.getInputStream()); EditorKit kit = new HTMLEditorKit(); kit.read(rd, doc, 0); }
From source file:Main.java
public static void main(String[] argv) throws Exception { URL url = new URI("http://www.google.com").toURL(); URLConnection conn = url.openConnection(); Reader rd = new InputStreamReader(conn.getInputStream()); EditorKit kit = new HTMLEditorKit(); HTMLDocument doc = (HTMLDocument) kit.createDefaultDocument(); kit.read(rd, doc, 0); HTMLDocument.Iterator it = doc.getIterator(HTML.Tag.A); while (it.isValid()) { SimpleAttributeSet s = (SimpleAttributeSet) it.getAttributes(); String link = (String) s.getAttribute(HTML.Attribute.HREF); if (link != null) { System.out.println(link); }// w ww . ja v a 2s . co m it.next(); } }
From source file:HTML.java
/** * Utility method to convert HTML to text. * @param html The string containing HTML. * @return a String containing the derived text . *///from w w w.ja v a2s . c o m public static final String html2text(String html) { EditorKit kit = new HTMLEditorKit(); Document doc = kit.createDefaultDocument(); doc.putProperty("IgnoreCharsetDirective", Boolean.TRUE); try { Reader reader = new StringReader(html); kit.read(reader, doc, 0); return doc.getText(0, doc.getLength()); } catch (Exception e) { return ""; } }
From source file:org.python.pydev.core.docutils.StringUtils.java
/** * Given some html, extracts its text.// w w w .j a v a2s . c o m */ public static String extractTextFromHTML(String html) { try { EditorKit kit = new HTMLEditorKit(); Document doc = kit.createDefaultDocument(); // The Document class does not yet handle charset's properly. doc.putProperty("IgnoreCharsetDirective", Boolean.TRUE); // Create a reader on the HTML content. Reader rd = new StringReader(html); // Parse the HTML. kit.read(rd, doc, 0); // The HTML text is now stored in the document return doc.getText(0, doc.getLength()); } catch (Exception e) { } return ""; }