List of usage examples for javax.swing.text DefaultStyledDocument getText
public String getText(int offset, int length) throws BadLocationException
From source file:com.liferay.portal.util.LuceneFields.java
public static Field getFile(String field, File file, String fileExt) throws IOException { fileExt = fileExt.toLowerCase();//from w w w . j av a2s.c o m FileInputStream fis = new FileInputStream(file); Reader reader = new BufferedReader(new InputStreamReader(fis)); String text = null; if (fileExt.equals(".doc")) { try { WordDocument wordDocument = new WordDocument(fis); StringWriter stringWriter = new StringWriter(); wordDocument.writeAllText(stringWriter); text = stringWriter.toString(); stringWriter.close(); } catch (Exception e) { _log.error(e.getMessage()); } } else if (fileExt.equals(".htm") || fileExt.equals(".html")) { try { DefaultStyledDocument dsd = new DefaultStyledDocument(); HTMLEditorKit htmlEditorKit = new HTMLEditorKit(); htmlEditorKit.read(reader, dsd, 0); text = dsd.getText(0, dsd.getLength()); } catch (Exception e) { _log.error(e.getMessage()); } } else if (fileExt.equals(".pdf")) { try { PDFParser parser = new PDFParser(fis); parser.parse(); PDDocument pdDoc = parser.getPDDocument(); StringWriter stringWriter = new StringWriter(); PDFTextStripper stripper = new PDFTextStripper(); stripper.setLineSeparator("\n"); stripper.writeText(pdDoc, stringWriter); text = stringWriter.toString(); stringWriter.close(); pdDoc.close(); } catch (Exception e) { _log.error(e.getMessage()); } } else if (fileExt.equals(".rtf")) { try { DefaultStyledDocument dsd = new DefaultStyledDocument(); RTFEditorKit rtfEditorKit = new RTFEditorKit(); rtfEditorKit.read(reader, dsd, 0); text = dsd.getText(0, dsd.getLength()); } catch (Exception e) { _log.error(e.getMessage()); } } else if (fileExt.equals(".xls")) { try { XLSTextStripper stripper = new XLSTextStripper(fis); text = stripper.getText(); } catch (Exception e) { _log.error(e.getMessage()); } } if (text != null) { return new Field(field, text, Field.Store.YES, Field.Index.NOT_ANALYZED); } else { return new Field(field, reader); } }
From source file:com.stimulus.archiva.extraction.RTFExtractor.java
public Reader getText(InputStream is, Charset charset, IndexInfo indexInfo) throws ExtractionException { Reader reader = null;//from w w w.j a va2s . c om FileWriter writer = null; File file = null; try { reader = new InputStreamReader(is); file = File.createTempFile("extract_rtf", ".tmp"); indexInfo.addDeleteFile(file); writer = new FileWriter(file); DefaultStyledDocument doc = new DefaultStyledDocument(); new RTFEditorKit().read(reader, doc, 0); writer.write(doc.getText(0, doc.getLength())); } catch (Throwable ioe) { throw new ExtractionException("failed to parse rtf document", ioe, logger); } finally { if (reader != null) { try { reader.close(); } catch (IOException ioe) { } } if (writer != null) { try { writer.close(); } catch (IOException ioe) { } } } try { Reader outReader = new FileReader(file); indexInfo.addReader(outReader); return outReader; } catch (Exception ex) { throw new ExtractionException("failed to extract text from powerpoint document", ex, logger, ChainedException.Level.DEBUG); } }
From source file:framework.retrieval.engine.index.create.impl.file.parse.RTFFileContentParser.java
public String getContent(RFileDocument document, String charsetName) { String content = ""; InputStream fileInputStream = null; try {/*www . j a v a 2 s .com*/ fileInputStream = new FileInputStream(document.getFile()); DefaultStyledDocument styledDoc = new DefaultStyledDocument(); RTFEditorKit rtfEditorKit = new RTFEditorKit(); rtfEditorKit.read(fileInputStream, styledDoc, 0); content = styledDoc.getText(0, styledDoc.getLength()); } catch (Exception e) { RetrievalUtil.errorLog(log, document.getFile().getAbsolutePath(), e); } finally { try { if (fileInputStream != null) { fileInputStream.close(); } } catch (Exception e) { RetrievalUtil.errorLog(log, e); } } return content; }
From source file:com.aurel.track.lucene.index.associatedFields.textExctractor.HTMLExtractor.java
/** * Gets the text from file content /*from ww w . jav a2 s . c om*/ * @param file * @param fileExtension * @return */ @Override public String getText(File file, String fileExtension) { FileInputStream fis = null; Reader reader = null; try { try { fis = new FileInputStream(file); } catch (FileNotFoundException e) { LOGGER.info("File " + file.getName() + " not found. " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); return null; } reader = new BufferedReader(new InputStreamReader(fis)); DefaultStyledDocument dsd = new DefaultStyledDocument(); HTMLEditorKit htmlEditorKit = new HTMLEditorKit(); htmlEditorKit.read(reader, dsd, 0); return dsd.getText(0, dsd.getLength()); } catch (Exception e) { LOGGER.debug("Extracting text from the .htm or .html file " + file.getName() + " failed with " + e.getMessage()); LOGGER.error(ExceptionUtils.getStackTrace(e)); } finally { try { if (reader != null) { reader.close(); } } catch (Exception e) { LOGGER.debug("Closing the reader for file " + file.getName() + " failed with " + e.getMessage()); } try { if (fis != null) { fis.close(); } } catch (Exception e) { LOGGER.debug("Closing the FileInputStream for file " + file.getName() + " failed with " + e.getMessage()); } } return null; }
From source file:com.croer.javaorange.diviner.SimpleOrangeTextPane.java
protected void colorStyledDocument(final DefaultStyledDocument document) { EventQueue.invokeLater(new Runnable() { @Override/* www . j a va2s .c o m*/ public void run() { String input = ""; try { input = document.getText(0, document.getLength()); } catch (BadLocationException ex) { Logger.getLogger(SimpleOrangeTextPane.class.getName()).log(Level.SEVERE, null, ex); } StringBuilder inputMut = new StringBuilder(input); String[] split = StringUtils.split(inputMut.toString()); int i = 0; for (String string : split) { int start = inputMut.indexOf(string); int end = start + string.length(); inputMut.replace(start, end, StringUtils.repeat(" ", string.length())); document.setCharacterAttributes(start, string.length(), styles[i++ % styles.length], true); } } }); }
From source file:edu.ur.ir.index.DefaultRtfTextExtractor.java
/** * Extract text from the Rich text file document * @throws Exception //from w ww .j av a 2s.c o m * * @see edu.ur.ir.index.FileTextExtractor#getText(java.io.File) */ public String getText(File f) throws Exception { String text = null; // don't even try if the file is too large if (isFileTooLarge(f) || f.length() <= 0l) { return text; } DefaultStyledDocument styledDoc = new DefaultStyledDocument(); RTFEditorKit editorKit = new RTFEditorKit(); FileInputStream inputStream = null; try { inputStream = new FileInputStream(f); editorKit.read(inputStream, styledDoc, 0); String myText = styledDoc.getText(0, styledDoc.getLength()); if (myText != null && !myText.trim().equals("")) { text = myText; } } catch (OutOfMemoryError oome) { text = null; log.error("could not extract text", oome); throw (oome); } catch (Exception e) { text = null; log.error("could not get text for rich text document " + f.getAbsolutePath(), e); throw (e); } finally { closeInputStream(inputStream); editorKit = null; } return text; }
From source file:com.aurel.track.lucene.index.associatedFields.textExctractor.RTFExtractor.java
/** * Gets the text from file content /*from w ww.j a va 2s .c o m*/ * @param file * @param fileExtension * @return */ @Override public String getText(File file, String fileExtension) { FileInputStream fis = null; Reader reader = null; try { DefaultStyledDocument dsd = new DefaultStyledDocument(); RTFEditorKit rtfEditorKit = new RTFEditorKit(); try { fis = new FileInputStream(file); } catch (FileNotFoundException e) { LOGGER.info("File " + file.getName() + " not found. " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); return null; } reader = new BufferedReader(new InputStreamReader(fis)); rtfEditorKit.read(reader, dsd, 0); return dsd.getText(0, dsd.getLength()); } catch (Exception e) { if (LOGGER.isDebugEnabled()) { LOGGER.debug( "Extracting text from the .rtf file " + file.getName() + " failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); } return null; } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { LOGGER.debug( "Closing the reader for file " + file.getName() + " failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); } } if (fis != null) { try { fis.close(); } catch (IOException e) { LOGGER.debug("Closing the FileInputStream for file " + file.getName() + " failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); } } } }
From source file:org.alder.fotobuchconvert.scribus.RtfToScribusConverter.java
void output(XmlBuilder xml, DefaultStyledDocument doc, ScribusWriter scribus) { log.debug("Starting conversion of RTF data"); if (log.isTraceEnabled()) doc.dump(System.err);/*from w w w . j ava 2 s . co m*/ try { Element section = doc.getDefaultRootElement(); log.trace(section); assert section.getName().equals("section"); final int nj = section.getElementCount(); for (int j = 0; j < nj; j++) { Element paragraph = section.getElement(j); log.trace(paragraph); assert section.getName().equals("paragraph"); // boolean firstInPara = true; AttributeSet attr = paragraph.getAttributes(); Integer alignment = (Integer) attr.getAttribute(StyleConstants.Alignment); boolean elementsInThisLine = false; final int ni = paragraph.getElementCount(); for (int i = 0; i < ni; i++) { Element content = paragraph.getElement(i); assert section.getName().equals("content"); int start = content.getStartOffset(); int end = content.getEndOffset(); attr = content.getAttributes(); Boolean italic = (Boolean) attr.getAttribute(StyleConstants.Italic); Boolean bold = (Boolean) attr.getAttribute(StyleConstants.Bold); Boolean underline = (Boolean) attr.getAttribute(StyleConstants.Underline); String family = (String) attr.getAttribute(StyleConstants.Family); Integer fontSize = (Integer) attr.getAttribute(StyleConstants.Size); Color color = (Color) attr.getAttribute(StyleConstants.ColorConstants.Foreground); String text = doc.getText(start, end - start); // if (firstInPara && text.trim().isEmpty() && family == // null // && fontSize == null) // continue; // else // firstInPara = false; if (i == ni - 1 && text.trim().isEmpty() && text.length() < 3) continue; elementsInThisLine = true; while (text.endsWith("\n") || text.endsWith("\r")) text = text.substring(0, text.length() - 1); log.debug(italic + " " + bold + " " + underline + " " + family + " " + fontSize + " " + color + "\t\"" + text + "\""); XmlBuilder el = xml.add(C.EL_ITEXT).set(C.CH, text); if (bold == Boolean.TRUE && italic == Boolean.TRUE) el.set(C.FONT, family + " Bold Italic"); else if (bold == Boolean.TRUE) el.set(C.FONT, family + " Bold"); else if (italic == Boolean.TRUE) el.set(C.FONT, family + " Italic"); else el.set(C.FONT, family + " Regular"); if (fontSize != null) el.set(C.FONTSIZE, fontSize); if (color != null && color.equals(Color.BLACK) && scribus != null) { String colname = scribus.colorManager.getColorName(color); el.set(C.FCOLOR, colname); } } if (!elementsInThisLine && j == nj - 1) break; // don't convert last line if empty XmlBuilder el = xml.add(C.EL_PARA); if (alignment != null) switch (alignment) { case StyleConstants.ALIGN_LEFT: el.set(C.ALIGN, 0); break; case StyleConstants.ALIGN_CENTER: el.set(C.ALIGN, 1); break; case StyleConstants.ALIGN_RIGHT: el.set(C.ALIGN, 2); break; case StyleConstants.ALIGN_JUSTIFIED: el.set(C.ALIGN, 3); break; } } } catch (BadLocationException e) { throw new RuntimeException("This error should not occour", e); } }
From source file:simplealbum.mvc.autocomplete.JTextPaneX.java
protected void colorStyledDocument(final DefaultStyledDocument document) { EventQueue.invokeLater(new Runnable() { @Override// w ww.j a v a2 s .c o m public void run() { String input = ""; try { input = document.getText(0, document.getLength()); } catch (BadLocationException ex) { Logger.getLogger(JTextPaneX.class.getName()).log(Level.SEVERE, null, ex); } StringBuilder inputMut = new StringBuilder(input); String[] split = StringUtils.split(inputMut.toString()); int i = 0; for (String string : split) { int start = inputMut.indexOf(string); int end = start + string.length(); inputMut.replace(start, end, StringUtils.repeat(" ", string.length())); document.setCharacterAttributes(start, string.length(), styles[i++ % styles.length], true); } } }); }