List of usage examples for org.apache.poi.hwpf HWPFDocument getRange
@Override
public Range getRange()
From source file:de.uni_siegen.wineme.come_in.thumbnailer.util.mime.DocFileIdentifier.java
License:Open Source License
@Override public String identify(String mimeType, byte[] bytes, File file) { if (isOfficeFile(mimeType) && !DOC_MIME_TYPE.equals(mimeType)) { try {//from w w w. j av a 2 s .c om FileInputStream stream = new FileInputStream(file); HWPFDocument document = new HWPFDocument(stream); if (document.getRange().getEndOffset() > 0) { return DOC_MIME_TYPE; } } catch (Throwable e) { } } return mimeType; }
From source file:File.DOC.WriteDoc.java
/** * @param args the command line arguments *//* w w w . j a v a 2 s .c o m*/ public void Write(String path, String namafile, String content) { File file = new File("D:\\xyz.doc"); try { POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(file)); HWPFDocument doc = new HWPFDocument(fs); Range range = doc.getRange(); CharacterRun run = range.insertBefore(content.replace("\n", "\013")); run.setBold(true); OutputStream outa = new FileOutputStream(new File(path + namafile + ".doc")); doc.write(outa); out.close(); } catch (Exception e) { System.out.println(e.getMessage()); } }
From source file:javaapplication1.HWPFTest.java
private static HWPFDocument replaceText(HWPFDocument doc, String findText, String replaceText) { Range r1 = doc.getRange(); for (int i = 0; i < r1.numSections(); ++i) { Section s = r1.getSection(i);/*from w w w .java 2s. c o m*/ for (int x = 0; x < s.numParagraphs(); x++) { Paragraph p = s.getParagraph(x); /*String text = p.text(); if(text.contains(findText)) { p.replaceText(replaceText, findText); }*/ for (int z = 0; z < p.numCharacterRuns(); z++) { CharacterRun run = p.getCharacterRun(z); String text = run.text(); if (text.contains(findText)) { run.replaceText(findText, replaceText); } } } } return doc; }
From source file:mj.ocraptor.extraction.tika.parser.microsoft.WordExtractor.java
License:Apache License
protected void parse(DirectoryNode root, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException { HWPFDocument document; try {/* w w w .j a v a2s . c o m*/ document = new HWPFDocument(root); } catch (OldWordFileFormatException e) { parseWord6(root, xhtml); return; } org.apache.poi.hwpf.extractor.WordExtractor wordExtractor = new org.apache.poi.hwpf.extractor.WordExtractor( document); // mj extractImageText(xhtml, document); HeaderStories headerFooter = new HeaderStories(document); // Grab the list of pictures. As far as we can tell, // the pictures should be in order, and may be directly // placed or referenced from an anchor PicturesTable pictureTable = document.getPicturesTable(); PicturesSource pictures = new PicturesSource(document); // Do any headers, if present Range[] headers = new Range[] { headerFooter.getFirstHeaderSubrange(), headerFooter.getEvenHeaderSubrange(), headerFooter.getOddHeaderSubrange() }; handleHeaderFooter(headers, "header", document, pictures, pictureTable, xhtml); // Do the main paragraph text Range r = document.getRange(); for (int i = 0; i < r.numParagraphs(); i++) { Paragraph p = r.getParagraph(i); i += handleParagraph(p, 0, r, document, FieldsDocumentPart.MAIN, pictures, pictureTable, xhtml); } // Do everything else for (String paragraph : wordExtractor.getMainTextboxText()) { xhtml.element("p", paragraph); } for (String paragraph : wordExtractor.getFootnoteText()) { xhtml.element("p", paragraph); } for (String paragraph : wordExtractor.getCommentsText()) { xhtml.element("p", paragraph); } for (String paragraph : wordExtractor.getEndnoteText()) { xhtml.element("p", paragraph); } // Do any footers, if present Range[] footers = new Range[] { headerFooter.getFirstFooterSubrange(), headerFooter.getEvenFooterSubrange(), headerFooter.getOddFooterSubrange() }; handleHeaderFooter(footers, "footer", document, pictures, pictureTable, xhtml); // Handle any pictures that we haven't output yet for (Picture p = pictures.nextUnclaimed(); p != null;) { handlePictureCharacterRun(null, p, pictures, xhtml); p = pictures.nextUnclaimed(); } // Handle any embeded office documents try { DirectoryEntry op = (DirectoryEntry) root.getEntry("ObjectPool"); for (Entry entry : op) { if (entry.getName().startsWith("_") && entry instanceof DirectoryEntry) { handleEmbeddedOfficeDoc((DirectoryEntry) entry, xhtml); } } } catch (FileNotFoundException e) { } }
From source file:Modelo.EscribirWord.java
private HWPFDocument replaceText(HWPFDocument doc, String findText, String replaceText) { Range r1 = doc.getRange(); for (int i = 0; i < r1.numSections(); ++i) { Section s = r1.getSection(i);/*from www .j a v a2 s . c om*/ for (int x = 0; x < s.numParagraphs(); x++) { Paragraph p = s.getParagraph(x); for (int z = 0; z < p.numCharacterRuns(); z++) { CharacterRun run = p.getCharacterRun(z); String text = run.text(); if (text.contains(findText)) { if (replaceText == null) { System.out.println("null"); replaceText = ""; } run.replaceText(findText, replaceText); } } } } return doc; }
From source file:org.apache.tika.parser.microsoft.WordExtractor.java
License:Apache License
protected void parse(DirectoryNode root, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException { HWPFDocument document; try {/* w ww . ja v a 2 s . c om*/ document = new HWPFDocument(root); } catch (OldWordFileFormatException e) { parseWord6(root, xhtml); return; } org.apache.poi.hwpf.extractor.WordExtractor wordExtractor = new org.apache.poi.hwpf.extractor.WordExtractor( document); HeaderStories headerFooter = new HeaderStories(document); // Grab the list of pictures. As far as we can tell, // the pictures should be in order, and may be directly // placed or referenced from an anchor PicturesTable pictureTable = document.getPicturesTable(); PicturesSource pictures = new PicturesSource(document); // Do any headers, if present Range[] headers = new Range[] { headerFooter.getFirstHeaderSubrange(), headerFooter.getEvenHeaderSubrange(), headerFooter.getOddHeaderSubrange() }; handleHeaderFooter(headers, "header", document, pictures, pictureTable, xhtml); // Do the main paragraph text Range r = document.getRange(); ListManager listManager = new ListManager(document); for (int i = 0; i < r.numParagraphs(); i++) { Paragraph p = r.getParagraph(i); i += handleParagraph(p, 0, r, document, FieldsDocumentPart.MAIN, pictures, pictureTable, listManager, xhtml); } // Do everything else for (String paragraph : wordExtractor.getMainTextboxText()) { xhtml.element("p", paragraph); } for (String paragraph : wordExtractor.getFootnoteText()) { xhtml.element("p", paragraph); } for (String paragraph : wordExtractor.getCommentsText()) { xhtml.element("p", paragraph); } for (String paragraph : wordExtractor.getEndnoteText()) { xhtml.element("p", paragraph); } // Do any footers, if present Range[] footers = new Range[] { headerFooter.getFirstFooterSubrange(), headerFooter.getEvenFooterSubrange(), headerFooter.getOddFooterSubrange() }; handleHeaderFooter(footers, "footer", document, pictures, pictureTable, xhtml); // Handle any pictures that we haven't output yet for (Picture p = pictures.nextUnclaimed(); p != null;) { handlePictureCharacterRun(null, p, pictures, xhtml); p = pictures.nextUnclaimed(); } // Handle any embeded office documents try { DirectoryEntry op = (DirectoryEntry) root.getEntry("ObjectPool"); for (Entry entry : op) { if (entry.getName().startsWith("_") && entry instanceof DirectoryEntry) { handleEmbeddedOfficeDoc((DirectoryEntry) entry, xhtml); } } } catch (FileNotFoundException e) { } }
From source file:org.docx4j.convert.in.Doc.java
License:Apache License
/** * This method is private, since the fact that conversion is (currently) * performed using POI's HWPF should be encapsulated. * //from ww w . j av a 2 s .c o m * @param doc * @param wordMLPackage * @return success or failure */ private static void convert(HWPFDocument doc, WordprocessingMLPackage wordMLPackage) throws Exception { // Convert styles org.apache.poi.hwpf.model.StyleSheet stylesheet = doc.getStyleSheet(); // TODO - higher priority // At present, a default set of styles are defined in the output // document. // Convert lists org.apache.poi.hwpf.model.ListTables listTables = doc.getListTables(); // TODO // Convert document properties org.apache.poi.hwpf.model.DocumentProperties docProps = doc.getDocProperties(); // TODO // Convert main document part MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart(); org.docx4j.wml.ObjectFactory factory = new org.docx4j.wml.ObjectFactory(); Range r = doc.getRange(); for (int x = 0; x < r.numSections(); x++) { Section s = r.getSection(x); // TODO - convert section for (int y = 0; y < s.numParagraphs(); y++) { Paragraph p = s.getParagraph(y); if (p.isInTable()) { Table t = s.getTable(p); int cl = numCol(t); log.info("Found " + t.numRows() + "x" + cl + " table - TODO - convert"); handleTable(wordMLPackage, doc, t, stylesheet, documentPart, factory); // addTODO(factory, wmlP, "[TABLE " + + t.numRows() + "x" + // cl // + " - can't convert tables yet]"); y += t.numParagraphs() - 1; } else { org.docx4j.wml.P paraToAdd = handleP(wordMLPackage, doc, p, stylesheet, documentPart, factory); documentPart.addObject(paraToAdd); } } } }
From source file:org.esmerilprogramming.pdfcake.DocumentReplace.java
License:Open Source License
/** * Read the document searching for the $$$<keys>$$$ and replace with the values in the template * @param document//from w ww .j a v a2 s . c o m * @param template * @return */ private static HWPFDocument replaceKeys(HWPFDocument document, DocumentTemplate template) { Range range = document.getRange(); for (int i = 0; i < range.numParagraphs(); i++) { Paragraph p = range.getParagraph(i); String text = null; for (Enumeration<String> e = template.getAttributes().keys(); e.hasMoreElements();) { String key = e.nextElement(); String attributeKey = "$$$" + key + "$$$"; try { text = p.text(); } catch (Exception ex) { ; } while (text != null && text.indexOf(attributeKey) > -1) { String replacement = template.getAttributes().get(key); p.replaceText(attributeKey, replacement, text.indexOf(attributeKey)); text = text.replace(attributeKey, ""); } } } return document; }
From source file:org.exoplatform.services.document.impl.MSWordDocumentReader.java
License:Open Source License
/** * Returns only a text from .doc file content. * /* ww w. ja va2 s. c o m*/ * @param is an input stream with .doc file content. * @return The string only with text from file content. */ public String getContentAsText(final InputStream is) throws IOException, DocumentReadException { if (is == null) { throw new IllegalArgumentException("InputStream is null."); } String text = ""; try { if (is.available() == 0) { return ""; } HWPFDocument doc; try { doc = SecurityHelper.doPrivilegedIOExceptionAction(new PrivilegedExceptionAction<HWPFDocument>() { public HWPFDocument run() throws Exception { return new HWPFDocument(is); } }); } catch (IOException e) { throw new DocumentReadException("Can't open document.", e); } Range range = doc.getRange(); text = range.text(); } finally { if (is != null) { try { is.close(); } catch (IOException e) { if (LOG.isTraceEnabled()) { LOG.trace("An exception occurred: " + e.getMessage()); } } } } return text.trim(); }
From source file:org.modeshape.sequencer.msoffice.word.WordMetadataReader.java
License:Apache License
public static WordMetadata instance(InputStream stream) throws IOException { WordMetadata metadata = new WordMetadata(); List<WordMetadata.WordHeading> headings = new ArrayList<WordMetadata.WordHeading>(); HWPFDocument document = new HWPFDocument(stream); Range range = document.getRange(); StyleSheet stylesheet = document.getStyleSheet(); for (int i = 0; i < range.numParagraphs(); i++) { Paragraph paragraph = range.getParagraph(i); String styleName = stylesheet.getStyleDescription(paragraph.getStyleIndex()).getName(); if (styleName.startsWith(HEADER_PREFIX)) { String rawLevelNum = styleName.substring(HEADER_PREFIX.length() + 1).trim(); int levelNum = 0; try { levelNum = Integer.parseInt(rawLevelNum); } catch (NumberFormatException nfe) { log.debug("Could not parse heading level from: " + styleName); }//from w w w . j a va 2 s .c o m String text = Paragraph.stripFields(paragraph.text()); if ('\r' == text.charAt(text.length() - 1)) { text = text.substring(0, text.length() - 1); } headings.add(new WordMetadata.WordHeading(text, levelNum)); } } metadata.setHeadings(headings); metadata.setMetadata(document.getSummaryInformation()); return metadata; }