List of usage examples for org.apache.poi.xwpf.usermodel XWPFDocument XWPFDocument
public XWPFDocument(InputStream is) throws IOException
From source file:org.exoplatform.services.document.impl.MSXWordDocumentReader.java
License:Open Source License
/** * @see org.exoplatform.services.document.DocumentReader#getProperties(java.io.InputStream) */// w ww .j av a 2 s .c o m public Properties getProperties(final InputStream is) throws IOException, DocumentReadException { POIPropertiesReader reader = new POIPropertiesReader(); reader.readDCProperties( SecurityHelper.doPrivilegedIOExceptionAction(new PrivilegedExceptionAction<XWPFDocument>() { public XWPFDocument run() throws Exception { return new XWPFDocument(is); } })); return reader.getProperties(); }
From source file:org.isisaddons.module.xdocreport.dom.example.Generate2Pdf.java
License:Apache License
public static void main(String[] args) throws IOException, XDocReportException { // 1) Load DOCX into XWPFDocument InputStream in = new FileInputStream(new File("target/Project.docx")); XWPFDocument document = new XWPFDocument(in); // 2) Prepare Pdf options PdfOptions options = PdfOptions.create(); // 3) Convert XWPFDocument to Pdf OutputStream out = new FileOutputStream(new File("target/Project.pdf")); PdfConverter.getInstance().convert(document, out, options); }
From source file:org.isisaddons.module.xdocreport.dom.service.XDocReportService.java
License:Apache License
private byte[] toPdf(final byte[] docxBytes) throws IOException { XWPFDocument document = new XWPFDocument(new ByteArrayInputStream(docxBytes)); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); PdfConverter.getInstance().convert(document, baos, pdfOptions); return baos.toByteArray(); }
From source file:org.joeffice.wordprocessor.reader.POIDocxReader.java
License:Apache License
/** * Reads content of specified stream to the document. * * @param in stream.// ww w . j a v a2 s . c o m */ public void read(InputStream in, int offset) throws IOException, BadLocationException { poiDocument = new XWPFDocument(in); iteratePart(poiDocument.getBodyElements()); this.currentOffset = offset; document.putProperty("XWPFDocument", poiDocument); }
From source file:org.kimios.kernel.index.filters.ExcelXFilter.java
License:Open Source License
public String getBody(InputStream in) throws IOException { XWPFDocument doc = new XWPFDocument(in); XWPFWordExtractor ex = new XWPFWordExtractor(doc); String text = ex.getText();/*from ww w . java 2s .co m*/ return text; }
From source file:org.kino.server.api.contractgenerator.java
static void writeDocxTemplate(InputStream src, OutputStream dststrem, Map<String, String> replacementMap) throws InvalidFormatException, IOException { XWPFDocument doc = new XWPFDocument(src); replaceInParagraphs(replacementMap, doc.getParagraphs()); for (XWPFTable tbl : doc.getTables()) { for (XWPFTableRow row : tbl.getRows()) { for (XWPFTableCell cell : row.getTableCells()) { replaceInParagraphs(replacementMap, cell.getParagraphs()); }/* w w w. j av a 2s .com*/ } } doc.write(dststrem); }
From source file:org.knime.ext.textprocessing.nodes.source.parser.word.WordDocumentParser.java
License:Open Source License
private Document parseInternal(final InputStream is) throws Exception { m_currentDoc = new DocumentBuilder(m_tokenizerName); m_currentDoc.setDocumentFile(new File(m_docPath)); m_currentDoc.setDocumentType(m_type); m_currentDoc.addDocumentCategory(m_category); m_currentDoc.addDocumentSource(m_source); POIFSFileSystem poifs = null;//from ww w . ja va 2s .c o m HWPFDocument hdoc = null; XWPFDocument hdoc2 = null; WordExtractor extractor = null; try { // doc files if (m_docPath.endsWith(".doc")) { // copy content of input stream into byte array since content have to be red twice unfortunately. final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final byte[] buf = new byte[1024]; int i = 0; while ((i = is.read(buf)) >= 0) { baos.write(buf, 0, i); } final byte[] content = baos.toByteArray(); // open stream with copied content to read text InputStream copiedInput = new ByteArrayInputStream(content); hdoc = new HWPFDocument(copiedInput); extractor = new WordExtractor(hdoc); for (String p : extractor.getParagraphText()) { p = p.trim(); if (!onlyWhitepscaes(p)) { m_currentDoc.addParagraph(p); } } // open stream again with copied content to read meta info copiedInput = new ByteArrayInputStream(content); poifs = new POIFSFileSystem(copiedInput); final DirectoryEntry dir = poifs.getRoot(); final DocumentEntry siEntry = (DocumentEntry) dir.getEntry(SummaryInformation.DEFAULT_STREAM_NAME); final PropertySet ps = new PropertySet(new DocumentInputStream(siEntry)); final SummaryInformation si = new SummaryInformation(ps); setAuthor(si.getAuthor()); setPublicationDate(si.getCreateDateTime()); // docx files } else if (m_docPath.endsWith(".docx") || m_docPath.endsWith(".docm")) { hdoc2 = new XWPFDocument(is); final List<XWPFParagraph> paragraphs = hdoc2.getParagraphs(); for (final XWPFParagraph paragraph : paragraphs) { final String text = paragraph.getText(); if (!onlyWhitepscaes(text)) { m_currentDoc.addParagraph(text); } } setAuthor(hdoc2.getProperties().getCoreProperties().getCreator()); setPublicationDate(hdoc2.getProperties().getCoreProperties().getCreated()); } m_currentDoc.createNewSection(SectionAnnotation.CHAPTER); // find title String title = null; if (m_filenameAsTitle) { title = m_docPath.trim(); } else { final List<Section> sections = m_currentDoc.getSections(); if (sections.size() > 0) { try { title = sections.get(0).getParagraphs().get(0).getSentences().get(0).getText().trim(); } catch (IndexOutOfBoundsException e) { LOGGER.debug("Parsed word document " + m_docPath + " is empty."); title = ""; } } } if (!checkTitle(title)) { title = m_docPath.toString(); } m_currentDoc.addTitle(title); return m_currentDoc.createDocument(); } finally { is.close(); if (poifs != null) { poifs.close(); } if (hdoc != null) { hdoc.close(); } if (hdoc2 != null) { hdoc2.close(); } if (extractor != null) { extractor.close(); } } }
From source file:org.nuclos.server.common.ooxml.WordXMLReader.java
License:Open Source License
public WordXMLReader(InputStream is) throws IOException { this(new XWPFDocument(is)); }
From source file:org.nuxeo.ecm.platform.template.tests.TestOOoConvert.java
License:Apache License
@Test public void testOfficeConverter4() throws Exception { ConversionService cs = Framework.getService(ConversionService.class); BlobHolder bh = getBlobFromPath("data/testMe.html", "text/html"); String converterName = cs.getConverterName(bh.getBlob().getMimeType(), "application/vnd.openxmlformats-officedocument.wordprocessingml.document"); assertEquals("any2docx", converterName); boolean isAvailable = cs.isConverterAvailable(converterName).isAvailable(); assumeTrue(isAvailable);/*ww w . j a va 2s . c o m*/ BlobHolder result = cs.convert(converterName, bh, null); File docxFile = Framework.createTempFile("docxfile", "docx"); result.getBlob().transferTo(docxFile); XWPFDocument doc = new XWPFDocument(new FileInputStream(docxFile)); XWPFWordExtractor extractor = new XWPFWordExtractor(doc); String text = extractor.getText(); assertTrue(text.length() > 0); assertTrue(text.contains("Titre 1")); docxFile.delete(); }
From source file:org.obeonetwork.m2doc.api.POIServices.java
License:Open Source License
/** * Get XWPFDocument from template file./* w w w.j av a 2 s. c o m*/ * * @param templateFile * IFile * @return XWPFDocument * @throws IOException * IOException */ public XWPFDocument getXWPFDocument(IFile templateFile) throws IOException { OPCPackage oPackage = getOPCPackage(templateFile); XWPFDocument document = new XWPFDocument(oPackage); return document; }