Example usage for org.apache.poi.xwpf.usermodel XWPFDocument XWPFDocument

List of usage examples for org.apache.poi.xwpf.usermodel XWPFDocument XWPFDocument

Introduction

In this page you can find the example usage for org.apache.poi.xwpf.usermodel XWPFDocument XWPFDocument.

Prototype

public XWPFDocument(InputStream is) throws IOException 

Source Link

Usage

From source file:org.exoplatform.services.document.impl.MSXWordDocumentReader.java

License:Open Source License

/**
 * @see org.exoplatform.services.document.DocumentReader#getProperties(java.io.InputStream)
 */// w ww .j  av a 2 s .c o m
public Properties getProperties(final InputStream is) throws IOException, DocumentReadException {
    POIPropertiesReader reader = new POIPropertiesReader();
    reader.readDCProperties(
            SecurityHelper.doPrivilegedIOExceptionAction(new PrivilegedExceptionAction<XWPFDocument>() {
                public XWPFDocument run() throws Exception {
                    return new XWPFDocument(is);
                }
            }));

    return reader.getProperties();
}

From source file:org.isisaddons.module.xdocreport.dom.example.Generate2Pdf.java

License:Apache License

public static void main(String[] args) throws IOException, XDocReportException {

    // 1) Load DOCX into XWPFDocument
    InputStream in = new FileInputStream(new File("target/Project.docx"));
    XWPFDocument document = new XWPFDocument(in);

    // 2) Prepare Pdf options
    PdfOptions options = PdfOptions.create();

    // 3) Convert XWPFDocument to Pdf
    OutputStream out = new FileOutputStream(new File("target/Project.pdf"));
    PdfConverter.getInstance().convert(document, out, options);
}

From source file:org.isisaddons.module.xdocreport.dom.service.XDocReportService.java

License:Apache License

private byte[] toPdf(final byte[] docxBytes) throws IOException {

    XWPFDocument document = new XWPFDocument(new ByteArrayInputStream(docxBytes));

    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    PdfConverter.getInstance().convert(document, baos, pdfOptions);

    return baos.toByteArray();
}

From source file:org.joeffice.wordprocessor.reader.POIDocxReader.java

License:Apache License

/**
 * Reads content of specified stream to the document.
 *
 * @param in stream.// ww w . j  a  v  a2 s  . c  o m
 */
public void read(InputStream in, int offset) throws IOException, BadLocationException {
    poiDocument = new XWPFDocument(in);

    iteratePart(poiDocument.getBodyElements());

    this.currentOffset = offset;
    document.putProperty("XWPFDocument", poiDocument);
}

From source file:org.kimios.kernel.index.filters.ExcelXFilter.java

License:Open Source License

public String getBody(InputStream in) throws IOException {
    XWPFDocument doc = new XWPFDocument(in);
    XWPFWordExtractor ex = new XWPFWordExtractor(doc);
    String text = ex.getText();/*from   ww  w . java 2s .co  m*/
    return text;
}

From source file:org.kino.server.api.contractgenerator.java

static void writeDocxTemplate(InputStream src, OutputStream dststrem, Map<String, String> replacementMap)
        throws InvalidFormatException, IOException {
    XWPFDocument doc = new XWPFDocument(src);
    replaceInParagraphs(replacementMap, doc.getParagraphs());
    for (XWPFTable tbl : doc.getTables()) {
        for (XWPFTableRow row : tbl.getRows()) {
            for (XWPFTableCell cell : row.getTableCells()) {
                replaceInParagraphs(replacementMap, cell.getParagraphs());

            }/* w w w.  j  av  a  2s  .com*/
        }
    }

    doc.write(dststrem);

}

From source file:org.knime.ext.textprocessing.nodes.source.parser.word.WordDocumentParser.java

License:Open Source License

private Document parseInternal(final InputStream is) throws Exception {
    m_currentDoc = new DocumentBuilder(m_tokenizerName);
    m_currentDoc.setDocumentFile(new File(m_docPath));
    m_currentDoc.setDocumentType(m_type);
    m_currentDoc.addDocumentCategory(m_category);
    m_currentDoc.addDocumentSource(m_source);

    POIFSFileSystem poifs = null;//from   ww  w .  ja  va 2s  .c  o m
    HWPFDocument hdoc = null;
    XWPFDocument hdoc2 = null;
    WordExtractor extractor = null;

    try {
        // doc files
        if (m_docPath.endsWith(".doc")) {
            // copy content of input stream into byte array since content have to be red twice unfortunately.
            final ByteArrayOutputStream baos = new ByteArrayOutputStream();
            final byte[] buf = new byte[1024];
            int i = 0;
            while ((i = is.read(buf)) >= 0) {
                baos.write(buf, 0, i);
            }
            final byte[] content = baos.toByteArray();

            // open stream with copied content to read text
            InputStream copiedInput = new ByteArrayInputStream(content);
            hdoc = new HWPFDocument(copiedInput);
            extractor = new WordExtractor(hdoc);
            for (String p : extractor.getParagraphText()) {
                p = p.trim();
                if (!onlyWhitepscaes(p)) {
                    m_currentDoc.addParagraph(p);
                }
            }

            // open stream again with copied content to read meta info
            copiedInput = new ByteArrayInputStream(content);
            poifs = new POIFSFileSystem(copiedInput);
            final DirectoryEntry dir = poifs.getRoot();
            final DocumentEntry siEntry = (DocumentEntry) dir.getEntry(SummaryInformation.DEFAULT_STREAM_NAME);
            final PropertySet ps = new PropertySet(new DocumentInputStream(siEntry));

            final SummaryInformation si = new SummaryInformation(ps);

            setAuthor(si.getAuthor());
            setPublicationDate(si.getCreateDateTime());

            // docx files
        } else if (m_docPath.endsWith(".docx") || m_docPath.endsWith(".docm")) {
            hdoc2 = new XWPFDocument(is);
            final List<XWPFParagraph> paragraphs = hdoc2.getParagraphs();
            for (final XWPFParagraph paragraph : paragraphs) {
                final String text = paragraph.getText();
                if (!onlyWhitepscaes(text)) {
                    m_currentDoc.addParagraph(text);
                }
            }

            setAuthor(hdoc2.getProperties().getCoreProperties().getCreator());
            setPublicationDate(hdoc2.getProperties().getCoreProperties().getCreated());
        }

        m_currentDoc.createNewSection(SectionAnnotation.CHAPTER);

        // find title
        String title = null;

        if (m_filenameAsTitle) {
            title = m_docPath.trim();
        } else {
            final List<Section> sections = m_currentDoc.getSections();
            if (sections.size() > 0) {
                try {
                    title = sections.get(0).getParagraphs().get(0).getSentences().get(0).getText().trim();
                } catch (IndexOutOfBoundsException e) {
                    LOGGER.debug("Parsed word document " + m_docPath + " is empty.");
                    title = "";
                }
            }
        }
        if (!checkTitle(title)) {
            title = m_docPath.toString();
        }
        m_currentDoc.addTitle(title);

        return m_currentDoc.createDocument();
    } finally {
        is.close();
        if (poifs != null) {
            poifs.close();
        }
        if (hdoc != null) {
            hdoc.close();
        }
        if (hdoc2 != null) {
            hdoc2.close();
        }
        if (extractor != null) {
            extractor.close();
        }
    }
}

From source file:org.nuclos.server.common.ooxml.WordXMLReader.java

License:Open Source License

public WordXMLReader(InputStream is) throws IOException {
    this(new XWPFDocument(is));
}

From source file:org.nuxeo.ecm.platform.template.tests.TestOOoConvert.java

License:Apache License

@Test
public void testOfficeConverter4() throws Exception {
    ConversionService cs = Framework.getService(ConversionService.class);

    BlobHolder bh = getBlobFromPath("data/testMe.html", "text/html");
    String converterName = cs.getConverterName(bh.getBlob().getMimeType(),
            "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
    assertEquals("any2docx", converterName);

    boolean isAvailable = cs.isConverterAvailable(converterName).isAvailable();
    assumeTrue(isAvailable);/*ww  w  . j a  va  2s .  c o m*/

    BlobHolder result = cs.convert(converterName, bh, null);
    File docxFile = Framework.createTempFile("docxfile", "docx");
    result.getBlob().transferTo(docxFile);

    XWPFDocument doc = new XWPFDocument(new FileInputStream(docxFile));
    XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

    String text = extractor.getText();
    assertTrue(text.length() > 0);
    assertTrue(text.contains("Titre 1"));

    docxFile.delete();
}

From source file:org.obeonetwork.m2doc.api.POIServices.java

License:Open Source License

/**
 * Get XWPFDocument from template file./* w w w.j av  a  2  s. c o m*/
 * 
 * @param templateFile
 *            IFile
 * @return XWPFDocument
 * @throws IOException
 *             IOException
 */
public XWPFDocument getXWPFDocument(IFile templateFile) throws IOException {
    OPCPackage oPackage = getOPCPackage(templateFile);
    XWPFDocument document = new XWPFDocument(oPackage);
    return document;
}