Example usage for org.apache.poi.xwpf.usermodel XWPFDocument getProperties

List of usage examples for org.apache.poi.xwpf.usermodel XWPFDocument getProperties

Introduction

In this page you can find the example usage for org.apache.poi.xwpf.usermodel XWPFDocument getProperties.

Prototype

public POIXMLProperties getProperties() 

Source Link

Document

Get the document properties.

Usage

From source file:org.knime.ext.textprocessing.nodes.source.parser.word.WordDocumentParser.java

License:Open Source License

private Document parseInternal(final InputStream is) throws Exception {
    m_currentDoc = new DocumentBuilder(m_tokenizerName);
    m_currentDoc.setDocumentFile(new File(m_docPath));
    m_currentDoc.setDocumentType(m_type);
    m_currentDoc.addDocumentCategory(m_category);
    m_currentDoc.addDocumentSource(m_source);

    POIFSFileSystem poifs = null;//  www.  ja  v  a  2  s .  c  o m
    HWPFDocument hdoc = null;
    XWPFDocument hdoc2 = null;
    WordExtractor extractor = null;

    try {
        // doc files
        if (m_docPath.endsWith(".doc")) {
            // copy content of input stream into byte array since content have to be red twice unfortunately.
            final ByteArrayOutputStream baos = new ByteArrayOutputStream();
            final byte[] buf = new byte[1024];
            int i = 0;
            while ((i = is.read(buf)) >= 0) {
                baos.write(buf, 0, i);
            }
            final byte[] content = baos.toByteArray();

            // open stream with copied content to read text
            InputStream copiedInput = new ByteArrayInputStream(content);
            hdoc = new HWPFDocument(copiedInput);
            extractor = new WordExtractor(hdoc);
            for (String p : extractor.getParagraphText()) {
                p = p.trim();
                if (!onlyWhitepscaes(p)) {
                    m_currentDoc.addParagraph(p);
                }
            }

            // open stream again with copied content to read meta info
            copiedInput = new ByteArrayInputStream(content);
            poifs = new POIFSFileSystem(copiedInput);
            final DirectoryEntry dir = poifs.getRoot();
            final DocumentEntry siEntry = (DocumentEntry) dir.getEntry(SummaryInformation.DEFAULT_STREAM_NAME);
            final PropertySet ps = new PropertySet(new DocumentInputStream(siEntry));

            final SummaryInformation si = new SummaryInformation(ps);

            setAuthor(si.getAuthor());
            setPublicationDate(si.getCreateDateTime());

            // docx files
        } else if (m_docPath.endsWith(".docx") || m_docPath.endsWith(".docm")) {
            hdoc2 = new XWPFDocument(is);
            final List<XWPFParagraph> paragraphs = hdoc2.getParagraphs();
            for (final XWPFParagraph paragraph : paragraphs) {
                final String text = paragraph.getText();
                if (!onlyWhitepscaes(text)) {
                    m_currentDoc.addParagraph(text);
                }
            }

            setAuthor(hdoc2.getProperties().getCoreProperties().getCreator());
            setPublicationDate(hdoc2.getProperties().getCoreProperties().getCreated());
        }

        m_currentDoc.createNewSection(SectionAnnotation.CHAPTER);

        // find title
        String title = null;

        if (m_filenameAsTitle) {
            title = m_docPath.trim();
        } else {
            final List<Section> sections = m_currentDoc.getSections();
            if (sections.size() > 0) {
                try {
                    title = sections.get(0).getParagraphs().get(0).getSentences().get(0).getText().trim();
                } catch (IndexOutOfBoundsException e) {
                    LOGGER.debug("Parsed word document " + m_docPath + " is empty.");
                    title = "";
                }
            }
        }
        if (!checkTitle(title)) {
            title = m_docPath.toString();
        }
        m_currentDoc.addTitle(title);

        return m_currentDoc.createDocument();
    } finally {
        is.close();
        if (poifs != null) {
            poifs.close();
        }
        if (hdoc != null) {
            hdoc.close();
        }
        if (hdoc2 != null) {
            hdoc2.close();
        }
        if (extractor != null) {
            extractor.close();
        }
    }
}

From source file:org.obeonetwork.m2doc.generator.test.VariousTest.java

License:Open Source License

@Test
public void testPropertiesAccess()
        throws InvalidFormatException, IOException, DocumentParserException, DocumentGenerationException {
    FileInputStream is = new FileInputStream("templates/propertiesTest.docx");
    OPCPackage oPackage = OPCPackage.open(is);
    XWPFDocument document = new XWPFDocument(oPackage);
    CustomProperties props = document.getProperties().getCustomProperties();
    List<CTProperty> properties = props.getUnderlyingProperties().getPropertyList();
    for (CTProperty property : properties) {
        // TODO Finish this.
    }//w ww . ja  v a 2  s . co m
}

From source file:org.obeonetwork.m2doc.properties.TemplateCustomProperties.java

License:Open Source License

/**
 * Parses {@link CustomProperties}.// w ww.  j ava2 s.  c  o  m
 * 
 * @param doc
 *            the {@link XWPFDocument}
 */
private void parseProperties(XWPFDocument doc) {
    final CustomProperties props = doc.getProperties().getCustomProperties();
    final List<CTProperty> properties = props.getUnderlyingProperties().getPropertyList();
    for (CTProperty property : properties) {
        String propertyName = property.getName();
        if (propertyName == null) {
            continue;
        }
        propertyName = propertyName.trim();
        final String nsURI = getNsURI(propertyName);
        if (nsURI != null) {
            nsURIs.add(nsURI);
            continue;
        }

        final String serviceClasse = getServiceImport(propertyName);
        if (serviceClasse != null) {
            serviceClasses.add(serviceClasse);
            continue;
        }

        final String serviceToken = getServiceToken(propertyName);
        if (serviceToken != null) {
            serviceTokens.add(serviceToken);
            continue;
        }

        final String variableName = getVariableName(propertyName);
        if (variableName != null && isValidVariableName(variableName)) {
            variables.put(variableName, property.getLpwstr());
        }
    }
}

From source file:org.obeonetwork.m2doc.properties.TemplateInfo.java

License:Open Source License

private void extractMetaData(XWPFDocument document) {
    CustomProperties props = document.getProperties().getCustomProperties();
    List<CTProperty> properties = props.getUnderlyingProperties().getPropertyList();
    for (CTProperty property : properties) {
        String name = property.getName();
        int variablePrefixLength = M2DocCustomProperties.VAR_PROPERTY_PREFIX.length();
        if (name != null) {
            if (name.startsWith(M2DocCustomProperties.SERVICE_PROPERTY_PREFIX)) {
                String[] tokens = property.getLpwstr().split(M2DocCustomProperties.SERVICETOKEN_SEPARATOR);
                serviceTokens.addAll(Lists.newArrayList(tokens));
            } else if (name.startsWith(M2DocCustomProperties.VAR_PROPERTY_PREFIX)
                    && name.length() > variablePrefixLength) {
                String variableName = name.substring(variablePrefixLength + 1);
                String type = property.getLpwstr();
                variables.put(variableName, type);
            }//from  www. j ava 2  s.  c  om
        }
    }
}

From source file:org.obeonetwork.m2doc.util.test.TemplateConfigUtilTest.java

License:Open Source License

@Test
public void testStore() {
    // given//from  w  w w  .ja va2  s. c  o m
    XWPFDocument doc = new XWPFDocument();
    TemplateConfig config = TplconfFactory.eINSTANCE.createTemplateConfig();
    EPackageMapping ecoreMapping = TplconfFactory.eINSTANCE.createEPackageMapping();
    ecoreMapping.setName(EcorePackage.eNAME);
    ecoreMapping.setUri(EcorePackage.eNS_URI);
    ecoreMapping.setEPackage(EcorePackage.eINSTANCE);
    config.getMappings().add(ecoreMapping);

    EPackageMapping testMapping = TplconfFactory.eINSTANCE.createEPackageMapping();
    testMapping.setUri("http://www.test.com/some/test/uri");
    config.getMappings().add(testMapping);

    TemplateVariable v1 = TplconfFactory.eINSTANCE.createTemplateVariable();
    v1.setName("v1");
    v1.setTypeName("string");
    config.getVariables().add(v1);

    StructuredType typeEClassifier = TplconfFactory.eINSTANCE.createStructuredType();
    typeEClassifier.setName("EClassifier");
    typeEClassifier.setEClassifier(EcorePackage.eINSTANCE.getEClassifier());
    typeEClassifier.setMappingName(EcorePackage.eNAME);
    typeEClassifier.setMapping(ecoreMapping);
    config.getTypesByName().put("ecore::EClassifier", typeEClassifier);

    TemplateVariable v2 = TplconfFactory.eINSTANCE.createTemplateVariable();
    v2.setName("v2");
    v2.setTypeName("ecore::EClassifier");
    v2.setType(typeEClassifier);
    config.getVariables().add(v2);

    TemplateVariable v3 = TplconfFactory.eINSTANCE.createTemplateVariable();
    v3.setName("v3");
    v3.setTypeName("test::UnboundType");
    config.getVariables().add(v3);

    // when
    TemplateConfigUtil.store(config, doc);

    // then
    CustomProperties props = doc.getProperties().getCustomProperties();
    assertEquals(4, props.getUnderlyingProperties().sizeOfPropertyArray());

    CTProperty propUri = props.getProperty("m:uri");
    String propUriValue = propUri.getLpwstr();
    assertTrue(propUriValue.indexOf(EcorePackage.eNS_URI) >= 0);
    assertTrue(propUriValue.indexOf("http://www.test.com/some/test/uri") >= 0);
    assertTrue(propUriValue.indexOf(",") > 0);

    CTProperty propV1 = props.getProperty("m:var:v1");
    assertEquals("string", propV1.getLpwstr());

    CTProperty propV2 = props.getProperty("m:var:v2");
    assertEquals("ecore::EClassifier", propV2.getLpwstr());

    CTProperty propV3 = props.getProperty("m:var:v3");
    assertEquals("test::UnboundType", propV3.getLpwstr());
}