List of usage examples for org.apache.poi.xwpf.usermodel XWPFDocument getProperties
public POIXMLProperties getProperties()
From source file:org.knime.ext.textprocessing.nodes.source.parser.word.WordDocumentParser.java
License:Open Source License
private Document parseInternal(final InputStream is) throws Exception { m_currentDoc = new DocumentBuilder(m_tokenizerName); m_currentDoc.setDocumentFile(new File(m_docPath)); m_currentDoc.setDocumentType(m_type); m_currentDoc.addDocumentCategory(m_category); m_currentDoc.addDocumentSource(m_source); POIFSFileSystem poifs = null;// www. ja v a 2 s . c o m HWPFDocument hdoc = null; XWPFDocument hdoc2 = null; WordExtractor extractor = null; try { // doc files if (m_docPath.endsWith(".doc")) { // copy content of input stream into byte array since content have to be red twice unfortunately. final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final byte[] buf = new byte[1024]; int i = 0; while ((i = is.read(buf)) >= 0) { baos.write(buf, 0, i); } final byte[] content = baos.toByteArray(); // open stream with copied content to read text InputStream copiedInput = new ByteArrayInputStream(content); hdoc = new HWPFDocument(copiedInput); extractor = new WordExtractor(hdoc); for (String p : extractor.getParagraphText()) { p = p.trim(); if (!onlyWhitepscaes(p)) { m_currentDoc.addParagraph(p); } } // open stream again with copied content to read meta info copiedInput = new ByteArrayInputStream(content); poifs = new POIFSFileSystem(copiedInput); final DirectoryEntry dir = poifs.getRoot(); final DocumentEntry siEntry = (DocumentEntry) dir.getEntry(SummaryInformation.DEFAULT_STREAM_NAME); final PropertySet ps = new PropertySet(new DocumentInputStream(siEntry)); final SummaryInformation si = new SummaryInformation(ps); setAuthor(si.getAuthor()); setPublicationDate(si.getCreateDateTime()); // docx files } else if (m_docPath.endsWith(".docx") || m_docPath.endsWith(".docm")) { hdoc2 = new XWPFDocument(is); final List<XWPFParagraph> paragraphs = hdoc2.getParagraphs(); for (final XWPFParagraph paragraph : paragraphs) { final String text = paragraph.getText(); if (!onlyWhitepscaes(text)) { m_currentDoc.addParagraph(text); } } setAuthor(hdoc2.getProperties().getCoreProperties().getCreator()); setPublicationDate(hdoc2.getProperties().getCoreProperties().getCreated()); } m_currentDoc.createNewSection(SectionAnnotation.CHAPTER); // find title String title = null; if (m_filenameAsTitle) { title = m_docPath.trim(); } else { final List<Section> sections = m_currentDoc.getSections(); if (sections.size() > 0) { try { title = sections.get(0).getParagraphs().get(0).getSentences().get(0).getText().trim(); } catch (IndexOutOfBoundsException e) { LOGGER.debug("Parsed word document " + m_docPath + " is empty."); title = ""; } } } if (!checkTitle(title)) { title = m_docPath.toString(); } m_currentDoc.addTitle(title); return m_currentDoc.createDocument(); } finally { is.close(); if (poifs != null) { poifs.close(); } if (hdoc != null) { hdoc.close(); } if (hdoc2 != null) { hdoc2.close(); } if (extractor != null) { extractor.close(); } } }
From source file:org.obeonetwork.m2doc.generator.test.VariousTest.java
License:Open Source License
@Test public void testPropertiesAccess() throws InvalidFormatException, IOException, DocumentParserException, DocumentGenerationException { FileInputStream is = new FileInputStream("templates/propertiesTest.docx"); OPCPackage oPackage = OPCPackage.open(is); XWPFDocument document = new XWPFDocument(oPackage); CustomProperties props = document.getProperties().getCustomProperties(); List<CTProperty> properties = props.getUnderlyingProperties().getPropertyList(); for (CTProperty property : properties) { // TODO Finish this. }//w ww . ja v a 2 s . co m }
From source file:org.obeonetwork.m2doc.properties.TemplateCustomProperties.java
License:Open Source License
/** * Parses {@link CustomProperties}.// w ww. j ava2 s. c o m * * @param doc * the {@link XWPFDocument} */ private void parseProperties(XWPFDocument doc) { final CustomProperties props = doc.getProperties().getCustomProperties(); final List<CTProperty> properties = props.getUnderlyingProperties().getPropertyList(); for (CTProperty property : properties) { String propertyName = property.getName(); if (propertyName == null) { continue; } propertyName = propertyName.trim(); final String nsURI = getNsURI(propertyName); if (nsURI != null) { nsURIs.add(nsURI); continue; } final String serviceClasse = getServiceImport(propertyName); if (serviceClasse != null) { serviceClasses.add(serviceClasse); continue; } final String serviceToken = getServiceToken(propertyName); if (serviceToken != null) { serviceTokens.add(serviceToken); continue; } final String variableName = getVariableName(propertyName); if (variableName != null && isValidVariableName(variableName)) { variables.put(variableName, property.getLpwstr()); } } }
From source file:org.obeonetwork.m2doc.properties.TemplateInfo.java
License:Open Source License
private void extractMetaData(XWPFDocument document) { CustomProperties props = document.getProperties().getCustomProperties(); List<CTProperty> properties = props.getUnderlyingProperties().getPropertyList(); for (CTProperty property : properties) { String name = property.getName(); int variablePrefixLength = M2DocCustomProperties.VAR_PROPERTY_PREFIX.length(); if (name != null) { if (name.startsWith(M2DocCustomProperties.SERVICE_PROPERTY_PREFIX)) { String[] tokens = property.getLpwstr().split(M2DocCustomProperties.SERVICETOKEN_SEPARATOR); serviceTokens.addAll(Lists.newArrayList(tokens)); } else if (name.startsWith(M2DocCustomProperties.VAR_PROPERTY_PREFIX) && name.length() > variablePrefixLength) { String variableName = name.substring(variablePrefixLength + 1); String type = property.getLpwstr(); variables.put(variableName, type); }//from www. j ava 2 s. c om } } }
From source file:org.obeonetwork.m2doc.util.test.TemplateConfigUtilTest.java
License:Open Source License
@Test public void testStore() { // given//from w w w .ja va2 s. c o m XWPFDocument doc = new XWPFDocument(); TemplateConfig config = TplconfFactory.eINSTANCE.createTemplateConfig(); EPackageMapping ecoreMapping = TplconfFactory.eINSTANCE.createEPackageMapping(); ecoreMapping.setName(EcorePackage.eNAME); ecoreMapping.setUri(EcorePackage.eNS_URI); ecoreMapping.setEPackage(EcorePackage.eINSTANCE); config.getMappings().add(ecoreMapping); EPackageMapping testMapping = TplconfFactory.eINSTANCE.createEPackageMapping(); testMapping.setUri("http://www.test.com/some/test/uri"); config.getMappings().add(testMapping); TemplateVariable v1 = TplconfFactory.eINSTANCE.createTemplateVariable(); v1.setName("v1"); v1.setTypeName("string"); config.getVariables().add(v1); StructuredType typeEClassifier = TplconfFactory.eINSTANCE.createStructuredType(); typeEClassifier.setName("EClassifier"); typeEClassifier.setEClassifier(EcorePackage.eINSTANCE.getEClassifier()); typeEClassifier.setMappingName(EcorePackage.eNAME); typeEClassifier.setMapping(ecoreMapping); config.getTypesByName().put("ecore::EClassifier", typeEClassifier); TemplateVariable v2 = TplconfFactory.eINSTANCE.createTemplateVariable(); v2.setName("v2"); v2.setTypeName("ecore::EClassifier"); v2.setType(typeEClassifier); config.getVariables().add(v2); TemplateVariable v3 = TplconfFactory.eINSTANCE.createTemplateVariable(); v3.setName("v3"); v3.setTypeName("test::UnboundType"); config.getVariables().add(v3); // when TemplateConfigUtil.store(config, doc); // then CustomProperties props = doc.getProperties().getCustomProperties(); assertEquals(4, props.getUnderlyingProperties().sizeOfPropertyArray()); CTProperty propUri = props.getProperty("m:uri"); String propUriValue = propUri.getLpwstr(); assertTrue(propUriValue.indexOf(EcorePackage.eNS_URI) >= 0); assertTrue(propUriValue.indexOf("http://www.test.com/some/test/uri") >= 0); assertTrue(propUriValue.indexOf(",") > 0); CTProperty propV1 = props.getProperty("m:var:v1"); assertEquals("string", propV1.getLpwstr()); CTProperty propV2 = props.getProperty("m:var:v2"); assertEquals("ecore::EClassifier", propV2.getLpwstr()); CTProperty propV3 = props.getProperty("m:var:v3"); assertEquals("test::UnboundType", propV3.getLpwstr()); }