List of usage examples for org.apache.poi.xwpf.usermodel XWPFDocument getParagraphs
@Override
public List<XWPFParagraph> getParagraphs()
From source file:orcamentotraducao.OrcamentoTraducao.java
/** * @param args the command line arguments *///from ww w . ja va2 s . com public static void main(String[] args) { // TODO code application logic here Scanner scan = new Scanner(System.in); System.out.println("Informe o nome do arquivo:"); String filename = scan.nextLine(); String typeFile = filename.substring(filename.length() - 3, filename.length()); if (!typeFile.matches("ocx") && !typeFile.matches("doc")) { System.out.println("Este formato de arquivo no suportado\n"); System.exit(0); } try { File file = new File(filename); FileInputStream fis = new FileInputStream(file.getAbsolutePath()); String allText = ""; int lines = 0; if (typeFile.matches("ocx")) { XWPFDocument document = new XWPFDocument(fis); List<XWPFParagraph> paragraphs = document.getParagraphs(); for (XWPFParagraph para : paragraphs) { allText += para.getText() + " "; lines++; } fis.close(); } else if (typeFile.matches("doc")) { WordExtractor extractor = new WordExtractor(new HWPFDocument(fis)); allText = extractor.getText(); } String allTextExploded[] = allText.split(" "); int words = allTextExploded.length; int characters = allText.length(); System.out.println("H " + words + " palavras"); System.out.println("H " + characters + " caracteres"); System.out.println("H " + lines + " linhas"); System.out.println("O oramento estimado de R$" + calculate(characters, words, lines)); } catch (Exception e) { e.printStackTrace(); } }
From source file:org.ArticleEditor.OptionsView.MenuOptionsTopComponent.java
public int getNumParagraphs(XWPFDocument document) throws FileNotFoundException, IOException { List<XWPFParagraph> parrafos = document.getParagraphs(); return parrafos.size(); }
From source file:org.kino.server.api.contractgenerator.java
static void writeDocxTemplate(InputStream src, OutputStream dststrem, Map<String, String> replacementMap) throws InvalidFormatException, IOException { XWPFDocument doc = new XWPFDocument(src); replaceInParagraphs(replacementMap, doc.getParagraphs()); for (XWPFTable tbl : doc.getTables()) { for (XWPFTableRow row : tbl.getRows()) { for (XWPFTableCell cell : row.getTableCells()) { replaceInParagraphs(replacementMap, cell.getParagraphs()); }/*from ww w. j av a 2 s .com*/ } } doc.write(dststrem); }
From source file:org.knime.ext.textprocessing.nodes.source.parser.word.WordDocumentParser.java
License:Open Source License
private Document parseInternal(final InputStream is) throws Exception { m_currentDoc = new DocumentBuilder(m_tokenizerName); m_currentDoc.setDocumentFile(new File(m_docPath)); m_currentDoc.setDocumentType(m_type); m_currentDoc.addDocumentCategory(m_category); m_currentDoc.addDocumentSource(m_source); POIFSFileSystem poifs = null;/* w w w .j av a2s. c om*/ HWPFDocument hdoc = null; XWPFDocument hdoc2 = null; WordExtractor extractor = null; try { // doc files if (m_docPath.endsWith(".doc")) { // copy content of input stream into byte array since content have to be red twice unfortunately. final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final byte[] buf = new byte[1024]; int i = 0; while ((i = is.read(buf)) >= 0) { baos.write(buf, 0, i); } final byte[] content = baos.toByteArray(); // open stream with copied content to read text InputStream copiedInput = new ByteArrayInputStream(content); hdoc = new HWPFDocument(copiedInput); extractor = new WordExtractor(hdoc); for (String p : extractor.getParagraphText()) { p = p.trim(); if (!onlyWhitepscaes(p)) { m_currentDoc.addParagraph(p); } } // open stream again with copied content to read meta info copiedInput = new ByteArrayInputStream(content); poifs = new POIFSFileSystem(copiedInput); final DirectoryEntry dir = poifs.getRoot(); final DocumentEntry siEntry = (DocumentEntry) dir.getEntry(SummaryInformation.DEFAULT_STREAM_NAME); final PropertySet ps = new PropertySet(new DocumentInputStream(siEntry)); final SummaryInformation si = new SummaryInformation(ps); setAuthor(si.getAuthor()); setPublicationDate(si.getCreateDateTime()); // docx files } else if (m_docPath.endsWith(".docx") || m_docPath.endsWith(".docm")) { hdoc2 = new XWPFDocument(is); final List<XWPFParagraph> paragraphs = hdoc2.getParagraphs(); for (final XWPFParagraph paragraph : paragraphs) { final String text = paragraph.getText(); if (!onlyWhitepscaes(text)) { m_currentDoc.addParagraph(text); } } setAuthor(hdoc2.getProperties().getCoreProperties().getCreator()); setPublicationDate(hdoc2.getProperties().getCoreProperties().getCreated()); } m_currentDoc.createNewSection(SectionAnnotation.CHAPTER); // find title String title = null; if (m_filenameAsTitle) { title = m_docPath.trim(); } else { final List<Section> sections = m_currentDoc.getSections(); if (sections.size() > 0) { try { title = sections.get(0).getParagraphs().get(0).getSentences().get(0).getText().trim(); } catch (IndexOutOfBoundsException e) { LOGGER.debug("Parsed word document " + m_docPath + " is empty."); title = ""; } } } if (!checkTitle(title)) { title = m_docPath.toString(); } m_currentDoc.addTitle(title); return m_currentDoc.createDocument(); } finally { is.close(); if (poifs != null) { poifs.close(); } if (hdoc != null) { hdoc.close(); } if (hdoc2 != null) { hdoc2.close(); } if (extractor != null) { extractor.close(); } } }
From source file:org.obeonetwork.m2doc.generator.test.TemplateParsingValidatorTest.java
License:Open Source License
/** * Tests that parsing errors from AQL template tag (conditional here) are placed next to the start tag. * The tested tag is <{m:wrong->.}ajout de value1{m:endif}> * The expected tag is : <{m:wrong->.} Expression wrong->. is invalid ajout de value1{m:endif}> * After the run with the end '}' char, the following runs must be present : * A run must contains blanks char./*from w w w.ja v a 2s . com*/ * The next one must contains the error message. * The next one must contains blank char and the next one the static content of the conditional. * * @throws InvalidFormatException * @throws IOException * @throws DocumentParserException * @throws DocumentGenerationException */ @Test public void testErrorInStartTag() throws InvalidFormatException, IOException, DocumentParserException, DocumentGenerationException { FileInputStream is = new FileInputStream("templates/testParsingErrorStartTag.docx"); OPCPackage oPackage = OPCPackage.open(is); XWPFDocument document = new XWPFDocument(oPackage); BodyParser parser = new BodyParser(document, env); Template template = parser.parseTemplate(); TemplateValidationGenerator validator = new TemplateValidationGenerator(); validator.doSwitch(template); createDestinationDocument(document, "results/testParsingErrorStartTag.docx"); // scan the destination document assertEquals(2, document.getParagraphs().size()); assertEquals(16, document.getParagraphs().get(0).getRuns().size()); assertEquals(1, document.getParagraphs().get(1).getRuns().size()); assertEquals(" ", document.getParagraphs().get(0).getRuns().get(5).getText(0)); assertEquals("<---", document.getParagraphs().get(0).getRuns().get(6).getText(0)); assertEquals("FF0000", document.getParagraphs().get(0).getRuns().get(6).getColor()); assertEquals(16, document.getParagraphs().get(0).getRuns().get(6).getFontSize()); assertEquals(STHighlightColor.LIGHT_GRAY, document.getParagraphs().get(0).getRuns().get(7).getCTR().getRPr().getHighlight().getVal()); assertEquals("Expression \"wrong->.\" is invalid: missing collection service call", document.getParagraphs().get(0).getRuns().get(7).getText(0)); assertEquals("FF0000", document.getParagraphs().get(0).getRuns().get(7).getColor()); assertEquals(16, document.getParagraphs().get(0).getRuns().get(7).getFontSize()); assertEquals(STHighlightColor.LIGHT_GRAY, document.getParagraphs().get(0).getRuns().get(7).getCTR().getRPr().getHighlight().getVal()); assertEquals(" ", document.getParagraphs().get(0).getRuns().get(8).getText(0)); assertEquals("ajout de value1", document.getParagraphs().get(0).getRuns().get(9).getText(0)); assertEquals("Unexpected tag m:endif at this location", document.getParagraphs().get(0).getRuns().get(13).getText(0)); }
From source file:org.obeonetwork.m2doc.generator.test.TemplateParsingValidatorTest.java
License:Open Source License
/** * Tests that parsing errors from AQL template tag (conditional here) are placed next to the start tag. * The tested tag is <{m:diagram provider:"noExistingProvider" width:"500" height:"500" title="representationTitle"}Some text> * The expected tag is : <{m:diagram provider:"noExistingProvider" width:"500" height:"500" title="representationTitle"}<---The image * tag is referencing an unknown diagram provider : 'noExistingProvider' Some text> * After the run with the end '}' char, the following runs must be present : * A run must contains blanks char.//ww w.j av a2s.co m * The next one must contains the error message. * The next one is a blank separator. * The next one must contains the other error message. * The next one must contains blank char and the next one the static content after the tag in the original template. * * @throws InvalidFormatException * @throws IOException * @throws DocumentParserException * @throws DocumentGenerationException */ @Test public void testErrorInSimpleTag() throws InvalidFormatException, IOException, DocumentParserException, DocumentGenerationException { FileInputStream is = new FileInputStream("templates/testParsingErrorSimpleTag.docx"); OPCPackage oPackage = OPCPackage.open(is); XWPFDocument document = new XWPFDocument(oPackage); BodyParser parser = new BodyParser(document, env); Template template = parser.parseTemplate(); TemplateValidationGenerator validator = new TemplateValidationGenerator(); validator.doSwitch(template); createDestinationDocument(document, "results/testParsingErrorSimpleTag.docx"); // scan the destination document assertEquals(2, document.getParagraphs().size()); assertEquals(11, document.getParagraphs().get(0).getRuns().size()); assertEquals(1, document.getParagraphs().get(1).getRuns().size()); assertEquals(" ", document.getParagraphs().get(0).getRuns().get(2).getText(0)); assertEquals("<---", document.getParagraphs().get(0).getRuns().get(3).getText(0)); assertEquals("FF0000", document.getParagraphs().get(0).getRuns().get(3).getColor()); assertEquals(16, document.getParagraphs().get(0).getRuns().get(3).getFontSize()); assertEquals(STHighlightColor.LIGHT_GRAY, document.getParagraphs().get(0).getRuns().get(5).getCTR().getRPr().getHighlight().getVal()); assertEquals("The image tag is referencing an unknown diagram provider : 'noExistingProvider'", document.getParagraphs().get(0).getRuns().get(5).getText(0)); assertEquals("FF0000", document.getParagraphs().get(0).getRuns().get(5).getColor()); assertEquals(16, document.getParagraphs().get(0).getRuns().get(5).getFontSize()); assertEquals(STHighlightColor.LIGHT_GRAY, document.getParagraphs().get(0).getRuns().get(5).getCTR().getRPr().getHighlight().getVal()); assertEquals("Some text", document.getParagraphs().get(0).getRuns().get(10).getText(0)); }
From source file:org.obeonetwork.m2doc.generator.test.TemplateParsingValidatorTest.java
License:Open Source License
/** * Tests that parsing errors from AQL template tag (conditional here) are placed next to the start tag. * The tested tag is <{m:diagram provider:"noExistingProvider" width:"500" height:"500" title="representationTitle"}Some text> * The expected tag is : <{m:diagram provider:"noExistingProvider" width:"500" height:"500" title="representationTitle"}<---The image * tag is referencing an unknown diagram provider : 'noExistingProvider' <---The start of an option's key has been read but the end of * it and the value were missing : ' title="representationTitle"'. Some text> * After the run with the end '}' char, the following runs must be present : * A run must contains blanks char.//from w ww . ja v a 2s . com * The next one must contains the error message. * The next one is a blank separator. * The next one must contains the other error message. * The next one must contains blank char and the next one the static content after the tag in the original template. * * @throws InvalidFormatException * @throws IOException * @throws DocumentParserException * @throws DocumentGenerationException */ @Test public void testMultiErrorInSimpleTag() throws InvalidFormatException, IOException, DocumentParserException, DocumentGenerationException { FileInputStream is = new FileInputStream("templates/testMultiParsingErrorSimpleTag.docx"); OPCPackage oPackage = OPCPackage.open(is); XWPFDocument document = new XWPFDocument(oPackage); BodyParser parser = new BodyParser(document, env); Template template = parser.parseTemplate(); TemplateValidationGenerator validator = new TemplateValidationGenerator(); validator.doSwitch(template); createDestinationDocument(document, "results/testMultiParsingErrorSimpleTag.docx"); // scan the destination document assertEquals(1, document.getParagraphs().size()); assertEquals(14, document.getParagraphs().get(0).getRuns().size()); assertEquals(" ", document.getParagraphs().get(0).getRuns().get(2).getText(0)); assertEquals("<---", document.getParagraphs().get(0).getRuns().get(3).getText(0)); assertEquals("FF0000", document.getParagraphs().get(0).getRuns().get(3).getColor()); assertEquals(16, document.getParagraphs().get(0).getRuns().get(3).getFontSize()); assertEquals(STHighlightColor.LIGHT_GRAY, document.getParagraphs().get(0).getRuns().get(3).getCTR().getRPr().getHighlight().getVal()); assertEquals("The image tag is referencing an unknown diagram provider : 'noExistingProvider'", document.getParagraphs().get(0).getRuns().get(6).getText(0)); assertEquals("FF0000", document.getParagraphs().get(0).getRuns().get(6).getColor()); assertEquals(16, document.getParagraphs().get(0).getRuns().get(6).getFontSize()); assertEquals(STHighlightColor.LIGHT_GRAY, document.getParagraphs().get(0).getRuns().get(6).getCTR().getRPr().getHighlight().getVal()); assertEquals(" ", document.getParagraphs().get(0).getRuns().get(10).getText(0)); assertEquals("<---", document.getParagraphs().get(0).getRuns().get(11).getText(0)); assertEquals("FF0000", document.getParagraphs().get(0).getRuns().get(11).getColor()); assertEquals(16, document.getParagraphs().get(0).getRuns().get(11).getFontSize()); assertEquals(STHighlightColor.LIGHT_GRAY, document.getParagraphs().get(0).getRuns().get(11).getCTR().getRPr().getHighlight().getVal()); assertEquals( "The start of an option's key has been read but the end of it and the value were missing : ' title=\"representationTitle\"'.", document.getParagraphs().get(0).getRuns().get(12).getText(0)); assertEquals("FF0000", document.getParagraphs().get(0).getRuns().get(12).getColor()); assertEquals(16, document.getParagraphs().get(0).getRuns().get(12).getFontSize()); assertEquals(STHighlightColor.LIGHT_GRAY, document.getParagraphs().get(0).getRuns().get(11).getCTR().getRPr().getHighlight().getVal()); assertEquals("Some text", document.getParagraphs().get(0).getRuns().get(13).getText(0)); }
From source file:org.obeonetwork.m2doc.generator.test.TemplateParsingValidatorTest.java
License:Open Source License
/** * Tests that parsing errors from AQL template tag (conditional here) are placed next to the start tag. * The tested tag is <{m:diagram provider:"noExistingProvider" width:"500" height:"500" title="representationTitle"}Some text> * The expected tag is : <{m:diagram provider:"noExistingProvider" width:"500" height:"500" title="representationTitle"}<---The image * tag is referencing an unknown diagram provider : 'noExistingProvider' Some text> * After the run with the end '}' char, the following runs must be present : * A run must contains blanks char./* ww w. j a va2 s . co m*/ * The next one must contains the error message. * The next one is a blank separator. * The next one must contains the other error message. * The next one must contains blank char and the next one the static content after the tag in the original template. * * @throws InvalidFormatException * @throws IOException * @throws DocumentParserException * @throws DocumentGenerationException */ @Test public void testErrorInEndTag() throws InvalidFormatException, IOException, DocumentParserException, DocumentGenerationException { FileInputStream is = new FileInputStream("templates/testParsingErrorEndTag.docx"); OPCPackage oPackage = OPCPackage.open(is); XWPFDocument document = new XWPFDocument(oPackage); BodyParser parser = new BodyParser(document, env); Template template = parser.parseTemplate(); TemplateValidationGenerator validator = new TemplateValidationGenerator(); validator.doSwitch(template); createDestinationDocument(document, "results/testParsingErrorEndTag.docx"); // scan the destination document assertEquals(1, document.getParagraphs().size()); assertEquals(24, document.getParagraphs().get(0).getRuns().size()); assertEquals(" ", document.getParagraphs().get(0).getRuns().get(9).getText(0)); assertEquals("<---", document.getParagraphs().get(0).getRuns().get(10).getText(0)); assertEquals("FF0000", document.getParagraphs().get(0).getRuns().get(10).getColor()); assertEquals(16, document.getParagraphs().get(0).getRuns().get(10).getFontSize()); assertEquals(STHighlightColor.LIGHT_GRAY, document.getParagraphs().get(0).getRuns().get(10).getCTR().getRPr().getHighlight().getVal()); assertEquals("Unexpected tag m:endlet at this location", document.getParagraphs().get(0).getRuns().get(11).getText(0)); assertEquals("FF0000", document.getParagraphs().get(0).getRuns().get(11).getColor()); assertEquals(16, document.getParagraphs().get(0).getRuns().get(11).getFontSize()); assertEquals(STHighlightColor.LIGHT_GRAY, document.getParagraphs().get(0).getRuns().get(11).getCTR().getRPr().getHighlight().getVal()); assertEquals("<---", document.getParagraphs().get(0).getRuns().get(10).getText(0)); assertEquals("FF0000", document.getParagraphs().get(0).getRuns().get(10).getColor()); assertEquals(16, document.getParagraphs().get(0).getRuns().get(10).getFontSize()); assertEquals(STHighlightColor.LIGHT_GRAY, document.getParagraphs().get(0).getRuns().get(10).getCTR().getRPr().getHighlight().getVal()); assertEquals("Unexpected tag m:endlet at this location", document.getParagraphs().get(0).getRuns().get(11).getText(0)); assertEquals("FF0000", document.getParagraphs().get(0).getRuns().get(11).getColor()); assertEquals(16, document.getParagraphs().get(0).getRuns().get(11).getFontSize()); assertEquals(STHighlightColor.LIGHT_GRAY, document.getParagraphs().get(0).getRuns().get(11).getCTR().getRPr().getHighlight().getVal()); assertEquals(" ", document.getParagraphs().get(0).getRuns().get(15).getText(0)); assertEquals("<---", document.getParagraphs().get(0).getRuns().get(16).getText(0)); assertEquals("FF0000", document.getParagraphs().get(0).getRuns().get(16).getColor()); assertEquals(16, document.getParagraphs().get(0).getRuns().get(16).getFontSize()); assertEquals(STHighlightColor.LIGHT_GRAY, document.getParagraphs().get(0).getRuns().get(16).getCTR().getRPr().getHighlight().getVal()); assertEquals("gd:elseif, gd:else or gd:endif expected here.", document.getParagraphs().get(0).getRuns().get(17).getText(0)); assertEquals("FF0000", document.getParagraphs().get(0).getRuns().get(17).getColor()); assertEquals(16, document.getParagraphs().get(0).getRuns().get(17).getFontSize()); assertEquals(STHighlightColor.LIGHT_GRAY, document.getParagraphs().get(0).getRuns().get(17).getCTR().getRPr().getHighlight().getVal()); assertEquals("Some", document.getParagraphs().get(0).getRuns().get(18).getText(0)); assertEquals(" t", document.getParagraphs().get(0).getRuns().get(19).getText(0)); assertEquals("ext", document.getParagraphs().get(0).getRuns().get(20).getText(0)); assertEquals("<---", document.getParagraphs().get(0).getRuns().get(22).getText(0)); assertEquals("Unexpected tag EOF at this location", document.getParagraphs().get(0).getRuns().get(23).getText(0)); }
From source file:org.obeonetwork.m2doc.generator.test.TemplateParsingValidatorTest.java
License:Open Source License
/** * Tests that parsing errors from AQL template tag (conditional here) are placed next to the start tag when no following text exists. * The tested tag is <{m:diagram provider:"noExistingProvider" width:"500" height:"500" title="representationTitle"}> * The expected tag is : <{m:diagram provider:"noExistingProvider" width:"500" height:"500" title="representationTitle"}<---The image * tag is referencing an unknown diagram provider : 'noExistingProvider' > * After the run with the end '}' char, the following runs must be present : * A run must contains blanks char.// w w w . ja v a 2 s .c om * The next one must contains the error message. * The next one is a blank separator. * The next one must contains the other error message. * The next one must contains blank char and the next one the static content after the tag in the original template. * * @throws InvalidFormatException * @throws IOException * @throws DocumentParserException * @throws DocumentGenerationException */ @Test public void testErrorInSimpleTagWithoutFollowing() throws InvalidFormatException, IOException, DocumentParserException, DocumentGenerationException { FileInputStream is = new FileInputStream("templates/testParsingErrorSimpleTagWithoutFollowingText.docx"); OPCPackage oPackage = OPCPackage.open(is); XWPFDocument document = new XWPFDocument(oPackage); BodyParser parser = new BodyParser(document, env); Template template = parser.parseTemplate(); TemplateValidationGenerator validator = new TemplateValidationGenerator(); validator.doSwitch(template); createDestinationDocument(document, "results/testParsingErrorSimpleTagWithoutFollowingText.docx"); // scan the destination document assertEquals(1, document.getParagraphs().size()); assertEquals(11, document.getParagraphs().get(0).getRuns().size()); assertEquals(" ", document.getParagraphs().get(0).getRuns().get(2).getText(0)); assertEquals("<---", document.getParagraphs().get(0).getRuns().get(3).getText(0)); assertEquals("FF0000", document.getParagraphs().get(0).getRuns().get(3).getColor()); assertEquals(16, document.getParagraphs().get(0).getRuns().get(3).getFontSize()); assertEquals(STHighlightColor.LIGHT_GRAY, document.getParagraphs().get(0).getRuns().get(5).getCTR().getRPr().getHighlight().getVal()); assertEquals("The image tag is referencing an unknown diagram provider : 'noExistingProvider'", document.getParagraphs().get(0).getRuns().get(5).getText(0)); assertEquals("FF0000", document.getParagraphs().get(0).getRuns().get(5).getColor()); assertEquals(16, document.getParagraphs().get(0).getRuns().get(5).getFontSize()); assertEquals(STHighlightColor.LIGHT_GRAY, document.getParagraphs().get(0).getRuns().get(5).getCTR().getRPr().getHighlight().getVal()); }
From source file:org.obeonetwork.m2doc.generator.test.TemplateProcessorTest.java
License:Open Source License
/** * Test the replacement of a variable in a doc. * /* w ww . j ava2 s. c o m*/ * @throws InvalidFormatException * @throws IOException * @throws DocumentParserException */ @Test public void testVarRefProcessing() throws InvalidFormatException, IOException, DocumentParserException { FileInputStream is = new FileInputStream("templates/testVar.docx"); OPCPackage oPackage = OPCPackage.open(is); XWPFDocument document = new XWPFDocument(oPackage); BodyParser parser = new BodyParser(document, env); Template template = parser.parseTemplate(); Map<String, Object> definitions = new HashMap<String, Object>(); definitions.put("x", "valueofx"); XWPFDocument destinationDoc = createDestinationDocument("templates/testVar.docx"); TemplateProcessor processor = new TemplateProcessor(definitions, "", env, destinationDoc); processor.doSwitch(template); // scan the destination document assertEquals(2, destinationDoc.getParagraphs().size()); System.out.println(destinationDoc.getParagraphs().get(0).getText()); assertEquals("Template de test pour les balises de rfrence une variable\u00a0: valueofx", destinationDoc.getParagraphs().get(0).getText()); assertEquals("Fin du gabarit", destinationDoc.getParagraphs().get(1).getText()); }