List of usage examples for org.apache.poi.xwpf.usermodel XWPFParagraph getText
public String getText()
From source file:org.ArticleEditor.OptionsView.MenuOptionsTopComponent.java
public int getPosWord(Vector words, int NumParagraphs, XWPFDocument Document) { int Pos = -1; int i;/*from w ww.j a va 2 s .c o m*/ if (NumParagraphs == 1 && Document.getParagraphArray(0).equals("")) { return Pos; } else { Pos = 0; for (i = 0; i < NumParagraphs; i++) { XWPFParagraph parrafo = Document.getParagraphArray(i); String Parrafo = parrafo.getText().toLowerCase(); int longitud = words.get(0).toString().length() + 1; if (isWord(words, Parrafo) == true && parrafo.getText().length() <= longitud) { Pos = Pos + parrafo.getText().length(); i = NumParagraphs; } else { Pos = Pos + parrafo.getText().length() + 1; } } if (i == NumParagraphs) { return -1; } else { return Pos; } } }
From source file:org.knime.ext.textprocessing.nodes.source.parser.word.WordDocumentParser.java
License:Open Source License
private Document parseInternal(final InputStream is) throws Exception { m_currentDoc = new DocumentBuilder(m_tokenizerName); m_currentDoc.setDocumentFile(new File(m_docPath)); m_currentDoc.setDocumentType(m_type); m_currentDoc.addDocumentCategory(m_category); m_currentDoc.addDocumentSource(m_source); POIFSFileSystem poifs = null;//from w w w.ja v a 2s . c om HWPFDocument hdoc = null; XWPFDocument hdoc2 = null; WordExtractor extractor = null; try { // doc files if (m_docPath.endsWith(".doc")) { // copy content of input stream into byte array since content have to be red twice unfortunately. final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final byte[] buf = new byte[1024]; int i = 0; while ((i = is.read(buf)) >= 0) { baos.write(buf, 0, i); } final byte[] content = baos.toByteArray(); // open stream with copied content to read text InputStream copiedInput = new ByteArrayInputStream(content); hdoc = new HWPFDocument(copiedInput); extractor = new WordExtractor(hdoc); for (String p : extractor.getParagraphText()) { p = p.trim(); if (!onlyWhitepscaes(p)) { m_currentDoc.addParagraph(p); } } // open stream again with copied content to read meta info copiedInput = new ByteArrayInputStream(content); poifs = new POIFSFileSystem(copiedInput); final DirectoryEntry dir = poifs.getRoot(); final DocumentEntry siEntry = (DocumentEntry) dir.getEntry(SummaryInformation.DEFAULT_STREAM_NAME); final PropertySet ps = new PropertySet(new DocumentInputStream(siEntry)); final SummaryInformation si = new SummaryInformation(ps); setAuthor(si.getAuthor()); setPublicationDate(si.getCreateDateTime()); // docx files } else if (m_docPath.endsWith(".docx") || m_docPath.endsWith(".docm")) { hdoc2 = new XWPFDocument(is); final List<XWPFParagraph> paragraphs = hdoc2.getParagraphs(); for (final XWPFParagraph paragraph : paragraphs) { final String text = paragraph.getText(); if (!onlyWhitepscaes(text)) { m_currentDoc.addParagraph(text); } } setAuthor(hdoc2.getProperties().getCoreProperties().getCreator()); setPublicationDate(hdoc2.getProperties().getCoreProperties().getCreated()); } m_currentDoc.createNewSection(SectionAnnotation.CHAPTER); // find title String title = null; if (m_filenameAsTitle) { title = m_docPath.trim(); } else { final List<Section> sections = m_currentDoc.getSections(); if (sections.size() > 0) { try { title = sections.get(0).getParagraphs().get(0).getSentences().get(0).getText().trim(); } catch (IndexOutOfBoundsException e) { LOGGER.debug("Parsed word document " + m_docPath + " is empty."); title = ""; } } } if (!checkTitle(title)) { title = m_docPath.toString(); } m_currentDoc.addTitle(title); return m_currentDoc.createDocument(); } finally { is.close(); if (poifs != null) { poifs.close(); } if (hdoc != null) { hdoc.close(); } if (hdoc2 != null) { hdoc2.close(); } if (extractor != null) { extractor.close(); } } }
From source file:org.obeonetwork.m2doc.generator.test.TableClientProcessorTest.java
License:Open Source License
protected void checkParagraph(XWPFParagraph paragraph, String expectedTitle) { assertEquals(expectedTitle, paragraph.getText()); assertEquals(0, paragraph.getSpacingAfter()); List<XWPFRun> runs = paragraph.getRuns(); assertEquals(1, runs.size());//from w w w . j a va 2s. c o m }
From source file:org.shareok.data.documentProcessor.WordHandler.java
private String[] readDocxFile(FileInputStream fs) throws IOException { String[] paragraphs = null;//ww w. j av a 2 s. c o m try { // XWPFDocument doc = new XWPFDocument(); // XWPFParagraph p1 = doc.createParagraph(); // p1.setAlignment(ParagraphAlignment.CENTER); // p1.setBorderBottom(Borders.DOUBLE); // p1.setBorderTop(Borders.DOUBLE); // // p1.setBorderRight(Borders.DOUBLE); // p1.setBorderLeft(Borders.DOUBLE); // p1.setBorderBetween(Borders.SINGLE); // // p1.setVerticalAlignment(TextAlignment.TOP); // // XWPFRun r1 = p1.createRun(); // r1.setBold(true); // r1.setText("The quick brown fox"); // r1.setBold(true); // r1.setFontFamily("Courier"); // r1.setUnderline(UnderlinePatterns.DOT_DOT_DASH); // r1.setTextPosition(100); // // XWPFParagraph p2 = doc.createParagraph(); // p2.setAlignment(ParagraphAlignment.RIGHT); // // //BORDERS // p2.setBorderBottom(Borders.DOUBLE); // p2.setBorderTop(Borders.DOUBLE); // p2.setBorderRight(Borders.DOUBLE); // p2.setBorderLeft(Borders.DOUBLE); // p2.setBorderBetween(Borders.SINGLE); // // XWPFRun r2 = p2.createRun(); // r2.setText("jumped over the lazy dog"); // r2.setStrike(true); // r2.setFontSize(20); // // XWPFRun r3 = p2.createRun(); // r3.setText("and went away"); // r3.setStrike(true); // r3.setFontSize(20); // r3.setSubscript(VerticalAlign.SUPERSCRIPT); // // // XWPFParagraph p3 = doc.createParagraph(); // p3.setWordWrap(true); // p3.setPageBreak(true); // // //p3.setAlignment(ParagraphAlignment.DISTRIBUTE); // p3.setAlignment(ParagraphAlignment.BOTH); // p3.setSpacingLineRule(LineSpacingRule.EXACT); // // p3.setIndentationFirstLine(600); // // // XWPFRun r4 = p3.createRun(); // r4.setTextPosition(20); // r4.setText("To be, or not to be: that is the question: " // + "Whether 'tis nobler in the mind to suffer " // + "The slings and arrows of outrageous fortune, " // + "Or to take arms against a sea of troubles, " // + "And by opposing end them? To die: to sleep; "); // r4.addBreak(BreakType.PAGE); // r4.setText("No more; and by a sleep to say we end " // + "The heart-ache and the thousand natural shocks " // + "That flesh is heir to, 'tis a consummation " // + "Devoutly to be wish'd. To die, to sleep; " // + "To sleep: perchance to dream: ay, there's the rub; " // + "......."); // r4.setItalic(true); ////This would imply that this break shall be treated as a simple line break, and break the line after that word: // // XWPFRun r5 = p3.createRun(); // r5.setTextPosition(-10); // r5.setText("For in that sleep of death what dreams may come"); // r5.addCarriageReturn(); // r5.setText("When we have shuffled off this mortal coil," // + "Must give us pause: there's the respect" // + "That makes calamity of so long life;"); // r5.addBreak(); // r5.setText("For who would bear the whips and scorns of time," // + "The oppressor's wrong, the proud man's contumely,"); // // r5.addBreak(BreakClear.ALL); // r5.setText("The pangs of despised love, the law's delay," // + "The insolence of office and the spurns" + "......."); // // FileOutputStream out = new FileOutputStream("simple.docx"); // doc.write(out); // out.close(); XWPFDocument document = new XWPFDocument(OPCPackage.open("simple.docx")); List<XWPFParagraph> paragraphList = document.getParagraphs(); paragraphs = new String[paragraphList.size()]; int i = 0; for (XWPFParagraph para : paragraphList) { paragraphs[i] = para.getText(); } } catch (Exception e) { e.printStackTrace(); } finally { fs.close(); } return paragraphs; }
From source file:util.DocumentFunction.java
public static String readDocxFile(String fileName) { StringBuilder text = new StringBuilder(); try {/*from w w w . jav a2s.c o m*/ File file = new File(fileName); FileInputStream fis = new FileInputStream(file.getAbsolutePath()); XWPFDocument document = new XWPFDocument(fis); List<XWPFParagraph> paragraphs = document.getParagraphs(); //System.out.println("Total no of paragraph "+paragraphs.size()); for (XWPFParagraph para : paragraphs) { text.append(para.getText() + "\n"); } fis.close(); } catch (Exception e) { e.printStackTrace(); } return text.toString(); }