List of usage examples for org.apache.poi.xwpf.usermodel XWPFDocument XWPFDocument
public XWPFDocument(InputStream is) throws IOException
From source file:eu.modelwriter.ide.ui.command.ExtractTextHandler.java
License:Open Source License
/** * Extracts text from the given .docx {@link IFile}. * //from w w w . ja v a 2 s . c o m * @param file * the .docx {@link IFile} */ private void exctractDocx(final IFile file) { try { FileInputStream fis = new FileInputStream(file.getLocation().toFile()); XWPFDocument docx = new XWPFDocument(fis); XWPFWordExtractor we = new XWPFWordExtractor(docx); final IPath textPath = file.getFullPath().removeFileExtension().addFileExtension("txt"); final IFile textFile = ResourcesPlugin.getWorkspace().getRoot().getFile(textPath); if (textFile.exists()) { textFile.delete(true, new NullProgressMonitor()); } textFile.create(new ByteArrayInputStream(we.getText().getBytes()), true, new NullProgressMonitor()); we.close(); docx.close(); fis.close(); } catch (IOException e) { Activator.getDefault().getLog().log(new Status(IStatus.ERROR, Activator.PLUGIN_ID, UNABLE_TO_EXTRACT_TEXT_FROM + file.getFullPath(), e)); } catch (CoreException e) { Activator.getDefault().getLog().log(new Status(IStatus.ERROR, Activator.PLUGIN_ID, UNABLE_TO_EXTRACT_TEXT_FROM + file.getFullPath(), e)); } }
From source file:eu.transkribus.languageresources.extractor.docx.DocxExtractor.java
@Override public String extractTextFromDocument(String pathToFile, String splitCharacter) { try {//from w ww.j av a 2 s . c o m XWPFDocument docx = new XWPFDocument(new FileInputStream(pathToFile)); XWPFWordExtractor we = new XWPFWordExtractor(docx); return we.getText(); } catch (IOException ex) { throw new RuntimeException("Could not find docx for given path: " + pathToFile); } }
From source file:File.DOCX.ReadDocx.java
/** * @param args the command line arguments *//*from w w w .j a v a 2 s. c om*/ public void ReadParagraph(String path, String filename) { try { FileInputStream fis = new FileInputStream(path + filename + ".docx"); XWPFDocument xdoc = new XWPFDocument(OPCPackage.open(fis)); List<XWPFParagraph> paragraphList = xdoc.getParagraphs(); for (XWPFParagraph paragraph : paragraphList) { System.out.println(paragraph.getText()); } } catch (Exception ex) { ex.printStackTrace(); } }
From source file:File.DOCX.ReadDocx.java
public void ReadTable(String path, String filename) { try {/* w w w. jav a2 s .c om*/ FileInputStream fis = new FileInputStream(path + filename + ".docx"); XWPFDocument xdoc = new XWPFDocument(OPCPackage.open(fis)); Iterator<IBodyElement> bodyElementIterator = xdoc.getBodyElementsIterator(); while (bodyElementIterator.hasNext()) { IBodyElement element = bodyElementIterator.next(); if ("TABLE".equalsIgnoreCase(element.getElementType().name())) { List<XWPFTable> tableList = element.getBody().getTables(); for (XWPFTable table : tableList) { System.out.println("Total Number of Rows of Table:" + table.getNumberOfRows()); System.out.println(table.getText()); } } } } catch (Exception ex) { ex.printStackTrace(); } }
From source file:File.DOCX.ReadDocx.java
public void ReadAll(String path, String filename) { try {/*from ww w . j a v a 2 s . c om*/ FileInputStream fis = new FileInputStream(path + filename + ".doc"); XWPFDocument xdoc = new XWPFDocument(OPCPackage.open(fis)); XWPFWordExtractor extractor = new XWPFWordExtractor(xdoc); System.out.println(extractor.getText()); } catch (Exception ex) { ex.printStackTrace(); } }
From source file:FilesHandlers.WordHandler.java
public String[] getDocContentByLine(String docName) throws IOException { XWPFDocument docx = new XWPFDocument(new FileInputStream(workingDirectory.concat(docName))); //using XWPFWordExtractor Class XWPFWordExtractor we = new XWPFWordExtractor(docx); String content = we.getText(); int total = countOccurrences(content, '\n'); String[] res = new String[total]; int latest = 0; String row = ""; for (int i = 0; i < content.length(); i++) { if (content.charAt(i) == '\n') { res[latest] = row;/* ww w.j av a 2 s .com*/ row = ""; latest++; } else { row = row.concat("" + content.charAt(i)); } } return res; }
From source file:fr.opensagres.poi.xwpf.converter.core.styles.A.java
License:Open Source License
@Test public void testParagraphStyles() throws Exception { // 1) Load docx with Apache POI XWPFDocument document = new XWPFDocument(Data.class.getResourceAsStream("DocxStructures.docx")); // Create styles engine XWPFStylesDocument stylesDocument = new XWPFStylesDocument(document); // Loop for each paragraph List<IBodyElement> elements = document.getBodyElements(); for (IBodyElement element : elements) { if (element.getElementType() == BodyElementType.PARAGRAPH) { testParagraph((XWPFParagraph) element, stylesDocument); }//from ww w . j a va 2 s . com } }
From source file:fr.opensagres.poi.xwpf.converter.core.styles.FontStylesBasedOnTestCase.java
License:Open Source License
@Test public void testParagraphStyles() throws Exception { // 1) Load docx with Apache POI XWPFDocument document = new XWPFDocument(Data.class.getResourceAsStream("TestFontStylesBasedOn.docx")); // Create styles engine XWPFStylesDocument stylesDocument = new XWPFStylesDocument(document); // Loop for each paragraph List<IBodyElement> elements = document.getBodyElements(); for (IBodyElement element : elements) { if (element.getElementType() == BodyElementType.PARAGRAPH) { testParagraph((XWPFParagraph) element, stylesDocument); }//from w w w. j a v a 2 s .c o m } }
From source file:fr.opensagres.poi.xwpf.converter.core.styles.run.FontSizeDocDefaultsTestCase.java
License:Open Source License
private void internalTest(Float size, String docName) throws Exception { XWPFDocument document = new XWPFDocument(Data.class.getResourceAsStream(docName)); XWPFStylesDocument stylesDocument = new XWPFStylesDocument(document); List<IBodyElement> elements = document.getBodyElements(); boolean ran = false; for (IBodyElement element : elements) { if (element.getElementType() == BodyElementType.PARAGRAPH) { for (XWPFRun docxRun : ((XWPFParagraph) element).getRuns()) { Object sizeFromStyle = stylesDocument.getFontSize(docxRun); ran = true;// w w w. ja va 2s . c o m assertEquals(sizeFromStyle, size); } } } assertTrue(ran); }
From source file:fr.opensagres.poi.xwpf.converter.core.styles.TableCellVerticalAlignmentTestCase.java
License:Open Source License
@Test public void testParagraphStyles() throws Exception { // 1) Load docx with Apache POI XWPFDocument document = new XWPFDocument(Data.class.getResourceAsStream("TableCellVerticalAlignment.docx")); // Create styles engine XWPFStylesDocument stylesDocument = new XWPFStylesDocument(document); // Loop for each paragraph List<IBodyElement> elements = document.getBodyElements(); for (IBodyElement element : elements) { if (element.getElementType() == BodyElementType.TABLE) { testTable((XWPFTable) element, stylesDocument); }// ww w . jav a 2s . co m } }