Example usage for org.apache.poi.xwpf.usermodel XWPFDocument XWPFDocument

Introduction

In this page you can find the example usage for org.apache.poi.xwpf.usermodel XWPFDocument XWPFDocument.

Prototype

public XWPFDocument(InputStream is) throws IOException

Source Link

Usage

From source file:eu.modelwriter.ide.ui.command.ExtractTextHandler.java

License:Open Source License

/**
 * Extracts text from the given .docx {@link IFile}.
 * //from w  w w . ja  v a  2  s .  c o  m
 * @param file
 *            the .docx {@link IFile}
 */
private void exctractDocx(final IFile file) {
    try {
        FileInputStream fis = new FileInputStream(file.getLocation().toFile());
        XWPFDocument docx = new XWPFDocument(fis);
        XWPFWordExtractor we = new XWPFWordExtractor(docx);
        final IPath textPath = file.getFullPath().removeFileExtension().addFileExtension("txt");
        final IFile textFile = ResourcesPlugin.getWorkspace().getRoot().getFile(textPath);
        if (textFile.exists()) {
            textFile.delete(true, new NullProgressMonitor());
        }
        textFile.create(new ByteArrayInputStream(we.getText().getBytes()), true, new NullProgressMonitor());
        we.close();
        docx.close();
        fis.close();
    } catch (IOException e) {
        Activator.getDefault().getLog().log(new Status(IStatus.ERROR, Activator.PLUGIN_ID,
                UNABLE_TO_EXTRACT_TEXT_FROM + file.getFullPath(), e));
    } catch (CoreException e) {
        Activator.getDefault().getLog().log(new Status(IStatus.ERROR, Activator.PLUGIN_ID,
                UNABLE_TO_EXTRACT_TEXT_FROM + file.getFullPath(), e));
    }
}

From source file:eu.transkribus.languageresources.extractor.docx.DocxExtractor.java

@Override
public String extractTextFromDocument(String pathToFile, String splitCharacter) {
    try {//from w ww.j av a  2 s  . c o m
        XWPFDocument docx = new XWPFDocument(new FileInputStream(pathToFile));
        XWPFWordExtractor we = new XWPFWordExtractor(docx);
        return we.getText();
    } catch (IOException ex) {
        throw new RuntimeException("Could not find docx for given path: " + pathToFile);
    }
}

From source file:File.DOCX.ReadDocx.java

/**
 * @param args the command line arguments
 *//*from   w  w w .j a  v  a  2  s.  c om*/
public void ReadParagraph(String path, String filename) {
    try {
        FileInputStream fis = new FileInputStream(path + filename + ".docx");
        XWPFDocument xdoc = new XWPFDocument(OPCPackage.open(fis));
        List<XWPFParagraph> paragraphList = xdoc.getParagraphs();
        for (XWPFParagraph paragraph : paragraphList) {
            System.out.println(paragraph.getText());
        }
    } catch (Exception ex) {
        ex.printStackTrace();
    }
}

From source file:File.DOCX.ReadDocx.java

public void ReadTable(String path, String filename) {
    try {/*  w w  w. jav  a2 s .c om*/
        FileInputStream fis = new FileInputStream(path + filename + ".docx");
        XWPFDocument xdoc = new XWPFDocument(OPCPackage.open(fis));
        Iterator<IBodyElement> bodyElementIterator = xdoc.getBodyElementsIterator();
        while (bodyElementIterator.hasNext()) {
            IBodyElement element = bodyElementIterator.next();
            if ("TABLE".equalsIgnoreCase(element.getElementType().name())) {
                List<XWPFTable> tableList = element.getBody().getTables();
                for (XWPFTable table : tableList) {
                    System.out.println("Total Number of Rows of Table:" + table.getNumberOfRows());
                    System.out.println(table.getText());
                }
            }
        }
    } catch (Exception ex) {
        ex.printStackTrace();
    }
}

From source file:File.DOCX.ReadDocx.java

public void ReadAll(String path, String filename) {
    try {/*from ww w . j a v  a 2  s  .  c om*/
        FileInputStream fis = new FileInputStream(path + filename + ".doc");
        XWPFDocument xdoc = new XWPFDocument(OPCPackage.open(fis));
        XWPFWordExtractor extractor = new XWPFWordExtractor(xdoc);
        System.out.println(extractor.getText());
    } catch (Exception ex) {
        ex.printStackTrace();
    }
}

From source file:FilesHandlers.WordHandler.java

public String[] getDocContentByLine(String docName) throws IOException {
    XWPFDocument docx = new XWPFDocument(new FileInputStream(workingDirectory.concat(docName)));

    //using XWPFWordExtractor Class
    XWPFWordExtractor we = new XWPFWordExtractor(docx);
    String content = we.getText();

    int total = countOccurrences(content, '\n');
    String[] res = new String[total];
    int latest = 0;
    String row = "";

    for (int i = 0; i < content.length(); i++) {
        if (content.charAt(i) == '\n') {
            res[latest] = row;/*  ww  w.j  av  a  2  s .com*/
            row = "";
            latest++;
        } else {
            row = row.concat("" + content.charAt(i));

        }

    }

    return res;

}

From source file:fr.opensagres.poi.xwpf.converter.core.styles.A.java

License:Open Source License

@Test
public void testParagraphStyles() throws Exception {
    // 1) Load docx with Apache POI
    XWPFDocument document = new XWPFDocument(Data.class.getResourceAsStream("DocxStructures.docx"));

    // Create styles engine
    XWPFStylesDocument stylesDocument = new XWPFStylesDocument(document);

    // Loop for each paragraph
    List<IBodyElement> elements = document.getBodyElements();
    for (IBodyElement element : elements) {
        if (element.getElementType() == BodyElementType.PARAGRAPH) {
            testParagraph((XWPFParagraph) element, stylesDocument);
        }//from  ww  w  .  j a va 2 s  . com
    }
}

From source file:fr.opensagres.poi.xwpf.converter.core.styles.FontStylesBasedOnTestCase.java

License:Open Source License

@Test
public void testParagraphStyles() throws Exception {
    // 1) Load docx with Apache POI
    XWPFDocument document = new XWPFDocument(Data.class.getResourceAsStream("TestFontStylesBasedOn.docx"));

    // Create styles engine
    XWPFStylesDocument stylesDocument = new XWPFStylesDocument(document);

    // Loop for each paragraph
    List<IBodyElement> elements = document.getBodyElements();
    for (IBodyElement element : elements) {
        if (element.getElementType() == BodyElementType.PARAGRAPH) {
            testParagraph((XWPFParagraph) element, stylesDocument);
        }//from   w  w  w.  j a  v  a  2  s .c o m
    }
}

From source file:fr.opensagres.poi.xwpf.converter.core.styles.run.FontSizeDocDefaultsTestCase.java

License:Open Source License

private void internalTest(Float size, String docName) throws Exception {

    XWPFDocument document = new XWPFDocument(Data.class.getResourceAsStream(docName));
    XWPFStylesDocument stylesDocument = new XWPFStylesDocument(document);
    List<IBodyElement> elements = document.getBodyElements();
    boolean ran = false;
    for (IBodyElement element : elements) {
        if (element.getElementType() == BodyElementType.PARAGRAPH) {
            for (XWPFRun docxRun : ((XWPFParagraph) element).getRuns()) {
                Object sizeFromStyle = stylesDocument.getFontSize(docxRun);
                ran = true;// w  w w.  ja  va  2s  . c  o  m
                assertEquals(sizeFromStyle, size);
            }
        }
    }
    assertTrue(ran);
}

From source file:fr.opensagres.poi.xwpf.converter.core.styles.TableCellVerticalAlignmentTestCase.java

License:Open Source License

@Test
public void testParagraphStyles() throws Exception {
    // 1) Load docx with Apache POI
    XWPFDocument document = new XWPFDocument(Data.class.getResourceAsStream("TableCellVerticalAlignment.docx"));

    // Create styles engine
    XWPFStylesDocument stylesDocument = new XWPFStylesDocument(document);

    // Loop for each paragraph
    List<IBodyElement> elements = document.getBodyElements();
    for (IBodyElement element : elements) {
        if (element.getElementType() == BodyElementType.TABLE) {
            testTable((XWPFTable) element, stylesDocument);
        }//  ww  w . jav a  2s .  co m
    }
}