Example usage for org.apache.poi.xwpf.usermodel XWPFDocument XWPFDocument

List of usage examples for org.apache.poi.xwpf.usermodel XWPFDocument XWPFDocument

Introduction

In this page you can find the example usage for org.apache.poi.xwpf.usermodel XWPFDocument XWPFDocument.

Prototype

public XWPFDocument(InputStream is) throws IOException 

Source Link

Usage

From source file:com.qwazr.library.poi.DocxParser.java

License:Apache License

@Override
public void parseContent(final MultivaluedMap<String, String> parameters, final InputStream inputStream,
        final String extension, final String mimeType, final ParserResultBuilder resultBuilder)
        throws IOException {

    final XWPFDocument document = new XWPFDocument(inputStream);

    try (XWPFWordExtractor word = new XWPFWordExtractor(document)) {

        final ParserFieldsBuilder metas = resultBuilder.metas();
        metas.set(MIME_TYPE, findMimeType(extension, mimeType, this::findMimeTypeUsingDefault));

        final CoreProperties info = word.getCoreProperties();
        if (info != null) {
            metas.add(TITLE, info.getTitle());
            metas.add(CREATOR, info.getCreator());
            metas.add(CREATION_DATE, info.getCreated());
            metas.add(MODIFICATION_DATE, info.getModified());
            metas.add(SUBJECT, info.getSubject());
            metas.add(DESCRIPTION, info.getDescription());
            metas.add(KEYWORDS, info.getKeywords());
        }/*from   w  w  w . j a v a2s  . c  o m*/
        final ParserFieldsBuilder parserDocument = resultBuilder.newDocument();
        parserDocument.add(CONTENT, word.getText());
        parserDocument.add(LANG_DETECTION, languageDetection(parserDocument, CONTENT, 10000));
    }
}

From source file:com.raaz.doc.converter.ConvertDocxBigToXHTML.java

License:LGPL

public static void main(String[] args) {
    long startTime = System.currentTimeMillis();

    try {/*from  w  w w . j  ava  2  s . c  o m*/
        // 1) Load docx with POI XWPFDocument
        XWPFDocument document = new XWPFDocument(Data.class.getResourceAsStream("DocxBig.docx"));

        // 2) Convert POI XWPFDocument 2 PDF with iText
        File outFile = new File("target/DocxBig.htm");
        outFile.getParentFile().mkdirs();

        OutputStream out = new FileOutputStream(outFile);
        XHTMLConverter.getInstance().convert(document, out, null);
    } catch (Throwable e) {
        e.printStackTrace();
    }

    System.out.println("Generate DocxBig.htm with " + (System.currentTimeMillis() - startTime) + " ms.");
}

From source file:com.raaz.doc.converter.ConvertDocxStructuresToXHTML.java

License:LGPL

public static void main(String[] args) {
    long startTime = System.currentTimeMillis();

    try {/*from  w  w  w .  j  a  va2s.com*/
        // 1) Load docx with POI XWPFDocument
        XWPFDocument document = new XWPFDocument(Data.class.getResourceAsStream("DocxStructures.docx"));

        // 2) Convert POI XWPFDocument 2 PDF with iText
        File outFile = new File("target/DocxStructures.htm");
        outFile.getParentFile().mkdirs();

        OutputStream out = new FileOutputStream(outFile);
        XHTMLConverter.getInstance().convert(document, out, null);
    } catch (Throwable e) {
        e.printStackTrace();
    }

    System.out.println("Generate DocxStructures.htm with " + (System.currentTimeMillis() - startTime) + " ms.");
}

From source file:com.raaz.doc.converter.ConvertOoxmlToXHTML.java

License:LGPL

public static void main(String[] args) {
    long startTime = System.currentTimeMillis();

    try {/*  ww w .  ja v a2  s  .c om*/
        // 1) Load docx with POI XWPFDocument
        XWPFDocument document = new XWPFDocument(Data.class.getResourceAsStream("ooxml.docx"));

        // 2) Convert POI XWPFDocument 2 PDF with iText
        File outFile = new File("target/ooxml.htm");
        outFile.getParentFile().mkdirs();

        OutputStream out = new FileOutputStream(outFile);
        XHTMLConverter.getInstance().convert(document, out, null);
    } catch (Throwable e) {
        e.printStackTrace();
    }

    System.out.println("Generate ooxml.htm with " + (System.currentTimeMillis() - startTime) + " ms.");
}

From source file:com.raghav.plot.ConvertWord.java

public void ConvertWordToHtml() {

    try {/*from  ww w.j a  v  a 2s.  c  om*/

        // 1) Load DOCX into XWPFDocument
        InputStream doc = new FileInputStream(new File(outputlFolderPath + docName));
        System.out.println("InputStream" + doc);
        XWPFDocument document = new XWPFDocument(doc);

        // 2) Prepare XHTML options (here we set the IURIResolver to load images from a "word/media" folder)
        XHTMLOptions options = XHTMLOptions.create(); //.URIResolver(new FileURIResolver(new File("word/media")));;

        // Extract image
        String root = "target";
        File imageFolder = new File(root + "/images/" + doc);
        options.setExtractor(new FileImageExtractor(imageFolder));
        // URI resolver
        options.URIResolver(new FileURIResolver(imageFolder));

        OutputStream out = new FileOutputStream(new File(htmlPath()));
        XHTMLConverter.getInstance().convert(document, out, options);

        System.out.println("OutputStream " + out.toString());
    } catch (FileNotFoundException ex) {
        ex.printStackTrace();
    } catch (IOException ex) {
        ex.printStackTrace();
    }
}

From source file:com.raghav.plot.DocxToHtml.java

public String convertDocToHtml(long documentVersionId) {

    // DocumentVersion documentVersion = documentVersionService.getDocumentVersion(documentVersionId);
    InputStream in = null;//from ww w  . ja v  a2s  .  co  m
    String result = "";
    try {
        in = new FileInputStream(new File("/home/raghav/Desktop/Axis-Flat.docxr.docx"));
        XWPFDocument document = new XWPFDocument(in);
        XHTMLOptions options = XHTMLOptions.create()
                .URIResolver(new FileURIResolver(new File("/home/raghav/yzzzzAxis-Flat_docxb.html")));
        OutputStream out = new ByteArrayOutputStream();
        XHTMLConverter.getInstance().convert(document, out, options);
        String html = out.toString();
        //String pattern = "_tag_\\[(.*?)\\]";
        String pattern = "_tag_";
        Pattern r = Pattern.compile(pattern);
        Matcher m = r.matcher(html);
        String htmlCopy = html;
        int index = 0;

        htmlCopy = htmlCopy.replaceAll("</table>", "</table><br>");

        htmlCopy = htmlCopy.replaceAll("<style>.*?</style>", "");
        htmlCopy = htmlCopy.replaceAll("style=\\\"width:.*?pt", "style=\\\"width:100%");

        htmlCopy = htmlCopy.replaceAll("style=\"", "style=\"word-wrap: break-word; ");

        // String[] bd = {"Table3", "Table5", "Table8"};
        String[] bd = {};

        for (int o = 0; o < bd.length; o++) {
            htmlCopy = htmlCopy.replaceAll("<td class=\"TableNormal " + bd[o] + "\" style=\".*?\"",
                    "<td class=\"TableNormal " + bd[o] + "\"");
            htmlCopy = htmlCopy.replaceAll("<td class=\"TableNormal " + bd[o] + "\"",
                    "<tid align=\"left\" class=\"TableNormal " + bd[o]
                            + "\" style=\"padding:10px 10px 10px 10px;border:1px solid black;word-break: break-all;\"");

        }
        // htmlCopy = htmlCopy.replaceAll("<td ", "<td align=\"left\" style= \"padding:10px 10px 10px 0px;\" ");

        htmlCopy = htmlCopy.replaceAll("<tid ", "<td ");
        htmlCopy = htmlCopy.replaceAll("margin.*?;", "");
        htmlCopy = htmlCopy.replaceAll("<body>", "<body><div style=\"margin-left:20px;margin-right:20px\">");
        htmlCopy = htmlCopy.replaceAll("</body>", "</div></body>");

        result = htmlCopy.replaceAll(">", ">\n");

        int ind = 1;

        while (result.contains("<p")) {

            String id = "version_" + documentVersionId + "_" + ind;

            String customText = "<_p id='" + id + "' ng-dblclick='showCommentBox(\"" + id + "\")'";

            ind++;

            result = result.replaceFirst("<p", customText);

        }

        result = result.replaceAll("<_p", "<p");

    } catch (FileNotFoundException ex) {
        ex.printStackTrace();
    } catch (IOException ex) {
        ex.printStackTrace();
    } finally {
        try {
            in.close();
        } catch (IOException ex) {
            ex.printStackTrace();
        }
    }

    Path outputPath = Paths.get("/home/raghav/yxAxis.html");

    try (BufferedWriter writer = Files.newBufferedWriter(outputPath)) {
        writer.append(result);
    } catch (Exception ex) {
        ex.printStackTrace();
    }

    return result;

}

From source file:com.raghav.plot.ReadDOCX.java

public static void main(String[] args) {
    InputStream in = null;//w  w w  .  java 2  s . c  om
    String result = "";
    try {
        in = new FileInputStream(new File("/home/raghav/Desktop/Axis-LB.docx"));
        XWPFDocument doc = new XWPFDocument(in);

        doc.getParagraphs().stream().map((p) -> p.getRuns()).filter((runs) -> (runs != null))
                .forEach((runs) -> {
                    runs.stream().forEach((r) -> {
                        String text = r.getText(0);
                        System.out.println(text);
                    });
                });

        doc.getTables().stream().forEach((tbl) -> {
            tbl.getRows().stream().forEach((row) -> {
                row.getTableCells().stream().forEach((cell) -> {
                    cell.getParagraphs().stream().forEach((p) -> {
                        p.getRuns().stream().filter((r) -> (r != null)).forEach((r) -> {
                            String text = r.getText(0);
                            if (text != null) {
                                System.out.println(text);

                            }
                        });
                    });
                });
            });
        });

    } catch (Exception ex) {
        ex.printStackTrace();
    }
}

From source file:com.siemens.sw360.licenseinfo.outputGenerators.DocxGenerator.java

License:Open Source License

@Override
public byte[] generateOutputFile(Collection<LicenseInfoParsingResult> projectLicenseInfoResults,
        String projectName) throws SW360Exception {
    try {//from   ww  w  .jav a 2  s.c o m
        XWPFDocument document = new XWPFDocument(
                this.getClass().getResourceAsStream("/templateFrontpageContent.docx"));

        fillDocument(document, projectLicenseInfoResults, projectName);

        ByteArrayOutputStream docxOutputStream = new ByteArrayOutputStream();
        document.write(docxOutputStream);
        docxOutputStream.close();
        return docxOutputStream.toByteArray();
    } catch (IOException ioe) {
        throw new SW360Exception("Got IOException when generating docx document: " + ioe.getMessage());
    }
}

From source file:com.swg.parse.docx.MSDocConvTest.java

/***
 * @return String containing the content of the .docx file from POI apache
 * @throws FileNotFoundException/*from w ww .  j av a 2  s . com*/
 * @throws IOException 
 */
private String getPOI() throws FileNotFoundException, IOException {

    FileInputStream inputTest = new FileInputStream(path + "SCD_2009_E-04 2009.08.21.docx");
    XWPFDocument docxTest = new XWPFDocument(inputTest);
    XWPFWordExtractor ContentTest = new XWPFWordExtractor(docxTest);
    String contentIn = ContentTest.getText();
    return contentIn;
}

From source file:com.swg.parse.docx.MSDocConvTest2.java

/***
 * @return String containing the content of the .docx file from POI apache
 * @throws FileNotFoundException// w ww . j av a 2s.c  o m
 * @throws IOException 
 */
private String getPOI() throws FileNotFoundException, IOException {

    FileInputStream inputTest = new FileInputStream(path + "CAD_2013_RE-06.docx");
    XWPFDocument docxTest = new XWPFDocument(inputTest);
    XWPFWordExtractor ContentTest = new XWPFWordExtractor(docxTest);
    String contentIn = ContentTest.getText();
    return contentIn;
}