List of usage examples for org.apache.poi.xwpf.usermodel XWPFDocument XWPFDocument
public XWPFDocument(InputStream is) throws IOException
From source file:com.qwazr.library.poi.DocxParser.java
License:Apache License
@Override public void parseContent(final MultivaluedMap<String, String> parameters, final InputStream inputStream, final String extension, final String mimeType, final ParserResultBuilder resultBuilder) throws IOException { final XWPFDocument document = new XWPFDocument(inputStream); try (XWPFWordExtractor word = new XWPFWordExtractor(document)) { final ParserFieldsBuilder metas = resultBuilder.metas(); metas.set(MIME_TYPE, findMimeType(extension, mimeType, this::findMimeTypeUsingDefault)); final CoreProperties info = word.getCoreProperties(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(CREATOR, info.getCreator()); metas.add(CREATION_DATE, info.getCreated()); metas.add(MODIFICATION_DATE, info.getModified()); metas.add(SUBJECT, info.getSubject()); metas.add(DESCRIPTION, info.getDescription()); metas.add(KEYWORDS, info.getKeywords()); }/*from w w w . j a v a2s . c o m*/ final ParserFieldsBuilder parserDocument = resultBuilder.newDocument(); parserDocument.add(CONTENT, word.getText()); parserDocument.add(LANG_DETECTION, languageDetection(parserDocument, CONTENT, 10000)); } }
From source file:com.raaz.doc.converter.ConvertDocxBigToXHTML.java
License:LGPL
public static void main(String[] args) { long startTime = System.currentTimeMillis(); try {/*from w w w . j ava 2 s . c o m*/ // 1) Load docx with POI XWPFDocument XWPFDocument document = new XWPFDocument(Data.class.getResourceAsStream("DocxBig.docx")); // 2) Convert POI XWPFDocument 2 PDF with iText File outFile = new File("target/DocxBig.htm"); outFile.getParentFile().mkdirs(); OutputStream out = new FileOutputStream(outFile); XHTMLConverter.getInstance().convert(document, out, null); } catch (Throwable e) { e.printStackTrace(); } System.out.println("Generate DocxBig.htm with " + (System.currentTimeMillis() - startTime) + " ms."); }
From source file:com.raaz.doc.converter.ConvertDocxStructuresToXHTML.java
License:LGPL
public static void main(String[] args) { long startTime = System.currentTimeMillis(); try {/*from w w w . j a va2s.com*/ // 1) Load docx with POI XWPFDocument XWPFDocument document = new XWPFDocument(Data.class.getResourceAsStream("DocxStructures.docx")); // 2) Convert POI XWPFDocument 2 PDF with iText File outFile = new File("target/DocxStructures.htm"); outFile.getParentFile().mkdirs(); OutputStream out = new FileOutputStream(outFile); XHTMLConverter.getInstance().convert(document, out, null); } catch (Throwable e) { e.printStackTrace(); } System.out.println("Generate DocxStructures.htm with " + (System.currentTimeMillis() - startTime) + " ms."); }
From source file:com.raaz.doc.converter.ConvertOoxmlToXHTML.java
License:LGPL
public static void main(String[] args) { long startTime = System.currentTimeMillis(); try {/* ww w . ja v a2 s .c om*/ // 1) Load docx with POI XWPFDocument XWPFDocument document = new XWPFDocument(Data.class.getResourceAsStream("ooxml.docx")); // 2) Convert POI XWPFDocument 2 PDF with iText File outFile = new File("target/ooxml.htm"); outFile.getParentFile().mkdirs(); OutputStream out = new FileOutputStream(outFile); XHTMLConverter.getInstance().convert(document, out, null); } catch (Throwable e) { e.printStackTrace(); } System.out.println("Generate ooxml.htm with " + (System.currentTimeMillis() - startTime) + " ms."); }
From source file:com.raghav.plot.ConvertWord.java
public void ConvertWordToHtml() { try {/*from ww w.j a v a 2s. c om*/ // 1) Load DOCX into XWPFDocument InputStream doc = new FileInputStream(new File(outputlFolderPath + docName)); System.out.println("InputStream" + doc); XWPFDocument document = new XWPFDocument(doc); // 2) Prepare XHTML options (here we set the IURIResolver to load images from a "word/media" folder) XHTMLOptions options = XHTMLOptions.create(); //.URIResolver(new FileURIResolver(new File("word/media")));; // Extract image String root = "target"; File imageFolder = new File(root + "/images/" + doc); options.setExtractor(new FileImageExtractor(imageFolder)); // URI resolver options.URIResolver(new FileURIResolver(imageFolder)); OutputStream out = new FileOutputStream(new File(htmlPath())); XHTMLConverter.getInstance().convert(document, out, options); System.out.println("OutputStream " + out.toString()); } catch (FileNotFoundException ex) { ex.printStackTrace(); } catch (IOException ex) { ex.printStackTrace(); } }
From source file:com.raghav.plot.DocxToHtml.java
public String convertDocToHtml(long documentVersionId) { // DocumentVersion documentVersion = documentVersionService.getDocumentVersion(documentVersionId); InputStream in = null;//from ww w . ja v a2s . co m String result = ""; try { in = new FileInputStream(new File("/home/raghav/Desktop/Axis-Flat.docxr.docx")); XWPFDocument document = new XWPFDocument(in); XHTMLOptions options = XHTMLOptions.create() .URIResolver(new FileURIResolver(new File("/home/raghav/yzzzzAxis-Flat_docxb.html"))); OutputStream out = new ByteArrayOutputStream(); XHTMLConverter.getInstance().convert(document, out, options); String html = out.toString(); //String pattern = "_tag_\\[(.*?)\\]"; String pattern = "_tag_"; Pattern r = Pattern.compile(pattern); Matcher m = r.matcher(html); String htmlCopy = html; int index = 0; htmlCopy = htmlCopy.replaceAll("</table>", "</table><br>"); htmlCopy = htmlCopy.replaceAll("<style>.*?</style>", ""); htmlCopy = htmlCopy.replaceAll("style=\\\"width:.*?pt", "style=\\\"width:100%"); htmlCopy = htmlCopy.replaceAll("style=\"", "style=\"word-wrap: break-word; "); // String[] bd = {"Table3", "Table5", "Table8"}; String[] bd = {}; for (int o = 0; o < bd.length; o++) { htmlCopy = htmlCopy.replaceAll("<td class=\"TableNormal " + bd[o] + "\" style=\".*?\"", "<td class=\"TableNormal " + bd[o] + "\""); htmlCopy = htmlCopy.replaceAll("<td class=\"TableNormal " + bd[o] + "\"", "<tid align=\"left\" class=\"TableNormal " + bd[o] + "\" style=\"padding:10px 10px 10px 10px;border:1px solid black;word-break: break-all;\""); } // htmlCopy = htmlCopy.replaceAll("<td ", "<td align=\"left\" style= \"padding:10px 10px 10px 0px;\" "); htmlCopy = htmlCopy.replaceAll("<tid ", "<td "); htmlCopy = htmlCopy.replaceAll("margin.*?;", ""); htmlCopy = htmlCopy.replaceAll("<body>", "<body><div style=\"margin-left:20px;margin-right:20px\">"); htmlCopy = htmlCopy.replaceAll("</body>", "</div></body>"); result = htmlCopy.replaceAll(">", ">\n"); int ind = 1; while (result.contains("<p")) { String id = "version_" + documentVersionId + "_" + ind; String customText = "<_p id='" + id + "' ng-dblclick='showCommentBox(\"" + id + "\")'"; ind++; result = result.replaceFirst("<p", customText); } result = result.replaceAll("<_p", "<p"); } catch (FileNotFoundException ex) { ex.printStackTrace(); } catch (IOException ex) { ex.printStackTrace(); } finally { try { in.close(); } catch (IOException ex) { ex.printStackTrace(); } } Path outputPath = Paths.get("/home/raghav/yxAxis.html"); try (BufferedWriter writer = Files.newBufferedWriter(outputPath)) { writer.append(result); } catch (Exception ex) { ex.printStackTrace(); } return result; }
From source file:com.raghav.plot.ReadDOCX.java
public static void main(String[] args) { InputStream in = null;//w w w . java 2 s . c om String result = ""; try { in = new FileInputStream(new File("/home/raghav/Desktop/Axis-LB.docx")); XWPFDocument doc = new XWPFDocument(in); doc.getParagraphs().stream().map((p) -> p.getRuns()).filter((runs) -> (runs != null)) .forEach((runs) -> { runs.stream().forEach((r) -> { String text = r.getText(0); System.out.println(text); }); }); doc.getTables().stream().forEach((tbl) -> { tbl.getRows().stream().forEach((row) -> { row.getTableCells().stream().forEach((cell) -> { cell.getParagraphs().stream().forEach((p) -> { p.getRuns().stream().filter((r) -> (r != null)).forEach((r) -> { String text = r.getText(0); if (text != null) { System.out.println(text); } }); }); }); }); }); } catch (Exception ex) { ex.printStackTrace(); } }
From source file:com.siemens.sw360.licenseinfo.outputGenerators.DocxGenerator.java
License:Open Source License
@Override public byte[] generateOutputFile(Collection<LicenseInfoParsingResult> projectLicenseInfoResults, String projectName) throws SW360Exception { try {//from ww w .jav a 2 s.c o m XWPFDocument document = new XWPFDocument( this.getClass().getResourceAsStream("/templateFrontpageContent.docx")); fillDocument(document, projectLicenseInfoResults, projectName); ByteArrayOutputStream docxOutputStream = new ByteArrayOutputStream(); document.write(docxOutputStream); docxOutputStream.close(); return docxOutputStream.toByteArray(); } catch (IOException ioe) { throw new SW360Exception("Got IOException when generating docx document: " + ioe.getMessage()); } }
From source file:com.swg.parse.docx.MSDocConvTest.java
/*** * @return String containing the content of the .docx file from POI apache * @throws FileNotFoundException/*from w ww . j av a 2 s . com*/ * @throws IOException */ private String getPOI() throws FileNotFoundException, IOException { FileInputStream inputTest = new FileInputStream(path + "SCD_2009_E-04 2009.08.21.docx"); XWPFDocument docxTest = new XWPFDocument(inputTest); XWPFWordExtractor ContentTest = new XWPFWordExtractor(docxTest); String contentIn = ContentTest.getText(); return contentIn; }
From source file:com.swg.parse.docx.MSDocConvTest2.java
/*** * @return String containing the content of the .docx file from POI apache * @throws FileNotFoundException// w ww . j av a 2s.c o m * @throws IOException */ private String getPOI() throws FileNotFoundException, IOException { FileInputStream inputTest = new FileInputStream(path + "CAD_2013_RE-06.docx"); XWPFDocument docxTest = new XWPFDocument(inputTest); XWPFWordExtractor ContentTest = new XWPFWordExtractor(docxTest); String contentIn = ContentTest.getText(); return contentIn; }