List of usage examples for org.apache.poi.xwpf.usermodel XWPFDocument XWPFDocument
public XWPFDocument(InputStream is) throws IOException
From source file:rocky.sizecounter.SizeCounterUtil.java
License:Apache License
/** * Count Word's number of page from input directory. * //from ww w . ja v a 2 s . com * @param filePath . * @return Number of A4 pages */ public static int countWordFile(String filePath) { FileInputStream fis = null; int page = 0; try { fis = new FileInputStream(filePath); if (CommonUtil.getExtension(filePath).equals("doc")) { // When file is .DOC HWPFDocument doc = new HWPFDocument(fis); page = doc.getDocProperties().getCPg(); } else if (CommonUtil.getExtension(filePath).equals("docx")) { // When file is .DOCX XWPFDocument doc = new XWPFDocument(fis); XWPFWordExtractor ex = new XWPFWordExtractor(doc); page = ex.getExtendedProperties().getUnderlyingProperties().getPages(); } } catch (FileNotFoundException ex) { LOG.warn("File " + filePath + " not found", ex); } catch (IOException ex) { LOG.warn("Invalid when reading file.", ex); } catch (Exception ex) { LOG.warn("Can not count file " + filePath, ex); } finally { if (fis != null) { try { fis.close(); } catch (IOException ex) { LOG.warn("Close the file input stream", ex); } } } return page; }
From source file:ru.lisaprog.parser.ExtractText.java
License:Open Source License
public static String parseDOCX(String file) { try {/*from w ww. j av a 2 s. c om*/ BufferedInputStream isr = new BufferedInputStream(new FileInputStream(file)); XWPFWordExtractor word = new XWPFWordExtractor(new XWPFDocument(isr)); return word.getText(); } catch (Exception e) { // Common.createLog(e); return ""; } }
From source file:service.GenerationLettres.CreerPiecesManquantes.java
/** * * @param filename - Nom du fichier modle de demande des pices manquantes. * @param idDossier - Identifiant du dossier pour lequel l est cr * @throws InvalidFormatException/*from w w w. ja va2 s .com*/ * @throws IOException */ public void replacePiecesManquantes(String filename, Formation formation, String sexe, String nom, String prenom, String adresse, Adresse adresseEntite, List<Justificatif> justificatifsOk) throws InvalidFormatException, IOException { List<Justificatif> lesJustificatifs = formation.getLesJustificatifs(); Date dateActuelle = new Date(); DateFormat dateForm = new SimpleDateFormat("dd MMMM yyyy", Locale.FRANCE); String date = dateForm.format(dateActuelle); String codePostal = adresseEntite.getCodePostal(); String ville = adresseEntite.getVille(); String civilite = ""; if (sexe.equals("Masculin")) civilite = "Monsieur"; if (sexe.equals("Feminin")) civilite = "Madame"; String intitule = formation.getIntitule(); if (justificatifsOk != null) { for (Justificatif just : justificatifsOk) { lesJustificatifs.remove(just); } } String newFileName = nom + prenom + " Lettre piecesManquantes.docx"; File file = new File(PATH_MODELS + "/" + filename); FileInputStream fis = new FileInputStream(file.getAbsolutePath()); XWPFDocument doc = new XWPFDocument(fis); doc.write(new FileOutputStream(PATH_TARGET + "/" + newFileName)); doc.close(); doc = new XWPFDocument(OPCPackage.open(PATH_TARGET + "/" + newFileName)); for (XWPFParagraph p : doc.getParagraphs()) { int numberOfRuns = p.getRuns().size(); StringBuilder sb = new StringBuilder(); for (XWPFRun r : p.getRuns()) { int pos = r.getTextPosition(); if (r.getText(pos) != null) { sb.append(r.getText(pos)); } } if (sb.length() > 0 && sb.toString().contains("$formation")) { for (int i = numberOfRuns - 1; i > 0; i--) { p.removeRun(i); } String text = sb.toString().replace("$formation", intitule); XWPFRun run = p.getRuns().get(0); run.setText(text, 0); System.out.println("Changement de la formation effectue"); } } for (XWPFParagraph p : doc.getParagraphs()) { int numberOfRuns = p.getRuns().size(); StringBuilder sb = new StringBuilder(); for (XWPFRun r : p.getRuns()) { int pos = r.getTextPosition(); if (r.getText(pos) != null) { sb.append(r.getText(pos)); } } if (sb.length() > 0 && sb.toString().contains("$date")) { for (int i = numberOfRuns - 1; i > 0; i--) { p.removeRun(i); } String text = sb.toString().replace("$date", date); XWPFRun run = p.getRuns().get(0); run.setText(text, 0); System.out.println("Changement de la date effectue"); } } for (XWPFParagraph p : doc.getParagraphs()) { int numberOfRuns = p.getRuns().size(); StringBuilder sb = new StringBuilder(); for (XWPFRun r : p.getRuns()) { int pos = r.getTextPosition(); if (r.getText(pos) != null) { sb.append(r.getText(pos)); } } if (sb.length() > 0 && sb.toString().contains("$civilite")) { for (int i = numberOfRuns - 1; i > 0; i--) { p.removeRun(i); } String text = sb.toString().replace("$civilite", civilite); XWPFRun run = p.getRuns().get(0); run.setText(text, 0); System.out.println("Changement de la civilite effectue"); } } for (XWPFParagraph p : doc.getParagraphs()) { int numberOfRuns = p.getRuns().size(); StringBuilder sb = new StringBuilder(); for (XWPFRun r : p.getRuns()) { int pos = r.getTextPosition(); if (r.getText(pos) != null) { sb.append(r.getText(pos)); } } if (sb.length() > 0 && sb.toString().contains("$prenom")) { for (int i = numberOfRuns - 1; i > 0; i--) { p.removeRun(i); } String text = sb.toString().replace("$prenom", prenom); XWPFRun run = p.getRuns().get(0); run.setText(text, 0); System.out.println("Changement du prenom effectue"); } } for (XWPFParagraph p : doc.getParagraphs()) { int numberOfRuns = p.getRuns().size(); StringBuilder sb = new StringBuilder(); for (XWPFRun r : p.getRuns()) { int pos = r.getTextPosition(); if (r.getText(pos) != null) { sb.append(r.getText(pos)); } } if (sb.length() > 0 && sb.toString().contains("$nom")) { for (int i = numberOfRuns - 1; i > 0; i--) { p.removeRun(i); } String text = sb.toString().replace("$nom", nom); XWPFRun run = p.getRuns().get(0); run.setText(text, 0); System.out.println("Changement du nom effectue"); } } for (XWPFParagraph p : doc.getParagraphs()) { int numberOfRuns = p.getRuns().size(); StringBuilder sb = new StringBuilder(); for (XWPFRun r : p.getRuns()) { int pos = r.getTextPosition(); if (r.getText(pos) != null) { sb.append(r.getText(pos)); } } if (sb.length() > 0 && sb.toString().contains("$adresse")) { for (int i = numberOfRuns - 1; i > 0; i--) { p.removeRun(i); } String text = sb.toString().replace("$adresse", adresse); XWPFRun run = p.getRuns().get(0); run.setText(text, 0); System.out.println("Changement de l'adresse effectue"); } } for (XWPFParagraph p : doc.getParagraphs()) { int numberOfRuns = p.getRuns().size(); StringBuilder sb = new StringBuilder(); for (XWPFRun r : p.getRuns()) { int pos = r.getTextPosition(); if (r.getText(pos) != null) { sb.append(r.getText(pos)); } } if (sb.length() > 0 && sb.toString().contains("$codePostal")) { for (int i = numberOfRuns - 1; i > 0; i--) { p.removeRun(i); } String text = sb.toString().replace("$codePostal", codePostal); XWPFRun run = p.getRuns().get(0); run.setText(text, 0); System.out.println("Changement du code postal effectue"); } } for (XWPFParagraph p : doc.getParagraphs()) { int numberOfRuns = p.getRuns().size(); StringBuilder sb = new StringBuilder(); for (XWPFRun r : p.getRuns()) { int pos = r.getTextPosition(); if (r.getText(pos) != null) { sb.append(r.getText(pos)); } } if (sb.length() > 0 && sb.toString().contains("$ville")) { for (int i = numberOfRuns - 1; i > 0; i--) { p.removeRun(i); } String text = sb.toString().replace("$ville", ville); XWPFRun run = p.getRuns().get(0); run.setText(text, 0); System.out.println("Changement de la ville effectue"); } } XWPFTable table = doc.createTable(lesJustificatifs.size(), 2); table.setCellMargins(200, 250, 0, 250); int i = 0; for (XWPFTableRow r : table.getRows()) { XWPFTableCell cell = r.getCell(0); cell.setText(lesJustificatifs.get(i).getTitre()); cell = r.getCell(1); cell.setText(lesJustificatifs.get(i).getDescription()); i++; } doc.write(new FileOutputStream(PATH_TARGET + "/temp.docx")); new File(PATH_TARGET + "/temp.docx").delete(); doc.close(); //copyTempToFile(filename); System.out.println("replaceLettrePiecesManquantes DONE"); }
From source file:steffen.haertlein.file.FileObject.java
License:Apache License
private void readWordDocument() { try {//from w w w . j av a 2 s .com FileInputStream fs = new FileInputStream(f); XWPFDocument document; document = new XWPFDocument(OPCPackage.open(fs)); XWPFWordExtractor docxReader = new XWPFWordExtractor(document); String text = docxReader.getText(); docxReader.close(); String[] docxLines = text.split("\n"); for (String line : docxLines) { lines.add(line); } fs.close(); } catch (InvalidFormatException e) { JOptionPane.showMessageDialog(null, "InvalidFormatException in readWordDocument", "Fehler", JOptionPane.ERROR_MESSAGE); e.printStackTrace(); } catch (FileNotFoundException e) { JOptionPane.showMessageDialog(null, "FileNotFoundException in readWordDocument", "Fehler", JOptionPane.ERROR_MESSAGE); e.printStackTrace(); } catch (IOException e) { JOptionPane.showMessageDialog(null, "IOException in readWordDocument", "Fehler", JOptionPane.ERROR_MESSAGE); e.printStackTrace(); } }
From source file:test.poi.ConvertDocxResumeToPDF.java
License:LGPL
private static void create() { long startTime = System.currentTimeMillis(); try {//from www. j a v a 2s . c om // 1) Load docx with POI XWPFDocument XWPFDocument document = new XWPFDocument( ConvertDocxResumeToPDF.class.getClassLoader().getResourceAsStream("DocxResume.docx")); // 2) Convert POI XWPFDocument 2 PDF with iText File outFile = new File("d:/DocxResume.pdf"); outFile.getParentFile().mkdirs(); OutputStream out = new FileOutputStream(outFile); PdfOptions options = PdfOptions.create(); // options.fontProvider(new IFontProvider() { @Override public Font getFont(String familyName, String encoding, float size, int style, Color color) { try { BaseFont bfChinese = BaseFont.createFont("c:/Windows/Fonts/arialuni.ttf", BaseFont.IDENTITY_H, BaseFont.EMBEDDED); Font fontChinese = new Font(bfChinese, size, style, color); if (familyName != null) fontChinese.setFamily(familyName); return fontChinese; } catch (Throwable e) { e.printStackTrace(); return ITextFontRegistry.getRegistry().getFont(familyName, encoding, size, style, color); } } }); PdfConverter.getInstance().convert(document, out, options); } catch (Throwable e) { e.printStackTrace(); } System.out.println("Generate DocxResume.pdf with " + (System.currentTimeMillis() - startTime) + " ms."); }
From source file:test.poi.ConvertDocxResumeToXHTML.java
License:LGPL
public static void main(String[] args) { long startTime = System.currentTimeMillis(); try {/* w w w . ja v a2 s . c om*/ // 1) Load docx with POI XWPFDocument XWPFDocument document = new XWPFDocument( ConvertDocxResumeToXHTML.class.getClassLoader().getResourceAsStream("DocxResume.docx")); // 2) Convert POI XWPFDocument 2 PDF with iText File outFile = new File("d:/aa/DocxResume.html"); outFile.getParentFile().mkdirs(); OutputStream out = new FileOutputStream(outFile); XHTMLConverter.getInstance().convert(document, out, null); } catch (Throwable e) { e.printStackTrace(); } System.out.println("Generate DocxResume.htm with " + (System.currentTimeMillis() - startTime) + " ms."); }
From source file:test.poi.ImageDocxToHtml.java
License:LGPL
public static void main(String[] args) { long startTime = System.currentTimeMillis(); try {/*w w w . j av a 2 s .com*/ // 1) Load docx with POI XWPFDocument XWPFDocument document = new XWPFDocument( ImageDocxToHtml.class.getClassLoader().getResourceAsStream("images.docx")); // 2) Convert POI XWPFDocument 2 docx with iText String root = "d:/bb"; File outFile = new File(root, "docximages.html"); outFile.getParentFile().mkdirs(); File imageFolder = new File(root, "images"); // 3) ? //imageFolder.getParentFile().mkdirs(); XHTMLOptions options = XHTMLOptions.create(); options.setExtractor(new FileImageExtractor(imageFolder)); options.URIResolver(new BasicURIResolver("images")); //htmlurl OutputStream out = new FileOutputStream(outFile); XHTMLConverter.getInstance().convert(document, out, options); } catch (Throwable e) { e.printStackTrace(); } System.out.println("Generate DocxResume.htm with " + (System.currentTimeMillis() - startTime) + " ms."); }
From source file:textextractor.WordManager.java
public ArrayList extractDocx(FileInputStream fis) throws IOException { listDocx = new ArrayList(); XWPFDocument docx = new XWPFDocument(fis); List<XWPFParagraph> pragraphList = docx.getParagraphs(); pragraphList.stream().forEach((pragraph) -> { if (pragraph.getStyle() != null) { System.out.println(pragraph.getStyle()); }//from w w w. j a va 2 s . c o m System.out.println(pragraph.getText()); String[] ary = pragraph.getText().split(" "); listDocx.addAll(Arrays.asList(ary)); }); return listDocx; }
From source file:uk.ac.ebi.biostudies.file.thumbnails.DOCXThumbnail.java
License:Apache License
@Override public void generateThumbnail(String sourceFilePath, File thumbnailFile) throws IOException { //TODO: Confirm licence //convert word to pdf String tempPDFFilePath = thumbnailFile.getAbsolutePath() + ".pdf"; FileInputStream in = new FileInputStream(sourceFilePath); FileOutputStream out = new FileOutputStream(tempPDFFilePath); XWPFDocument wordDoc = new XWPFDocument(in); PdfConverter.getInstance().convert(wordDoc, out, PdfOptions.create()); in.close();// w ww . j ava 2s . c om out.close(); //convert pdf to image PDPage page = (PDPage) PDDocument.load(tempPDFFilePath).getDocumentCatalog().getAllPages().get(0); BufferedImage image = page.convertToImage(BufferedImage.TYPE_INT_RGB, 96); ImageIOUtil.writeImage(image, thumbnailFile.getAbsolutePath(), 96); new File(tempPDFFilePath).delete(); }
From source file:util.DocumentFunction.java
public static String readDocxFile(String fileName) { StringBuilder text = new StringBuilder(); try {/* w ww.j a v a 2s .com*/ File file = new File(fileName); FileInputStream fis = new FileInputStream(file.getAbsolutePath()); XWPFDocument document = new XWPFDocument(fis); List<XWPFParagraph> paragraphs = document.getParagraphs(); //System.out.println("Total no of paragraph "+paragraphs.size()); for (XWPFParagraph para : paragraphs) { text.append(para.getText() + "\n"); } fis.close(); } catch (Exception e) { e.printStackTrace(); } return text.toString(); }