Example usage for org.apache.poi.xwpf.usermodel XWPFDocument XWPFDocument

List of usage examples for org.apache.poi.xwpf.usermodel XWPFDocument XWPFDocument

Introduction

In this page you can find the example usage for org.apache.poi.xwpf.usermodel XWPFDocument XWPFDocument.

Prototype

public XWPFDocument(InputStream is) throws IOException 

Source Link

Usage

From source file:org.sleuthkit.autopsy.modules.embeddedfileextractor.ImageExtractor.java

License:Open Source License

/**
 * Extract images from docx format files.
 *
 * @param af the file from which images are to be extracted.
 *
 * @return list of extracted images. Returns null in case no images were
 *         extracted.// w  w  w . ja va 2s. c  o  m
 */
private List<ExtractedImage> extractImagesFromDocx(AbstractFile af) {
    List<ExtractedImage> listOfExtractedImages;
    XWPFDocument docx = null;
    try {
        docx = new XWPFDocument(new ReadContentInputStream(af));
    } catch (Throwable ex) {
        // instantiating POI containers throw RuntimeExceptions
        logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(),
                "EmbeddedFileExtractorIngestModule.ImageExtractor.docxContainer.init.err", af.getName()), ex); //NON-NLS
        return null;
    }
    List<XWPFPictureData> listOfAllPictures = null;
    try {
        listOfAllPictures = docx.getAllPictures();
    } catch (Exception ex) {
        // log internal Java and Apache errors as WARNING
        logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(),
                "EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()), ex); //NON-NLS
        return null;
    }

    // if no images are extracted from the PPT, return null, else initialize
    // the output folder for image extraction.
    String outputFolderPath;
    if (listOfAllPictures.isEmpty()) {
        return null;
    } else {
        outputFolderPath = getOutputFolderPath(this.parentFileName);
    }
    if (outputFolderPath == null) {
        logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(),
                "EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg",
                af.getName())); //NON-NLS
        return null;
    }
    listOfExtractedImages = new ArrayList<>();
    byte[] data = null;
    for (XWPFPictureData xwpfPicture : listOfAllPictures) {
        String fileName = xwpfPicture.getFileName();
        try {
            data = xwpfPicture.getData();
        } catch (Exception ex) {
            // log internal Java and Apache errors as WARNING
            logger.log(Level.WARNING,
                    NbBundle.getMessage(this.getClass(),
                            "EmbeddedFileExtractorIngestModule.ImageExtractor.processing.err", af.getName()),
                    ex); //NON-NLS
            return null;
        }
        writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), data);
        listOfExtractedImages.add(
                new ExtractedImage(fileName, getFileRelativePath(fileName), xwpfPicture.getData().length, af));
    }
    return listOfExtractedImages;
}

From source file:org.terrier.indexing.POIDocument.java

License:Mozilla Public License

protected POITextExtractor getExtractor(String filename, InputStream docStream) throws IOException {
    //Word .doc: 
    if (filename.endsWith(".doc")) {
        return new WordExtractor(docStream);
    }//from  w w w.j a v  a 2s.c  om
    //Word .docx:
    if (filename.endsWith(".docx")) {
        return new XWPFWordExtractor(new XWPFDocument(docStream));
    }
    //Powertpoint .ppt: 
    if (filename.endsWith(".ppt")) {
        return new PowerPointExtractor(docStream);
    }
    //Powertpoint .pptx: 
    if (filename.endsWith(".pptx")) {
        return new XSLFPowerPointExtractor(new XMLSlideShow(docStream));
    }
    //Publisher .pub: 
    if (filename.endsWith(".pub")) {
        return new PublisherTextExtractor(docStream);
    }
    //Excel: .xls:
    if (filename.endsWith(".xls")) {
        return new ExcelExtractor(new POIFSFileSystem(docStream));
    }
    //Excel: .xlsx:
    if (filename.endsWith(".xlsx")) {
        return new org.apache.poi.xssf.extractor.XSSFExcelExtractor(new XSSFWorkbook(docStream));
    }
    //Visio: .vsd:
    if (filename.endsWith(".vsd")) {
        return new VisioTextExtractor(docStream);
    }
    return null;
}

From source file:org.wandora.utils.MSOfficeBox.java

License:Open Source License

public static String getDocxText(File file) {
    try {/*from ww w .ja va  2s. co  m*/
        XWPFDocument docx = new XWPFDocument(new FileInputStream(file));
        XWPFWordExtractor extractor = new XWPFWordExtractor(docx);
        String text = extractor.getText();
        return text;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return null;
}

From source file:org.wso2.carbon.apimgt.impl.indexing.indexer.DocumentIndexer.java

License:Open Source License

/**
 * Write document content to document artifact as its raw content
 *
 * @param registry//from ww w. j  a  v  a 2 s  . c  o m
 * @param documentResource
 * @return
 * @throws RegistryException
 * @throws IOException
 * @throws APIManagementException
 */
private String fetchDocumentContent(Registry registry, Resource documentResource)
        throws RegistryException, IOException, APIManagementException {
    GenericArtifactManager docArtifactManager = APIUtil.getArtifactManager(registry,
            APIConstants.DOCUMENTATION_KEY);
    GenericArtifact documentArtifact = docArtifactManager.getGenericArtifact(documentResource.getUUID());
    String sourceType = documentArtifact.getAttribute(APIConstants.DOC_SOURCE_TYPE);

    String contentString = null;
    if (Documentation.DocumentSourceType.FILE.name().equals(sourceType)) {
        Association fileAssociations[] = registry.getAssociations(documentResource.getPath(),
                APIConstants.DOCUMENTATION_FILE_ASSOCIATION);
        Association fileAssociation;

        if (fileAssociations.length < 1) {
            String error = "No document associated to API";
            log.error(error);
            throw new APIManagementException(error);
        }

        //a file document can have one file association
        fileAssociation = fileAssociations[0];
        String contentPath = fileAssociation.getDestinationPath();

        if (!registry.resourceExists(contentPath)) {
            String error = "API not found at " + contentPath;
            log.error(error);
            throw new APIManagementException(error);
        }

        Resource contentResource = registry.get(contentPath);

        String fileName = ((ResourceImpl) contentResource).getName();
        String extension = FilenameUtils.getExtension(fileName);
        InputStream inputStream = null;
        try {
            inputStream = contentResource.getContentStream();
            switch (extension) {
            case APIConstants.PDF_EXTENSION:
                PDFParser pdfParser = new PDFParser(inputStream);
                pdfParser.parse();
                COSDocument cosDocument = pdfParser.getDocument();
                PDFTextStripper stripper = new PDFTextStripper();
                contentString = stripper.getText(new PDDocument(cosDocument));
                break;
            case APIConstants.DOC_EXTENSION: {
                POIFSFileSystem pfs = new POIFSFileSystem(inputStream);
                WordExtractor msWord2003Extractor = new WordExtractor(pfs);
                contentString = msWord2003Extractor.getText();
                break;
            }
            case APIConstants.DOCX_EXTENSION:
                XWPFDocument doc = new XWPFDocument(inputStream);
                XWPFWordExtractor msWord2007Extractor = new XWPFWordExtractor(doc);
                contentString = msWord2007Extractor.getText();
                break;
            case APIConstants.XLS_EXTENSION: {
                POIFSFileSystem pfs = new POIFSFileSystem(inputStream);
                ExcelExtractor extractor = new ExcelExtractor(pfs);
                contentString = extractor.getText();
                break;
            }
            case APIConstants.XLSX_EXTENSION:
                XSSFWorkbook xssfSheets = new XSSFWorkbook(inputStream);
                XSSFExcelExtractor xssfExcelExtractor = new XSSFExcelExtractor(xssfSheets);
                contentString = xssfExcelExtractor.getText();
                break;
            case APIConstants.PPT_EXTENSION: {
                POIFSFileSystem fs = new POIFSFileSystem(inputStream);
                PowerPointExtractor extractor = new PowerPointExtractor(fs);
                contentString = extractor.getText();
                break;
            }
            case APIConstants.PPTX_EXTENSION:
                XMLSlideShow xmlSlideShow = new XMLSlideShow(inputStream);
                XSLFPowerPointExtractor xslfPowerPointExtractor = new XSLFPowerPointExtractor(xmlSlideShow);
                contentString = xslfPowerPointExtractor.getText();
                break;
            case APIConstants.TXT_EXTENSION:
            case APIConstants.WSDL_EXTENSION:
            case APIConstants.XML_DOC_EXTENSION:
                BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
                String line;
                StringBuilder contentBuilder = new StringBuilder();
                while ((line = reader.readLine()) != null) {
                    contentBuilder.append(line);
                }
                contentString = contentBuilder.toString();
                break;
            }
        } finally {
            IOUtils.closeQuietly(inputStream);
        }

    } else if (Documentation.DocumentSourceType.INLINE.name().equals(sourceType)) {
        Association contentAssociations[] = registry.getAssociations(documentResource.getPath(),
                APIConstants.DOCUMENTATION_CONTENT_ASSOCIATION);
        Association contentAssociation;

        //an inline document can have one or no content associations
        if (contentAssociations.length == 1) {
            contentAssociation = contentAssociations[0];
            String contentPath = contentAssociation.getDestinationPath();

            if (registry.resourceExists(contentPath)) {
                Resource contentResource = registry.get(contentPath);

                InputStream instream = null;
                BufferedReader reader = null;
                String line;
                try {
                    instream = contentResource.getContentStream();
                    reader = new BufferedReader(new InputStreamReader(instream));
                    StringBuilder contentBuilder = new StringBuilder();
                    while ((line = reader.readLine()) != null) {
                        contentBuilder.append(line);
                    }
                    contentString = contentBuilder.toString();
                } finally {
                    if (reader != null) {
                        IOUtils.closeQuietly(reader);
                    }
                }
            }
        }
    }
    return contentString;
}

From source file:org.wso2.carbon.pc.core.DocumentIndexer.java

License:Open Source License

@Override
public IndexDocument getIndexedDocument(AsyncIndexer.File2Index fileData)
        throws SolrException, RegistryException {
    try {//from  www .jav a2  s  .  c  om
        String wordText = null;
        try {
            //Extract MSWord 2003 document files
            POIFSFileSystem fs = new POIFSFileSystem(new ByteArrayInputStream(fileData.data));

            WordExtractor msWord2003Extractor = new WordExtractor(fs);
            wordText = msWord2003Extractor.getText();

        } catch (OfficeXmlFileException e) {
            //if 2003 extraction failed, try with MSWord 2007 document files extractor
            XWPFDocument doc = new XWPFDocument(new ByteArrayInputStream(fileData.data));

            XWPFWordExtractor msWord2007Extractor = new XWPFWordExtractor(doc);
            wordText = msWord2007Extractor.getText();

        } catch (Exception e) {
            //The reason for not throwing an exception is that since this is an indexer that runs in the background
            //throwing an exception might lead to adverse behaviors in the client side and might lead to
            //other files not being indexed
            String msg = "Failed to extract the document while indexing";
            log.error(msg, e);
        }
        IndexDocument indexDoc = new IndexDocument(fileData.path, wordText, null);

        Map<String, List<String>> fields = new HashMap<String, List<String>>();
        fields.put("path", Arrays.asList(fileData.path));
        if (fileData.mediaType != null) {
            fields.put(IndexingConstants.FIELD_MEDIA_TYPE, Arrays.asList(fileData.mediaType));
        } else {
            fields.put(IndexingConstants.FIELD_MEDIA_TYPE, Arrays.asList("application/pdf"));
        }

        indexDoc.setFields(fields);

        return indexDoc;

    } catch (IOException e) {
        String msg = "Failed to write to the index";
        log.error(msg, e);
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, msg);
    }
}

From source file:org.wurtele.ifttt.watchers.TrainingScheduleWatcher.java

License:Open Source License

private void processWordFile(Path path) {
    try {/*from  w  w w  .  j  a va  2  s  .  c om*/
        XWPFDocument doc = new XWPFDocument(Files.newInputStream(path));
        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
        List<List<String>> data = new ArrayList<>();
        DateFormat df1 = new SimpleDateFormat("MMM dd, yyyy");
        DateFormat df2 = new SimpleDateFormat("MMM dd, yyyy HH:mm");
        Arrays.asList(extractor.getText().split("\n")).stream().forEach((line) -> {
            try {
                df1.parse(line.split("\t")[0]);
                List<String> list = new ArrayList<>();
                list.addAll(Arrays.asList(line.split("\t")));
                data.add(list);
            } catch (ParseException pe) {
            }
            if (line.startsWith("\t"))
                data.get(data.size() - 1).addAll(Arrays.asList(line.substring(1).split("\t")));
        });
        List<TrainingScheduleEntry> entries = new ArrayList<>();
        for (List<String> event : data) {
            TrainingScheduleEntry entry = new TrainingScheduleEntry();
            entry.setStart(df2.parse(event.get(0) + " " + event.get(1)));
            entry.setEnd(df2.parse(event.get(0) + " " + event.get(2)));
            entry.setGroup(event.get(4));
            entry.setTitle(event.get(5));
            entry.setNotes(event.get(6).length() > 6 ? event.get(6).substring(6) : event.get(6));
            if (event.size() > 13) {
                for (int i = 7; i < 7 + event.size() - 13; i++) {
                    entry.setNotes(entry.getNotes() + " " + event.get(i));
                }
            }
            entry.setInstructor(event.get(event.size() - 6).trim());
            entry.setUniform(event.get(event.size() - 5));
            entry.setLocation(event.get(event.size() - 2));
            entries.add(entry);
        }

        if (!entries.isEmpty()) {
            Collections.sort(entries);

            try (OutputStream os = Files.newOutputStream(processedPath(path));
                    ObjectOutputStream oos = new ObjectOutputStream(os)) {
                oos.writeObject(entries);
            }
            logger.info("Processed " + path);
            Date start = DateUtils.truncate(entries.get(0).getStart(), Calendar.DATE);
            Date end = DateUtils.truncate(entries.get(entries.size() - 1).getEnd(), Calendar.DATE);
            DateFormat df = new SimpleDateFormat("MMM d, yyyy");
            String payload = APNS.newPayload().category("scheduleCategory")
                    .alertTitle("Training Schedule Received")
                    .alertBody(entries.size() + " events found for "
                            + (start.before(end) ? df.format(start) + " - " + df.format(end)
                                    : df.format(start)))
                    .sound("default").customField("schedule", path.getParent().getFileName().toString() + "/"
                            + FilenameUtils.getBaseName(path.getFileName().toString()))
                    .build();
            PushDevices.getDevices().stream().forEach((device) -> {
                PushUtils.getService().push(device, payload);
            });
        }
    } catch (Exception e) {
        logger.error("Failed to process training schedule file: " + path, e);
        FAILED.add(path);
    }
}

From source file:pe.gob.onpe.rae.controller.registro.registroController.java

@RequestMapping(value = "generateFVDoc/{codExpediente}", method = RequestMethod.GET)
public void generateFVDoc(HttpServletRequest request, @PathVariable("codExpediente") int codExpediente,
        HttpServletResponse response) {/*from   w  w w  .ja  va 2  s  .  c  o  m*/
    try {
        ServletContext sc = request.getSession().getServletContext();

        Expediente expediente = new Expediente(codExpediente);
        expediente = expedienteDAO.find(expediente);

        Ambito amb = new Ambito(expediente.getAmbito().getId());
        amb = ambitoDAO.find(amb);

        int totalElectoresRemitidos = expedientePadronDAO.getCountByExpediente(expediente);
        int totalElectoresIncorporados = expedientePadronDAO.getCountByExpedienteAndEstado(expediente,
                Parametros.ESTADO_ELECTOR_ACTIVO);

        JsonParser jsonParser = new JsonParser();
        JsonObject jsonObject = (JsonObject) jsonParser.parse(amb.getInformacion());
        String nombre = jsonObject.get("nombres").toString() + " "
                + jsonObject.get("apellidoPaterno").toString() + " "
                + jsonObject.get("apellidoMaterno").toString();

        InputStream is = registroController.class.getResourceAsStream("/ejemplo.docx");
        XWPFDocument document = new XWPFDocument(is);

        XWPFHeaderFooterPolicy policy = document.getHeaderFooterPolicy();
        if (policy == null) {
            CTSectPr sectPr = document.getDocument().getBody().addNewSectPr();
            policy = new XWPFHeaderFooterPolicy(document, sectPr);
        }

        if (policy.getDefaultHeader() == null && policy.getFirstPageHeader() == null
                && policy.getDefaultFooter() == null) {
            XWPFFooter footerD = policy.getFooter(1);// createFooter(policy.DEFAULT);
            XWPFRun run = footerD.getParagraphs().get(0).createRun();
            run.setText("usuario");
            XWPFParagraph paragraph = footerD.createParagraph();
            paragraph.setAlignment(ParagraphAlignment.DISTRIBUTE);
            run = paragraph.createRun();
            run.setFontFamily("Arial");
            run.setFontSize(8);
            run.setText(
                    "Jr.Washington N 1894, Cercado de Lima. Central Telefonica: 417-0630 www.onpe.gob.pe informes@onpe.gob.pe");

        }

        XWPFParagraph paragraph = document.createParagraph();

        XWPFRun run = paragraph.createRun();
        run.setFontSize(11);
        run.setFontFamily("Arial");
        run.setText("Lima,");
        run.addBreak();

        paragraph = document.createParagraph();
        run = paragraph.createRun();
        run.setFontSize(11);
        run.setFontFamily("Arial");
        run.setBold(true);
        run.setText("OFICIO N       -2016-GPP/ONPE");
        run.setUnderline(UnderlinePatterns.SINGLE);
        run.addBreak();

        paragraph = document.createParagraph();
        run = paragraph.createRun();
        run.setFontSize(11);
        run.setFontFamily("Arial");
        run.setText("Seor");

        XWPFRun run1 = paragraph.createRun();
        run1.setFontSize(11);
        run1.setFontFamily("Arial");
        run1.setText(nombre.replace("\"", ""));
        run1.setBold(true);
        run1.addBreak();

        XWPFRun run2 = paragraph.createRun();
        run2.setFontSize(11);
        run2.setFontFamily("Arial");
        run2.setText(jsonObject.get("cargo").toString().replace("\"", ""));
        run2.addBreak();
        run2.setText("Centro Poblado " + amb.getNombreAmbito());
        run2.addBreak();
        run2.setText("Av. 28 de Julio S/N Centro Cvico Huacrachuco - Municipalidad Provincial de "
                + amb.getProvincia());
        run2.addBreak();
        run2.setText(amb.getDepartamento() + " - " + amb.getProvincia() + " - " + amb.getDistrito());
        run2.addBreak();

        run2 = paragraph.createRun();
        run2.setFontSize(11);
        run2.setFontFamily("Arial");
        run2.setUnderline(UnderlinePatterns.WORDS);
        run2.setText("Presente");

        run2 = paragraph.createRun();
        run2.setFontSize(11);
        run2.setFontFamily("Arial");
        run2.setText(".-");

        paragraph = document.createParagraph();
        run.addBreak();
        run = paragraph.createRun();
        run.setFontSize(11);
        run.setFontFamily("Arial");
        run.addBreak();
        run.setText("Asunto");
        run.addTab();
        run.addTab();
        run.setText(": SOLICITUD DE CREACIN DE MESA DE SUFRAGIO.");
        run.addBreak();

        paragraph = document.createParagraph();
        run = paragraph.createRun();
        run.setFontSize(11);
        run.setFontFamily("Arial");
        run.setText("Referencia");
        run.addTab();
        run.setText(": OFICIO N 087-2016/M-CP.CHOCOBAMBA (16AGO2016) - Exp. " + expediente.getExpediente());
        run.addBreak();

        paragraph = document.createParagraph();
        paragraph.setAlignment(ParagraphAlignment.THAI_DISTRIBUTE);
        run = paragraph.createRun();
        run.setFontSize(11);
        run.setFontFamily("Arial");
        run.setText(
                "Me dirijo a usted con relacin al documento de la referencia con la finalidad de hacer de su "
                        + "conocimiento que se ha cumplido con todos los requisitos que dan inicio al trmite de "
                        + "instalacin de mesas de sufragio en el Centro Poblado " + amb.getNombreAmbito()
                        + ", distrito " + amb.getDistrito() + ", " + "provincia " + amb.getProvincia()
                        + ", departamento " + amb.getDepartamento() + ".");
        paragraph = document.createParagraph();
        paragraph.setAlignment(ParagraphAlignment.THAI_DISTRIBUTE);
        run = paragraph.createRun();
        run.setFontSize(11);
        run.setFontFamily("Arial");
        run.addBreak();
        run.setText("Al respecto, el mencionado expediente contiene un listado de electores que solicitan ser "
                + "parte de la mesa de sufragio de la localidad " + amb.getNombreAmbito()
                + ", el cual, luego de la validacin " + "realizada, se informa que podrn ser incorporados "
                + totalElectoresIncorporados + " electores del total de " + totalElectoresRemitidos
                + " registros "
                + "de electores remitidos. Se adjunta un cuadro resumen con las observaciones mencionadas.");
        paragraph = document.createParagraph();
        paragraph.setAlignment(ParagraphAlignment.THAI_DISTRIBUTE);
        run = paragraph.createRun();
        run.setFontSize(11);
        run.setFontFamily("Arial");
        run.addBreak();
        run.setText(
                "Asimismo, se programar un viaje para la verificacin de rutas, tiempos y servicios de la "
                        + "localidad, la cual se coordinar previamente con las autoridades del centro poblado a fin de "
                        + "programarla adecuadamente; luego de lo cual se emitir un informe de respuesta al "
                        + "resultado de la solicitud, que de ser positivo, conllevara a la instalacin de mesas de sufragio "
                        + "en el centro poblado en mencin, con miras a las ");
        run = paragraph.createRun();
        run.setFontSize(11);
        run.setFontFamily("Arial");
        run.setBold(true);
        run.setText("Elecciones Regionales y Municipales de 2018.");
        paragraph = document.createParagraph();
        paragraph.setAlignment(ParagraphAlignment.THAI_DISTRIBUTE);
        run = paragraph.createRun();
        run.setFontSize(11);
        run.setFontFamily("Arial");
        run.addBreak();
        run.setText("Finalmente, de requerir mayor informacin, agradeceremos se comunique con nosotros al "
                + "telefono 417-0630 anexo 8484 o al 8481.");
        paragraph = document.createParagraph();
        paragraph.setAlignment(ParagraphAlignment.THAI_DISTRIBUTE);
        run = paragraph.createRun();
        run.setFontSize(11);
        run.setFontFamily("Arial");
        run.addBreak();
        run.setText("Sin otro particular.");

        paragraph = document.createParagraph();
        paragraph.setAlignment(ParagraphAlignment.THAI_DISTRIBUTE);
        run = paragraph.createRun();
        run.setFontSize(11);
        run.setFontFamily("Arial");
        run.addBreak();
        run.addBreak();
        run.setText("Atentamente,");
        response.setContentType("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
        document.write(response.getOutputStream());
    } catch (Exception ex) {
        Logger.getLogger(registroController.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:persistentie.PixelMapper.java

public List<String> leesDocFile(String bestandsNaam) {
    List<String> zin = new ArrayList<>();
    letterLijst = new ArrayList<>();
    File file = null;/*w  ww .  j a  v  a2s  .c om*/
    XWPFWordExtractor extractor = null;
    try {
        XWPFDocument document = new XWPFDocument(Files.newInputStream(Paths.get(bestandsNaam)));
        List<XWPFParagraph> paragraphs = document.getParagraphs();
        for (XWPFParagraph par : paragraphs) {
            /**
             * Elke paragraph op spaties splitten en elk woord in de letterlijst plaatsen.
             */
            zin = Arrays.asList(par.getParagraphText().split(" "));
            for (String woord : zin) {
                letterLijst.add(woord);
                letterLijst.add(" ");
            }
            letterLijst.add("\n");
        }
    } catch (Exception exep) {
        exep.printStackTrace();
    }

    return letterLijst;
}

From source file:poi.xssf.usermodel.examples.EmbeddedObjects.java

License:Apache License

public static void main(String[] args) throws Exception {
    OPCPackage pkg = OPCPackage.open(args[0]);
    XSSFWorkbook workbook = new XSSFWorkbook(pkg);
    for (PackagePart pPart : workbook.getAllEmbedds()) {
        String contentType = pPart.getContentType();
        // Excel Workbook - either binary or OpenXML
        if (contentType.equals("application/vnd.ms-excel")) {
            HSSFWorkbook embeddedWorkbook = new HSSFWorkbook(pPart.getInputStream());
        }/*w w w  . j a  v  a  2s.  c  om*/
        // Excel Workbook - OpenXML file format
        else if (contentType.equals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")) {
            XSSFWorkbook embeddedWorkbook = new XSSFWorkbook(pPart.getInputStream());
        }
        // Word Document - binary (OLE2CDF) file format
        else if (contentType.equals("application/msword")) {
            HWPFDocument document = new HWPFDocument(pPart.getInputStream());
        }
        // Word Document - OpenXML file format
        else if (contentType
                .equals("application/vnd.openxmlformats-officedocument.wordprocessingml.document")) {
            XWPFDocument document = new XWPFDocument(pPart.getInputStream());
        }
        // PowerPoint Document - binary file format
        else if (contentType.equals("application/vnd.ms-powerpoint")) {
            HSLFSlideShow slideShow = new HSLFSlideShow(pPart.getInputStream());
        }
        // PowerPoint Document - OpenXML file format
        else if (contentType
                .equals("application/vnd.openxmlformats-officedocument.presentationml.presentation")) {
            OPCPackage docPackage = OPCPackage.open(pPart.getInputStream());
            XSLFSlideShow slideShow = new XSLFSlideShow(docPackage);
        }
        // Any other type of embedded object.
        else {
            System.out.println("Unknown Embedded Document: " + contentType);
            InputStream inputStream = pPart.getInputStream();
        }
    }
    pkg.close();
}

From source file:ro.dabuno.office.integration.MailMerge.java

private void merge(File wordTemplate, File dataFile, String outputFile) throws Exception {
    log.info("Merging data from " + wordTemplate + " and " + dataFile + " into " + outputFile);

    // read the data-rows from the CSV or XLS(X) file
    Data data = new Data();
    data.read(dataFile);//ww  w .  j a  v  a 2s . c o  m

    // now open the word file and apply the changes
    try (InputStream is = new FileInputStream(wordTemplate)) {
        try (XWPFDocument doc = new XWPFDocument(is)) {
            // apply the lines and concatenate the results into the document
            applyLines(data, doc);

            log.info("Writing overall result to " + outputFile);
            try (OutputStream out = new FileOutputStream(outputFile)) {
                doc.write(out);
            }
        }
    }
}