Example usage for org.apache.poi.xwpf.usermodel XWPFDocument XWPFDocument

List of usage examples for org.apache.poi.xwpf.usermodel XWPFDocument XWPFDocument

Introduction

In this page you can find the example usage for org.apache.poi.xwpf.usermodel XWPFDocument XWPFDocument.

Prototype

public XWPFDocument(InputStream is) throws IOException 

Source Link

Usage

From source file:com.swg.parse.docx.NewExtract.java

private void extractImages(String src, int cnt) {

    try {/*from w  w w.j  a v a2s . c o  m*/

        FileInputStream fs = new FileInputStream(src);
        XWPFDocument docx = new XWPFDocument(fs);
        List<XWPFPictureData> piclist = docx.getAllPictures();
        Iterator<XWPFPictureData> iterator = piclist.iterator();
        int i = 0;
        new File(
                "C:\\Users\\KXK3\\Documents\\NetBeansProjects\\ParseSuite2\\ParseDocx\\build\\test\\unit\\results\\NewPicFolder"
                        + cnt).mkdir();
        while (iterator.hasNext()) {
            XWPFPictureData pic = iterator.next();
            byte[] bytepic = pic.getData();
            BufferedImage imag = ImageIO.read(new ByteArrayInputStream(bytepic));
            File CreatedImageFile = new File(
                    "C:\\Users\\KXK3\\Documents\\NetBeansProjects\\ParseSuite2\\ParseDocx\\build\\test\\unit\\results\\NewPicFolder"
                            + cnt + "\\imagefromword" + i + ".jpg");
            ImageIO.write(imag, "jpg", new File(
                    "C:\\Users\\KXK3\\Documents\\NetBeansProjects\\ParseSuite2\\ParseDocx\\build\\test\\unit\\results\\NewPicFolder"
                            + cnt + "\\imagefromword" + i + ".jpg"));
            i++;
            System.out.println("path to image " + i + " = " + CreatedImageFile.getAbsolutePath());
            labelBeforePOJO.add("path to image " + i);
            ValueBeforePOJO.add(CreatedImageFile.getAbsolutePath());
            sectionBeforePOJO.add(section);
        }
    } catch (Exception e) {//I can handle the image advance conversion here?
        System.exit(-1);
    }

}

From source file:com.swg.parse.docx.OpenFolderAction.java

/***
 * Simply grab the content of a .docx file using Apache POI and put it into a string
 * This String may have missing part due to POI library
 * @param f the .dox file//from w w  w. j ava  2  s.  c o  m
 * @return POI content of .docx
 * @throws FileNotFoundException
 * @throws IOException 
 */
private String getPOI(File f) throws FileNotFoundException, IOException {

    FileInputStream inputTest = new FileInputStream(f.getAbsolutePath());
    XWPFDocument docxTest = new XWPFDocument(inputTest);
    XWPFWordExtractor ContentTest = new XWPFWordExtractor(docxTest);
    String contentIn = ContentTest.getText();
    return contentIn;
}

From source file:com.swg.parse.docx.OpenWord.java

/***
 * Simply grab the content of a .docx file using Apache POI and put it into a string
 * This String may have missing part due to POI library
 * @param f the .dox file//w w  w.  j a  va  2  s  .co m
 * @return POI content of .docx
 * @throws FileNotFoundException
 * @throws IOException 
 */
private String getPOI() throws FileNotFoundException, IOException {

    FileInputStream inputTest = new FileInputStream(selectedFile.getAbsolutePath());
    XWPFDocument docxTest = new XWPFDocument(inputTest);
    XWPFWordExtractor ContentTest = new XWPFWordExtractor(docxTest);
    String contentIn = ContentTest.getText();
    return contentIn;
}

From source file:com.swg.parse.docx.V2Test.java

/***
 * @return String containing the content of the .docx file from POI apache
 * @throws FileNotFoundException//from ww w.  j  a va2  s  .  co m
 * @throws IOException 
 */
private String getPOI() throws FileNotFoundException, IOException {

    FileInputStream inputTest = new FileInputStream(path + "CAD_2013_RE-02.docx");
    XWPFDocument docxTest = new XWPFDocument(inputTest);
    XWPFWordExtractor ContentTest = new XWPFWordExtractor(docxTest);
    String contentIn = ContentTest.getText();
    return contentIn;
}

From source file:com.unsa.view.MainView.java

License:Creative Commons License

private void btnProcesarActionPerformed(java.awt.event.ActionEvent evt) throws SQLException, IOException {//GEN-FIRST:event_btnProcesarActionPerformed
    // TODO add your handling code here:
    if (lblInstitucion.getText().equals("") || lblInstitucion.getText().equals("")
            || lblIdioma.getText().equals("")) {

        JOptionPane.showMessageDialog(null, "Una de las opciones generales est vacio");
        return;//  w  ww. j  av a  2s  . c  o m
    }
    if (jTextField1.getText().equals("")) {
        JOptionPane.showMessageDialog(null, "No se especific la ruta donde guardar la metadata");
        return;
    }

    jProgressBar1.setValue(0);
    jProgressBar1.setStringPainted(true);

    File[] listOfFiles = file.getSelectedFiles();
    int count = 0;

    for (File file : listOfFiles) {
        boolean archivo_daniado = false;
        if (file.isFile()) {
            System.out.println(file.getName());
            if (file.getName().substring(file.getName().length() - 1).equals("x")) { //is a docx
                try {

                    XWPFDocument doc = new XWPFDocument(new FileInputStream(file));

                    alg = new AlgorithmsWord(doc.getParagraphs());

                } catch (Exception e) {

                    archivo_daniado = true;
                }
            } else { //is not a docx
                try {

                    HWPFDocument doc = new HWPFDocument(new FileInputStream(file));

                    Range r = doc.getRange();
                    alg = new AlgorithmsWord(r);

                } catch (Exception e) {

                    try {
                        XWPFDocument doc = new XWPFDocument(new FileInputStream(file));
                        alg = new AlgorithmsWord(doc.getParagraphs());
                    } catch (Exception ex) {
                        archivo_daniado = true;
                    }
                }
            }

            Metadata metadata = null;
            if (archivo_daniado == true) {
                metadata = loadMetadataFail();
            } else {
                metadata = loadMetadata(alg);
            }
            metadata.setFileName(file.getName());
            listMetaData.add(metadata);
            int val_calculate = (count + 1) * 100 / listOfFiles.length;
            jProgressBar1.setValue(val_calculate);

            count++;

        }

    }

    String name = jTextField1.getText();

    ExcelController excel = new ExcelController(name, "UNSA", listMetaData);

    String[] lnames = { "Nombre Archivo", "Obs. Dudosa", "Obs. Critica", "Abrir Archivo" };
    DefaultTableModel model = new DefaultTableModel(lnames, 0);
    tableSalida.setModel(model);

    int contador = 0;
    for (Metadata meta : listMetaData) {
        Object[] data = new Object[4];
        data[0] = listOfFiles[contador].getName();

        if (meta.getFailGeneral()) {
            data[1] = "Fail";
            data[2] = "Fail";
        } else {
            data[1] = meta.getStadistic().getObservationGeneral() ? "Observacion" : "";
            data[2] = meta.getObservacionGeneral() ? "Falta" : "";
        }
        data[3] = "abrir";

        model.addRow(data);
        contador++;
    }

    btnAbrirMetadata.setEnabled(true);

}

From source file:com.unsa.view.MainView.java

License:Creative Commons License

private void menuProcesarActionPerformed(java.awt.event.ActionEvent evt) throws FileNotFoundException {//GEN-FIRST:event_menuProcesarActionPerformed
    // TODO add your handling code here:

    if (txtArchivos.getText().equals("")) {
        return;/*from  w  ww.  ja  v a  2  s.  c  o  m*/

    }

    File[] listOfFiles = file.getSelectedFiles();
    for (File file : listOfFiles) {
        boolean archivo_daniado = false;

        String nameFile = file.getAbsolutePath().substring(0, file.getAbsolutePath().length() - 4) + "pdf";
        File outFile = new File(nameFile);
        OutputStream out = new FileOutputStream(outFile);
        PdfOptions options = PdfOptions.create().fontEncoding("DOCX");
        //
        //PdfOptions options =PdfOptions.create().fontEncoding("windows-1250");

        System.out.println(file.getName());
        if (file.isFile()) {
            if (file.getName().substring(file.getName().length() - 1).equals("x")) {
                try {
                    XWPFDocument doc = new XWPFDocument(new FileInputStream(file));
                    PdfConverter.getInstance().convert(doc, out, options);

                } catch (Exception e) {
                    archivo_daniado = true;
                }
            } else { //is not a docx
                try {
                    //HWPFDocument doc = new HWPFDocument(new FileInputStream(file));
                    DocConverterPDF(file);

                } catch (Exception e) {
                    try {
                        XWPFDocument doc = new XWPFDocument(new FileInputStream(file));
                        PdfConverter.getInstance().convert(doc, out, options);
                    } catch (Exception ex) {
                        archivo_daniado = true;
                    }
                }
            }

        }

    }

}

From source file:com.validation.manager.core.tool.table.extractor.TableExtractor.java

License:Apache License

private List<XWPFTable> extractTablesFromWord() throws FileNotFoundException, IOException {
    List<XWPFTable> tables;
    try ( //Word documents
            InputStream fis = new FileInputStream(source)) {
        XWPFDocument doc = new XWPFDocument(fis);
        tables = doc.getTables();/* ww w . ja  v  a2 s  .c  om*/
    }
    return tables;
}

From source file:com.viettel.util.doc.DocsUtility.java

public String saveToPdf() throws IOException {

    //configSource.getMessage("path.tempFolder", null, null)

    String pathWord = saveToWord();
    Random rand = new Random();
    int n = rand.nextInt(1000) + 1;
    SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddhhmmss");
    String fileName = pathTemp + sdf.format(new Date()) + n + ".pdf";

    InputStream in = new FileInputStream(new File(pathWord));
    XWPFDocument documentPdf = new XWPFDocument(in);

    OutputStream out = new FileOutputStream(fileName);
    PdfOptions options = PdfOptions.create();
    PdfConverter.getInstance().convert(documentPdf, out, options);
    new File(pathWord).delete();
    return fileName;
}

From source file:cv_extractor.DocReader.java

protected static void readDocxFile(File localFile) {
    try {/*  ww w  .  j  ava 2  s  .  c o  m*/
        //Create a input stream to read file
        FileInputStream fis = new FileInputStream(localFile.getAbsolutePath());

        //For reading docx files
        XWPFDocument document = new XWPFDocument(fis);

        List<XWPFParagraph> paragraphs = document.getParagraphs();

        System.out.println("Total no of paragraph " + paragraphs.size());

        for (XWPFParagraph para : paragraphs) {
            //Compile the regex defined above
            Pattern r = Pattern.compile(pattern);

            //Check if any string matches the compiled pattern
            Matcher m = r.matcher(para.getText());

            if (m.find()) {
                //m.group() Returns the input subsequence matched by the previous match
                data.add(m.group());
            }
        }

        fis.close();

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:de.catma.document.source.contenthandler.DOCXContentHandler.java

License:Open Source License

@Override
public void load(InputStream is) throws IOException {
    XWPFDocument doc = new XWPFDocument(is);
    XWPFWordExtractor wordExtractor = new XWPFWordExtractor(doc);
    String buf = wordExtractor.getText();

    //it's still microsoft after all
    if (FileOSType.getFileOSType(buf).equals(FileOSType.UNIX)) {
        buf = FileOSType.convertUnixToDos(buf);
    }// www . j  a  va 2  s .  c  o m

    setContent(buf);
}