Example usage for org.apache.poi.xwpf.usermodel XWPFDocument getParagraphs

Introduction

In this page you can find the example usage for org.apache.poi.xwpf.usermodel XWPFDocument getParagraphs.

Prototype

@Override
public List<XWPFParagraph> getParagraphs()

Source Link

Usage

From source file:com.project3.utils.poi.ApachePOIChecker.java

public static void checkDocument(String filename) {
    resultList = new ArrayList<ResultModel>();

    try {//from  w  w w .  ja  v a  2 s.  co  m
        // Open document to check
        /*
        Writer fw = new FileWriter("C:\\Users\\Noel\\Documents\\NetBeansProjects\\ApachePOITest\\test1.json"); 
        JsonObject jo = new JsonObject().add( "name", "John" ).add( "age", 23 );
                
        JsonArray ja = new JsonArray().add( "John" ).add( 23 );
        jo.writeTo(fw);
        ja.writeTo(fw);
        fw.close();
        */
        XWPFDocument docx1 = new XWPFDocument(new FileInputStream(new File(filename)));

        // Put the following to an XML file that contains strings to check with respective properties to check
        // Question 1 in Level 1
        // Initialize strings to find
        List<String> sl = new ArrayList<String>();
        String[] tl = { "Melissa Martin", "555 West Main St.", "Sampaloc, Metro Manila", "Phone: 312-312-3123",
                "E-mail: TeachMartin@email.com" };
        sl.addAll(Arrays.asList(tl));

        // Initialize properties these strings should have
        Map<String, String> properties = new HashMap<String, String>();
        properties.put("FONT FAMILY", "MV Boli");
        properties.put("FONT SIZE", "12");
        // We go through all paragraphs of the document and check for the presence of the strings
        // If they are present, check if the properties given above are present
        // Result is displayed as String = {Property1 = Score1, Property2 = Score2, ...}
        // Scores are determined by the number of elements within the paragraph which follows the given formatting
        Map<String, HashMap> results;
        results = DocumentPropertyChecker.checkRunPropertiesOfParagraphs(docx1.getParagraphs(), sl, properties);
        System.out.println("1. " + results.toString());
        addResultsToList(results, properties);

        //2
        tl = new String[] { "Summary", "Educational Background", "Related Work Experience",
                "Additional Work Experience" };
        sl.addAll(Arrays.asList(tl));

        //properties
        properties = new HashMap();
        properties.put("BOLD", "true");

        results = DocumentPropertyChecker.checkRunPropertiesOfParagraphs(docx1.getParagraphs(), sl, properties);
        System.out.println("2. " + results.toString());
        addResultsToList(results, properties);

        //3
        tl = new String[] { "Holds Bachelor's Degree in Music and Education with TEFL certification",
                "5 years experience in teaching Englsih to Spanish speaking students ages 12 and up",
                "Exceptional skills in teaching English and Spanish language",
                "Bachelor of Music; Univeristy of Sto. Tomas 2004",
                "Bachelor of Science in Education; Univerity of the Philippines 2008" };
        sl.addAll(Arrays.asList(tl));
        properties = new HashMap();
        properties.put("LINE SPACING", "1.5");

        results = DocumentPropertyChecker.checkPropertiesOfParagraphs(docx1.getParagraphs(), sl, properties);
        System.out.println("3. " + results.toString());
        addResultsToList(results, properties);

        //5
        tl = new String[] { "St. Peter's University", "2011  Present",
                "Teaches English and Spanish to students ages 15 and up",
                "Creates course materials, including exams, quizzes and visual aids used by all teachers throughout the organization",
                "Initiates programs focused in improving grammar and active listening, writing and speaking skills of students" };
        sl.addAll(Arrays.asList(tl));
        properties = new HashMap();
        properties.put("NUMBERING FORMAT", "bullet");

        results = DocumentPropertyChecker.checkPropertiesOfParagraphs(docx1.getParagraphs(), sl, properties);
        System.out.println("5. " + results.toString());
        addResultsToList(results, properties);

        //7
        properties = new HashMap();
        properties.put("MARGIN TOP", "2");
        properties.put("MARGIN BOTTOM", "2");
        properties.put("MARGIN LEFT", "2");
        properties.put("MARGIN RIGHT", "2");

        Map<String, Object> results2;
        results2 = DocumentPropertyChecker.checkPropertiesOfDocument(docx1, properties);
        HashMap<String, String> temp = new HashMap<String, String>();

        for (Entry<String, Object> r : results2.entrySet()) {
            temp.put(r.getKey(), r.getValue().toString());
        }
        System.out.println("7. " + results2.toString());
        results.clear();
        results.put("Page Format", temp);
        addResultsToList(results, properties);

        //8
        temp.clear();
        properties = new HashMap<String, String>();
        properties.put("ALIGN", "both");

        results2 = DocumentPropertyChecker.checkPropertiesOfAllParagraphs(docx1.getParagraphs(), properties);
        System.out.println("8. " + results2.toString());

        for (Entry<String, Object> r : results2.entrySet()) {
            temp.put(r.getKey(), r.getValue().toString());
        }

        results.clear();
        results.put("Page Format", temp);
        addResultsToList(results, properties);

    } catch (IOException ex) {
        Logger.getLogger(ApachePOIChecker.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:com.project3.utils.poiold.ApachePOIChecker.java

public static void checkDocument1(String filename) {
    try {/*  www  .java 2  s .co m*/
        // Open document to check
        /*
        Writer fw = new FileWriter("C:\\Users\\Noel\\Documents\\NetBeansProjects\\ApachePOITest\\test1.json"); 
        JsonObject jo = new JsonObject().add( "name", "John" ).add( "age", 23 );
                
        JsonArray ja = new JsonArray().add( "John" ).add( 23 );
        jo.writeTo(fw);
        ja.writeTo(fw);
        fw.close();
        */
        XWPFDocument docx1 = new XWPFDocument(new FileInputStream(
                new File("C:\\Users\\Noel\\Documents\\NetBeansProjects\\ApachePOITest\\resume_only.docx")));

        // Put the following to an XML file that contains strings to check with respective properties to check
        // Question 1 in Level 1
        // Initialize strings to find
        ArrayList<String> sl = new ArrayList();
        String[] tl = { "Melissa Martin", "555 West Main St.", "Sampaloc, Metro Manila", "Phone: 312-312-3123",
                "E-mail: TeachMartin@email.com" };
        sl.addAll(Arrays.asList(tl));

        // Initialize properties these strings should have
        Map<String, String> properties = new HashMap();
        properties.put("FONT FAMILY", "MV Boli");
        properties.put("FONT SIZE", "12");
        // We go through all paragraphs of the document and check for the presence of the strings
        // If they are present, check if the properties given above are present
        // Result is displayed as String = {Property1 = Score1, Property2 = Score2, ...}
        // Scores are determined by the number of elements within the paragraph which follows the given formatting
        Map<String, HashMap> results;
        results = DocumentPropertyCheckerOld.checkRunPropertiesOfParagraphs(docx1.getParagraphs(), sl,
                properties);
        System.out.println("1. " + results.toString());
        System.out.println("");
        //2
        tl = new String[] { "Summary", "Educational Background", "Related Work Experience",
                "Additional Work Experience" };
        sl.addAll(Arrays.asList(tl));

        //properties
        properties = new HashMap();
        properties.put("BOLD", "true");

        results = DocumentPropertyCheckerOld.checkRunPropertiesOfParagraphs(docx1.getParagraphs(), sl,
                properties);
        System.out.println("2. " + results.toString());
        System.out.println("");
        //3
        tl = new String[] { "Holds Bachelor's Degree in Music and Education with TEFL certification",
                "5 years experience in teaching Englsih to Spanish speaking students ages 12 and up",
                "Exceptional skills in teaching English and Spanish language",
                "Bachelor of Music; Univeristy of Sto. Tomas 2004",
                "Bachelor of Science in Education; Univerity of the Philippines 2008" };
        sl.addAll(Arrays.asList(tl));
        properties = new HashMap();
        properties.put("LINE SPACING", "1.5");

        results = DocumentPropertyCheckerOld.checkPropertiesOfParagraphs(docx1.getParagraphs(), sl, properties);
        System.out.println("3. " + results.toString());
        System.out.println("");
        //4
        tl = new String[] { "2008-2011" };
        sl.addAll(Arrays.asList(tl));
        results = DocumentPropertyCheckerOld.checkIfStringExistsInParagraphs(docx1.getParagraphs(), sl);
        System.out.println("4. " + results.toString());
        System.out.println("");
        //5
        tl = new String[] { "St. Peter's University", "2011  Present",
                "Teaches English and Spanish to students ages 15 and up",
                "Creates course materials, including exams, quizzes and visual aids used by all teachers throughout the organization",
                "Initiates programs focused in improving grammar and active listening, writing and speaking skills of students" };
        sl.addAll(Arrays.asList(tl));
        properties = new HashMap();
        properties.put("NUMBERING FORMAT", "bullet");

        results = DocumentPropertyCheckerOld.checkPropertiesOfParagraphs(docx1.getParagraphs(), sl, properties);
        System.out.println("5. " + results.toString());
        System.out.println("");
        //6
        tl = new String[] { "Black Pen Movement \u00AE" };
        sl.addAll(Arrays.asList(tl));
        results = DocumentPropertyCheckerOld.checkIfStringExistsInParagraphs(docx1.getParagraphs(), sl);
        System.out.println("6. " + results.toString());
        System.out.println("");
        //7
        properties = new HashMap();
        properties.put("MARGIN TOP", "2");
        properties.put("MARGIN BOTTOM", "2");
        properties.put("MARGIN LEFT", "2");
        properties.put("MARGIN RIGHT", "2");

        System.out.println(
                "7. " + DocumentPropertyCheckerOld.checkPropertiesOfDocument(docx1, properties).toString());
        System.out.println("");
        //8
        properties = new HashMap();
        properties.put("ALIGN", "both");

        System.out.println("8. " + DocumentPropertyCheckerOld
                .checkPropertiesOfAllParagraphs(docx1.getParagraphs(), properties).toString());

    } catch (IOException ex) {
        Logger.getLogger(ApachePOIChecker.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:com.raghav.plot.ReadDOCX.java

public static void main(String[] args) {
    InputStream in = null;//from  ww  w . j  a  v  a  2  s. c om
    String result = "";
    try {
        in = new FileInputStream(new File("/home/raghav/Desktop/Axis-LB.docx"));
        XWPFDocument doc = new XWPFDocument(in);

        doc.getParagraphs().stream().map((p) -> p.getRuns()).filter((runs) -> (runs != null))
                .forEach((runs) -> {
                    runs.stream().forEach((r) -> {
                        String text = r.getText(0);
                        System.out.println(text);
                    });
                });

        doc.getTables().stream().forEach((tbl) -> {
            tbl.getRows().stream().forEach((row) -> {
                row.getTableCells().stream().forEach((cell) -> {
                    cell.getParagraphs().stream().forEach((p) -> {
                        p.getRuns().stream().filter((r) -> (r != null)).forEach((r) -> {
                            String text = r.getText(0);
                            if (text != null) {
                                System.out.println(text);

                            }
                        });
                    });
                });
            });
        });

    } catch (Exception ex) {
        ex.printStackTrace();
    }
}

From source file:com.unsa.view.MainView.java

License:Creative Commons License

private void btnProcesarActionPerformed(java.awt.event.ActionEvent evt) throws SQLException, IOException {//GEN-FIRST:event_btnProcesarActionPerformed
    // TODO add your handling code here:
    if (lblInstitucion.getText().equals("") || lblInstitucion.getText().equals("")
            || lblIdioma.getText().equals("")) {

        JOptionPane.showMessageDialog(null, "Una de las opciones generales est vacio");
        return;/*from  w  ww. ja  va  2 s  .  c o  m*/
    }
    if (jTextField1.getText().equals("")) {
        JOptionPane.showMessageDialog(null, "No se especific la ruta donde guardar la metadata");
        return;
    }

    jProgressBar1.setValue(0);
    jProgressBar1.setStringPainted(true);

    File[] listOfFiles = file.getSelectedFiles();
    int count = 0;

    for (File file : listOfFiles) {
        boolean archivo_daniado = false;
        if (file.isFile()) {
            System.out.println(file.getName());
            if (file.getName().substring(file.getName().length() - 1).equals("x")) { //is a docx
                try {

                    XWPFDocument doc = new XWPFDocument(new FileInputStream(file));

                    alg = new AlgorithmsWord(doc.getParagraphs());

                } catch (Exception e) {

                    archivo_daniado = true;
                }
            } else { //is not a docx
                try {

                    HWPFDocument doc = new HWPFDocument(new FileInputStream(file));

                    Range r = doc.getRange();
                    alg = new AlgorithmsWord(r);

                } catch (Exception e) {

                    try {
                        XWPFDocument doc = new XWPFDocument(new FileInputStream(file));
                        alg = new AlgorithmsWord(doc.getParagraphs());
                    } catch (Exception ex) {
                        archivo_daniado = true;
                    }
                }
            }

            Metadata metadata = null;
            if (archivo_daniado == true) {
                metadata = loadMetadataFail();
            } else {
                metadata = loadMetadata(alg);
            }
            metadata.setFileName(file.getName());
            listMetaData.add(metadata);
            int val_calculate = (count + 1) * 100 / listOfFiles.length;
            jProgressBar1.setValue(val_calculate);

            count++;

        }

    }

    String name = jTextField1.getText();

    ExcelController excel = new ExcelController(name, "UNSA", listMetaData);

    String[] lnames = { "Nombre Archivo", "Obs. Dudosa", "Obs. Critica", "Abrir Archivo" };
    DefaultTableModel model = new DefaultTableModel(lnames, 0);
    tableSalida.setModel(model);

    int contador = 0;
    for (Metadata meta : listMetaData) {
        Object[] data = new Object[4];
        data[0] = listOfFiles[contador].getName();

        if (meta.getFailGeneral()) {
            data[1] = "Fail";
            data[2] = "Fail";
        } else {
            data[1] = meta.getStadistic().getObservationGeneral() ? "Observacion" : "";
            data[2] = meta.getObservacionGeneral() ? "Falta" : "";
        }
        data[3] = "abrir";

        model.addRow(data);
        contador++;
    }

    btnAbrirMetadata.setEnabled(true);

}

From source file:cv_extractor.DocReader.java

protected static void readDocxFile(File localFile) {
    try {//from   w  w  w.  j a v a 2s.  co m
        //Create a input stream to read file
        FileInputStream fis = new FileInputStream(localFile.getAbsolutePath());

        //For reading docx files
        XWPFDocument document = new XWPFDocument(fis);

        List<XWPFParagraph> paragraphs = document.getParagraphs();

        System.out.println("Total no of paragraph " + paragraphs.size());

        for (XWPFParagraph para : paragraphs) {
            //Compile the regex defined above
            Pattern r = Pattern.compile(pattern);

            //Check if any string matches the compiled pattern
            Matcher m = r.matcher(para.getText());

            if (m.find()) {
                //m.group() Returns the input subsequence matched by the previous match
                data.add(m.group());
            }
        }

        fis.close();

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:DocxProcess.ReadWordDocx.java

public void ReadByDocx(XWPFDocument doc) throws IOException {

    List<XWPFParagraph> paras = doc.getParagraphs();
    System.out.println("Paragraph");
    for (XWPFParagraph para : paras) {
        System.out.println(para.getText());
    }/*from   w w w.ja  v a 2s .c  o  m*/

    List<XWPFTable> tables = doc.getTables();
    List<XWPFTableRow> rows;
    List<XWPFTableCell> cells;
    System.out.println("TableCell");
    for (XWPFTable table : tables) {
        rows = table.getRows();
        for (XWPFTableRow row : rows) {
            System.out.println("");
            cells = row.getTableCells();
            for (XWPFTableCell cell : cells) {
                System.out.print(cell.getText());
                System.out.print(" ");
            }
            System.out.println();
        }
    }

}

From source file:easyoffice.word.WordMaker.java

private static void replaceText(XWPFDocument doc, HashMap<String, String> data) {

    Set<String> keySet = data.keySet();

    for (String key : keySet) {
        for (XWPFParagraph p : doc.getParagraphs()) {
            List<XWPFRun> runs = p.getRuns();

            for (XWPFRun run : runs) {
                if (run.toString().toLowerCase().equals(key)) {
                    run.setText(data.get(key), 0);
                }//from   w w  w .  ja v  a 2  s.  co  m
            }
        }
    }
}

From source file:edu.gatech.pmase.capstone.awesome.impl.output.DisasterResponseTradeStudyOutputer.java

License:Open Source License

/**
 * Creates the report details paragraph.
 *
 * @param xdoc the document to create the paragraph in
 *//*from  ww w  .ja v  a  2  s . c om*/
private void createReportDetails(final XWPFDocument xdoc) {
    final Locale currentLocale = Locale.getDefault();
    LOGGER.debug("Creating report details");

    final XWPFParagraph para = xdoc.getParagraphs().get(REPORT_DETAILS_ROW_INDEX);
    final XWPFRun run1 = para.createRun();
    run1.setBold(true);
    run1.setText("Date Report Generated: ");

    final XWPFRun run2 = para.createRun();
    run2.setBold(false);
    run2.setText(outputFileFormatter.format(now));
    run2.addBreak();

    final XWPFRun run3 = para.createRun();
    run3.setBold(true);
    run3.setText("Country Report Generated: ");

    final XWPFRun run4 = para.createRun();
    run4.setBold(false);
    run4.setText(currentLocale.getDisplayCountry());
}

From source file:File.DOCX.ReadDocx.java

/**
 * @param args the command line arguments
 *///from  ww w .ja  va 2 s  . co  m
public void ReadParagraph(String path, String filename) {
    try {
        FileInputStream fis = new FileInputStream(path + filename + ".docx");
        XWPFDocument xdoc = new XWPFDocument(OPCPackage.open(fis));
        List<XWPFParagraph> paragraphList = xdoc.getParagraphs();
        for (XWPFParagraph paragraph : paragraphList) {
            System.out.println(paragraph.getText());
        }
    } catch (Exception ex) {
        ex.printStackTrace();
    }
}

From source file:fr.univrouen.poste.services.WordParser.java

License:Apache License

public void modifyWord(InputStream docx, Map<String, String> textMap, OutputStream out) {
    try {//from w  w  w.  java 2 s . c om
        XWPFDocument doc = new XWPFDocument(OPCPackage.open(docx));

        // tentative avec les noms {{}}
        for (XWPFParagraph p : doc.getParagraphs()) {

            for (CTBookmark bookmark : p.getCTP().getBookmarkStartList()) {
                log.trace(bookmark.getName());
                for (String key : textMap.keySet()) {
                    String cleanKey = StringUtils.stripAccents(key);
                    cleanKey = cleanKey.replaceAll(" ", "_");
                    cleanKey = cleanKey.replaceAll("\\W", "");
                    if (bookmark.getName().equalsIgnoreCase(cleanKey)) {
                        Node nextNode = bookmark.getDomNode().getNextSibling();
                        while (nextNode != null && nextNode.getNodeName() != null
                                && !(nextNode.getNodeName().contains("bookmarkEnd"))) {
                            p.getCTP().getDomNode().removeChild(nextNode);
                            nextNode = bookmark.getDomNode().getNextSibling();
                        }
                        XWPFRun run = p.createRun();
                        run.setText(textMap.get(key));
                        p.getCTP().getDomNode().insertBefore(run.getCTR().getDomNode(), nextNode);
                    }
                }
            }
        }

        doc.write(out);
    } catch (Exception e) {
        log.error("Pb durant la modification du document word", e);
    }

}