Example usage for org.apache.poi.xwpf.usermodel XWPFParagraph getText

List of usage examples for org.apache.poi.xwpf.usermodel XWPFParagraph getText

Introduction

In this page you can find the example usage for org.apache.poi.xwpf.usermodel XWPFParagraph getText.

Prototype

public String getText() 

Source Link

Document

Return the textual content of the paragraph, including text from pictures and sdt elements in it.

Usage

From source file:FindFile.java

public void search(String gUrl) {
    try {//from   w  ww. ja v a 2 s  . c om
        FileInputStream fis = new FileInputStream(gUrl);
        XWPFDocument docx = new XWPFDocument(OPCPackage.open(fis));
        List<XWPFParagraph> parag = docx.getParagraphs();
        int cntrl = 0;
        for (XWPFParagraph paraglist : parag) {
            String gelen = paraglist.getText().trim();
            String[] temp = gelen.split(" ");
            for (String temp2 : temp) {
                if (temp2.contains(srcword)) {
                    cntrl++;
                    System.out.println(cntrl + "----->" + temp2);
                }
            }
        }

    } catch (Exception e) {
        e.printStackTrace();
    }

}

From source file:cn.afterturn.easypoi.word.parse.ParseWord07.java

License:Apache License

/**
 * ?//from   w w  w.  j av  a  2  s  . co m
 *
 * @param paragraphs
 * @param map
 * @author JueYue
 * 2013-11-17
 */
private void parseAllParagraphic(List<XWPFParagraph> paragraphs, Map<String, Object> map) throws Exception {
    XWPFParagraph paragraph;
    for (int i = 0; i < paragraphs.size(); i++) {
        paragraph = paragraphs.get(i);
        if (paragraph.getText().indexOf(START_STR) != -1) {
            parseThisParagraph(paragraph, map);
        }

    }

}

From source file:com.bxf.hradmin.testgen.service.impl.DocxTestGenerator.java

License:Open Source License

private void doReplace(XWPFParagraph p, Map<String, String> data) {
    String pText = p.getText(); // complete paragraph as string
    if (pText.contains("${")) { // if paragraph does not include our pattern, ignore
        Pattern pattern = Pattern.compile("\\$\\{(.+?)\\}");
        Matcher matcher = pattern.matcher(pText);
        while (matcher.find()) { // for all patterns in the paragraph
            String key = matcher.group(1); // extract key start and end pos
            int start = matcher.start(1);
            int end = matcher.end(1);
            String value = data.get(key);
            if (value == null) {
                value = "";
            }//from  w  w  w  . j  av  a  2  s.com
            // get runs which contain the pattern
            SortedMap<Integer, XWPFRun> range = getPosToRuns(p).subMap(start - 2, true, end + 1, true);
            boolean isFoundDollarSign = false;
            boolean isFoundLeftBracket = false;
            boolean isFoundRightBracket = false;
            XWPFRun prevRun = null; // previous run handled in the loop
            XWPFRun found2Run = null; // run in which { was found
            int found2Pos = -1; // pos of { within above run
            for (XWPFRun run : range.values()) {
                if (run == prevRun) {
                    continue; // this run has already been handled
                }
                if (isFoundRightBracket) {
                    break; // done working on current key pattern
                }
                prevRun = run;
                for (int k = 0;; k++) { // iterate over texts of run r
                    if (isFoundRightBracket) {
                        break;
                    }
                    String txt = null;
                    try {
                        txt = run.getText(k); // note: should return null, but throws exception if the text does not exist
                    } catch (Exception e) {
                    }
                    if (txt == null) {
                        break; // no more texts in the run, exit loop
                    }
                    if (txt.contains("$") && !isFoundDollarSign) { // found $, replace it with value from data map
                        txt = txt.replaceFirst("\\$", value);
                        isFoundDollarSign = true;
                    }
                    if (txt.contains("{") && !isFoundLeftBracket && isFoundDollarSign) {
                        found2Run = run; // found { replace it with empty string and remember location
                        found2Pos = txt.indexOf('{');
                        txt = txt.replaceFirst("\\{", "");
                        isFoundLeftBracket = true;
                    }

                    // find } and set all chars between { and } to blank
                    if (isFoundDollarSign && isFoundLeftBracket && !isFoundRightBracket) {
                        if (txt.contains("}")) {
                            if (run == found2Run) { // complete pattern was within a single run
                                txt = txt.substring(0, found2Pos) + txt.substring(txt.indexOf('}'));
                            } else {
                                txt = txt.substring(txt.indexOf('}'));
                            }
                        } else if (run == found2Run) {
                            txt = txt.substring(0, found2Pos);
                        } else {
                            txt = ""; // run between { and }, set text to blank
                        }
                    }
                    if (txt.contains("}") && !isFoundRightBracket) {
                        txt = txt.replaceFirst("\\}", "");
                        isFoundRightBracket = true;
                    }
                    run.setText(txt, k);
                }
            }
        }
    }
}

From source file:com.deepoove.poi.resolver.TemplateResolver.java

License:Apache License

/**
 * running string Algorithm//from w w w  .j av  a 2  s  . co m
 * 
 * @param paragraph
 * @return
 */
public static List<RunTemplate> parseRun(XWPFParagraph paragraph) {
    List<XWPFRun> runs = paragraph.getRuns();
    if (null == runs || runs.isEmpty())
        return null;
    String text = paragraph.getText();
    logger.debug("Paragrah's text is:" + text);
    List<Pair<RunEdge, RunEdge>> pairs = new ArrayList<Pair<RunEdge, RunEdge>>();
    List<String> tags = new ArrayList<String>();
    calcTagPosInParagraph(text, pairs, tags);

    List<RunTemplate> rts = new ArrayList<RunTemplate>();
    if (pairs.isEmpty())
        return rts;
    RunTemplate runTemplate;
    calcRunPosInParagraph(runs, pairs);
    for (Pair<RunEdge, RunEdge> pai : pairs) {
        logger.debug(pai.getLeft().toString());
        logger.debug(pai.getRight().toString());
    }
    // split and merge
    Pair<RunEdge, RunEdge> pair2 = pairs.get(0);
    int length = pairs.size();
    int tagIndex = length;
    for (int n = length - 1; n >= 0; n--) {
        pair2 = pairs.get(n);
        RunEdge left2 = pair2.getLeft();
        RunEdge right2 = pair2.getRight();
        int left_r = left2.getRunPos();
        int right_r = right2.getRunPos();
        int runEdge = left2.getRunEdge();
        int runEdge2 = right2.getRunEdge();
        String text1 = runs.get(left_r).getText(0);
        String text2 = runs.get(right_r).getText(0);
        if (runEdge2 + 1 >= text2.length()) {
            if (left_r != right_r)
                paragraph.removeRun(right_r);
        } else {
            String substring = text2.substring(runEdge2 + 1, text2.length());
            if (left_r == right_r) {
                XWPFRun insertNewRun = paragraph.insertNewRun(right_r + 1);
                styleRun(insertNewRun, runs.get(right_r));
                insertNewRun.setText(substring, 0);
            } else
                runs.get(right_r).setText(substring, 0);
        }
        for (int m = right_r - 1; m > left_r; m--) {
            paragraph.removeRun(m);
        }
        if (runEdge <= 0) {
            runs.get(left_r).setText(tags.get(--tagIndex), 0);
            runTemplate = parseRun(runs.get(left_r));
        } else {
            String substring = text1.substring(0, runEdge);
            XWPFRun xwpfRun = runs.get(left_r);
            runs.get(left_r).setText(substring, 0);
            XWPFRun insertNewRun = paragraph.insertNewRun(left_r + 1);
            styleRun(insertNewRun, xwpfRun);
            insertNewRun.setText(tags.get(--tagIndex), 0);
            runTemplate = parseRun(runs.get(left_r + 1));
        }

        if (null != runTemplate) {
            rts.add(runTemplate);
        }
    }
    return rts;
}

From source file:com.example.office.DOCDocumentParse.java

@SuppressWarnings("unused")
private File doc2docxOld(File docFile) {
    String docxFilePath = docFile.getPath() + "x";
    File docxFile = new File(docxFilePath);
    if (!docxFile.exists()) {
        XWPFDocument document = null;/*from w ww  . jav a2 s .c  om*/
        try (InputStream ins = new FileInputStream(docxFile);
                OutputStream out = new FileOutputStream(docxFile);) {
            Document doc = new Document(docFile.getPath());
            doc.save(docxFile.getPath());
            document = new XWPFDocument(ins);

            // document.removeBodyElement(0)
            List<IBodyElement> elements = document.getBodyElements();
            IBodyElement element = elements.get(elements.size() - 1);
            if (StringUtils.equals(BodyElementType.PARAGRAPH.name(), element.getElementType().name())) {
                XWPFParagraph xp = ((XWPFParagraph) element);
                String text = xp.getText();
                if (StringUtils.isNotBlank(text)) {
                    if (text.contains("Evaluation") && text.contains("Aspose")) {
                        document.removeBodyElement(elements.size() - 1);
                    }
                }
            }
            IBodyElement element0 = elements.get(0);
            if (StringUtils.equals(BodyElementType.PARAGRAPH.name(), element0.getElementType().name())) {
                XWPFParagraph xp = ((XWPFParagraph) element0);
                String text = xp.getText();
                if (StringUtils.isNotBlank(text)) {
                    if (text.contains("Evaluation") && text.contains("Aspose")) {
                        document.removeBodyElement(0);
                    }
                }
            }

            document.write(out);
        } catch (Exception e) {
            LogUtils.writeWarnExceptionLog(log, e);
        } finally {
            try {
                if (document != null)
                    document.close();
            } catch (IOException e) {
                LogUtils.writeDebugExceptionLog(log, e);
            }
        }
    }
    return docxFile;
}

From source file:com.example.office.DOCDocumentParse.java

private void deleteAsposeInfo(File docxFile) {
    XWPFDocument document = null;//w w  w . j a v  a 2s .  c  o m
    try (InputStream ins = new FileInputStream(docxFile); OutputStream out = new FileOutputStream(docxFile);) {
        document = new XWPFDocument(ins);
        List<IBodyElement> elements = document.getBodyElements();
        IBodyElement element = elements.get(elements.size() - 1);
        if (StringUtils.equals(BodyElementType.PARAGRAPH.name(), element.getElementType().name())) {
            XWPFParagraph xp = ((XWPFParagraph) element);
            String text = xp.getText();
            if (StringUtils.isNotBlank(text)) {
                if (text.contains("Evaluation") && text.contains("Aspose")) {
                    document.removeBodyElement(elements.size() - 1);
                }
            }
        }
        IBodyElement element0 = elements.get(0);
        if (StringUtils.equals(BodyElementType.PARAGRAPH.name(), element0.getElementType().name())) {
            XWPFParagraph xp = ((XWPFParagraph) element0);
            String text = xp.getText();
            if (StringUtils.isNotBlank(text)) {
                if (text.contains("Evaluation") && text.contains("Aspose")) {
                    document.removeBodyElement(0);
                }
            }
        }

        document.write(out);
    } catch (Exception e) {
        LogUtils.writeWarnExceptionLog(log, e);
    } finally {
        if (document != null) {
            try {
                document.close();
            } catch (IOException e) {
                LogUtils.writeDebugExceptionLog(log, e);
            }
        }

    }
}

From source file:com.qihang.winter.poi.word.parse.ParseWord07.java

License:Apache License

/**
 * ?//w  w  w .  ja  v  a 2s .  c  o  m
 * 
 * @author Zerrion
 * @date 2013-11-17
 * @param paragraphs
 * @param map
 */
private void parseAllParagraphic(List<XWPFParagraph> paragraphs, Map<String, Object> map) throws Exception {
    XWPFParagraph paragraph;
    for (int i = 0; i < paragraphs.size(); i++) {
        paragraph = paragraphs.get(i);
        if (paragraph.getText().indexOf("{{") != -1) {
            parseThisParagraph(paragraph, map);
        }

    }

}

From source file:cv_extractor.DocReader.java

protected static void readDocxFile(File localFile) {
    try {/*from   w ww  .  j av  a2  s .  co  m*/
        //Create a input stream to read file
        FileInputStream fis = new FileInputStream(localFile.getAbsolutePath());

        //For reading docx files
        XWPFDocument document = new XWPFDocument(fis);

        List<XWPFParagraph> paragraphs = document.getParagraphs();

        System.out.println("Total no of paragraph " + paragraphs.size());

        for (XWPFParagraph para : paragraphs) {
            //Compile the regex defined above
            Pattern r = Pattern.compile(pattern);

            //Check if any string matches the compiled pattern
            Matcher m = r.matcher(para.getText());

            if (m.find()) {
                //m.group() Returns the input subsequence matched by the previous match
                data.add(m.group());
            }
        }

        fis.close();

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:DocxProcess.DocxTemplateReplacer.java

private void replaceParagraph(XWPFParagraph p, Map<String, String> data) {

    String pText = p.getText(); // complete paragraph as string
    if (pText.contains("${")) { // if paragraph does not include our pattern, ignore
        TreeMap<Integer, XWPFRun> posRuns = getPosToRuns(p);
        Pattern pat = Pattern.compile("\\$\\{(.+?)\\}");
        Matcher m = pat.matcher(pText);
        while (m.find()) { // for all patterns in the paragraph
            String g = m.group(1); // extract key start and end pos
            int s = m.start(1);
            int e = m.end(1);
            String key = g;//from   w w w . jav  a  2 s  .  c o  m
            String x = data.get(key);
            if (x == null)
                x = "";
            SortedMap<Integer, XWPFRun> range = posRuns.subMap(s - 2, true, e + 1, true); // get runs which contain the pattern
            boolean found1 = false; // found $
            boolean found2 = false; // found {
            boolean found3 = false; // found }
            XWPFRun prevRun = null; // previous run handled in the loop
            XWPFRun found2Run = null; // run in which { was found
            int found2Pos = -1; // pos of { within above run
            for (XWPFRun r : range.values()) {
                if (r == prevRun)
                    continue; // this run has already been handled
                if (found3)
                    break; // done working on current key pattern
                prevRun = r;
                for (int k = 0;; k++) { // iterate over texts of run r
                    if (found3)
                        break;
                    String txt = null;
                    try {
                        txt = r.getText(k); // note: should return null, but throws exception if the text does not exist
                    } catch (Exception ex) {

                    }
                    if (txt == null)
                        break; // no more texts in the run, exit loop
                    if (txt.contains("$") && !found1) { // found $, replaceAll it with value from data map
                        txt = txt.replaceFirst("\\$", x);
                        found1 = true;
                    }
                    if (txt.contains("{") && !found2 && found1) {
                        found2Run = r; // found { replaceAll it with empty string and remember location
                        found2Pos = txt.indexOf('{');
                        txt = txt.replaceFirst("\\{", "");
                        found2 = true;
                    }
                    if (found1 && found2 && !found3) { // find } and set all chars between { and } to blank
                        if (txt.contains("}")) {
                            if (r == found2Run) { // complete pattern was within a single run
                                txt = txt.substring(0, found2Pos) + txt.substring(txt.indexOf('}'));
                            } else // pattern spread across multiple runs
                                txt = txt.substring(txt.indexOf('}'));
                        } else if (r == found2Run) // same run as { but no }, remove all text starting at {
                            txt = txt.substring(0, found2Pos);
                        else
                            txt = ""; // run between { and }, set text to blank
                    }
                    if (txt.contains("}") && !found3) {
                        txt = txt.replaceFirst("\\}", "");
                        found3 = true;
                    }
                    r.setText(txt, k);
                }
            }
        }
        //            System.out.println(p.getText());

    }

}

From source file:DocxProcess.ReadWordDocx.java

public void ReadByDocx(XWPFDocument doc) throws IOException {

    List<XWPFParagraph> paras = doc.getParagraphs();
    System.out.println("Paragraph");
    for (XWPFParagraph para : paras) {
        System.out.println(para.getText());
    }//from w w w.j  a  va 2s  . c om

    List<XWPFTable> tables = doc.getTables();
    List<XWPFTableRow> rows;
    List<XWPFTableCell> cells;
    System.out.println("TableCell");
    for (XWPFTable table : tables) {
        rows = table.getRows();
        for (XWPFTableRow row : rows) {
            System.out.println("");
            cells = row.getTableCells();
            for (XWPFTableCell cell : cells) {
                System.out.print(cell.getText());
                System.out.print(" ");
            }
            System.out.println();
        }
    }

}