List of usage examples for org.apache.poi.xwpf.usermodel XWPFParagraph getText
public String getText()
From source file:FindFile.java
public void search(String gUrl) { try {//from w ww. ja v a 2 s . c om FileInputStream fis = new FileInputStream(gUrl); XWPFDocument docx = new XWPFDocument(OPCPackage.open(fis)); List<XWPFParagraph> parag = docx.getParagraphs(); int cntrl = 0; for (XWPFParagraph paraglist : parag) { String gelen = paraglist.getText().trim(); String[] temp = gelen.split(" "); for (String temp2 : temp) { if (temp2.contains(srcword)) { cntrl++; System.out.println(cntrl + "----->" + temp2); } } } } catch (Exception e) { e.printStackTrace(); } }
From source file:cn.afterturn.easypoi.word.parse.ParseWord07.java
License:Apache License
/** * ?//from w w w. j av a 2 s . co m * * @param paragraphs * @param map * @author JueYue * 2013-11-17 */ private void parseAllParagraphic(List<XWPFParagraph> paragraphs, Map<String, Object> map) throws Exception { XWPFParagraph paragraph; for (int i = 0; i < paragraphs.size(); i++) { paragraph = paragraphs.get(i); if (paragraph.getText().indexOf(START_STR) != -1) { parseThisParagraph(paragraph, map); } } }
From source file:com.bxf.hradmin.testgen.service.impl.DocxTestGenerator.java
License:Open Source License
private void doReplace(XWPFParagraph p, Map<String, String> data) { String pText = p.getText(); // complete paragraph as string if (pText.contains("${")) { // if paragraph does not include our pattern, ignore Pattern pattern = Pattern.compile("\\$\\{(.+?)\\}"); Matcher matcher = pattern.matcher(pText); while (matcher.find()) { // for all patterns in the paragraph String key = matcher.group(1); // extract key start and end pos int start = matcher.start(1); int end = matcher.end(1); String value = data.get(key); if (value == null) { value = ""; }//from w w w . j av a 2 s.com // get runs which contain the pattern SortedMap<Integer, XWPFRun> range = getPosToRuns(p).subMap(start - 2, true, end + 1, true); boolean isFoundDollarSign = false; boolean isFoundLeftBracket = false; boolean isFoundRightBracket = false; XWPFRun prevRun = null; // previous run handled in the loop XWPFRun found2Run = null; // run in which { was found int found2Pos = -1; // pos of { within above run for (XWPFRun run : range.values()) { if (run == prevRun) { continue; // this run has already been handled } if (isFoundRightBracket) { break; // done working on current key pattern } prevRun = run; for (int k = 0;; k++) { // iterate over texts of run r if (isFoundRightBracket) { break; } String txt = null; try { txt = run.getText(k); // note: should return null, but throws exception if the text does not exist } catch (Exception e) { } if (txt == null) { break; // no more texts in the run, exit loop } if (txt.contains("$") && !isFoundDollarSign) { // found $, replace it with value from data map txt = txt.replaceFirst("\\$", value); isFoundDollarSign = true; } if (txt.contains("{") && !isFoundLeftBracket && isFoundDollarSign) { found2Run = run; // found { replace it with empty string and remember location found2Pos = txt.indexOf('{'); txt = txt.replaceFirst("\\{", ""); isFoundLeftBracket = true; } // find } and set all chars between { and } to blank if (isFoundDollarSign && isFoundLeftBracket && !isFoundRightBracket) { if (txt.contains("}")) { if (run == found2Run) { // complete pattern was within a single run txt = txt.substring(0, found2Pos) + txt.substring(txt.indexOf('}')); } else { txt = txt.substring(txt.indexOf('}')); } } else if (run == found2Run) { txt = txt.substring(0, found2Pos); } else { txt = ""; // run between { and }, set text to blank } } if (txt.contains("}") && !isFoundRightBracket) { txt = txt.replaceFirst("\\}", ""); isFoundRightBracket = true; } run.setText(txt, k); } } } } }
From source file:com.deepoove.poi.resolver.TemplateResolver.java
License:Apache License
/** * running string Algorithm//from w w w .j av a 2 s . co m * * @param paragraph * @return */ public static List<RunTemplate> parseRun(XWPFParagraph paragraph) { List<XWPFRun> runs = paragraph.getRuns(); if (null == runs || runs.isEmpty()) return null; String text = paragraph.getText(); logger.debug("Paragrah's text is:" + text); List<Pair<RunEdge, RunEdge>> pairs = new ArrayList<Pair<RunEdge, RunEdge>>(); List<String> tags = new ArrayList<String>(); calcTagPosInParagraph(text, pairs, tags); List<RunTemplate> rts = new ArrayList<RunTemplate>(); if (pairs.isEmpty()) return rts; RunTemplate runTemplate; calcRunPosInParagraph(runs, pairs); for (Pair<RunEdge, RunEdge> pai : pairs) { logger.debug(pai.getLeft().toString()); logger.debug(pai.getRight().toString()); } // split and merge Pair<RunEdge, RunEdge> pair2 = pairs.get(0); int length = pairs.size(); int tagIndex = length; for (int n = length - 1; n >= 0; n--) { pair2 = pairs.get(n); RunEdge left2 = pair2.getLeft(); RunEdge right2 = pair2.getRight(); int left_r = left2.getRunPos(); int right_r = right2.getRunPos(); int runEdge = left2.getRunEdge(); int runEdge2 = right2.getRunEdge(); String text1 = runs.get(left_r).getText(0); String text2 = runs.get(right_r).getText(0); if (runEdge2 + 1 >= text2.length()) { if (left_r != right_r) paragraph.removeRun(right_r); } else { String substring = text2.substring(runEdge2 + 1, text2.length()); if (left_r == right_r) { XWPFRun insertNewRun = paragraph.insertNewRun(right_r + 1); styleRun(insertNewRun, runs.get(right_r)); insertNewRun.setText(substring, 0); } else runs.get(right_r).setText(substring, 0); } for (int m = right_r - 1; m > left_r; m--) { paragraph.removeRun(m); } if (runEdge <= 0) { runs.get(left_r).setText(tags.get(--tagIndex), 0); runTemplate = parseRun(runs.get(left_r)); } else { String substring = text1.substring(0, runEdge); XWPFRun xwpfRun = runs.get(left_r); runs.get(left_r).setText(substring, 0); XWPFRun insertNewRun = paragraph.insertNewRun(left_r + 1); styleRun(insertNewRun, xwpfRun); insertNewRun.setText(tags.get(--tagIndex), 0); runTemplate = parseRun(runs.get(left_r + 1)); } if (null != runTemplate) { rts.add(runTemplate); } } return rts; }
From source file:com.example.office.DOCDocumentParse.java
@SuppressWarnings("unused") private File doc2docxOld(File docFile) { String docxFilePath = docFile.getPath() + "x"; File docxFile = new File(docxFilePath); if (!docxFile.exists()) { XWPFDocument document = null;/*from w ww . jav a2 s .c om*/ try (InputStream ins = new FileInputStream(docxFile); OutputStream out = new FileOutputStream(docxFile);) { Document doc = new Document(docFile.getPath()); doc.save(docxFile.getPath()); document = new XWPFDocument(ins); // document.removeBodyElement(0) List<IBodyElement> elements = document.getBodyElements(); IBodyElement element = elements.get(elements.size() - 1); if (StringUtils.equals(BodyElementType.PARAGRAPH.name(), element.getElementType().name())) { XWPFParagraph xp = ((XWPFParagraph) element); String text = xp.getText(); if (StringUtils.isNotBlank(text)) { if (text.contains("Evaluation") && text.contains("Aspose")) { document.removeBodyElement(elements.size() - 1); } } } IBodyElement element0 = elements.get(0); if (StringUtils.equals(BodyElementType.PARAGRAPH.name(), element0.getElementType().name())) { XWPFParagraph xp = ((XWPFParagraph) element0); String text = xp.getText(); if (StringUtils.isNotBlank(text)) { if (text.contains("Evaluation") && text.contains("Aspose")) { document.removeBodyElement(0); } } } document.write(out); } catch (Exception e) { LogUtils.writeWarnExceptionLog(log, e); } finally { try { if (document != null) document.close(); } catch (IOException e) { LogUtils.writeDebugExceptionLog(log, e); } } } return docxFile; }
From source file:com.example.office.DOCDocumentParse.java
private void deleteAsposeInfo(File docxFile) { XWPFDocument document = null;//w w w . j a v a 2s . c o m try (InputStream ins = new FileInputStream(docxFile); OutputStream out = new FileOutputStream(docxFile);) { document = new XWPFDocument(ins); List<IBodyElement> elements = document.getBodyElements(); IBodyElement element = elements.get(elements.size() - 1); if (StringUtils.equals(BodyElementType.PARAGRAPH.name(), element.getElementType().name())) { XWPFParagraph xp = ((XWPFParagraph) element); String text = xp.getText(); if (StringUtils.isNotBlank(text)) { if (text.contains("Evaluation") && text.contains("Aspose")) { document.removeBodyElement(elements.size() - 1); } } } IBodyElement element0 = elements.get(0); if (StringUtils.equals(BodyElementType.PARAGRAPH.name(), element0.getElementType().name())) { XWPFParagraph xp = ((XWPFParagraph) element0); String text = xp.getText(); if (StringUtils.isNotBlank(text)) { if (text.contains("Evaluation") && text.contains("Aspose")) { document.removeBodyElement(0); } } } document.write(out); } catch (Exception e) { LogUtils.writeWarnExceptionLog(log, e); } finally { if (document != null) { try { document.close(); } catch (IOException e) { LogUtils.writeDebugExceptionLog(log, e); } } } }
From source file:com.qihang.winter.poi.word.parse.ParseWord07.java
License:Apache License
/** * ?//w w w . ja v a 2s . c o m * * @author Zerrion * @date 2013-11-17 * @param paragraphs * @param map */ private void parseAllParagraphic(List<XWPFParagraph> paragraphs, Map<String, Object> map) throws Exception { XWPFParagraph paragraph; for (int i = 0; i < paragraphs.size(); i++) { paragraph = paragraphs.get(i); if (paragraph.getText().indexOf("{{") != -1) { parseThisParagraph(paragraph, map); } } }
From source file:cv_extractor.DocReader.java
protected static void readDocxFile(File localFile) { try {/*from w ww . j av a2 s . co m*/ //Create a input stream to read file FileInputStream fis = new FileInputStream(localFile.getAbsolutePath()); //For reading docx files XWPFDocument document = new XWPFDocument(fis); List<XWPFParagraph> paragraphs = document.getParagraphs(); System.out.println("Total no of paragraph " + paragraphs.size()); for (XWPFParagraph para : paragraphs) { //Compile the regex defined above Pattern r = Pattern.compile(pattern); //Check if any string matches the compiled pattern Matcher m = r.matcher(para.getText()); if (m.find()) { //m.group() Returns the input subsequence matched by the previous match data.add(m.group()); } } fis.close(); } catch (Exception e) { e.printStackTrace(); } }
From source file:DocxProcess.DocxTemplateReplacer.java
private void replaceParagraph(XWPFParagraph p, Map<String, String> data) { String pText = p.getText(); // complete paragraph as string if (pText.contains("${")) { // if paragraph does not include our pattern, ignore TreeMap<Integer, XWPFRun> posRuns = getPosToRuns(p); Pattern pat = Pattern.compile("\\$\\{(.+?)\\}"); Matcher m = pat.matcher(pText); while (m.find()) { // for all patterns in the paragraph String g = m.group(1); // extract key start and end pos int s = m.start(1); int e = m.end(1); String key = g;//from w w w . jav a 2 s . c o m String x = data.get(key); if (x == null) x = ""; SortedMap<Integer, XWPFRun> range = posRuns.subMap(s - 2, true, e + 1, true); // get runs which contain the pattern boolean found1 = false; // found $ boolean found2 = false; // found { boolean found3 = false; // found } XWPFRun prevRun = null; // previous run handled in the loop XWPFRun found2Run = null; // run in which { was found int found2Pos = -1; // pos of { within above run for (XWPFRun r : range.values()) { if (r == prevRun) continue; // this run has already been handled if (found3) break; // done working on current key pattern prevRun = r; for (int k = 0;; k++) { // iterate over texts of run r if (found3) break; String txt = null; try { txt = r.getText(k); // note: should return null, but throws exception if the text does not exist } catch (Exception ex) { } if (txt == null) break; // no more texts in the run, exit loop if (txt.contains("$") && !found1) { // found $, replaceAll it with value from data map txt = txt.replaceFirst("\\$", x); found1 = true; } if (txt.contains("{") && !found2 && found1) { found2Run = r; // found { replaceAll it with empty string and remember location found2Pos = txt.indexOf('{'); txt = txt.replaceFirst("\\{", ""); found2 = true; } if (found1 && found2 && !found3) { // find } and set all chars between { and } to blank if (txt.contains("}")) { if (r == found2Run) { // complete pattern was within a single run txt = txt.substring(0, found2Pos) + txt.substring(txt.indexOf('}')); } else // pattern spread across multiple runs txt = txt.substring(txt.indexOf('}')); } else if (r == found2Run) // same run as { but no }, remove all text starting at { txt = txt.substring(0, found2Pos); else txt = ""; // run between { and }, set text to blank } if (txt.contains("}") && !found3) { txt = txt.replaceFirst("\\}", ""); found3 = true; } r.setText(txt, k); } } } // System.out.println(p.getText()); } }
From source file:DocxProcess.ReadWordDocx.java
public void ReadByDocx(XWPFDocument doc) throws IOException { List<XWPFParagraph> paras = doc.getParagraphs(); System.out.println("Paragraph"); for (XWPFParagraph para : paras) { System.out.println(para.getText()); }//from w w w.j a va 2s . c om List<XWPFTable> tables = doc.getTables(); List<XWPFTableRow> rows; List<XWPFTableCell> cells; System.out.println("TableCell"); for (XWPFTable table : tables) { rows = table.getRows(); for (XWPFTableRow row : rows) { System.out.println(""); cells = row.getTableCells(); for (XWPFTableCell cell : cells) { System.out.print(cell.getText()); System.out.print(" "); } System.out.println(); } } }