List of usage examples for org.apache.poi.xwpf.usermodel XWPFDocument getParagraphs
@Override
public List<XWPFParagraph> getParagraphs()
From source file:com.project3.utils.poi.ApachePOIChecker.java
public static void checkDocument(String filename) { resultList = new ArrayList<ResultModel>(); try {//from w w w . ja v a 2 s. co m // Open document to check /* Writer fw = new FileWriter("C:\\Users\\Noel\\Documents\\NetBeansProjects\\ApachePOITest\\test1.json"); JsonObject jo = new JsonObject().add( "name", "John" ).add( "age", 23 ); JsonArray ja = new JsonArray().add( "John" ).add( 23 ); jo.writeTo(fw); ja.writeTo(fw); fw.close(); */ XWPFDocument docx1 = new XWPFDocument(new FileInputStream(new File(filename))); // Put the following to an XML file that contains strings to check with respective properties to check // Question 1 in Level 1 // Initialize strings to find List<String> sl = new ArrayList<String>(); String[] tl = { "Melissa Martin", "555 West Main St.", "Sampaloc, Metro Manila", "Phone: 312-312-3123", "E-mail: TeachMartin@email.com" }; sl.addAll(Arrays.asList(tl)); // Initialize properties these strings should have Map<String, String> properties = new HashMap<String, String>(); properties.put("FONT FAMILY", "MV Boli"); properties.put("FONT SIZE", "12"); // We go through all paragraphs of the document and check for the presence of the strings // If they are present, check if the properties given above are present // Result is displayed as String = {Property1 = Score1, Property2 = Score2, ...} // Scores are determined by the number of elements within the paragraph which follows the given formatting Map<String, HashMap> results; results = DocumentPropertyChecker.checkRunPropertiesOfParagraphs(docx1.getParagraphs(), sl, properties); System.out.println("1. " + results.toString()); addResultsToList(results, properties); //2 tl = new String[] { "Summary", "Educational Background", "Related Work Experience", "Additional Work Experience" }; sl.addAll(Arrays.asList(tl)); //properties properties = new HashMap(); properties.put("BOLD", "true"); results = DocumentPropertyChecker.checkRunPropertiesOfParagraphs(docx1.getParagraphs(), sl, properties); System.out.println("2. " + results.toString()); addResultsToList(results, properties); //3 tl = new String[] { "Holds Bachelor's Degree in Music and Education with TEFL certification", "5 years experience in teaching Englsih to Spanish speaking students ages 12 and up", "Exceptional skills in teaching English and Spanish language", "Bachelor of Music; Univeristy of Sto. Tomas 2004", "Bachelor of Science in Education; Univerity of the Philippines 2008" }; sl.addAll(Arrays.asList(tl)); properties = new HashMap(); properties.put("LINE SPACING", "1.5"); results = DocumentPropertyChecker.checkPropertiesOfParagraphs(docx1.getParagraphs(), sl, properties); System.out.println("3. " + results.toString()); addResultsToList(results, properties); //5 tl = new String[] { "St. Peter's University", "2011 Present", "Teaches English and Spanish to students ages 15 and up", "Creates course materials, including exams, quizzes and visual aids used by all teachers throughout the organization", "Initiates programs focused in improving grammar and active listening, writing and speaking skills of students" }; sl.addAll(Arrays.asList(tl)); properties = new HashMap(); properties.put("NUMBERING FORMAT", "bullet"); results = DocumentPropertyChecker.checkPropertiesOfParagraphs(docx1.getParagraphs(), sl, properties); System.out.println("5. " + results.toString()); addResultsToList(results, properties); //7 properties = new HashMap(); properties.put("MARGIN TOP", "2"); properties.put("MARGIN BOTTOM", "2"); properties.put("MARGIN LEFT", "2"); properties.put("MARGIN RIGHT", "2"); Map<String, Object> results2; results2 = DocumentPropertyChecker.checkPropertiesOfDocument(docx1, properties); HashMap<String, String> temp = new HashMap<String, String>(); for (Entry<String, Object> r : results2.entrySet()) { temp.put(r.getKey(), r.getValue().toString()); } System.out.println("7. " + results2.toString()); results.clear(); results.put("Page Format", temp); addResultsToList(results, properties); //8 temp.clear(); properties = new HashMap<String, String>(); properties.put("ALIGN", "both"); results2 = DocumentPropertyChecker.checkPropertiesOfAllParagraphs(docx1.getParagraphs(), properties); System.out.println("8. " + results2.toString()); for (Entry<String, Object> r : results2.entrySet()) { temp.put(r.getKey(), r.getValue().toString()); } results.clear(); results.put("Page Format", temp); addResultsToList(results, properties); } catch (IOException ex) { Logger.getLogger(ApachePOIChecker.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:com.project3.utils.poiold.ApachePOIChecker.java
public static void checkDocument1(String filename) { try {/* www .java 2 s .co m*/ // Open document to check /* Writer fw = new FileWriter("C:\\Users\\Noel\\Documents\\NetBeansProjects\\ApachePOITest\\test1.json"); JsonObject jo = new JsonObject().add( "name", "John" ).add( "age", 23 ); JsonArray ja = new JsonArray().add( "John" ).add( 23 ); jo.writeTo(fw); ja.writeTo(fw); fw.close(); */ XWPFDocument docx1 = new XWPFDocument(new FileInputStream( new File("C:\\Users\\Noel\\Documents\\NetBeansProjects\\ApachePOITest\\resume_only.docx"))); // Put the following to an XML file that contains strings to check with respective properties to check // Question 1 in Level 1 // Initialize strings to find ArrayList<String> sl = new ArrayList(); String[] tl = { "Melissa Martin", "555 West Main St.", "Sampaloc, Metro Manila", "Phone: 312-312-3123", "E-mail: TeachMartin@email.com" }; sl.addAll(Arrays.asList(tl)); // Initialize properties these strings should have Map<String, String> properties = new HashMap(); properties.put("FONT FAMILY", "MV Boli"); properties.put("FONT SIZE", "12"); // We go through all paragraphs of the document and check for the presence of the strings // If they are present, check if the properties given above are present // Result is displayed as String = {Property1 = Score1, Property2 = Score2, ...} // Scores are determined by the number of elements within the paragraph which follows the given formatting Map<String, HashMap> results; results = DocumentPropertyCheckerOld.checkRunPropertiesOfParagraphs(docx1.getParagraphs(), sl, properties); System.out.println("1. " + results.toString()); System.out.println(""); //2 tl = new String[] { "Summary", "Educational Background", "Related Work Experience", "Additional Work Experience" }; sl.addAll(Arrays.asList(tl)); //properties properties = new HashMap(); properties.put("BOLD", "true"); results = DocumentPropertyCheckerOld.checkRunPropertiesOfParagraphs(docx1.getParagraphs(), sl, properties); System.out.println("2. " + results.toString()); System.out.println(""); //3 tl = new String[] { "Holds Bachelor's Degree in Music and Education with TEFL certification", "5 years experience in teaching Englsih to Spanish speaking students ages 12 and up", "Exceptional skills in teaching English and Spanish language", "Bachelor of Music; Univeristy of Sto. Tomas 2004", "Bachelor of Science in Education; Univerity of the Philippines 2008" }; sl.addAll(Arrays.asList(tl)); properties = new HashMap(); properties.put("LINE SPACING", "1.5"); results = DocumentPropertyCheckerOld.checkPropertiesOfParagraphs(docx1.getParagraphs(), sl, properties); System.out.println("3. " + results.toString()); System.out.println(""); //4 tl = new String[] { "2008-2011" }; sl.addAll(Arrays.asList(tl)); results = DocumentPropertyCheckerOld.checkIfStringExistsInParagraphs(docx1.getParagraphs(), sl); System.out.println("4. " + results.toString()); System.out.println(""); //5 tl = new String[] { "St. Peter's University", "2011 Present", "Teaches English and Spanish to students ages 15 and up", "Creates course materials, including exams, quizzes and visual aids used by all teachers throughout the organization", "Initiates programs focused in improving grammar and active listening, writing and speaking skills of students" }; sl.addAll(Arrays.asList(tl)); properties = new HashMap(); properties.put("NUMBERING FORMAT", "bullet"); results = DocumentPropertyCheckerOld.checkPropertiesOfParagraphs(docx1.getParagraphs(), sl, properties); System.out.println("5. " + results.toString()); System.out.println(""); //6 tl = new String[] { "Black Pen Movement \u00AE" }; sl.addAll(Arrays.asList(tl)); results = DocumentPropertyCheckerOld.checkIfStringExistsInParagraphs(docx1.getParagraphs(), sl); System.out.println("6. " + results.toString()); System.out.println(""); //7 properties = new HashMap(); properties.put("MARGIN TOP", "2"); properties.put("MARGIN BOTTOM", "2"); properties.put("MARGIN LEFT", "2"); properties.put("MARGIN RIGHT", "2"); System.out.println( "7. " + DocumentPropertyCheckerOld.checkPropertiesOfDocument(docx1, properties).toString()); System.out.println(""); //8 properties = new HashMap(); properties.put("ALIGN", "both"); System.out.println("8. " + DocumentPropertyCheckerOld .checkPropertiesOfAllParagraphs(docx1.getParagraphs(), properties).toString()); } catch (IOException ex) { Logger.getLogger(ApachePOIChecker.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:com.raghav.plot.ReadDOCX.java
public static void main(String[] args) { InputStream in = null;//from ww w . j a v a 2 s. c om String result = ""; try { in = new FileInputStream(new File("/home/raghav/Desktop/Axis-LB.docx")); XWPFDocument doc = new XWPFDocument(in); doc.getParagraphs().stream().map((p) -> p.getRuns()).filter((runs) -> (runs != null)) .forEach((runs) -> { runs.stream().forEach((r) -> { String text = r.getText(0); System.out.println(text); }); }); doc.getTables().stream().forEach((tbl) -> { tbl.getRows().stream().forEach((row) -> { row.getTableCells().stream().forEach((cell) -> { cell.getParagraphs().stream().forEach((p) -> { p.getRuns().stream().filter((r) -> (r != null)).forEach((r) -> { String text = r.getText(0); if (text != null) { System.out.println(text); } }); }); }); }); }); } catch (Exception ex) { ex.printStackTrace(); } }
From source file:com.unsa.view.MainView.java
License:Creative Commons License
private void btnProcesarActionPerformed(java.awt.event.ActionEvent evt) throws SQLException, IOException {//GEN-FIRST:event_btnProcesarActionPerformed // TODO add your handling code here: if (lblInstitucion.getText().equals("") || lblInstitucion.getText().equals("") || lblIdioma.getText().equals("")) { JOptionPane.showMessageDialog(null, "Una de las opciones generales est vacio"); return;/*from w ww. ja va 2 s . c o m*/ } if (jTextField1.getText().equals("")) { JOptionPane.showMessageDialog(null, "No se especific la ruta donde guardar la metadata"); return; } jProgressBar1.setValue(0); jProgressBar1.setStringPainted(true); File[] listOfFiles = file.getSelectedFiles(); int count = 0; for (File file : listOfFiles) { boolean archivo_daniado = false; if (file.isFile()) { System.out.println(file.getName()); if (file.getName().substring(file.getName().length() - 1).equals("x")) { //is a docx try { XWPFDocument doc = new XWPFDocument(new FileInputStream(file)); alg = new AlgorithmsWord(doc.getParagraphs()); } catch (Exception e) { archivo_daniado = true; } } else { //is not a docx try { HWPFDocument doc = new HWPFDocument(new FileInputStream(file)); Range r = doc.getRange(); alg = new AlgorithmsWord(r); } catch (Exception e) { try { XWPFDocument doc = new XWPFDocument(new FileInputStream(file)); alg = new AlgorithmsWord(doc.getParagraphs()); } catch (Exception ex) { archivo_daniado = true; } } } Metadata metadata = null; if (archivo_daniado == true) { metadata = loadMetadataFail(); } else { metadata = loadMetadata(alg); } metadata.setFileName(file.getName()); listMetaData.add(metadata); int val_calculate = (count + 1) * 100 / listOfFiles.length; jProgressBar1.setValue(val_calculate); count++; } } String name = jTextField1.getText(); ExcelController excel = new ExcelController(name, "UNSA", listMetaData); String[] lnames = { "Nombre Archivo", "Obs. Dudosa", "Obs. Critica", "Abrir Archivo" }; DefaultTableModel model = new DefaultTableModel(lnames, 0); tableSalida.setModel(model); int contador = 0; for (Metadata meta : listMetaData) { Object[] data = new Object[4]; data[0] = listOfFiles[contador].getName(); if (meta.getFailGeneral()) { data[1] = "Fail"; data[2] = "Fail"; } else { data[1] = meta.getStadistic().getObservationGeneral() ? "Observacion" : ""; data[2] = meta.getObservacionGeneral() ? "Falta" : ""; } data[3] = "abrir"; model.addRow(data); contador++; } btnAbrirMetadata.setEnabled(true); }
From source file:cv_extractor.DocReader.java
protected static void readDocxFile(File localFile) { try {//from w w w. j a v a 2s. co m //Create a input stream to read file FileInputStream fis = new FileInputStream(localFile.getAbsolutePath()); //For reading docx files XWPFDocument document = new XWPFDocument(fis); List<XWPFParagraph> paragraphs = document.getParagraphs(); System.out.println("Total no of paragraph " + paragraphs.size()); for (XWPFParagraph para : paragraphs) { //Compile the regex defined above Pattern r = Pattern.compile(pattern); //Check if any string matches the compiled pattern Matcher m = r.matcher(para.getText()); if (m.find()) { //m.group() Returns the input subsequence matched by the previous match data.add(m.group()); } } fis.close(); } catch (Exception e) { e.printStackTrace(); } }
From source file:DocxProcess.ReadWordDocx.java
public void ReadByDocx(XWPFDocument doc) throws IOException { List<XWPFParagraph> paras = doc.getParagraphs(); System.out.println("Paragraph"); for (XWPFParagraph para : paras) { System.out.println(para.getText()); }/*from w w w.ja v a 2s .c o m*/ List<XWPFTable> tables = doc.getTables(); List<XWPFTableRow> rows; List<XWPFTableCell> cells; System.out.println("TableCell"); for (XWPFTable table : tables) { rows = table.getRows(); for (XWPFTableRow row : rows) { System.out.println(""); cells = row.getTableCells(); for (XWPFTableCell cell : cells) { System.out.print(cell.getText()); System.out.print(" "); } System.out.println(); } } }
From source file:easyoffice.word.WordMaker.java
private static void replaceText(XWPFDocument doc, HashMap<String, String> data) { Set<String> keySet = data.keySet(); for (String key : keySet) { for (XWPFParagraph p : doc.getParagraphs()) { List<XWPFRun> runs = p.getRuns(); for (XWPFRun run : runs) { if (run.toString().toLowerCase().equals(key)) { run.setText(data.get(key), 0); }//from w w w . ja v a 2 s. co m } } } }
From source file:edu.gatech.pmase.capstone.awesome.impl.output.DisasterResponseTradeStudyOutputer.java
License:Open Source License
/** * Creates the report details paragraph. * * @param xdoc the document to create the paragraph in *//*from ww w .ja v a 2 s . c om*/ private void createReportDetails(final XWPFDocument xdoc) { final Locale currentLocale = Locale.getDefault(); LOGGER.debug("Creating report details"); final XWPFParagraph para = xdoc.getParagraphs().get(REPORT_DETAILS_ROW_INDEX); final XWPFRun run1 = para.createRun(); run1.setBold(true); run1.setText("Date Report Generated: "); final XWPFRun run2 = para.createRun(); run2.setBold(false); run2.setText(outputFileFormatter.format(now)); run2.addBreak(); final XWPFRun run3 = para.createRun(); run3.setBold(true); run3.setText("Country Report Generated: "); final XWPFRun run4 = para.createRun(); run4.setBold(false); run4.setText(currentLocale.getDisplayCountry()); }
From source file:File.DOCX.ReadDocx.java
/** * @param args the command line arguments *///from ww w .ja va 2 s . co m public void ReadParagraph(String path, String filename) { try { FileInputStream fis = new FileInputStream(path + filename + ".docx"); XWPFDocument xdoc = new XWPFDocument(OPCPackage.open(fis)); List<XWPFParagraph> paragraphList = xdoc.getParagraphs(); for (XWPFParagraph paragraph : paragraphList) { System.out.println(paragraph.getText()); } } catch (Exception ex) { ex.printStackTrace(); } }
From source file:fr.univrouen.poste.services.WordParser.java
License:Apache License
public void modifyWord(InputStream docx, Map<String, String> textMap, OutputStream out) { try {//from w w w. java 2 s . c om XWPFDocument doc = new XWPFDocument(OPCPackage.open(docx)); // tentative avec les noms {{}} for (XWPFParagraph p : doc.getParagraphs()) { for (CTBookmark bookmark : p.getCTP().getBookmarkStartList()) { log.trace(bookmark.getName()); for (String key : textMap.keySet()) { String cleanKey = StringUtils.stripAccents(key); cleanKey = cleanKey.replaceAll(" ", "_"); cleanKey = cleanKey.replaceAll("\\W", ""); if (bookmark.getName().equalsIgnoreCase(cleanKey)) { Node nextNode = bookmark.getDomNode().getNextSibling(); while (nextNode != null && nextNode.getNodeName() != null && !(nextNode.getNodeName().contains("bookmarkEnd"))) { p.getCTP().getDomNode().removeChild(nextNode); nextNode = bookmark.getDomNode().getNextSibling(); } XWPFRun run = p.createRun(); run.setText(textMap.get(key)); p.getCTP().getDomNode().insertBefore(run.getCTR().getDomNode(), nextNode); } } } } doc.write(out); } catch (Exception e) { log.error("Pb durant la modification du document word", e); } }