List of usage examples for org.apache.poi.xwpf.usermodel XWPFDocument XWPFDocument
public XWPFDocument(InputStream is) throws IOException
From source file:com.swg.parse.docx.NewExtract.java
private void extractImages(String src, int cnt) { try {/*from w w w.j a v a2s . c o m*/ FileInputStream fs = new FileInputStream(src); XWPFDocument docx = new XWPFDocument(fs); List<XWPFPictureData> piclist = docx.getAllPictures(); Iterator<XWPFPictureData> iterator = piclist.iterator(); int i = 0; new File( "C:\\Users\\KXK3\\Documents\\NetBeansProjects\\ParseSuite2\\ParseDocx\\build\\test\\unit\\results\\NewPicFolder" + cnt).mkdir(); while (iterator.hasNext()) { XWPFPictureData pic = iterator.next(); byte[] bytepic = pic.getData(); BufferedImage imag = ImageIO.read(new ByteArrayInputStream(bytepic)); File CreatedImageFile = new File( "C:\\Users\\KXK3\\Documents\\NetBeansProjects\\ParseSuite2\\ParseDocx\\build\\test\\unit\\results\\NewPicFolder" + cnt + "\\imagefromword" + i + ".jpg"); ImageIO.write(imag, "jpg", new File( "C:\\Users\\KXK3\\Documents\\NetBeansProjects\\ParseSuite2\\ParseDocx\\build\\test\\unit\\results\\NewPicFolder" + cnt + "\\imagefromword" + i + ".jpg")); i++; System.out.println("path to image " + i + " = " + CreatedImageFile.getAbsolutePath()); labelBeforePOJO.add("path to image " + i); ValueBeforePOJO.add(CreatedImageFile.getAbsolutePath()); sectionBeforePOJO.add(section); } } catch (Exception e) {//I can handle the image advance conversion here? System.exit(-1); } }
From source file:com.swg.parse.docx.OpenFolderAction.java
/*** * Simply grab the content of a .docx file using Apache POI and put it into a string * This String may have missing part due to POI library * @param f the .dox file//from w w w. j ava 2 s. c o m * @return POI content of .docx * @throws FileNotFoundException * @throws IOException */ private String getPOI(File f) throws FileNotFoundException, IOException { FileInputStream inputTest = new FileInputStream(f.getAbsolutePath()); XWPFDocument docxTest = new XWPFDocument(inputTest); XWPFWordExtractor ContentTest = new XWPFWordExtractor(docxTest); String contentIn = ContentTest.getText(); return contentIn; }
From source file:com.swg.parse.docx.OpenWord.java
/*** * Simply grab the content of a .docx file using Apache POI and put it into a string * This String may have missing part due to POI library * @param f the .dox file//w w w. j a va 2 s .co m * @return POI content of .docx * @throws FileNotFoundException * @throws IOException */ private String getPOI() throws FileNotFoundException, IOException { FileInputStream inputTest = new FileInputStream(selectedFile.getAbsolutePath()); XWPFDocument docxTest = new XWPFDocument(inputTest); XWPFWordExtractor ContentTest = new XWPFWordExtractor(docxTest); String contentIn = ContentTest.getText(); return contentIn; }
From source file:com.swg.parse.docx.V2Test.java
/*** * @return String containing the content of the .docx file from POI apache * @throws FileNotFoundException//from ww w. j a va2 s . co m * @throws IOException */ private String getPOI() throws FileNotFoundException, IOException { FileInputStream inputTest = new FileInputStream(path + "CAD_2013_RE-02.docx"); XWPFDocument docxTest = new XWPFDocument(inputTest); XWPFWordExtractor ContentTest = new XWPFWordExtractor(docxTest); String contentIn = ContentTest.getText(); return contentIn; }
From source file:com.unsa.view.MainView.java
License:Creative Commons License
private void btnProcesarActionPerformed(java.awt.event.ActionEvent evt) throws SQLException, IOException {//GEN-FIRST:event_btnProcesarActionPerformed // TODO add your handling code here: if (lblInstitucion.getText().equals("") || lblInstitucion.getText().equals("") || lblIdioma.getText().equals("")) { JOptionPane.showMessageDialog(null, "Una de las opciones generales est vacio"); return;// w ww. j av a 2s . c o m } if (jTextField1.getText().equals("")) { JOptionPane.showMessageDialog(null, "No se especific la ruta donde guardar la metadata"); return; } jProgressBar1.setValue(0); jProgressBar1.setStringPainted(true); File[] listOfFiles = file.getSelectedFiles(); int count = 0; for (File file : listOfFiles) { boolean archivo_daniado = false; if (file.isFile()) { System.out.println(file.getName()); if (file.getName().substring(file.getName().length() - 1).equals("x")) { //is a docx try { XWPFDocument doc = new XWPFDocument(new FileInputStream(file)); alg = new AlgorithmsWord(doc.getParagraphs()); } catch (Exception e) { archivo_daniado = true; } } else { //is not a docx try { HWPFDocument doc = new HWPFDocument(new FileInputStream(file)); Range r = doc.getRange(); alg = new AlgorithmsWord(r); } catch (Exception e) { try { XWPFDocument doc = new XWPFDocument(new FileInputStream(file)); alg = new AlgorithmsWord(doc.getParagraphs()); } catch (Exception ex) { archivo_daniado = true; } } } Metadata metadata = null; if (archivo_daniado == true) { metadata = loadMetadataFail(); } else { metadata = loadMetadata(alg); } metadata.setFileName(file.getName()); listMetaData.add(metadata); int val_calculate = (count + 1) * 100 / listOfFiles.length; jProgressBar1.setValue(val_calculate); count++; } } String name = jTextField1.getText(); ExcelController excel = new ExcelController(name, "UNSA", listMetaData); String[] lnames = { "Nombre Archivo", "Obs. Dudosa", "Obs. Critica", "Abrir Archivo" }; DefaultTableModel model = new DefaultTableModel(lnames, 0); tableSalida.setModel(model); int contador = 0; for (Metadata meta : listMetaData) { Object[] data = new Object[4]; data[0] = listOfFiles[contador].getName(); if (meta.getFailGeneral()) { data[1] = "Fail"; data[2] = "Fail"; } else { data[1] = meta.getStadistic().getObservationGeneral() ? "Observacion" : ""; data[2] = meta.getObservacionGeneral() ? "Falta" : ""; } data[3] = "abrir"; model.addRow(data); contador++; } btnAbrirMetadata.setEnabled(true); }
From source file:com.unsa.view.MainView.java
License:Creative Commons License
private void menuProcesarActionPerformed(java.awt.event.ActionEvent evt) throws FileNotFoundException {//GEN-FIRST:event_menuProcesarActionPerformed // TODO add your handling code here: if (txtArchivos.getText().equals("")) { return;/*from w ww. ja v a 2 s. c o m*/ } File[] listOfFiles = file.getSelectedFiles(); for (File file : listOfFiles) { boolean archivo_daniado = false; String nameFile = file.getAbsolutePath().substring(0, file.getAbsolutePath().length() - 4) + "pdf"; File outFile = new File(nameFile); OutputStream out = new FileOutputStream(outFile); PdfOptions options = PdfOptions.create().fontEncoding("DOCX"); // //PdfOptions options =PdfOptions.create().fontEncoding("windows-1250"); System.out.println(file.getName()); if (file.isFile()) { if (file.getName().substring(file.getName().length() - 1).equals("x")) { try { XWPFDocument doc = new XWPFDocument(new FileInputStream(file)); PdfConverter.getInstance().convert(doc, out, options); } catch (Exception e) { archivo_daniado = true; } } else { //is not a docx try { //HWPFDocument doc = new HWPFDocument(new FileInputStream(file)); DocConverterPDF(file); } catch (Exception e) { try { XWPFDocument doc = new XWPFDocument(new FileInputStream(file)); PdfConverter.getInstance().convert(doc, out, options); } catch (Exception ex) { archivo_daniado = true; } } } } } }
From source file:com.validation.manager.core.tool.table.extractor.TableExtractor.java
License:Apache License
private List<XWPFTable> extractTablesFromWord() throws FileNotFoundException, IOException { List<XWPFTable> tables; try ( //Word documents InputStream fis = new FileInputStream(source)) { XWPFDocument doc = new XWPFDocument(fis); tables = doc.getTables();/* ww w . ja v a2 s .c om*/ } return tables; }
From source file:com.viettel.util.doc.DocsUtility.java
public String saveToPdf() throws IOException { //configSource.getMessage("path.tempFolder", null, null) String pathWord = saveToWord(); Random rand = new Random(); int n = rand.nextInt(1000) + 1; SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddhhmmss"); String fileName = pathTemp + sdf.format(new Date()) + n + ".pdf"; InputStream in = new FileInputStream(new File(pathWord)); XWPFDocument documentPdf = new XWPFDocument(in); OutputStream out = new FileOutputStream(fileName); PdfOptions options = PdfOptions.create(); PdfConverter.getInstance().convert(documentPdf, out, options); new File(pathWord).delete(); return fileName; }
From source file:cv_extractor.DocReader.java
protected static void readDocxFile(File localFile) { try {/* ww w . j ava 2 s . c o m*/ //Create a input stream to read file FileInputStream fis = new FileInputStream(localFile.getAbsolutePath()); //For reading docx files XWPFDocument document = new XWPFDocument(fis); List<XWPFParagraph> paragraphs = document.getParagraphs(); System.out.println("Total no of paragraph " + paragraphs.size()); for (XWPFParagraph para : paragraphs) { //Compile the regex defined above Pattern r = Pattern.compile(pattern); //Check if any string matches the compiled pattern Matcher m = r.matcher(para.getText()); if (m.find()) { //m.group() Returns the input subsequence matched by the previous match data.add(m.group()); } } fis.close(); } catch (Exception e) { e.printStackTrace(); } }
From source file:de.catma.document.source.contenthandler.DOCXContentHandler.java
License:Open Source License
@Override public void load(InputStream is) throws IOException { XWPFDocument doc = new XWPFDocument(is); XWPFWordExtractor wordExtractor = new XWPFWordExtractor(doc); String buf = wordExtractor.getText(); //it's still microsoft after all if (FileOSType.getFileOSType(buf).equals(FileOSType.UNIX)) { buf = FileOSType.convertUnixToDos(buf); }// www . j a va 2 s . c o m setContent(buf); }