List of usage examples for org.apache.poi.hwpf HWPFDocument HWPFDocument
public HWPFDocument(DirectoryNode directory) throws IOException
From source file:com.artech.prototype2.bardakov.utils.impl.MultiParserImpl.java
/** * doc/docx//from w w w . ja v a 2 s. c om * @param FilePath - * @return ?? ? */ private ArrayList<String> getListOfWordsFromDoc(String FilePath) { FileInputStream fis; List<String> result = new ArrayList<String>(); if (FilePath.substring(FilePath.length() - 1).equals("x")) { //is a docx try { fis = new FileInputStream(new File(FilePath)); XWPFDocument doc = new XWPFDocument(fis); XWPFWordExtractor extract = new XWPFWordExtractor(doc); // System.out.println(extract.getText()); StringBuilder builder = new StringBuilder(); builder.append(extract.getText()); String[] words = builder.toString().split(" "); for (String s : words) { result.add(s); } } catch (IOException e) { e.printStackTrace(); } } else { //is not a docx try { fis = new FileInputStream(new File(FilePath)); HWPFDocument doc = new HWPFDocument(fis); WordExtractor extractor = new WordExtractor(doc); StringBuilder builder = new StringBuilder(); builder.append(extractor.getText()); String[] words = builder.toString().split(" "); for (String s : words) { result.add(s); } } catch (IOException e) { e.printStackTrace(); } } return (ArrayList<String>) result; }
From source file:com.duroty.lucene.parser.MSWordParser.java
License:Open Source License
/** * DOCUMENT ME!/*from ww w .j a v a 2 s . c om*/ * * @return DOCUMENT ME! * * @throws ParserException DOCUMENT ME! */ private String getContents() throws ParserException { String contents = ""; try { HWPFDocument doc = new HWPFDocument(input); Range r = doc.getRange(); StringBuffer buffer = new StringBuffer(); for (int x = 0; x < r.numSections(); x++) { Section s = r.getSection(x); for (int y = 0; y < s.numParagraphs(); y++) { Paragraph p = null; try { p = s.getParagraph(y); } catch (Exception e) { buffer.append("\n"); } if (p != null) { for (int z = 0; z < p.numCharacterRuns(); z++) { try { //character run CharacterRun run = p.getCharacterRun(z); //character run text buffer.append(run.text()); } catch (Exception e) { buffer.append(" "); } } } /*if (sleep > 0) { try { Thread.sleep(sleep); } catch (Exception ex) { } }*/ // use a new line at the paragraph break buffer.append("\n"); } } contents = buffer.toString(); } catch (Exception ex) { throw new ParserException(ex); } return contents; }
From source file:com.example.minireader.WordViewActivity.java
License:Apache License
private void getRange() { FileInputStream in = null;//from w w w. j a v a2 s . c o m POIFSFileSystem pfs = null; try { in = new FileInputStream(nameStr); pfs = new POIFSFileSystem(in); hwpf = new HWPFDocument(pfs); } catch (Exception e) { } range = hwpf.getRange(); // pictures = hwpf.getPicturesTable().getAllPictures(); tableIterator = new TableIterator(range); }
From source file:com.google.gdt.handler.impl.WordHandler.java
License:Open Source License
/** * /*ww w . j a va2 s.c om*/ * @param inputFile * @param pLevel * @throws IOException * @throws InvalidFormatException */ @Override public void handle(String inputFile, ProgressLevel pLevel) throws IOException, InvalidFormatException { String outPutFile = getOuputFileName(inputFile); OutputStream outputStream = new FileOutputStream(outPutFile); InputStream inputStream = new FileInputStream(inputFile); HWPFDocument hDocument = new HWPFDocument(inputStream); Range range = hDocument.getRange(); pLevel.setTrFileName(outPutFile); pLevel.setValue(0); pLevel.setStringPainted(true); pLevel.setMaxValue(range.numParagraphs()); int count = 0; for (int i = 0; i < range.numParagraphs(); i++) { Paragraph paragraph = range.getParagraph(i); int numCharRuns = paragraph.numCharacterRuns(); for (int j = 0; j < numCharRuns; j++) { if (isInterrupted) { outputStream.close(); new File(outPutFile).delete(); pLevel.setString("cancelled"); return; } CharacterRun charRun = paragraph.getCharacterRun(j); String inputText = charRun.text(); if ((null == inputText) || (inputText.trim().equals(""))) continue; String translatedTxt = inputText; //in http post method, all key value pairs are seperated with & if (preferenceModel.getTranslatorType() == TranslatorType.HTTP) inputText = inputText.replaceAll("&", "and"); try { translatedTxt = translator.translate(translatedTxt); charRun.replaceText(inputText, translatedTxt); } catch (Exception e) { logger.log(Level.SEVERE, "Input File : " + inputFile + " cannot translate the text : " + inputText, e); } } count++; pLevel.setValue(count); } pLevel.setString("done"); hDocument.write(outputStream); outputStream.close(); }
From source file:com.icebreak.p2p.front.controller.trade.download.WordParse.java
@Transactional(rollbackFor = Exception.class, value = "transactionManager") public void readwriteWord(HttpServletResponse response, HttpSession session, String _file, Map<String, String> map, List<Map<String, Text>> lst, LoanDemandDO loan, String downType) { //?word?//from w ww . ja v a 2 s . c o m FileInputStream in; HWPFDocument hdt = null; String filePath = _file; ServletContext application = session.getServletContext(); String serverRealPath = application.getRealPath("/"); String fileTemp = AppConstantsUtil.getYrdUploadFolder() + File.separator + "doc"; File fileDir = new File(fileTemp); if (!fileDir.exists()) { fileDir.mkdir(); } try { in = new FileInputStream(new File(serverRealPath + filePath)); hdt = new HWPFDocument(in); } catch (Exception e1) { logger.error("??", e1); } //??word? Range range = hdt.getRange(); TableIterator it = new TableIterator(range); Table tb = null; while (it.hasNext()) { tb = it.next(); break; } if (lst.size() > 0) { for (int i = 1; i <= lst.size(); i++) { Map<String, Text> replaces = lst.get(i - 1); TableRow tr = tb.getRow(i); // 0 for (int j = 0; j < tr.numCells(); j++) { TableCell td = tr.getCell(j);// ?? // ?? for (int k = 0; k < td.numParagraphs(); k++) { Paragraph para = td.getParagraph(k); String s = para.text(); final String old = s; for (String key : replaces.keySet()) { if (s.contains(key)) { s = s.replace(key, replaces.get(key).getText()); } } if (!old.equals(s)) {// ? para.replaceText(old, s); s = para.text(); } } // end for } } for (int n = lst.size() + 1; n < tb.numRows(); n++) { TableRow tr = tb.getRow(n); tr.delete(); } } for (Map.Entry<String, String> entry : map.entrySet()) { range.replaceText(entry.getKey(), entry.getValue()); } //String fileName = f[f.length-1]; String fileName = System.currentTimeMillis() + _file.substring(_file.lastIndexOf("."), _file.length()); ByteArrayOutputStream ostream = new ByteArrayOutputStream(); try { FileOutputStream out = new FileOutputStream(fileTemp + fileName);//?word hdt.write(ostream); out.write(ostream.toByteArray()); out.flush(); out.close(); } catch (Exception e) { logger.error("?word", e); } Doc2Pdf doc2pdf = new Doc2Pdf(); String pdfAddress = doc2pdf.createPDF(fileTemp + fileName);//wordpdf try { String fileType = ""; if (lst.size() > 0) {//?? fileType = "contract"; } else {//? fileType = "letter"; } DownloadAndPrivewFileTread downThread = new DownloadAndPrivewFileTread(); //this.downloadAndPreviewFile(response, loan.getLoanName(), pdfAddress, downType, fileType);// downThread.setDownType(downType); downThread.setFilePath(pdfAddress); downThread.setResponse(response); downThread.setFileType(fileType); downThread.setProName(loan.getLoanName()); downThread.run(); File pdfFile = new File(pdfAddress); pdfFile.delete(); } catch (Exception e) { logger.error("pdf", e); } }
From source file:com.isotrol.impe3.idx.oc.extractors.ExtractorMsWord.java
License:Open Source License
/** * Extrae el texto de un fichero word.// www. j a va2 s . c o m * @param in * @return String. Devuelve el texto crudo * @throws Exception */ public static String extractText(InputStream in) throws Exception { String result = ""; HWPFDocument doc = new HWPFDocument(in); WordExtractor we = new WordExtractor(doc); result = we.getText(); // Eliminamos los caracteres que no nos sirven para indexar. result = ExtractorUtil.removeControlChars(result); return result; }
From source file:com.jgaap.generics.DocumentHelper.java
License:Open Source License
/** * Extracts text from a Word document and stores it in the document. * /* w w w .j ava2 s . c o m*/ * @param inputStream * An input stream pointing to the Word document to be read. * @throws IOException */ static private char[] loadMSWord(InputStream inputStream) throws IOException { POIFSFileSystem fs = new POIFSFileSystem(inputStream); HWPFDocument doc = new HWPFDocument(fs); WordExtractor we = new WordExtractor(doc); char[] origText = we.getText().toCharArray(); return origText; }
From source file:com.pdf.GetPdf.java
public static void docConvert(Document document, String url, String type) throws IOException, DocumentException { WordExtractor we;// ww w. j ava 2s. com if (type.equals("doc")) { HWPFDocument wordDoc = new HWPFDocument(new URL(url).openStream()); we = new WordExtractor(wordDoc); String[] paragraphs = we.getParagraphText(); for (int i = 0; i < paragraphs.length; i++) { paragraphs[i] = paragraphs[i].replaceAll("\\cM?\r?\n", ""); document.add(new Paragraph(paragraphs[i])); } } else { XWPFDocument wordDoc = new XWPFDocument(new URL(url).openStream()); List<IBodyElement> contents = wordDoc.getBodyElements(); for (IBodyElement content : contents) { if (content.getElementType() == BodyElementType.PARAGRAPH) { List<XWPFParagraph> paras = content.getBody().getParagraphs(); for (XWPFParagraph para : paras) { document.add(new Paragraph(para.getParagraphText())); } } else if (content.getElementType() == BodyElementType.TABLE) { List<XWPFTable> tables = content.getBody().getTables(); for (XWPFTable table : tables) { List<XWPFTableRow> rows = table.getRows(); for (XWPFTableRow row : rows) { List<XWPFTableCell> tablecells = row.getTableCells(); } } } } } }
From source file:com.thuvienkhoahoc.wordtomwtext.examples.WordToMwtext.java
License:Apache License
public static void main(String[] args) { try {/*from w w w . jav a2 s . c om*/ OutputStream out = new FileOutputStream("c:\\test.wikitext"); new WordToMwtext(new HWPFDocument(new FileInputStream(args[0])), out); out.close(); } catch (Throwable t) { t.printStackTrace(); } }
From source file:com.unsa.view.MainView.java
License:Creative Commons License
private void btnProcesarActionPerformed(java.awt.event.ActionEvent evt) throws SQLException, IOException {//GEN-FIRST:event_btnProcesarActionPerformed // TODO add your handling code here: if (lblInstitucion.getText().equals("") || lblInstitucion.getText().equals("") || lblIdioma.getText().equals("")) { JOptionPane.showMessageDialog(null, "Una de las opciones generales est vacio"); return;/*from w ww . j av a 2 s.co m*/ } if (jTextField1.getText().equals("")) { JOptionPane.showMessageDialog(null, "No se especific la ruta donde guardar la metadata"); return; } jProgressBar1.setValue(0); jProgressBar1.setStringPainted(true); File[] listOfFiles = file.getSelectedFiles(); int count = 0; for (File file : listOfFiles) { boolean archivo_daniado = false; if (file.isFile()) { System.out.println(file.getName()); if (file.getName().substring(file.getName().length() - 1).equals("x")) { //is a docx try { XWPFDocument doc = new XWPFDocument(new FileInputStream(file)); alg = new AlgorithmsWord(doc.getParagraphs()); } catch (Exception e) { archivo_daniado = true; } } else { //is not a docx try { HWPFDocument doc = new HWPFDocument(new FileInputStream(file)); Range r = doc.getRange(); alg = new AlgorithmsWord(r); } catch (Exception e) { try { XWPFDocument doc = new XWPFDocument(new FileInputStream(file)); alg = new AlgorithmsWord(doc.getParagraphs()); } catch (Exception ex) { archivo_daniado = true; } } } Metadata metadata = null; if (archivo_daniado == true) { metadata = loadMetadataFail(); } else { metadata = loadMetadata(alg); } metadata.setFileName(file.getName()); listMetaData.add(metadata); int val_calculate = (count + 1) * 100 / listOfFiles.length; jProgressBar1.setValue(val_calculate); count++; } } String name = jTextField1.getText(); ExcelController excel = new ExcelController(name, "UNSA", listMetaData); String[] lnames = { "Nombre Archivo", "Obs. Dudosa", "Obs. Critica", "Abrir Archivo" }; DefaultTableModel model = new DefaultTableModel(lnames, 0); tableSalida.setModel(model); int contador = 0; for (Metadata meta : listMetaData) { Object[] data = new Object[4]; data[0] = listOfFiles[contador].getName(); if (meta.getFailGeneral()) { data[1] = "Fail"; data[2] = "Fail"; } else { data[1] = meta.getStadistic().getObservationGeneral() ? "Observacion" : ""; data[2] = meta.getObservacionGeneral() ? "Falta" : ""; } data[3] = "abrir"; model.addRow(data); contador++; } btnAbrirMetadata.setEnabled(true); }