List of usage examples for org.apache.poi.hwpf HWPFDocument getRange
@Override
public Range getRange()
From source file:at.tugraz.sss.serv.SSFileU.java
License:Apache License
public static void writePDFFromDoc(final String docFilePath, final String pdfFilePath) throws Exception { final Document document = new Document(); final POIFSFileSystem fs = new POIFSFileSystem(openFileForRead(docFilePath)); final HWPFDocument word = new HWPFDocument(fs); final WordExtractor we = new WordExtractor(word); final OutputStream out = openOrCreateFileWithPathForWrite(pdfFilePath); final PdfWriter writer = PdfWriter.getInstance(document, out); final Range range = word.getRange(); document.open();/* www . j a va2s . co m*/ writer.setPageEmpty(true); document.newPage(); writer.setPageEmpty(true); String[] paragraphs = we.getParagraphText(); for (int i = 0; i < paragraphs.length; i++) { org.apache.poi.hwpf.usermodel.Paragraph pr = range.getParagraph(i); // CharacterRun run = pr.getCharacterRun(i); // run.setBold(true); // run.setCapitalized(true); // run.setItalic(true); paragraphs[i] = paragraphs[i].replaceAll("\\cM?\r?\n", ""); System.out.println("Length:" + paragraphs[i].length()); System.out.println("Paragraph" + i + ": " + paragraphs[i].toString()); // add the paragraph to the document document.add(new Paragraph(paragraphs[i])); } document.close(); }
From source file:at.tugraz.sss.serv.util.SSFileU.java
License:Apache License
public static void writePDFFromDoc(final String docFilePath, final String pdfFilePath) throws SSErr { try {// w w w . j ava 2 s . co m final Document document = new Document(); final POIFSFileSystem fs = new POIFSFileSystem(openFileForRead(docFilePath)); final HWPFDocument word = new HWPFDocument(fs); final WordExtractor we = new WordExtractor(word); final OutputStream out = openOrCreateFileWithPathForWrite(pdfFilePath); final PdfWriter writer = PdfWriter.getInstance(document, out); final Range range = word.getRange(); document.open(); writer.setPageEmpty(true); document.newPage(); writer.setPageEmpty(true); String[] paragraphs = we.getParagraphText(); for (int i = 0; i < paragraphs.length; i++) { org.apache.poi.hwpf.usermodel.Paragraph pr = range.getParagraph(i); // CharacterRun run = pr.getCharacterRun(i); // run.setBold(true); // run.setCapitalized(true); // run.setItalic(true); paragraphs[i] = paragraphs[i].replaceAll("\\cM?\r?\n", ""); System.out.println("Length:" + paragraphs[i].length()); System.out.println("Paragraph" + i + ": " + paragraphs[i].toString()); // add the paragraph to the document document.add(new Paragraph(paragraphs[i])); } document.close(); } catch (Exception error) { SSServErrReg.regErrThrow(error); } }
From source file:com.duroty.lucene.parser.MSWordParser.java
License:Open Source License
/** * DOCUMENT ME!/*ww w .j a v a 2 s .co m*/ * * @return DOCUMENT ME! * * @throws ParserException DOCUMENT ME! */ private String getContents() throws ParserException { String contents = ""; try { HWPFDocument doc = new HWPFDocument(input); Range r = doc.getRange(); StringBuffer buffer = new StringBuffer(); for (int x = 0; x < r.numSections(); x++) { Section s = r.getSection(x); for (int y = 0; y < s.numParagraphs(); y++) { Paragraph p = null; try { p = s.getParagraph(y); } catch (Exception e) { buffer.append("\n"); } if (p != null) { for (int z = 0; z < p.numCharacterRuns(); z++) { try { //character run CharacterRun run = p.getCharacterRun(z); //character run text buffer.append(run.text()); } catch (Exception e) { buffer.append(" "); } } } /*if (sleep > 0) { try { Thread.sleep(sleep); } catch (Exception ex) { } }*/ // use a new line at the paragraph break buffer.append("\n"); } } contents = buffer.toString(); } catch (Exception ex) { throw new ParserException(ex); } return contents; }
From source file:com.google.gdt.handler.impl.WordHandler.java
License:Open Source License
/** * // ww w .j a v a 2 s . co m * @param inputFile * @param pLevel * @throws IOException * @throws InvalidFormatException */ @Override public void handle(String inputFile, ProgressLevel pLevel) throws IOException, InvalidFormatException { String outPutFile = getOuputFileName(inputFile); OutputStream outputStream = new FileOutputStream(outPutFile); InputStream inputStream = new FileInputStream(inputFile); HWPFDocument hDocument = new HWPFDocument(inputStream); Range range = hDocument.getRange(); pLevel.setTrFileName(outPutFile); pLevel.setValue(0); pLevel.setStringPainted(true); pLevel.setMaxValue(range.numParagraphs()); int count = 0; for (int i = 0; i < range.numParagraphs(); i++) { Paragraph paragraph = range.getParagraph(i); int numCharRuns = paragraph.numCharacterRuns(); for (int j = 0; j < numCharRuns; j++) { if (isInterrupted) { outputStream.close(); new File(outPutFile).delete(); pLevel.setString("cancelled"); return; } CharacterRun charRun = paragraph.getCharacterRun(j); String inputText = charRun.text(); if ((null == inputText) || (inputText.trim().equals(""))) continue; String translatedTxt = inputText; //in http post method, all key value pairs are seperated with & if (preferenceModel.getTranslatorType() == TranslatorType.HTTP) inputText = inputText.replaceAll("&", "and"); try { translatedTxt = translator.translate(translatedTxt); charRun.replaceText(inputText, translatedTxt); } catch (Exception e) { logger.log(Level.SEVERE, "Input File : " + inputFile + " cannot translate the text : " + inputText, e); } } count++; pLevel.setValue(count); } pLevel.setString("done"); hDocument.write(outputStream); outputStream.close(); }
From source file:com.icebreak.p2p.front.controller.trade.download.WordParse.java
@Transactional(rollbackFor = Exception.class, value = "transactionManager") public void readwriteWord(HttpServletResponse response, HttpSession session, String _file, Map<String, String> map, List<Map<String, Text>> lst, LoanDemandDO loan, String downType) { //?word?/*from www .j av a2 s . c o m*/ FileInputStream in; HWPFDocument hdt = null; String filePath = _file; ServletContext application = session.getServletContext(); String serverRealPath = application.getRealPath("/"); String fileTemp = AppConstantsUtil.getYrdUploadFolder() + File.separator + "doc"; File fileDir = new File(fileTemp); if (!fileDir.exists()) { fileDir.mkdir(); } try { in = new FileInputStream(new File(serverRealPath + filePath)); hdt = new HWPFDocument(in); } catch (Exception e1) { logger.error("??", e1); } //??word? Range range = hdt.getRange(); TableIterator it = new TableIterator(range); Table tb = null; while (it.hasNext()) { tb = it.next(); break; } if (lst.size() > 0) { for (int i = 1; i <= lst.size(); i++) { Map<String, Text> replaces = lst.get(i - 1); TableRow tr = tb.getRow(i); // 0 for (int j = 0; j < tr.numCells(); j++) { TableCell td = tr.getCell(j);// ?? // ?? for (int k = 0; k < td.numParagraphs(); k++) { Paragraph para = td.getParagraph(k); String s = para.text(); final String old = s; for (String key : replaces.keySet()) { if (s.contains(key)) { s = s.replace(key, replaces.get(key).getText()); } } if (!old.equals(s)) {// ? para.replaceText(old, s); s = para.text(); } } // end for } } for (int n = lst.size() + 1; n < tb.numRows(); n++) { TableRow tr = tb.getRow(n); tr.delete(); } } for (Map.Entry<String, String> entry : map.entrySet()) { range.replaceText(entry.getKey(), entry.getValue()); } //String fileName = f[f.length-1]; String fileName = System.currentTimeMillis() + _file.substring(_file.lastIndexOf("."), _file.length()); ByteArrayOutputStream ostream = new ByteArrayOutputStream(); try { FileOutputStream out = new FileOutputStream(fileTemp + fileName);//?word hdt.write(ostream); out.write(ostream.toByteArray()); out.flush(); out.close(); } catch (Exception e) { logger.error("?word", e); } Doc2Pdf doc2pdf = new Doc2Pdf(); String pdfAddress = doc2pdf.createPDF(fileTemp + fileName);//wordpdf try { String fileType = ""; if (lst.size() > 0) {//?? fileType = "contract"; } else {//? fileType = "letter"; } DownloadAndPrivewFileTread downThread = new DownloadAndPrivewFileTread(); //this.downloadAndPreviewFile(response, loan.getLoanName(), pdfAddress, downType, fileType);// downThread.setDownType(downType); downThread.setFilePath(pdfAddress); downThread.setResponse(response); downThread.setFileType(fileType); downThread.setProName(loan.getLoanName()); downThread.run(); File pdfFile = new File(pdfAddress); pdfFile.delete(); } catch (Exception e) { logger.error("pdf", e); } }
From source file:com.thuvienkhoahoc.wordtomwtext.examples.WordToMwtext.java
License:Apache License
public WordToMwtext(HWPFDocument doc, OutputStream stream) throws IOException, UnsupportedEncodingException { // bagd/*from w w w . java2 s . c o m*/ OutputStreamWriter out = new OutputStreamWriter(stream, "UTF-8"); _out = out; _doc = doc; init(); openDocument(); openBody(); Range r = doc.getRange(); StyleSheet styleSheet = doc.getStyleSheet(); int sectionLevel = 0; int lenParagraph = r.numParagraphs(); boolean inCode = false; for (int x = 0; x < lenParagraph; x++) { Paragraph p = r.getParagraph(x); String text = p.text(); if (text.trim().length() == 0) { continue; } StyleDescription paragraphStyle = styleSheet.getStyleDescription(p.getStyleIndex()); String styleName = paragraphStyle.getName(); if (styleName.startsWith("Heading")) { if (inCode) { closeSource(); inCode = false; } int headerLevel = Integer.parseInt(styleName.substring(8)); if (headerLevel > sectionLevel) { openSection(); } else { for (int y = 0; y < (sectionLevel - headerLevel) + 1; y++) { closeSection(); } openSection(); } sectionLevel = headerLevel; openTitle(sectionLevel); writePlainText(text.trim()); closeTitle(sectionLevel); } else { int cruns = p.numCharacterRuns(); CharacterRun run = p.getCharacterRun(0); String fontName = run.getFontName(); if (fontName.startsWith("Courier")) { if (!inCode) { openSource(); inCode = true; } writePlainText(p.text()); } else { if (inCode) { inCode = false; closeSource(); } openParagraph(); writePlainText(p.text()); closeParagraph(); } } } for (int x = 0; x < sectionLevel; x++) { closeSection(); } closeBody(); closeDocument(); _out.flush(); }
From source file:com.unsa.view.MainView.java
License:Creative Commons License
private void DocConverterPDF(File file1) { NPOIFSFileSystem fs = null;/*from w w w.ja v a 2s. c o m*/ com.lowagie.text.Document document = new com.lowagie.text.Document(); try { System.out.println(file1.getAbsolutePath()); fs = new NPOIFSFileSystem(new FileInputStream(file1.getAbsolutePath())); HWPFDocument doc = new HWPFDocument(fs.getRoot()); WordExtractor we = new WordExtractor(doc); String output = file1.getAbsolutePath().substring(0, file1.getAbsolutePath().length() - 3); OutputStream fileout = new FileOutputStream(new File(output + "pdf")); PdfWriter writer = PdfWriter.getInstance(document, fileout); Range range = doc.getRange(); document.open(); writer.setPageEmpty(true); document.newPage(); writer.setPageEmpty(true); String[] paragraphs = we.getParagraphText(); for (int i = 0; i < paragraphs.length; i++) { org.apache.poi.hwpf.usermodel.Paragraph pr = range.getParagraph(i); paragraphs[i] = paragraphs[i].replaceAll("\\cM?\r?\n", ""); document.add(new Paragraph(paragraphs[i])); } } catch (Exception e) { e.printStackTrace(); } finally { document.close(); } }
From source file:com.weibo.datasys.parser.office.extractor.WordParse.java
License:Open Source License
public FileData extractorDoc(File filePath) { FileData fData = new FileData(); fData.setName(filePath.getName());/*from w w w. j a v a2 s .co m*/ HWPFDocument doc = null; try { doc = new HWPFDocument(new FileInputStream(filePath)); fData.setContent(doc.getRange().text()); } catch (Exception e) { LOG.error("", e); } return fData; }
From source file:com.xpn.xwiki.plugin.lucene.textextraction.MSWordTextExtractor.java
License:Apache License
public String getText(byte[] data) throws Exception { HWPFDocument wordDoc = new HWPFDocument(new ByteArrayInputStream(data)); Range range = wordDoc.getRange(); return range.text(); }
From source file:com.zhch.example.poi.Word2Forrest.java
License:Apache License
@SuppressWarnings("unused") public Word2Forrest(HWPFDocument doc, OutputStream stream) throws IOException { OutputStreamWriter out = new OutputStreamWriter(stream, Charset.forName("UTF-8")); _out = out;/*from ww w .jav a 2s .c o m*/ _doc = doc; init(); openDocument(); openBody(); Range r = doc.getRange(); StyleSheet styleSheet = doc.getStyleSheet(); int sectionLevel = 0; int lenParagraph = r.numParagraphs(); boolean inCode = false; for (int x = 0; x < lenParagraph; x++) { Paragraph p = r.getParagraph(x); String text = p.text(); if (text.trim().length() == 0) { continue; } StyleDescription paragraphStyle = styleSheet.getStyleDescription(p.getStyleIndex()); String styleName = paragraphStyle.getName(); if (styleName.startsWith("Heading")) { if (inCode) { closeSource(); inCode = false; } int headerLevel = Integer.parseInt(styleName.substring(8)); if (headerLevel > sectionLevel) { openSection(); } else { for (int y = 0; y < (sectionLevel - headerLevel) + 1; y++) { closeSection(); } openSection(); } sectionLevel = headerLevel; openTitle(); writePlainText(text); closeTitle(); } else { int cruns = p.numCharacterRuns(); CharacterRun run = p.getCharacterRun(0); String fontName = run.getFontName(); if (fontName.startsWith("Courier")) { if (!inCode) { openSource(); inCode = true; } writePlainText(p.text()); } else { if (inCode) { inCode = false; closeSource(); } openParagraph(); writePlainText(p.text()); closeParagraph(); } } } for (int x = 0; x < sectionLevel; x++) { closeSection(); } closeBody(); closeDocument(); _out.flush(); }