List of usage examples for org.apache.poi.hwpf HWPFDocument getRange
@Override
public Range getRange()
From source file:org.nuxeo.typeDocPkg.WordDoc.java
License:Apache License
/** * return a number of page of document./* w ww.java2s . co m*/ * * @param filename * name of the file * @return number of pages */ public Integer NrPages(String filename) { try { Integer result = 0; Integer counterChar = 0; HWPFDocument doc = getHWPFDocument(filename); Range r = doc.getRange(); for (int k = 0; k < r.numParagraphs(); k++) { Paragraph p = r.getParagraph(k); counterChar += p.text().length(); if (counterChar > LengthOfPage) { result++; counterChar = 0; } } return result == 0 ? 1 : result; } catch (Exception e) { log.error("Error during the NrPages method: ", e); return 1; } }
From source file:org.nuxeo.typeDocPkg.WordDoc.java
License:Apache License
/** * return a text of the document.//from w ww. j ava2s . co m * * @param filename * name of file * @return text of page */ public String ExtractStrFromDoc(String filename) { String result = ""; try { Integer counterChar = 0; Integer nrPages = 1; HWPFDocument doc = getHWPFDocument(filename); Range r = doc.getRange(); for (int k = 0; k < r.numParagraphs(); k++) { Paragraph p = r.getParagraph(k); counterChar += p.text().length(); if (nrPages == CurrentPage) { result = result.concat(p.text()); } if (counterChar > LengthOfPage) { nrPages++; if (nrPages > CurrentPage) { return result; } counterChar = 0; result = ""; } } return result; } catch (Exception e) { log.error("Error during the ExtractStrFromDoc method: ", e); return ""; } }
From source file:org.opencrx.kernel.text.WordToText.java
License:BSD License
/** * Get the text from the word file, as an array with one String * per paragraph/*w ww . j a v a 2 s. c o m*/ */ public String[] getParagraphText(HWPFDocument doc) { String[] ret = new String[] {}; try { Range r = doc.getRange(); ret = new String[r.numParagraphs()]; for (int i = 0; i < ret.length; i++) { Paragraph p = r.getParagraph(i); ret[i] = p.text(); if (ret[i].endsWith("\r")) { ret[i] = ret[i] + "\n"; } } } catch (Exception e) { // Something's up with turning the text pieces into paragraphs // Fall back to ripping out the text pieces ret = new String[1]; ret[0] = this.getTextFromPieces(doc); } return ret; }
From source file:org.paxle.parser.msoffice.impl.MsWordParser.java
License:Open Source License
@Override protected void extractText(POIFSFileSystem fs, IParserDocument parserDoc) throws ParserException, IOException { // extract plain text final HWPFDocument doc = new HWPFDocument(fs); final Range r = doc.getRange(); for (int i = 0; i < r.numParagraphs(); i++) { // get next paragraph final Paragraph p = r.getParagraph(i); // append paragraph text parserDoc.append(p.text());//w w w . ja v a 2 s . com // we know that this is the end of a block of text, so we can include a separator parserDoc.append(' '); } }
From source file:org.wandora.utils.MSOfficeBox.java
License:Open Source License
/** * Get the text from the word file, as an array with one String * per paragraph//from www . j a v a2s . co m */ public static String[] getWordParagraphText(HWPFDocument doc) { String[] ret; // Extract using the model code try { Range r = doc.getRange(); ret = new String[r.numParagraphs()]; for (int i = 0; i < ret.length; i++) { Paragraph p = r.getParagraph(i); ret[i] = p.text(); // Fix the line ending if (ret[i].endsWith("\r")) { ret[i] = ret[i] + "\n"; } } } catch (Exception e) { // Something's up with turning the text pieces into paragraphs // Fall back to ripping out the text pieces ret = new String[1]; ret[0] = getWordTextFromPieces(doc); } return ret; }
From source file:poi.hslf.examples.DataExtraction.java
License:Apache License
public static void main(String args[]) throws Exception { if (args.length == 0) { usage();/*from w ww.jav a2 s. com*/ return; } FileInputStream is = new FileInputStream(args[0]); SlideShow ppt = new SlideShow(is); is.close(); //extract all sound files embedded in this presentation SoundData[] sound = ppt.getSoundData(); for (int i = 0; i < sound.length; i++) { String type = sound[i].getSoundType(); //*.wav String name = sound[i].getSoundName(); //typically file name byte[] data = sound[i].getData(); //raw bytes //save the sound on disk FileOutputStream out = new FileOutputStream(name + type); out.write(data); out.close(); } //extract embedded OLE documents Slide[] slide = ppt.getSlides(); for (int i = 0; i < slide.length; i++) { Shape[] shape = slide[i].getShapes(); for (int j = 0; j < shape.length; j++) { if (shape[j] instanceof OLEShape) { OLEShape ole = (OLEShape) shape[j]; ObjectData data = ole.getObjectData(); String name = ole.getInstanceName(); if ("Worksheet".equals(name)) { //read xls HSSFWorkbook wb = new HSSFWorkbook(data.getData()); } else if ("Document".equals(name)) { HWPFDocument doc = new HWPFDocument(data.getData()); //read the word document Range r = doc.getRange(); for (int k = 0; k < r.numParagraphs(); k++) { Paragraph p = r.getParagraph(k); System.out.println(p.text()); } //save on disk FileOutputStream out = new FileOutputStream(name + "-(" + (j) + ").doc"); doc.write(out); out.close(); } else { FileOutputStream out = new FileOutputStream(ole.getProgID() + "-" + (j + 1) + ".dat"); InputStream dis = data.getData(); byte[] chunk = new byte[2048]; int count; while ((count = dis.read(chunk)) >= 0) { out.write(chunk, 0, count); } is.close(); out.close(); } } } } //Pictures for (int i = 0; i < slide.length; i++) { Shape[] shape = slide[i].getShapes(); for (int j = 0; j < shape.length; j++) { if (shape[j] instanceof Picture) { Picture p = (Picture) shape[j]; PictureData data = p.getPictureData(); String name = p.getPictureName(); int type = data.getType(); String ext; switch (type) { case Picture.JPEG: ext = ".jpg"; break; case Picture.PNG: ext = ".png"; break; case Picture.WMF: ext = ".wmf"; break; case Picture.EMF: ext = ".emf"; break; case Picture.PICT: ext = ".pict"; break; case Picture.DIB: ext = ".dib"; break; default: continue; } FileOutputStream out = new FileOutputStream("pict-" + j + ext); out.write(data.getData()); out.close(); } } } }
From source file:poi.hwpf.Word2Forrest.java
License:Apache License
public Word2Forrest(HWPFDocument doc, OutputStream stream) throws IOException, UnsupportedEncodingException { OutputStreamWriter out = new OutputStreamWriter(stream, "UTF-8"); _out = out;// ww w . j a v a 2s. c om _doc = doc; init(); openDocument(); openBody(); Range r = doc.getRange(); StyleSheet styleSheet = doc.getStyleSheet(); int sectionLevel = 0; int lenParagraph = r.numParagraphs(); boolean inCode = false; for (int x = 0; x < lenParagraph; x++) { Paragraph p = r.getParagraph(x); String text = p.text(); if (text.trim().length() == 0) { continue; } StyleDescription paragraphStyle = styleSheet.getStyleDescription(p.getStyleIndex()); String styleName = paragraphStyle.getName(); if (styleName.startsWith("Heading")) { if (inCode) { closeSource(); inCode = false; } int headerLevel = Integer.parseInt(styleName.substring(8)); if (headerLevel > sectionLevel) { openSection(); } else { for (int y = 0; y < (sectionLevel - headerLevel) + 1; y++) { closeSection(); } openSection(); } sectionLevel = headerLevel; openTitle(); writePlainText(text); closeTitle(); } else { int cruns = p.numCharacterRuns(); CharacterRun run = p.getCharacterRun(0); String fontName = run.getFontName(); if (fontName.startsWith("Courier")) { if (!inCode) { openSource(); inCode = true; } writePlainText(p.text()); } else { if (inCode) { inCode = false; closeSource(); } openParagraph(); writePlainText(p.text()); closeParagraph(); } } } for (int x = 0; x < sectionLevel; x++) { closeSection(); } closeBody(); closeDocument(); _out.flush(); }
From source file:rzd.vivc.astzpte.beans.pagebean.ReportBean.java
public String generateReport(User usr) { HWPFDocument doc; Ticket ticket = usr.getTickets().get(0); List<UserAnswer> answers = usr.getTickets().get(0).getAnswers(); ArrayList<UserAnswerModel> questions = new ArrayList<>(); for (int i = 0; i < answers.size(); i++) { if (answers.get(i).getAnswer() != null) { questions.add(new UserAnswerModel(answers.get(i), i)); }//from w w w .j av a 2s. c om } SimpleDateFormat format = new SimpleDateFormat("dd/MM/yyyy"); SimpleDateFormat format1 = new SimpleDateFormat("hh:mm"); try (FileInputStream fis = new FileInputStream("c:\\rep\\templ.doc")) { doc = new HWPFDocument(fis); doc.getRange().getParagraph(3).replaceText("(dtBeg)", format.format(ticket.getDt_create())); doc.getRange().getParagraph(9).replaceText("(timeBeg)", format1.format(ticket.getDt_create())); doc.getRange().getParagraph(11).replaceText("(timeFin)", format1.format(ticket.getFinish())); long num = usr.getNum(); /* for (int i = 1; i <= 13; i++) { long mod = num % 10;*/ doc.getRange().replaceText("(num)"/* + (13 - i + 1) + ")"*/, num + ""); /* num = num / 10; }*/ doc.getRange().getParagraph(24).replaceText("(allow1)", usr.getAllowNum() + " " + format.format(usr.getAllowDat())); doc.getRange().replaceText("(tickNum)", ticket.getAnswers().get(0).getQuestion().getTicketTemplate().getNum() + ""); doc.getRange().replaceText("(themeNum)", ticket.getAnswers().get(0).getQuestion().getTicketTemplate().getTheme().getId() + ""); doc.getRange().replaceText("(themeName)", ticket.getAnswers().get(0).getQuestion().getTicketTemplate().getTheme().getName()); int count = 0; for (int i = 1; i <= 50; i++) { UserAnswerModel answerModel = questions.get(i - 1); if (i < 10) { doc.getRange().replaceText("T0" + i, answerModel.getQuestion().getText()); doc.getRange().replaceText("C0" + i, answerModel.givenNumber() + ""); boolean cor = answerModel.correctNumber() == answerModel.givenNumber(); if (cor) { count++; } doc.getRange().replaceText("Y0" + i, cor ? " " : " "); doc.getRange().replaceText("B0" + i, cor ? 1 + "" : 0 + ""); } else { doc.getRange().replaceText("T" + i, answerModel.getQuestion().getText()); doc.getRange().replaceText("C" + i, answerModel.givenNumber() + ""); boolean cor = answerModel.correctNumber() == answerModel.givenNumber(); if (cor) { count++; } doc.getRange().replaceText("Y" + i, cor ? " " : " "); doc.getRange().replaceText("B" + i, cor ? 1 + "" : 0 + ""); } } doc.getRange().replaceText("BT", count + ""); doc.getRange().replaceText("BT", count + ""); FileOutputStream fos = new FileOutputStream("c:\\rep\\" + ticket.getId() + ".doc"); doc.write(fos); fos.close(); } catch (FileNotFoundException ex) { Logger.getLogger(ReportBean.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(ReportBean.class.getName()).log(Level.SEVERE, null, ex); } return ticket.getId() + ".doc"; }
From source file:textextractor.WordManager.java
public ArrayList extractDoc(FileInputStream fis) throws IOException { HWPFDocument doc = new HWPFDocument(fis); Range range = doc.getRange(); for (int i = 0; i < range.numParagraphs(); i++) { Paragraph p = range.getParagraph(i); StyleDescription style = doc.getStyleSheet().getStyleDescription(p.getStyleIndex()); if (!"Normal".equals(style.getName())) { System.out.println(style.getName()); }/* w w w. j a v a 2s. c o m*/ String[] ary = p.text().split(" "); System.out.println(p.text()); listDoc = new ArrayList(); listDoc.addAll(Arrays.asList(ary)); } return listDoc; }