Example usage for org.apache.poi.hwpf HWPFDocument getRange

Introduction

In this page you can find the example usage for org.apache.poi.hwpf HWPFDocument getRange.

Prototype

@Override
public Range getRange()

Source Link

Document

Returns the range which covers the whole of the document, but excludes any headers and footers.

Usage

From source file:org.nuxeo.typeDocPkg.WordDoc.java

License:Apache License

/**
* return a number of page of document./*  w  ww.java2s . co m*/
*
* @param filename
*             name of the file
* @return number of pages
*/
public Integer NrPages(String filename) {
    try {
        Integer result = 0;
        Integer counterChar = 0;

        HWPFDocument doc = getHWPFDocument(filename);
        Range r = doc.getRange();
        for (int k = 0; k < r.numParagraphs(); k++) {
            Paragraph p = r.getParagraph(k);
            counterChar += p.text().length();
            if (counterChar > LengthOfPage) {
                result++;
                counterChar = 0;
            }
        }
        return result == 0 ? 1 : result;
    } catch (Exception e) {
        log.error("Error during the NrPages method: ", e);
        return 1;
    }
}

From source file:org.nuxeo.typeDocPkg.WordDoc.java

License:Apache License

/**
  * return a text of the document.//from  w  ww.  j  ava2s . co m
  *
  * @param filename
  *             name of file
  * @return text of page
  */
public String ExtractStrFromDoc(String filename) {
    String result = "";
    try {
        Integer counterChar = 0;
        Integer nrPages = 1;

        HWPFDocument doc = getHWPFDocument(filename);
        Range r = doc.getRange();

        for (int k = 0; k < r.numParagraphs(); k++) {
            Paragraph p = r.getParagraph(k);
            counterChar += p.text().length();
            if (nrPages == CurrentPage) {
                result = result.concat(p.text());
            }
            if (counterChar > LengthOfPage) {
                nrPages++;
                if (nrPages > CurrentPage) {
                    return result;
                }
                counterChar = 0;
                result = "";
            }
        }

        return result;
    } catch (Exception e) {
        log.error("Error during the ExtractStrFromDoc method: ", e);
        return "";
    }
}

From source file:org.opencrx.kernel.text.WordToText.java

License:BSD License

/**
 * Get the text from the word file, as an array with one String
 *  per paragraph/*w  ww  .  j  a v  a  2  s. c  o  m*/
 */
public String[] getParagraphText(HWPFDocument doc) {
    String[] ret = new String[] {};
    try {
        Range r = doc.getRange();
        ret = new String[r.numParagraphs()];
        for (int i = 0; i < ret.length; i++) {
            Paragraph p = r.getParagraph(i);
            ret[i] = p.text();
            if (ret[i].endsWith("\r")) {
                ret[i] = ret[i] + "\n";
            }
        }
    } catch (Exception e) {
        // Something's up with turning the text pieces into paragraphs
        // Fall back to ripping out the text pieces
        ret = new String[1];
        ret[0] = this.getTextFromPieces(doc);
    }
    return ret;
}

From source file:org.paxle.parser.msoffice.impl.MsWordParser.java

License:Open Source License

@Override
protected void extractText(POIFSFileSystem fs, IParserDocument parserDoc) throws ParserException, IOException {
    // extract plain text
    final HWPFDocument doc = new HWPFDocument(fs);

    final Range r = doc.getRange();
    for (int i = 0; i < r.numParagraphs(); i++) {
        // get next paragraph 
        final Paragraph p = r.getParagraph(i);

        // append paragraph text
        parserDoc.append(p.text());//w w w .  ja v  a 2 s  .  com
        // we know that this is the end of a block of text, so we can include a separator
        parserDoc.append(' ');
    }
}

From source file:org.wandora.utils.MSOfficeBox.java

License:Open Source License

/**
 * Get the text from the word file, as an array with one String
 *  per paragraph//from  www  .  j  a v a2s  .  co m
 */
public static String[] getWordParagraphText(HWPFDocument doc) {
    String[] ret;

    // Extract using the model code
    try {
        Range r = doc.getRange();

        ret = new String[r.numParagraphs()];
        for (int i = 0; i < ret.length; i++) {
            Paragraph p = r.getParagraph(i);
            ret[i] = p.text();

            // Fix the line ending
            if (ret[i].endsWith("\r")) {
                ret[i] = ret[i] + "\n";
            }
        }
    } catch (Exception e) {
        // Something's up with turning the text pieces into paragraphs
        // Fall back to ripping out the text pieces
        ret = new String[1];
        ret[0] = getWordTextFromPieces(doc);
    }

    return ret;
}

From source file:poi.hslf.examples.DataExtraction.java

License:Apache License

public static void main(String args[]) throws Exception {

    if (args.length == 0) {
        usage();/*from   w  ww.jav a2 s. com*/
        return;
    }

    FileInputStream is = new FileInputStream(args[0]);
    SlideShow ppt = new SlideShow(is);
    is.close();

    //extract all sound files embedded in this presentation
    SoundData[] sound = ppt.getSoundData();
    for (int i = 0; i < sound.length; i++) {
        String type = sound[i].getSoundType(); //*.wav
        String name = sound[i].getSoundName(); //typically file name
        byte[] data = sound[i].getData(); //raw bytes

        //save the sound  on disk
        FileOutputStream out = new FileOutputStream(name + type);
        out.write(data);
        out.close();
    }

    //extract embedded OLE documents
    Slide[] slide = ppt.getSlides();
    for (int i = 0; i < slide.length; i++) {
        Shape[] shape = slide[i].getShapes();
        for (int j = 0; j < shape.length; j++) {
            if (shape[j] instanceof OLEShape) {
                OLEShape ole = (OLEShape) shape[j];
                ObjectData data = ole.getObjectData();
                String name = ole.getInstanceName();
                if ("Worksheet".equals(name)) {

                    //read xls
                    HSSFWorkbook wb = new HSSFWorkbook(data.getData());

                } else if ("Document".equals(name)) {
                    HWPFDocument doc = new HWPFDocument(data.getData());
                    //read the word document
                    Range r = doc.getRange();
                    for (int k = 0; k < r.numParagraphs(); k++) {
                        Paragraph p = r.getParagraph(k);
                        System.out.println(p.text());
                    }

                    //save on disk
                    FileOutputStream out = new FileOutputStream(name + "-(" + (j) + ").doc");
                    doc.write(out);
                    out.close();
                } else {
                    FileOutputStream out = new FileOutputStream(ole.getProgID() + "-" + (j + 1) + ".dat");
                    InputStream dis = data.getData();
                    byte[] chunk = new byte[2048];
                    int count;
                    while ((count = dis.read(chunk)) >= 0) {
                        out.write(chunk, 0, count);
                    }
                    is.close();
                    out.close();
                }
            }

        }
    }

    //Pictures
    for (int i = 0; i < slide.length; i++) {
        Shape[] shape = slide[i].getShapes();
        for (int j = 0; j < shape.length; j++) {
            if (shape[j] instanceof Picture) {
                Picture p = (Picture) shape[j];
                PictureData data = p.getPictureData();
                String name = p.getPictureName();
                int type = data.getType();
                String ext;
                switch (type) {
                case Picture.JPEG:
                    ext = ".jpg";
                    break;
                case Picture.PNG:
                    ext = ".png";
                    break;
                case Picture.WMF:
                    ext = ".wmf";
                    break;
                case Picture.EMF:
                    ext = ".emf";
                    break;
                case Picture.PICT:
                    ext = ".pict";
                    break;
                case Picture.DIB:
                    ext = ".dib";
                    break;
                default:
                    continue;
                }
                FileOutputStream out = new FileOutputStream("pict-" + j + ext);
                out.write(data.getData());
                out.close();
            }

        }
    }

}

From source file:poi.hwpf.Word2Forrest.java

License:Apache License

public Word2Forrest(HWPFDocument doc, OutputStream stream) throws IOException, UnsupportedEncodingException {
    OutputStreamWriter out = new OutputStreamWriter(stream, "UTF-8");
    _out = out;// ww  w . j a  v  a 2s.  c om
    _doc = doc;

    init();
    openDocument();
    openBody();

    Range r = doc.getRange();
    StyleSheet styleSheet = doc.getStyleSheet();

    int sectionLevel = 0;
    int lenParagraph = r.numParagraphs();
    boolean inCode = false;
    for (int x = 0; x < lenParagraph; x++) {
        Paragraph p = r.getParagraph(x);
        String text = p.text();
        if (text.trim().length() == 0) {
            continue;
        }
        StyleDescription paragraphStyle = styleSheet.getStyleDescription(p.getStyleIndex());
        String styleName = paragraphStyle.getName();
        if (styleName.startsWith("Heading")) {
            if (inCode) {
                closeSource();
                inCode = false;
            }

            int headerLevel = Integer.parseInt(styleName.substring(8));
            if (headerLevel > sectionLevel) {
                openSection();
            } else {
                for (int y = 0; y < (sectionLevel - headerLevel) + 1; y++) {
                    closeSection();
                }
                openSection();
            }
            sectionLevel = headerLevel;
            openTitle();
            writePlainText(text);
            closeTitle();
        } else {
            int cruns = p.numCharacterRuns();
            CharacterRun run = p.getCharacterRun(0);
            String fontName = run.getFontName();
            if (fontName.startsWith("Courier")) {
                if (!inCode) {
                    openSource();
                    inCode = true;
                }
                writePlainText(p.text());
            } else {
                if (inCode) {
                    inCode = false;
                    closeSource();
                }
                openParagraph();
                writePlainText(p.text());
                closeParagraph();
            }
        }
    }
    for (int x = 0; x < sectionLevel; x++) {
        closeSection();
    }
    closeBody();
    closeDocument();
    _out.flush();

}

From source file:rzd.vivc.astzpte.beans.pagebean.ReportBean.java

public String generateReport(User usr) {
    HWPFDocument doc;
    Ticket ticket = usr.getTickets().get(0);
    List<UserAnswer> answers = usr.getTickets().get(0).getAnswers();
    ArrayList<UserAnswerModel> questions = new ArrayList<>();
    for (int i = 0; i < answers.size(); i++) {
        if (answers.get(i).getAnswer() != null) {
            questions.add(new UserAnswerModel(answers.get(i), i));
        }//from  w w  w .j  av  a  2s. c om
    }
    SimpleDateFormat format = new SimpleDateFormat("dd/MM/yyyy");
    SimpleDateFormat format1 = new SimpleDateFormat("hh:mm");
    try (FileInputStream fis = new FileInputStream("c:\\rep\\templ.doc")) {
        doc = new HWPFDocument(fis);
        doc.getRange().getParagraph(3).replaceText("(dtBeg)", format.format(ticket.getDt_create()));
        doc.getRange().getParagraph(9).replaceText("(timeBeg)", format1.format(ticket.getDt_create()));
        doc.getRange().getParagraph(11).replaceText("(timeFin)", format1.format(ticket.getFinish()));
        long num = usr.getNum();
        /* for (int i = 1; i <= 13; i++) {
        long mod = num % 10;*/
        doc.getRange().replaceText("(num)"/* + (13 - i + 1) + ")"*/, num + "");
        /* num = num / 10;
        }*/

        doc.getRange().getParagraph(24).replaceText("(allow1)",
                usr.getAllowNum() + "  " + format.format(usr.getAllowDat()));

        doc.getRange().replaceText("(tickNum)",
                ticket.getAnswers().get(0).getQuestion().getTicketTemplate().getNum() + "");
        doc.getRange().replaceText("(themeNum)",
                ticket.getAnswers().get(0).getQuestion().getTicketTemplate().getTheme().getId() + "");
        doc.getRange().replaceText("(themeName)",
                ticket.getAnswers().get(0).getQuestion().getTicketTemplate().getTheme().getName());
        int count = 0;
        for (int i = 1; i <= 50; i++) {
            UserAnswerModel answerModel = questions.get(i - 1);
            if (i < 10) {
                doc.getRange().replaceText("T0" + i, answerModel.getQuestion().getText());
                doc.getRange().replaceText("C0" + i, answerModel.givenNumber() + "");
                boolean cor = answerModel.correctNumber() == answerModel.givenNumber();
                if (cor) {
                    count++;
                }
                doc.getRange().replaceText("Y0" + i,
                        cor ? " " : "  ");
                doc.getRange().replaceText("B0" + i, cor ? 1 + "" : 0 + "");
            } else {
                doc.getRange().replaceText("T" + i, answerModel.getQuestion().getText());
                doc.getRange().replaceText("C" + i, answerModel.givenNumber() + "");
                boolean cor = answerModel.correctNumber() == answerModel.givenNumber();
                if (cor) {
                    count++;
                }
                doc.getRange().replaceText("Y" + i,
                        cor ? " " : "  ");
                doc.getRange().replaceText("B" + i, cor ? 1 + "" : 0 + "");
            }
        }
        doc.getRange().replaceText("BT", count + "");
        doc.getRange().replaceText("BT", count + "");

        FileOutputStream fos = new FileOutputStream("c:\\rep\\" + ticket.getId() + ".doc");
        doc.write(fos);
        fos.close();
    } catch (FileNotFoundException ex) {
        Logger.getLogger(ReportBean.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(ReportBean.class.getName()).log(Level.SEVERE, null, ex);
    }
    return ticket.getId() + ".doc";
}

From source file:textextractor.WordManager.java

public ArrayList extractDoc(FileInputStream fis) throws IOException {
    HWPFDocument doc = new HWPFDocument(fis);
    Range range = doc.getRange();
    for (int i = 0; i < range.numParagraphs(); i++) {
        Paragraph p = range.getParagraph(i);
        StyleDescription style = doc.getStyleSheet().getStyleDescription(p.getStyleIndex());
        if (!"Normal".equals(style.getName())) {
            System.out.println(style.getName());
        }/*  w w w. j a v  a  2s. c  o  m*/
        String[] ary = p.text().split(" ");
        System.out.println(p.text());
        listDoc = new ArrayList();
        listDoc.addAll(Arrays.asList(ary));
    }
    return listDoc;

}