Example usage for org.apache.poi.hwpf HWPFDocument getRange

Introduction

In this page you can find the example usage for org.apache.poi.hwpf HWPFDocument getRange.

Prototype

@Override
public Range getRange()

Source Link

Document

Returns the range which covers the whole of the document, but excludes any headers and footers.

Usage

From source file:at.tugraz.sss.serv.SSFileU.java

License:Apache License

public static void writePDFFromDoc(final String docFilePath, final String pdfFilePath) throws Exception {

    final Document document = new Document();
    final POIFSFileSystem fs = new POIFSFileSystem(openFileForRead(docFilePath));
    final HWPFDocument word = new HWPFDocument(fs);
    final WordExtractor we = new WordExtractor(word);
    final OutputStream out = openOrCreateFileWithPathForWrite(pdfFilePath);
    final PdfWriter writer = PdfWriter.getInstance(document, out);
    final Range range = word.getRange();

    document.open();/*  www . j  a va2s  . co  m*/
    writer.setPageEmpty(true);
    document.newPage();
    writer.setPageEmpty(true);

    String[] paragraphs = we.getParagraphText();

    for (int i = 0; i < paragraphs.length; i++) {

        org.apache.poi.hwpf.usermodel.Paragraph pr = range.getParagraph(i);
        // CharacterRun run = pr.getCharacterRun(i);
        // run.setBold(true);
        // run.setCapitalized(true);
        // run.setItalic(true);
        paragraphs[i] = paragraphs[i].replaceAll("\\cM?\r?\n", "");
        System.out.println("Length:" + paragraphs[i].length());
        System.out.println("Paragraph" + i + ": " + paragraphs[i].toString());

        // add the paragraph to the document
        document.add(new Paragraph(paragraphs[i]));
    }

    document.close();
}

From source file:at.tugraz.sss.serv.util.SSFileU.java

License:Apache License

public static void writePDFFromDoc(final String docFilePath, final String pdfFilePath) throws SSErr {

    try {// w w  w  . j  ava  2  s  .  co m
        final Document document = new Document();
        final POIFSFileSystem fs = new POIFSFileSystem(openFileForRead(docFilePath));
        final HWPFDocument word = new HWPFDocument(fs);
        final WordExtractor we = new WordExtractor(word);
        final OutputStream out = openOrCreateFileWithPathForWrite(pdfFilePath);
        final PdfWriter writer = PdfWriter.getInstance(document, out);
        final Range range = word.getRange();

        document.open();
        writer.setPageEmpty(true);
        document.newPage();
        writer.setPageEmpty(true);

        String[] paragraphs = we.getParagraphText();

        for (int i = 0; i < paragraphs.length; i++) {

            org.apache.poi.hwpf.usermodel.Paragraph pr = range.getParagraph(i);
            // CharacterRun run = pr.getCharacterRun(i);
            // run.setBold(true);
            // run.setCapitalized(true);
            // run.setItalic(true);
            paragraphs[i] = paragraphs[i].replaceAll("\\cM?\r?\n", "");
            System.out.println("Length:" + paragraphs[i].length());
            System.out.println("Paragraph" + i + ": " + paragraphs[i].toString());

            // add the paragraph to the document
            document.add(new Paragraph(paragraphs[i]));
        }

        document.close();
    } catch (Exception error) {
        SSServErrReg.regErrThrow(error);
    }
}

From source file:com.duroty.lucene.parser.MSWordParser.java

License:Open Source License

/**
 * DOCUMENT ME!/*ww w  .j  a  v a  2  s  .co m*/
 *
 * @return DOCUMENT ME!
 *
 * @throws ParserException DOCUMENT ME!
 */
private String getContents() throws ParserException {
    String contents = "";

    try {
        HWPFDocument doc = new HWPFDocument(input);
        Range r = doc.getRange();
        StringBuffer buffer = new StringBuffer();

        for (int x = 0; x < r.numSections(); x++) {
            Section s = r.getSection(x);

            for (int y = 0; y < s.numParagraphs(); y++) {
                Paragraph p = null;

                try {
                    p = s.getParagraph(y);
                } catch (Exception e) {
                    buffer.append("\n");
                }

                if (p != null) {
                    for (int z = 0; z < p.numCharacterRuns(); z++) {
                        try {
                            //character run
                            CharacterRun run = p.getCharacterRun(z);

                            //character run text
                            buffer.append(run.text());
                        } catch (Exception e) {
                            buffer.append(" ");
                        }
                    }
                }

                /*if (sleep > 0) {
                    try {
                        Thread.sleep(sleep);
                    } catch (Exception ex) {
                    }
                }*/
                // use a new line at the paragraph break
                buffer.append("\n");
            }
        }

        contents = buffer.toString();
    } catch (Exception ex) {
        throw new ParserException(ex);
    }

    return contents;
}

From source file:com.google.gdt.handler.impl.WordHandler.java

License:Open Source License

/**
 * // ww  w .j a v a 2 s .  co  m
 * @param inputFile
 * @param pLevel
 * @throws IOException
 * @throws InvalidFormatException
 */
@Override
public void handle(String inputFile, ProgressLevel pLevel) throws IOException, InvalidFormatException {
    String outPutFile = getOuputFileName(inputFile);
    OutputStream outputStream = new FileOutputStream(outPutFile);
    InputStream inputStream = new FileInputStream(inputFile);

    HWPFDocument hDocument = new HWPFDocument(inputStream);
    Range range = hDocument.getRange();

    pLevel.setTrFileName(outPutFile);
    pLevel.setValue(0);
    pLevel.setStringPainted(true);
    pLevel.setMaxValue(range.numParagraphs());
    int count = 0;
    for (int i = 0; i < range.numParagraphs(); i++) {
        Paragraph paragraph = range.getParagraph(i);
        int numCharRuns = paragraph.numCharacterRuns();
        for (int j = 0; j < numCharRuns; j++) {
            if (isInterrupted) {
                outputStream.close();
                new File(outPutFile).delete();
                pLevel.setString("cancelled");
                return;
            }
            CharacterRun charRun = paragraph.getCharacterRun(j);
            String inputText = charRun.text();
            if ((null == inputText) || (inputText.trim().equals("")))
                continue;
            String translatedTxt = inputText;
            //in http post method, all key value pairs are seperated with &
            if (preferenceModel.getTranslatorType() == TranslatorType.HTTP)
                inputText = inputText.replaceAll("&", "and");
            try {
                translatedTxt = translator.translate(translatedTxt);
                charRun.replaceText(inputText, translatedTxt);
            } catch (Exception e) {
                logger.log(Level.SEVERE,
                        "Input File : " + inputFile + " cannot translate the text : " + inputText, e);
            }
        }
        count++;
        pLevel.setValue(count);
    }
    pLevel.setString("done");
    hDocument.write(outputStream);
    outputStream.close();
}

From source file:com.icebreak.p2p.front.controller.trade.download.WordParse.java

@Transactional(rollbackFor = Exception.class, value = "transactionManager")
public void readwriteWord(HttpServletResponse response, HttpSession session, String _file,
        Map<String, String> map, List<Map<String, Text>> lst, LoanDemandDO loan, String downType) {
    //?word?/*from www .j  av a2 s .  c  o m*/
    FileInputStream in;
    HWPFDocument hdt = null;
    String filePath = _file;
    ServletContext application = session.getServletContext();
    String serverRealPath = application.getRealPath("/");
    String fileTemp = AppConstantsUtil.getYrdUploadFolder() + File.separator + "doc";
    File fileDir = new File(fileTemp);
    if (!fileDir.exists()) {
        fileDir.mkdir();
    }
    try {
        in = new FileInputStream(new File(serverRealPath + filePath));
        hdt = new HWPFDocument(in);
    } catch (Exception e1) {
        logger.error("??", e1);
    }

    //??word?
    Range range = hdt.getRange();
    TableIterator it = new TableIterator(range);
    Table tb = null;
    while (it.hasNext()) {
        tb = it.next();
        break;
    }
    if (lst.size() > 0) {
        for (int i = 1; i <= lst.size(); i++) {
            Map<String, Text> replaces = lst.get(i - 1);
            TableRow tr = tb.getRow(i);
            // 0
            for (int j = 0; j < tr.numCells(); j++) {
                TableCell td = tr.getCell(j);// ??
                // ??
                for (int k = 0; k < td.numParagraphs(); k++) {
                    Paragraph para = td.getParagraph(k);
                    String s = para.text();
                    final String old = s;
                    for (String key : replaces.keySet()) {
                        if (s.contains(key)) {
                            s = s.replace(key, replaces.get(key).getText());
                        }
                    }
                    if (!old.equals(s)) {// ?
                        para.replaceText(old, s);
                        s = para.text();
                    }
                } // end for
            }
        }
        for (int n = lst.size() + 1; n < tb.numRows(); n++) {
            TableRow tr = tb.getRow(n);
            tr.delete();
        }
    }

    for (Map.Entry<String, String> entry : map.entrySet()) {
        range.replaceText(entry.getKey(), entry.getValue());
    }
    //String fileName = f[f.length-1];
    String fileName = System.currentTimeMillis() + _file.substring(_file.lastIndexOf("."), _file.length());
    ByteArrayOutputStream ostream = new ByteArrayOutputStream();
    try {
        FileOutputStream out = new FileOutputStream(fileTemp + fileName);//?word
        hdt.write(ostream);
        out.write(ostream.toByteArray());
        out.flush();
        out.close();
    } catch (Exception e) {
        logger.error("?word", e);
    }
    Doc2Pdf doc2pdf = new Doc2Pdf();
    String pdfAddress = doc2pdf.createPDF(fileTemp + fileName);//wordpdf
    try {
        String fileType = "";
        if (lst.size() > 0) {//??
            fileType = "contract";
        } else {//?
            fileType = "letter";
        }
        DownloadAndPrivewFileTread downThread = new DownloadAndPrivewFileTread();
        //this.downloadAndPreviewFile(response, loan.getLoanName(), pdfAddress, downType, fileType);//
        downThread.setDownType(downType);
        downThread.setFilePath(pdfAddress);
        downThread.setResponse(response);
        downThread.setFileType(fileType);
        downThread.setProName(loan.getLoanName());
        downThread.run();
        File pdfFile = new File(pdfAddress);
        pdfFile.delete();

    } catch (Exception e) {
        logger.error("pdf", e);
    }
}

From source file:com.thuvienkhoahoc.wordtomwtext.examples.WordToMwtext.java

License:Apache License

public WordToMwtext(HWPFDocument doc, OutputStream stream) throws IOException, UnsupportedEncodingException {

    // bagd/*from   w w  w .  java2 s . c o  m*/
    OutputStreamWriter out = new OutputStreamWriter(stream, "UTF-8");
    _out = out;
    _doc = doc;

    init();
    openDocument();
    openBody();

    Range r = doc.getRange();
    StyleSheet styleSheet = doc.getStyleSheet();

    int sectionLevel = 0;
    int lenParagraph = r.numParagraphs();
    boolean inCode = false;
    for (int x = 0; x < lenParagraph; x++) {
        Paragraph p = r.getParagraph(x);
        String text = p.text();
        if (text.trim().length() == 0) {
            continue;
        }
        StyleDescription paragraphStyle = styleSheet.getStyleDescription(p.getStyleIndex());
        String styleName = paragraphStyle.getName();
        if (styleName.startsWith("Heading")) {
            if (inCode) {
                closeSource();
                inCode = false;
            }

            int headerLevel = Integer.parseInt(styleName.substring(8));
            if (headerLevel > sectionLevel) {
                openSection();
            } else {
                for (int y = 0; y < (sectionLevel - headerLevel) + 1; y++) {
                    closeSection();
                }
                openSection();
            }
            sectionLevel = headerLevel;
            openTitle(sectionLevel);
            writePlainText(text.trim());
            closeTitle(sectionLevel);
        } else {
            int cruns = p.numCharacterRuns();
            CharacterRun run = p.getCharacterRun(0);
            String fontName = run.getFontName();
            if (fontName.startsWith("Courier")) {
                if (!inCode) {
                    openSource();
                    inCode = true;
                }
                writePlainText(p.text());
            } else {
                if (inCode) {
                    inCode = false;
                    closeSource();
                }
                openParagraph();
                writePlainText(p.text());
                closeParagraph();
            }
        }
    }
    for (int x = 0; x < sectionLevel; x++) {
        closeSection();
    }
    closeBody();
    closeDocument();
    _out.flush();

}

From source file:com.unsa.view.MainView.java

License:Creative Commons License

private void DocConverterPDF(File file1) {
    NPOIFSFileSystem fs = null;/*from w  w w.ja v  a 2s.  c  o m*/
    com.lowagie.text.Document document = new com.lowagie.text.Document();

    try {
        System.out.println(file1.getAbsolutePath());
        fs = new NPOIFSFileSystem(new FileInputStream(file1.getAbsolutePath()));
        HWPFDocument doc = new HWPFDocument(fs.getRoot());
        WordExtractor we = new WordExtractor(doc);
        String output = file1.getAbsolutePath().substring(0, file1.getAbsolutePath().length() - 3);
        OutputStream fileout = new FileOutputStream(new File(output + "pdf"));

        PdfWriter writer = PdfWriter.getInstance(document, fileout);

        Range range = doc.getRange();
        document.open();
        writer.setPageEmpty(true);
        document.newPage();
        writer.setPageEmpty(true);

        String[] paragraphs = we.getParagraphText();
        for (int i = 0; i < paragraphs.length; i++) {

            org.apache.poi.hwpf.usermodel.Paragraph pr = range.getParagraph(i);
            paragraphs[i] = paragraphs[i].replaceAll("\\cM?\r?\n", "");
            document.add(new Paragraph(paragraphs[i]));
        }

    } catch (Exception e) {

        e.printStackTrace();
    } finally {

        document.close();
    }

}

From source file:com.weibo.datasys.parser.office.extractor.WordParse.java

License:Open Source License

public FileData extractorDoc(File filePath) {
    FileData fData = new FileData();
    fData.setName(filePath.getName());/*from   w w w.  j a v a2  s  .co m*/
    HWPFDocument doc = null;
    try {
        doc = new HWPFDocument(new FileInputStream(filePath));
        fData.setContent(doc.getRange().text());
    } catch (Exception e) {
        LOG.error("", e);
    }
    return fData;
}

From source file:com.xpn.xwiki.plugin.lucene.textextraction.MSWordTextExtractor.java

License:Apache License

public String getText(byte[] data) throws Exception {
    HWPFDocument wordDoc = new HWPFDocument(new ByteArrayInputStream(data));
    Range range = wordDoc.getRange();
    return range.text();
}

From source file:com.zhch.example.poi.Word2Forrest.java

License:Apache License

@SuppressWarnings("unused")
public Word2Forrest(HWPFDocument doc, OutputStream stream) throws IOException {
    OutputStreamWriter out = new OutputStreamWriter(stream, Charset.forName("UTF-8"));
    _out = out;/*from ww  w  .jav  a  2s  .c o  m*/
    _doc = doc;

    init();
    openDocument();
    openBody();

    Range r = doc.getRange();
    StyleSheet styleSheet = doc.getStyleSheet();

    int sectionLevel = 0;
    int lenParagraph = r.numParagraphs();
    boolean inCode = false;
    for (int x = 0; x < lenParagraph; x++) {
        Paragraph p = r.getParagraph(x);
        String text = p.text();
        if (text.trim().length() == 0) {
            continue;
        }
        StyleDescription paragraphStyle = styleSheet.getStyleDescription(p.getStyleIndex());
        String styleName = paragraphStyle.getName();
        if (styleName.startsWith("Heading")) {
            if (inCode) {
                closeSource();
                inCode = false;
            }

            int headerLevel = Integer.parseInt(styleName.substring(8));
            if (headerLevel > sectionLevel) {
                openSection();
            } else {
                for (int y = 0; y < (sectionLevel - headerLevel) + 1; y++) {
                    closeSection();
                }
                openSection();
            }
            sectionLevel = headerLevel;
            openTitle();
            writePlainText(text);
            closeTitle();
        } else {
            int cruns = p.numCharacterRuns();
            CharacterRun run = p.getCharacterRun(0);
            String fontName = run.getFontName();
            if (fontName.startsWith("Courier")) {
                if (!inCode) {
                    openSource();
                    inCode = true;
                }
                writePlainText(p.text());
            } else {
                if (inCode) {
                    inCode = false;
                    closeSource();
                }
                openParagraph();
                writePlainText(p.text());
                closeParagraph();
            }
        }
    }
    for (int x = 0; x < sectionLevel; x++) {
        closeSection();
    }
    closeBody();
    closeDocument();
    _out.flush();

}