Example usage for org.apache.poi.hwpf HWPFDocument getRange

List of usage examples for org.apache.poi.hwpf HWPFDocument getRange

Introduction

In this page you can find the example usage for org.apache.poi.hwpf HWPFDocument getRange.

Prototype

@Override
public Range getRange() 

Source Link

Document

Returns the range which covers the whole of the document, but excludes any headers and footers.

Usage

From source file:de.uni_siegen.wineme.come_in.thumbnailer.util.mime.DocFileIdentifier.java

License:Open Source License

@Override
public String identify(String mimeType, byte[] bytes, File file) {

    if (isOfficeFile(mimeType) && !DOC_MIME_TYPE.equals(mimeType)) {
        try {//from  w  w  w. j  av a 2 s  .c  om
            FileInputStream stream = new FileInputStream(file);
            HWPFDocument document = new HWPFDocument(stream);

            if (document.getRange().getEndOffset() > 0) {
                return DOC_MIME_TYPE;
            }
        } catch (Throwable e) {
        }
    }

    return mimeType;
}

From source file:File.DOC.WriteDoc.java

/**
 * @param args the command line arguments
 *//* w  w  w  . j  a  v a  2  s .c  o m*/
public void Write(String path, String namafile, String content) {
    File file = new File("D:\\xyz.doc");
    try {
        POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(file));
        HWPFDocument doc = new HWPFDocument(fs);
        Range range = doc.getRange();
        CharacterRun run = range.insertBefore(content.replace("\n", "\013"));
        run.setBold(true);
        OutputStream outa = new FileOutputStream(new File(path + namafile + ".doc"));
        doc.write(outa);
        out.close();
    } catch (Exception e) {
        System.out.println(e.getMessage());
    }
}

From source file:javaapplication1.HWPFTest.java

private static HWPFDocument replaceText(HWPFDocument doc, String findText, String replaceText) {
    Range r1 = doc.getRange();

    for (int i = 0; i < r1.numSections(); ++i) {
        Section s = r1.getSection(i);/*from w  w w .java  2s. c o m*/
        for (int x = 0; x < s.numParagraphs(); x++) {
            Paragraph p = s.getParagraph(x);
            /*String text = p.text();
            if(text.contains(findText)) {
            p.replaceText(replaceText, findText);
            }*/

            for (int z = 0; z < p.numCharacterRuns(); z++) {
                CharacterRun run = p.getCharacterRun(z);
                String text = run.text();
                if (text.contains(findText)) {
                    run.replaceText(findText, replaceText);
                }
            }
        }
    }
    return doc;
}

From source file:mj.ocraptor.extraction.tika.parser.microsoft.WordExtractor.java

License:Apache License

protected void parse(DirectoryNode root, XHTMLContentHandler xhtml)
        throws IOException, SAXException, TikaException {
    HWPFDocument document;
    try {/* w  w  w  .j  a  v a2s  . c  o  m*/
        document = new HWPFDocument(root);
    } catch (OldWordFileFormatException e) {
        parseWord6(root, xhtml);
        return;
    }

    org.apache.poi.hwpf.extractor.WordExtractor wordExtractor = new org.apache.poi.hwpf.extractor.WordExtractor(
            document);

    // mj
    extractImageText(xhtml, document);

    HeaderStories headerFooter = new HeaderStories(document);

    // Grab the list of pictures. As far as we can tell,
    // the pictures should be in order, and may be directly
    // placed or referenced from an anchor
    PicturesTable pictureTable = document.getPicturesTable();
    PicturesSource pictures = new PicturesSource(document);

    // Do any headers, if present
    Range[] headers = new Range[] { headerFooter.getFirstHeaderSubrange(), headerFooter.getEvenHeaderSubrange(),
            headerFooter.getOddHeaderSubrange() };
    handleHeaderFooter(headers, "header", document, pictures, pictureTable, xhtml);

    // Do the main paragraph text
    Range r = document.getRange();
    for (int i = 0; i < r.numParagraphs(); i++) {
        Paragraph p = r.getParagraph(i);
        i += handleParagraph(p, 0, r, document, FieldsDocumentPart.MAIN, pictures, pictureTable, xhtml);
    }

    // Do everything else
    for (String paragraph : wordExtractor.getMainTextboxText()) {
        xhtml.element("p", paragraph);
    }

    for (String paragraph : wordExtractor.getFootnoteText()) {
        xhtml.element("p", paragraph);
    }

    for (String paragraph : wordExtractor.getCommentsText()) {
        xhtml.element("p", paragraph);
    }

    for (String paragraph : wordExtractor.getEndnoteText()) {
        xhtml.element("p", paragraph);
    }

    // Do any footers, if present
    Range[] footers = new Range[] { headerFooter.getFirstFooterSubrange(), headerFooter.getEvenFooterSubrange(),
            headerFooter.getOddFooterSubrange() };
    handleHeaderFooter(footers, "footer", document, pictures, pictureTable, xhtml);

    // Handle any pictures that we haven't output yet
    for (Picture p = pictures.nextUnclaimed(); p != null;) {
        handlePictureCharacterRun(null, p, pictures, xhtml);
        p = pictures.nextUnclaimed();
    }

    // Handle any embeded office documents
    try {
        DirectoryEntry op = (DirectoryEntry) root.getEntry("ObjectPool");
        for (Entry entry : op) {
            if (entry.getName().startsWith("_") && entry instanceof DirectoryEntry) {
                handleEmbeddedOfficeDoc((DirectoryEntry) entry, xhtml);
            }
        }
    } catch (FileNotFoundException e) {
    }
}

From source file:Modelo.EscribirWord.java

private HWPFDocument replaceText(HWPFDocument doc, String findText, String replaceText) {

    Range r1 = doc.getRange();

    for (int i = 0; i < r1.numSections(); ++i) {
        Section s = r1.getSection(i);/*from www  .j a  v a2  s . c om*/
        for (int x = 0; x < s.numParagraphs(); x++) {
            Paragraph p = s.getParagraph(x);
            for (int z = 0; z < p.numCharacterRuns(); z++) {
                CharacterRun run = p.getCharacterRun(z);
                String text = run.text();

                if (text.contains(findText)) {

                    if (replaceText == null) {
                        System.out.println("null");
                        replaceText = "";
                    }

                    run.replaceText(findText, replaceText);
                }

            }
        }
    }
    return doc;
}

From source file:org.apache.tika.parser.microsoft.WordExtractor.java

License:Apache License

protected void parse(DirectoryNode root, XHTMLContentHandler xhtml)
        throws IOException, SAXException, TikaException {
    HWPFDocument document;
    try {/*  w  ww  .  ja v  a  2 s . c om*/
        document = new HWPFDocument(root);
    } catch (OldWordFileFormatException e) {
        parseWord6(root, xhtml);
        return;
    }
    org.apache.poi.hwpf.extractor.WordExtractor wordExtractor = new org.apache.poi.hwpf.extractor.WordExtractor(
            document);
    HeaderStories headerFooter = new HeaderStories(document);

    // Grab the list of pictures. As far as we can tell,
    //  the pictures should be in order, and may be directly
    //  placed or referenced from an anchor
    PicturesTable pictureTable = document.getPicturesTable();
    PicturesSource pictures = new PicturesSource(document);

    // Do any headers, if present
    Range[] headers = new Range[] { headerFooter.getFirstHeaderSubrange(), headerFooter.getEvenHeaderSubrange(),
            headerFooter.getOddHeaderSubrange() };
    handleHeaderFooter(headers, "header", document, pictures, pictureTable, xhtml);

    // Do the main paragraph text
    Range r = document.getRange();
    ListManager listManager = new ListManager(document);
    for (int i = 0; i < r.numParagraphs(); i++) {
        Paragraph p = r.getParagraph(i);
        i += handleParagraph(p, 0, r, document, FieldsDocumentPart.MAIN, pictures, pictureTable, listManager,
                xhtml);
    }

    // Do everything else
    for (String paragraph : wordExtractor.getMainTextboxText()) {
        xhtml.element("p", paragraph);
    }

    for (String paragraph : wordExtractor.getFootnoteText()) {
        xhtml.element("p", paragraph);
    }

    for (String paragraph : wordExtractor.getCommentsText()) {
        xhtml.element("p", paragraph);
    }

    for (String paragraph : wordExtractor.getEndnoteText()) {
        xhtml.element("p", paragraph);
    }

    // Do any footers, if present
    Range[] footers = new Range[] { headerFooter.getFirstFooterSubrange(), headerFooter.getEvenFooterSubrange(),
            headerFooter.getOddFooterSubrange() };
    handleHeaderFooter(footers, "footer", document, pictures, pictureTable, xhtml);

    // Handle any pictures that we haven't output yet
    for (Picture p = pictures.nextUnclaimed(); p != null;) {
        handlePictureCharacterRun(null, p, pictures, xhtml);
        p = pictures.nextUnclaimed();
    }

    // Handle any embeded office documents
    try {
        DirectoryEntry op = (DirectoryEntry) root.getEntry("ObjectPool");
        for (Entry entry : op) {
            if (entry.getName().startsWith("_") && entry instanceof DirectoryEntry) {
                handleEmbeddedOfficeDoc((DirectoryEntry) entry, xhtml);
            }
        }
    } catch (FileNotFoundException e) {
    }
}

From source file:org.docx4j.convert.in.Doc.java

License:Apache License

/**
 * This method is private, since the fact that conversion is (currently)
 * performed using POI's HWPF should be encapsulated.
 * //from ww  w  .  j av a  2  s .c o  m
 * @param doc
 * @param wordMLPackage
 * @return success or failure
 */
private static void convert(HWPFDocument doc, WordprocessingMLPackage wordMLPackage) throws Exception {

    // Convert styles
    org.apache.poi.hwpf.model.StyleSheet stylesheet = doc.getStyleSheet();
    // TODO - higher priority
    // At present, a default set of styles are defined in the output
    // document.

    // Convert lists
    org.apache.poi.hwpf.model.ListTables listTables = doc.getListTables();
    // TODO

    // Convert document properties
    org.apache.poi.hwpf.model.DocumentProperties docProps = doc.getDocProperties();
    // TODO

    // Convert main document part

    MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart();
    org.docx4j.wml.ObjectFactory factory = new org.docx4j.wml.ObjectFactory();

    Range r = doc.getRange();

    for (int x = 0; x < r.numSections(); x++) {
        Section s = r.getSection(x);

        // TODO - convert section

        for (int y = 0; y < s.numParagraphs(); y++) {
            Paragraph p = s.getParagraph(y);

            if (p.isInTable()) {
                Table t = s.getTable(p);
                int cl = numCol(t);

                log.info("Found " + t.numRows() + "x" + cl + " table - TODO - convert");

                handleTable(wordMLPackage, doc, t, stylesheet, documentPart, factory);

                // addTODO(factory, wmlP, "[TABLE " + + t.numRows() + "x" +
                // cl
                // + " - can't convert tables yet]");

                y += t.numParagraphs() - 1;
            }

            else {
                org.docx4j.wml.P paraToAdd = handleP(wordMLPackage, doc, p, stylesheet, documentPart, factory);

                documentPart.addObject(paraToAdd);
            }

        }
    }

}

From source file:org.esmerilprogramming.pdfcake.DocumentReplace.java

License:Open Source License

/**
 * Read the document searching for the $$$<keys>$$$ and replace with the values in the template
 * @param document//from   w ww  .j  a  v a2  s  .  c  o m
 * @param template
 * @return
 */
private static HWPFDocument replaceKeys(HWPFDocument document, DocumentTemplate template) {
    Range range = document.getRange();
    for (int i = 0; i < range.numParagraphs(); i++) {
        Paragraph p = range.getParagraph(i);
        String text = null;
        for (Enumeration<String> e = template.getAttributes().keys(); e.hasMoreElements();) {
            String key = e.nextElement();
            String attributeKey = "$$$" + key + "$$$";
            try {
                text = p.text();
            } catch (Exception ex) {
                ;
            }
            while (text != null && text.indexOf(attributeKey) > -1) {
                String replacement = template.getAttributes().get(key);
                p.replaceText(attributeKey, replacement, text.indexOf(attributeKey));
                text = text.replace(attributeKey, "");
            }
        }
    }
    return document;
}

From source file:org.exoplatform.services.document.impl.MSWordDocumentReader.java

License:Open Source License

/**
 * Returns only a text from .doc file content.
 * /* ww w.  ja  va2  s. c o  m*/
 * @param is an input stream with .doc file content.
 * @return The string only with text from file content.
 */
public String getContentAsText(final InputStream is) throws IOException, DocumentReadException {
    if (is == null) {
        throw new IllegalArgumentException("InputStream is null.");
    }
    String text = "";
    try {
        if (is.available() == 0) {
            return "";
        }

        HWPFDocument doc;
        try {
            doc = SecurityHelper.doPrivilegedIOExceptionAction(new PrivilegedExceptionAction<HWPFDocument>() {
                public HWPFDocument run() throws Exception {
                    return new HWPFDocument(is);
                }
            });
        } catch (IOException e) {
            throw new DocumentReadException("Can't open document.", e);
        }

        Range range = doc.getRange();
        text = range.text();
    } finally {
        if (is != null) {
            try {
                is.close();
            } catch (IOException e) {
                if (LOG.isTraceEnabled()) {
                    LOG.trace("An exception occurred: " + e.getMessage());
                }
            }
        }
    }
    return text.trim();
}

From source file:org.modeshape.sequencer.msoffice.word.WordMetadataReader.java

License:Apache License

public static WordMetadata instance(InputStream stream) throws IOException {
    WordMetadata metadata = new WordMetadata();
    List<WordMetadata.WordHeading> headings = new ArrayList<WordMetadata.WordHeading>();

    HWPFDocument document = new HWPFDocument(stream);
    Range range = document.getRange();

    StyleSheet stylesheet = document.getStyleSheet();

    for (int i = 0; i < range.numParagraphs(); i++) {
        Paragraph paragraph = range.getParagraph(i);

        String styleName = stylesheet.getStyleDescription(paragraph.getStyleIndex()).getName();

        if (styleName.startsWith(HEADER_PREFIX)) {
            String rawLevelNum = styleName.substring(HEADER_PREFIX.length() + 1).trim();
            int levelNum = 0;

            try {
                levelNum = Integer.parseInt(rawLevelNum);
            } catch (NumberFormatException nfe) {
                log.debug("Could not parse heading level from: " + styleName);
            }//from w w w  . j a  va 2  s  .c  o m

            String text = Paragraph.stripFields(paragraph.text());

            if ('\r' == text.charAt(text.length() - 1)) {
                text = text.substring(0, text.length() - 1);
            }

            headings.add(new WordMetadata.WordHeading(text, levelNum));
        }
    }

    metadata.setHeadings(headings);
    metadata.setMetadata(document.getSummaryInformation());
    return metadata;
}