List of usage examples for org.apache.poi.xwpf.usermodel XWPFParagraph getFootnoteText
public String getFootnoteText()
From source file:apachepoitest.DocumentPropertyEnumerator.java
public static void showParagraphPropertiesOnly(List<XWPFParagraph> lp) { int i1 = 1;/*from ww w . j av a 2 s.c om*/ for (XWPFParagraph p : lp) { //System.out.println(p.getStyleID() + " " + sl1.getStyle(p.getStyleID()).getCTStyle().xmlText()); System.out.println("____________________________________"); if (p.getParagraphText().trim().length() > 0) { System.out.println("\n#" + i1++ + " LINE: " + p.getParagraphText()); System.out.println("ALIGNMENT: " + p.getAlignment().toString()); //Uncomment to display other properties System.out.println("BORDER BETWEEN: " + p.getBorderBetween().toString()); System.out.println("BORDER BOTTOM: " + p.getBorderBottom().toString()); System.out.println("BORDER LEFT: " + p.getBorderLeft().toString()); System.out.println("BORDER RIGHT: " + p.getBorderRight().toString()); System.out.println("BORDER TOP: " + p.getBorderTop().toString()); System.out.println("BODY ELEMENT TYPE: " + p.getElementType().toString()); System.out.println("FOOTNOTE: " + p.getFootnoteText()); System.out.println("INDENTATION 1ST LINE: " + p.getIndentationFirstLine()); System.out.println("INDENTATION HANGING: " + p.getIndentationHanging()); System.out.println("INDENTATION LEFT: " + p.getIndentationLeft()); System.out.println("INDENTATION RIGHT: " + p.getIndentationRight()); System.out.println("NUMBERING FORMAT: " + p.getNumFmt()); System.out.println("NUMERIC STYLE ILVL: " + p.getNumIlvl()); System.out.println("STYLE: " + p.getBody().getXWPFDocument().getStyles().getStyle(p.getStyleID())); XWPFParagraphClone pc; pc = new XWPFParagraphClone(p.getCTP(), p.getBody()); System.out.println("SPACING VALUE: " + pc.getCTSpacing(false).getLine().floatValue() / 240); System.out.println("SPACING AFTER: " + p.getSpacingAfter()); System.out.println("SPACING AFTER LINES: " + p.getSpacingAfterLines()); System.out.println("SPACING BEFORE: " + p.getSpacingBefore()); System.out.println("SPACING BEFORE LINES: " + p.getSpacingBeforeLines()); System.out.println("SPACING LINE RULE: " + p.getSpacingLineRule()); System.out.println("VERTICAL ALIGNMENT: " + p.getVerticalAlignment()); } // can also use .searchText to look for a string else { // Uncomment to display lines //System.out.println("\n#" + i1++ + " LINE: <SPACE>"); } } }
From source file:com.project3.utils.poiold.DocumentPropertyEnumerator.java
public static void showAllParagraphProperties(List<XWPFParagraph> lp) { int i1 = 1;//from w w w . j a va 2 s . c om for (XWPFParagraph p : lp) { //System.out.println(p.getStyleID() + " " + sl1.getStyle(p.getStyleID()).getCTStyle().xmlText()); System.out.println("____________________________________"); if (p.getParagraphText().trim().length() > 0) { System.out.println("\n#" + i1++ + " LINE: " + p.getParagraphText()); System.out.println("ALIGNMENT: " + p.getAlignment().toString()); System.out.println("BORDER BETWEEN: " + p.getBorderBetween().toString()); System.out.println("BORDER BOTTOM: " + p.getBorderBottom().toString()); System.out.println("BORDER LEFT: " + p.getBorderLeft().toString()); System.out.println("BORDER RIGHT: " + p.getBorderRight().toString()); System.out.println("BORDER TOP: " + p.getBorderTop().toString()); System.out.println("BODY ELEMENT TYPE: " + p.getElementType().toString()); System.out.println("FOOTNOTE: " + p.getFootnoteText()); System.out.println("INDENTATION 1ST LINE: " + p.getIndentationFirstLine()); System.out.println("INDENTATION HANGING: " + p.getIndentationHanging()); System.out.println("INDENTATION LEFT: " + p.getIndentationLeft()); System.out.println("INDENTATION RIGHT: " + p.getIndentationRight()); System.out.println("NUMBERING FORMAT: " + p.getNumFmt()); System.out.println("NUMERIC STYLE ILVL: " + p.getNumIlvl()); System.out.println("STYLE: " + p.getBody().getXWPFDocument().getStyles().getStyle(p.getStyleID())); XWPFParagraphClone pc; pc = new XWPFParagraphClone(p.getCTP(), p.getBody()); System.out.println("SPACING VALUE: " + pc.getCTSpacing(false).getLine().floatValue() / 240); System.out.println("SPACING AFTER: " + p.getSpacingAfter()); System.out.println("SPACING AFTER LINES: " + p.getSpacingAfterLines()); System.out.println("SPACING BEFORE: " + p.getSpacingBefore()); System.out.println("SPACING BEFORE LINES: " + p.getSpacingBeforeLines()); System.out.println("SPACING LINE RULE: " + p.getSpacingLineRule()); System.out.println("VERTICAL ALIGNMENT: " + p.getVerticalAlignment()); } // can also use .searchText to look for a string else { // Uncomment to display lines //System.out.println("\n#" + i1++ + " LINE: <SPACE>"); } } }
From source file:mj.ocraptor.extraction.tika.parser.microsoft.ooxml.XWPFWordExtractorDecorator.java
License:Apache License
private void extractParagraph(XWPFParagraph paragraph, XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException { // If this paragraph is actually a whole new section, then // it could have its own headers and footers // Check and handle if so XWPFHeaderFooterPolicy headerFooterPolicy = null; if (paragraph.getCTP().getPPr() != null) { CTSectPr ctSectPr = paragraph.getCTP().getPPr().getSectPr(); if (ctSectPr != null) { headerFooterPolicy = new XWPFHeaderFooterPolicy(document, ctSectPr); extractHeaders(xhtml, headerFooterPolicy); }/*from w ww . j a va 2 s . c o m*/ } // Is this a paragraph, or a heading? String tag = "p"; String styleClass = null; if (paragraph.getStyleID() != null) { XWPFStyle style = styles.getStyle(paragraph.getStyleID()); if (style != null && style.getName() != null) { TagAndStyle tas = WordExtractor.buildParagraphTagAndStyle(style.getName(), paragraph.getPartType() == BodyType.TABLECELL); tag = tas.getTag(); styleClass = tas.getStyleClass(); } } if (styleClass == null) { xhtml.startElement(tag); } else { xhtml.startElement(tag, "class", styleClass); } // Output placeholder for any embedded docs: // TODO: replace w/ XPath/XQuery: for (XWPFRun run : paragraph.getRuns()) { XmlCursor c = run.getCTR().newCursor(); c.selectPath("./*"); while (c.toNextSelection()) { XmlObject o = c.getObject(); if (o instanceof CTObject) { XmlCursor c2 = o.newCursor(); c2.selectPath("./*"); while (c2.toNextSelection()) { XmlObject o2 = c2.getObject(); XmlObject embedAtt = o2.selectAttribute(new QName("Type")); if (embedAtt != null && embedAtt.getDomNode().getNodeValue().equals("Embed")) { // Type is "Embed" XmlObject relIDAtt = o2.selectAttribute(new QName( "http://schemas.openxmlformats.org/officeDocument/2006/relationships", "id")); if (relIDAtt != null) { String relID = relIDAtt.getDomNode().getNodeValue(); AttributesImpl attributes = new AttributesImpl(); attributes.addAttribute("", "class", "class", "CDATA", "embedded"); attributes.addAttribute("", "id", "id", "CDATA", relID); xhtml.startElement("div", attributes); xhtml.endElement("div"); } } } c2.dispose(); } } c.dispose(); } // Attach bookmarks for the paragraph // (In future, we might put them in the right place, for now // we just put them in the correct paragraph) for (CTBookmark bookmark : paragraph.getCTP().getBookmarkStartList()) { xhtml.startElement("a", "name", bookmark.getName()); xhtml.endElement("a"); } TmpFormatting fmtg = new TmpFormatting(false, false); // Do the iruns for (IRunElement run : paragraph.getIRuns()) { if (run instanceof XWPFSDT) { fmtg = closeStyleTags(xhtml, fmtg); processSDTRun((XWPFSDT) run, xhtml); // for now, we're ignoring formatting in sdt // if you hit an sdt reset to false fmtg.setBold(false); fmtg.setItalic(false); } else { fmtg = processRun((XWPFRun) run, paragraph, xhtml, fmtg); } } closeStyleTags(xhtml, fmtg); // Now do any comments for the paragraph XWPFCommentsDecorator comments = new XWPFCommentsDecorator(paragraph, null); String commentText = comments.getCommentText(); if (commentText != null && commentText.length() > 0) { xhtml.characters(commentText); } String footnameText = paragraph.getFootnoteText(); if (footnameText != null && footnameText.length() > 0) { xhtml.characters(footnameText + "\n"); } // Also extract any paragraphs embedded in text boxes: for (XmlObject embeddedParagraph : paragraph.getCTP().selectPath( "declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' declare namespace wps='http://schemas.microsoft.com/office/word/2010/wordprocessingShape' .//*/wps:txbx/w:txbxContent/w:p")) { extractParagraph(new XWPFParagraph(CTP.Factory.parse(embeddedParagraph.xmlText()), paragraph.getBody()), xhtml); } // Finish this paragraph xhtml.endElement(tag); if (headerFooterPolicy != null) { extractFooters(xhtml, headerFooterPolicy); } }
From source file:org.apache.tika.parser.microsoft.ooxml.XWPFWordExtractorDecorator.java
License:Apache License
private void extractParagraph(XWPFParagraph paragraph, XWPFListManager listManager, XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException { // If this paragraph is actually a whole new section, then // it could have its own headers and footers // Check and handle if so XWPFHeaderFooterPolicy headerFooterPolicy = null; if (paragraph.getCTP().getPPr() != null) { CTSectPr ctSectPr = paragraph.getCTP().getPPr().getSectPr(); if (ctSectPr != null) { headerFooterPolicy = new XWPFHeaderFooterPolicy(document, ctSectPr); extractHeaders(xhtml, headerFooterPolicy, listManager); }/*from w ww .j av a2 s.c o m*/ } // Is this a paragraph, or a heading? String tag = "p"; String styleClass = null; if (paragraph.getStyleID() != null) { XWPFStyle style = styles.getStyle(paragraph.getStyleID()); if (style != null && style.getName() != null) { TagAndStyle tas = WordExtractor.buildParagraphTagAndStyle(style.getName(), paragraph.getPartType() == BodyType.TABLECELL); tag = tas.getTag(); styleClass = tas.getStyleClass(); } } if (styleClass == null) { xhtml.startElement(tag); } else { xhtml.startElement(tag, "class", styleClass); } writeParagraphNumber(paragraph, listManager, xhtml); // Output placeholder for any embedded docs: // TODO: replace w/ XPath/XQuery: for (XWPFRun run : paragraph.getRuns()) { XmlCursor c = run.getCTR().newCursor(); c.selectPath("./*"); while (c.toNextSelection()) { XmlObject o = c.getObject(); if (o instanceof CTObject) { XmlCursor c2 = o.newCursor(); c2.selectPath("./*"); while (c2.toNextSelection()) { XmlObject o2 = c2.getObject(); XmlObject embedAtt = o2.selectAttribute(new QName("Type")); if (embedAtt != null && embedAtt.getDomNode().getNodeValue().equals("Embed")) { // Type is "Embed" XmlObject relIDAtt = o2.selectAttribute(new QName( "http://schemas.openxmlformats.org/officeDocument/2006/relationships", "id")); if (relIDAtt != null) { String relID = relIDAtt.getDomNode().getNodeValue(); AttributesImpl attributes = new AttributesImpl(); attributes.addAttribute("", "class", "class", "CDATA", "embedded"); attributes.addAttribute("", "id", "id", "CDATA", relID); xhtml.startElement("div", attributes); xhtml.endElement("div"); } } } c2.dispose(); } } c.dispose(); } // Attach bookmarks for the paragraph // (In future, we might put them in the right place, for now // we just put them in the correct paragraph) for (int i = 0; i < paragraph.getCTP().sizeOfBookmarkStartArray(); i++) { CTBookmark bookmark = paragraph.getCTP().getBookmarkStartArray(i); xhtml.startElement("a", "name", bookmark.getName()); xhtml.endElement("a"); } TmpFormatting fmtg = new TmpFormatting(false, false); // Do the iruns for (IRunElement run : paragraph.getIRuns()) { if (run instanceof XWPFSDT) { fmtg = closeStyleTags(xhtml, fmtg); processSDTRun((XWPFSDT) run, xhtml); //for now, we're ignoring formatting in sdt //if you hit an sdt reset to false fmtg.setBold(false); fmtg.setItalic(false); } else { fmtg = processRun((XWPFRun) run, paragraph, xhtml, fmtg); } } closeStyleTags(xhtml, fmtg); // Now do any comments for the paragraph XWPFCommentsDecorator comments = new XWPFCommentsDecorator(paragraph, null); String commentText = comments.getCommentText(); if (commentText != null && commentText.length() > 0) { xhtml.characters(commentText); } String footnameText = paragraph.getFootnoteText(); if (footnameText != null && footnameText.length() > 0) { xhtml.characters(footnameText + "\n"); } // Also extract any paragraphs embedded in text boxes: for (XmlObject embeddedParagraph : paragraph.getCTP().selectPath( "declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' declare namespace wps='http://schemas.microsoft.com/office/word/2010/wordprocessingShape' .//*/wps:txbx/w:txbxContent/w:p")) { extractParagraph(new XWPFParagraph(CTP.Factory.parse(embeddedParagraph.xmlText()), paragraph.getBody()), listManager, xhtml); } // Finish this paragraph xhtml.endElement(tag); if (headerFooterPolicy != null) { extractFooters(xhtml, headerFooterPolicy, listManager); } }