org.apache.commons.digester.NodeCreateRule.java Source code

Introduction

Here is the source code for org.apache.commons.digester.NodeCreateRule.java
Source

/* $Id: NodeCreateRule.java 992060 2010-09-02 19:09:47Z simonetripodi $
 *
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.commons.digester;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.w3c.dom.Attr;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/**
 * A rule implementation that creates a DOM
 * {@link org.w3c.dom.Node Node} containing the XML at the element that matched
 * the rule. Two concrete types of nodes can be created by this rule:
 * <ul>
 *   <li>the default is to create an {@link org.w3c.dom.Element Element} node.
 *   The created element will correspond to the element that matched the rule,
 *   containing all XML content underneath that element.</li>
 *   <li>alternatively, this rule can create nodes of type
 *   {@link org.w3c.dom.DocumentFragment DocumentFragment}, which will contain
 *   only the XML content under the element the rule was trigged on.</li>
 * </ul>
 * The created node will be normalized, meaning it will not contain text nodes 
 * that only contain white space characters.
 * 
    
 * 
 * <p>The created <code>Node</code> will be pushed on Digester's object stack
 * when done. To use it in the context of another DOM
 * {@link org.w3c.dom.Document Document}, it must be imported first, using the
 * Document method
 * {@link org.w3c.dom.Document#importNode(org.w3c.dom.Node, boolean) importNode()}.
 * </p>
 *
 * <p><strong>Important Note:</strong> This is implemented by replacing the SAX
 * {@link org.xml.sax.ContentHandler ContentHandler} in the parser used by 
 * Digester, and resetting it when the matched element is closed. As a side 
 * effect, rules that would match XML nodes under the element that matches 
 * a <code>NodeCreateRule</code> will never be triggered by Digester, which 
 * usually is the behavior one would expect.</p>
 * 
 * <p><strong>Note</strong> that the current implementation does not set the namespace prefixes
 * in the exported nodes. The (usually more important) namespace URIs are set,
 * of course.</p>
 *
 * @since Digester 1.4
 */

public class NodeCreateRule extends Rule {

    // ---------------------------------------------------------- Inner Classes

    /**
     * The SAX content handler that does all the actual work of assembling the 
     * DOM node tree from the SAX events.
     */
    private class NodeBuilder extends DefaultHandler {

        // ------------------------------------------------------- Constructors

        /**
         * Constructor.
         * 
         * <p>Stores the content handler currently used by Digester so it can 
         * be reset when done, and initializes the DOM objects needed to 
         * build the node.</p>
         * 
         * @param doc the document to use to create nodes
         * @param root the root node
         * @throws ParserConfigurationException if the DocumentBuilderFactory 
         *   could not be instantiated
         * @throws SAXException if the XMLReader could not be instantiated by 
         *   Digester (should not happen)
         */
        public NodeBuilder(Document doc, Node root) throws ParserConfigurationException, SAXException {

            this.doc = doc;
            this.root = root;
            this.top = root;

            oldContentHandler = digester.getCustomContentHandler();

        }

        // ------------------------------------------------- Instance Variables

        /**
         * The content handler used by Digester before it was set to this 
         * content handler.
         */
        protected ContentHandler oldContentHandler = null;

        /**
         * Depth of the current node, relative to the element where the content
         * handler was put into action.
         */
        protected int depth = 0;

        /**
         * A DOM Document used to create the various Node instances.
         */
        protected Document doc = null;

        /**
         * The DOM node that will be pushed on Digester's stack.
         */
        protected Node root = null;

        /**
         * The current top DOM mode.
         */
        protected Node top = null;

        /**
         * The text content of the current top DOM node.
         */
        protected StringBuffer topText = new StringBuffer();

        // --------------------------------------------- Helper Methods

        /**
         * Appends a {@link org.w3c.dom.Text Text} node to the current node
         * if the content reported by the parser is not purely whitespace.
         */
        private void addTextIfPresent() throws SAXException {
            if (topText.length() > 0) {
                String str = topText.toString();
                topText.setLength(0);

                if (str.trim().length() > 0) {
                    // The contained text is not *pure* whitespace, so create
                    // a text node to hold it. Note that the "untrimmed" text
                    // is stored in the node.
                    try {
                        top.appendChild(doc.createTextNode(str));
                    } catch (DOMException e) {
                        throw new SAXException(e.getMessage());
                    }
                }
            }
        }

        // --------------------------------------------- ContentHandler Methods

        /**
         * Handle notification about text embedded within the current node.
         * <p>
         * An xml parser calls this when text is found. We need to ensure that this
         * text gets attached to the new Node we are creating - except in the case
         * where the only text in the node is whitespace.
         * <p>
         * There is a catch, however. According to the sax specification, a parser
         * does not need to pass all of the text content of a node in one go; it can
         * make multiple calls passing part of the data on each call. In particular,
         * when the body of an element includes xml entity-references, at least some
         * parsers make a separate call to this method to pass just the entity content.
         * <p>
         * In this method, we therefore just append the provided text to a
         * "current text" buffer. When the element end is found, or a child element
         * is found then we can check whether we have all-whitespace. See method
         * addTextIfPresent. 
         * 
         * @param ch the characters from the XML document
         * @param start the start position in the array
         * @param length the number of characters to read from the array
         * @throws SAXException if the DOM implementation throws an exception
         */
        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {

            topText.append(ch, start, length);
        }

        /**
         * Checks whether control needs to be returned to Digester.
         * 
         * @param namespaceURI the namespace URI
         * @param localName the local name
         * @param qName the qualified (prefixed) name
         * @throws SAXException if the DOM implementation throws an exception
         */
        @Override
        public void endElement(String namespaceURI, String localName, String qName) throws SAXException {

            addTextIfPresent();

            try {
                if (depth == 0) {
                    getDigester().setCustomContentHandler(oldContentHandler);
                    getDigester().push(root);
                    getDigester().endElement(namespaceURI, localName, qName);
                }

                top = top.getParentNode();
                depth--;
            } catch (DOMException e) {
                throw new SAXException(e.getMessage());
            }

        }

        /**
         * Adds a new
         * {@link org.w3c.dom.ProcessingInstruction ProcessingInstruction} to 
         * the current node.
         * 
         * @param target the processing instruction target
         * @param data the processing instruction data, or null if none was 
         *   supplied
         * @throws SAXException if the DOM implementation throws an exception
         */
        @Override
        public void processingInstruction(String target, String data) throws SAXException {

            try {
                top.appendChild(doc.createProcessingInstruction(target, data));
            } catch (DOMException e) {
                throw new SAXException(e.getMessage());
            }

        }

        /**
         * Adds a new child {@link org.w3c.dom.Element Element} to the current
         * node.
         * 
         * @param namespaceURI the namespace URI
         * @param localName the local name
         * @param qName the qualified (prefixed) name
         * @param atts the list of attributes
         * @throws SAXException if the DOM implementation throws an exception
         */
        @Override
        public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
                throws SAXException {

            addTextIfPresent();

            try {
                Node previousTop = top;
                if ((localName == null) || (localName.length() == 0)) {
                    top = doc.createElement(qName);
                } else {
                    top = doc.createElementNS(namespaceURI, localName);
                }
                for (int i = 0; i < atts.getLength(); i++) {
                    Attr attr = null;
                    if ((atts.getLocalName(i) == null) || (atts.getLocalName(i).length() == 0)) {
                        attr = doc.createAttribute(atts.getQName(i));
                        attr.setNodeValue(atts.getValue(i));
                        ((Element) top).setAttributeNode(attr);
                    } else {
                        attr = doc.createAttributeNS(atts.getURI(i), atts.getLocalName(i));
                        attr.setNodeValue(atts.getValue(i));
                        ((Element) top).setAttributeNodeNS(attr);
                    }
                }
                previousTop.appendChild(top);
                depth++;
            } catch (DOMException e) {
                throw new SAXException(e.getMessage());
            }

        }

    }

    // ----------------------------------------------------------- Constructors

    /**
     * Default constructor. Creates an instance of this rule that will create a
     * DOM {@link org.w3c.dom.Element Element}.
     */
    public NodeCreateRule() throws ParserConfigurationException {

        this(Node.ELEMENT_NODE);

    }

    /**
     * Constructor. Creates an instance of this rule that will create a DOM
     * {@link org.w3c.dom.Element Element}, but lets you specify the JAXP 
     * <code>DocumentBuilder</code> that should be used when constructing the
     * node tree.
     * 
     * @param documentBuilder the JAXP <code>DocumentBuilder</code> to use
     */
    public NodeCreateRule(DocumentBuilder documentBuilder) {

        this(Node.ELEMENT_NODE, documentBuilder);

    }

    /**
     * Constructor. Creates an instance of this rule that will create either a 
     * DOM {@link org.w3c.dom.Element Element} or a DOM 
     * {@link org.w3c.dom.DocumentFragment DocumentFragment}, depending on the
     * value of the <code>nodeType</code> parameter.
     * 
     * @param nodeType the type of node to create, which can be either
     *   {@link org.w3c.dom.Node#ELEMENT_NODE Node.ELEMENT_NODE} or 
     *   {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE}
     * @throws IllegalArgumentException if the node type is not supported
     */
    public NodeCreateRule(int nodeType) throws ParserConfigurationException {

        this(nodeType, DocumentBuilderFactory.newInstance().newDocumentBuilder());

    }

    /**
     * Constructor. Creates an instance of this rule that will create either a 
     * DOM {@link org.w3c.dom.Element Element} or a DOM 
     * {@link org.w3c.dom.DocumentFragment DocumentFragment}, depending on the
     * value of the <code>nodeType</code> parameter. This constructor lets you
     * specify the JAXP <code>DocumentBuilder</code> that should be used when
     * constructing the node tree.
     * 
     * @param nodeType the type of node to create, which can be either
     *   {@link org.w3c.dom.Node#ELEMENT_NODE Node.ELEMENT_NODE} or 
     *   {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE}
     * @param documentBuilder the JAXP <code>DocumentBuilder</code> to use
     * @throws IllegalArgumentException if the node type is not supported
     */
    public NodeCreateRule(int nodeType, DocumentBuilder documentBuilder) {

        if (!((nodeType == Node.DOCUMENT_FRAGMENT_NODE) || (nodeType == Node.ELEMENT_NODE))) {
            throw new IllegalArgumentException("Can only create nodes of type DocumentFragment and Element");
        }
        this.nodeType = nodeType;
        this.documentBuilder = documentBuilder;

    }

    // ----------------------------------------------------- Instance Variables

    /**
     * The JAXP <code>DocumentBuilder</code> to use.
     */
    private DocumentBuilder documentBuilder = null;

    /**
     * The type of the node that should be created. Must be one of the
     * constants defined in {@link org.w3c.dom.Node Node}, but currently only
     * {@link org.w3c.dom.Node#ELEMENT_NODE Node.ELEMENT_NODE} and 
     * {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE}
     * are allowed values.
     */
    private int nodeType = Node.ELEMENT_NODE;

    // ----------------------------------------------------------- Rule Methods

    /**
     * When this method fires, the digester is told to forward all SAX
     * ContentHandler events to the builder object, resulting in a DOM being
     * built instead of normal digester rule-handling occurring. When the
     * end of the current xml element is encountered, the original content 
     * handler is restored (expected to be NULL, allowing normal Digester
     * operations to continue).
     * 
     * @param namespaceURI the namespace URI of the matching element, or an 
     *   empty string if the parser is not namespace aware or the element has
     *   no namespace
     * @param name the local name if the parser is namespace aware, or just 
     *   the element name otherwise
     * @param attributes The attribute list of this element
     * @throws Exception indicates a JAXP configuration problem
     */
    @Override
    public void begin(String namespaceURI, String name, Attributes attributes) throws Exception {

        Document doc = documentBuilder.newDocument();
        NodeBuilder builder = null;
        if (nodeType == Node.ELEMENT_NODE) {
            Element element = null;
            if (getDigester().getNamespaceAware()) {
                element = doc.createElementNS(namespaceURI, name);
                for (int i = 0; i < attributes.getLength(); i++) {
                    element.setAttributeNS(attributes.getURI(i), attributes.getQName(i), attributes.getValue(i));
                }
            } else {
                element = doc.createElement(name);
                for (int i = 0; i < attributes.getLength(); i++) {
                    element.setAttribute(attributes.getQName(i), attributes.getValue(i));
                }
            }
            builder = new NodeBuilder(doc, element);
        } else {
            builder = new NodeBuilder(doc, doc.createDocumentFragment());
        }
        // the NodeBuilder constructor has already saved the original
        // value of the digester's custom content handler (expected to
        // be null, but we save it just in case). So now we just
        // need to tell the digester to forward events to the builder.
        getDigester().setCustomContentHandler(builder);
    }

    /**
     * Pop the Node off the top of the stack.
     */
    @Override
    public void end() throws Exception {

        digester.pop();

    }

}