edu.indiana.lib.twinpeaks.util.DomUtils.java Source code

Java tutorial

Introduction

Here is the source code for edu.indiana.lib.twinpeaks.util.DomUtils.java

Source

/**********************************************************************************
*
 * Copyright (c) 2003, 2004, 2007, 2008 The Sakai Foundation
 *
 * Licensed under the Educational Community License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.opensource.org/licenses/ECL-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
*
**********************************************************************************/
package edu.indiana.lib.twinpeaks.util;

import java.io.*;
import java.util.*;

import javax.xml.parsers.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.*;

import org.w3c.dom.*;
import org.w3c.dom.html.*;
import org.xml.sax.*;

public class DomUtils {
    private static org.apache.commons.logging.Log _log = LogUtils.getLog(DomUtils.class);
    /**
     * Default encoding (NekoHTML)
     */
    private static final String ENCODING_OPTION = "http://cyberneko.org/html/properties/default-encoding";

    private DomUtils() {
    }

    public final static String INPUT_ENCODING = "iso-8859-1";
    public final static String ENCODING = "UTF-8";

    /**
     * Create a new element
     * @param document Document to contain the new element
     * @param name the element name
     * @return new Element
     */
    public static Element createElement(Document document, String name) {
        Element element;

        return document.createElement(name);
    }

    /**
     * Add a new element to the given parent
     * @param parent the parent Element
     * @param name the child name
     * @return new Element
     */
    public static Element createElement(Element parent, String name) {
        Document document;
        Element element;

        document = parent.getOwnerDocument();
        element = document.createElement(name);

        parent.appendChild(element);
        return element;
    }

    /**
     * Add Text object to an Element.
     * @param element the containing element
     * @param text the text to add
     */
    public static void addText(Element element, String text) {
        element.appendChild(element.getOwnerDocument().createTextNode(text));
    }

    /**
      * Add an entity to a specified Element.
      *      (eg <code>DomUtils.addEntity(element, "nbsp");</code>)
      * @param element the containing element
      * @param entity the entity to add
      */
    public static void addEntity(Element element, String entity) {
        element.appendChild(element.getOwnerDocument().createEntityReference(entity));
    }

    /**
     * "Normalize" XML text node content to create a simple string
     * @param update Text to add to the original string
     * @return Concatenated contents (trimmed, pagination characters (\r, \n, etc.)
     *         removed, with a space seperator)
     */
    public static String normalizeText(String update) {
        return normalizeText(null, update);
    }

    /**
     * "Normalize" XML text node content to create a simple string
     * @param original Original text
     * @param update Text to add to the original string
     * @return Concatenated contents (trimmed, pagination characters (\r, \n, etc.)
     *         removed, with a space seperator)
     */
    public static String normalizeText(String original, String update) {
        StringBuilder result;

        if (original == null) {
            return (update == null) ? "" : StringUtils.replace(update.trim(), "\\s", " ");
        }

        result = new StringBuilder(original.trim());
        result.append(' ');
        result.append(update.trim());

        return StringUtils.replace(result.toString(), "\\s", " ");
    }

    /**
     * Get the text associated with this element, at this level only
     * @param parent the node containing text
     * @return Text (trimmed of leading/trailing whitespace, null if none)
     */
    public static String getText(Node parent) {
        return textSearch(parent, false);
    }

    /**
     * Get the text associated with a specified element, at this level only
     * @param parent the node containing text
     * @param elementName Element with the text we want to fetch
     * @return Text (trimmed of leading/trailing whitespace, null if none)
     */
    public static String getText(Node parent, String elementName) {
        Element element = getElement((Element) parent, elementName);

        if (element == null) {
            return null;
        }
        return textSearch(element, false);
    }

    /**
     * Get the text associated with a specified element, at this level
     * only - namespace aware
     *
     * @param namespace Namespace URI
     * @param parent the node containing text
     * @param elementName Element with the text we want to fetch
     * @return Text (trimmed of leading/trailing whitespace, null if none)
     */
    public static String getTextNS(String namespace, Node parent, String elementName) {
        Element element = getElementNS(namespace, (Element) parent, elementName);

        if (element == null) {
            return null;
        }
        return textSearch(element, false);
    }

    /**
     * Get the text associated with a specified element, at this level
     * only - namespace aware
     *
     * @param namespace Namespace URI
     * @param parent the node containing text
     * @param elementName Element with the text we want to fetch
     * @return Text (trimmed of leading/trailing whitespace, null if none)
     */
    /*
      public static String getTextNS(String namespace, Element parent, String elementName)
      {
        Element element = getElementNS(namespace, parent, elementName);
        
        if (element == null)
        {
          return null;
        }
        return textSearch(element, false);
      }
    */
    /**
     * Get the text associated with this element, at all suboordinate levels
     * @param parent the node containing text
     * @return Text (trimmed of leading/trailing whitespace, null if none)
     */
    public static String getAllTextAtNode(Node parent) {
        return textSearch(parent, true);
    }

    /**
     * Get the text associated with this element at this level only, or
     * recursivley, searching through all child elements
     * @param parent the node containing text
     * @param recursiveSearch Search all child elements?
     * @return Text (trimmed of leading/trailing whitespace, null if none)
     */
    public static String textSearch(Node parent, boolean recursiveSearch) {
        String text = null;

        if (parent != null) {
            for (Node child = parent.getFirstChild(); child != null; child = child.getNextSibling()) {

                switch (child.getNodeType()) {
                case Node.TEXT_NODE:
                    text = normalizeText(text, child.getNodeValue());
                    break;

                case Node.ELEMENT_NODE:
                    if (recursiveSearch) {
                        text = normalizeText(text, getText(child));
                    }
                    break;

                default:
                    break;
                }
            }
        }
        return text == null ? text : text.trim();
    }

    /**
     * Get the first text node associated with this element
     * @param parent the node containing text
     * @return Text (trimmed of leanding/trailing whitespace, null if none)
     */
    public static String getFirstText(Node parent) {
        return getTextNodeByNumber(parent, 1);
    }

    /**
     * Get the specified text node associated with this element
     * @param parent the node containing text
     * @param number The text node to fetch (1st, 2nd, etc)
     * @return Text (trimmed of leanding/trailing whitespace, null if none)
     */
    public static String getTextNodeByNumber(Node parent, int number) {
        String text = null;
        int count = 1;

        if (parent != null) {
            for (Node child = parent.getFirstChild(); child != null; child = child.getNextSibling()) {

                if ((child.getNodeType() == Node.TEXT_NODE) && (count++ == number)) {
                    text = child.getNodeValue();
                    return text.trim();
                }
            }
        }
        return text;
    }

    /**
     * Get any text associated with this element and it's children.  Null if none.
     * @param parent the node containing text
     * @return Text
     */
    public static String getAllText(Node parent) {
        String text = null;

        if (parent != null) {

            for (Node child = parent.getFirstChild(); child != null; child = child.getNextSibling()) {

                if (child.getNodeType() == Node.TEXT_NODE) {
                    text = normalizeText(text, child.getNodeValue());
                    continue;
                }

                if (child.getNodeType() == Node.ELEMENT_NODE) {
                    String childText = getText(child);

                    if (childText != null) {
                        text = normalizeText(text, childText);
                    }
                }
            }
        }
        return text;
    }

    /**
     * Get an Attribute from an Element.  Returns an empty String if none found
     * @param element the containing Element
     * @param name the attribute name
     * @return Attribute as a String
     */
    public static String getAttribute(Element element, String name) {
        return element.getAttribute(name);
    }

    /**
     * Set an Attribute in an Element
     * @param element the containing Element
     * @param name the attribute name
     * @param value the attribute value
     */
    public static void setAttribute(Element element, String name, String value) {
        element.setAttribute(name, value);
    }

    /**
     * Return a list of named Elements.
     * @param element the containing Element
     * @param name the tag name
     * @return NodeList of matching elements
     */
    public static NodeList getElementList(Element element, String name) {
        return element.getElementsByTagName(name);
    }

    /**
     * Return a list of specified namespace:Elements
     * @param namespace Namespace URI
     * @param element the containing Element
     * @param name the tag name
     * @return NodeList of matching elements
     */
    public static NodeList getElementListNS(String namespace, Element element, String name) {
        return element.getElementsByTagNameNS(namespace, name);
    }

    /**
     * Return a list of named Elements with a specific attribute value.
     * @param element the containing Element
     * @param name the tag name
     * @param attribute Attribute name
     * @param value Attribute value
     * @return List of matching elements
     */
    public static List selectElementsByAttributeValue(Element element, String name, String attribute,
            String value) {
        return selectElementsByAttributeValue(element, name, attribute, value, false);
    }

    /**
     * Return the first named Element with a specific attribute value.
     * @param element the containing Element
     * @param name the tag name
     * @param attribute Attribute name
     * @param value Attribute value
     * @return The first matching Element (null if none)
     */
    public static Element selectFirstElementByAttributeValue(Element element, String name, String attribute,
            String value) {

        ArrayList resultList = (ArrayList) selectElementsByAttributeValue(element, name, attribute, value, true);
        return (resultList.size() == 0) ? null : (Element) resultList.get(0);
    }

    /**
     * Return a list of named Elements with a specific attribute value.
     * @param element the containing Element
     * @param name the tag name
     * @param attribute Attribute name
     * @param value Attribute value
     * @param returnFirst Return only the first matching value?
     * @return List of matching elements
     */
    public static List selectElementsByAttributeValue(Element element, String name, String attribute, String value,
            boolean returnFirst) {
        NodeList elementList = element.getElementsByTagName(name);
        List resultList = new ArrayList();

        for (int i = 0; i < elementList.getLength(); i++) {
            if (getAttribute((Element) elementList.item(i), attribute).equals(value)) {
                resultList.add(elementList.item(i));
                if (returnFirst) {
                    break;
                }
            }
        }
        return resultList;
    }

    /**
     * Return a list of named Elements with a specific attribute
     * value (namespace aware)
     *
     * @param namespace Namespace URI
     * @param element the containing Element
     * @param name the tag name
     * @param attribute Attribute name
     * @param value Attribute value
     * @return List of matching elements
     */
    public static List selectElementsByAttributeValueNS(String namespace, Element element, String name,
            String attribute, String value) {
        return selectElementsByAttributeValueNS(namespace, element, name, attribute, value, false);
    }

    /**
     * Return the first named Element with a specific attribute
     * value (namespace aware)
     *
     * namespace The namespace URI
     * @param element the containing Element
     * @param name the tag name
     * @param attribute Attribute name
     * @param value Attribute value
     * @return The first matching Element (null if none)
     */
    public static Element selectFirstElementByAttributeValueNS(String namespace, Element element, String name,
            String attribute, String value) {
        ArrayList resultList;

        resultList = (ArrayList) selectElementsByAttributeValueNS(namespace, element, name, attribute, value, true);

        return (resultList.size() == 0) ? null : (Element) resultList.get(0);
    }

    /**
     * Return a list of named Elements with a specific attribute
     * value (namespace aware)
     *
     * @param namespace Namespace URI
     * @param element the containing Element
     * @param name the tag name
     * @param attribute Attribute name
     * @param value Attribute value
     * @param returnFirst Return only the first matching value?
     * @return List of matching elements
     */
    public static List selectElementsByAttributeValueNS(String namespace, Element element, String name,
            String attribute, String value, boolean returnFirst) {
        NodeList elementList = element.getElementsByTagNameNS(namespace, name);
        List resultList = new ArrayList();

        for (int i = 0; i < elementList.getLength(); i++) {
            if (getAttribute((Element) elementList.item(i), attribute).equals(value)) {
                resultList.add(elementList.item(i));
                if (returnFirst) {
                    break;
                }
            }
        }
        return resultList;
    }

    /**
     * Return the first named Element found.
     * @param element the containing Element
     * @param name the tag name
     * @return matching Element (null if none)
     */
    public static Element getElement(Element element, String name) {
        NodeList nodeList = getElementList(element, name);

        return (nodeList.getLength() == 0) ? null : (Element) nodeList.item(0);
    }

    /**
     * Return the first named Element found - namespace aware
     * @param namespace Namespace URI
     * @param element the containing Element
     * @param name the tag name
     * @return matching Element (null if none)
     */
    public static Element getElementNS(String namespace, Element element, String name) {
        NodeList nodeList = getElementListNS(namespace, element, name);

        return (nodeList.getLength() == 0) ? null : (Element) nodeList.item(0);
    }

    /**
     * Remove this node from its parent.
     * @param node the node to remove
     * @return Node removed
     */
    public Node removeNode(Node node) {
        return node.getParentNode().removeChild(node);
    }

    /**
     * Search up the tree for a given node
     * @param currentNode Starting point for our search
     * @param tagName Node name to look up
     * @return matching Node (null if none)
     */
    public static Node getPreviousNodeByName(Node currentNode, String tagName) {
        Node node = currentNode.getParentNode();

        while ((node != null) && (!node.getNodeName().equals(tagName))) {
            node = node.getParentNode();
        }
        return node;
    }

    /**
     * Search earlier siblings for a given node
     * @param currentNode Starting point for our search
     * @param tagName Node name to look up
     * @return matching Node (null if none)
     */
    public static Node getPreviousSiblingByName(Node currentNode, String tagName) {
        Node node = currentNode.getPreviousSibling();

        while ((node != null) && (!node.getNodeName().equals(tagName))) {
            node = node.getPreviousSibling();
        }
        return node;
    }

    /**
     * Search our next siblings for a given node
     * @param currentNode Starting point for our search
     * @param tagName Node name to look up
     * @return matching Node (null if none)
     */
    public static Node getNextSiblingByName(Node currentNode, String tagName) {
        Node node = currentNode.getNextSibling();

        while ((node != null) && (!node.getNodeName().equals(tagName))) {
            node = node.getNextSibling();
        }
        return node;
    }

    /**
     * Search across the tree for a given sibling
     * @param currentNode Starting point for our search
     * @param tagName Node name to look up
     * @return matching Node (null if none)
     * @deprecated  Replaced by {@link #getNextSiblingByName(Node currentNode, String tagName)}
     */
    public static Node getNextNodeByName(Node currentNode, String tagName) {
        return getNextSiblingByName(currentNode, tagName);
    }

    /**
     * Search for a named child of a given node
     * @param currentNode Starting point for our search
     * @param tagName Node name to look up
     * @return matching Node (null if none)
     */
    public static Node getChildSiblingByName(Node currentNode, String tagName) {
        Node node = currentNode.getFirstChild();

        while ((node != null) && (!node.getNodeName().equals(tagName))) {
            node = node.getNextSibling();
        }
        return node;
    }

    /**
     * Get a DOM Document builder.
     * @return The DocumentBuilder
     * @throws DomException
     */
    public static DocumentBuilder getXmlDocumentBuilder() throws DomException {
        return getXmlDocumentBuilder(false);
    }

    /**
     * Get a DOM Document builder - namespace aware
     * @return The DocumentBuilder
     * @throws DomException
     */
    public static DocumentBuilder getXmlDocumentBuilderNS() throws DomException {
        return getXmlDocumentBuilder(true);
    }

    /**
     * Get a DOM Document builder.
     * @param namespaceAware true if we're to handle namespace details
     * @return The DocumentBuilder
     * @throws DomException
     */
    public static DocumentBuilder getXmlDocumentBuilder(boolean namespaceAware) throws DomException {
        try {
            DocumentBuilderFactory factory;

            factory = DocumentBuilderFactory.newInstance();
            factory.setNamespaceAware(namespaceAware);

            _log.debug("DOM parse: namespace aware = " + namespaceAware);
            return factory.newDocumentBuilder();

        } catch (Exception e) {
            throw new DomException(e.toString());
        }
    }

    /**
     * Start a new XML Document (with root name = xml)
     * @return the Document
     * @throws DomException
     */
    public static Document createXmlDocument() throws DomException {
        return createXmlDocument("xml");
    }

    /**
     * Start a new XML Document.
     * @param rootName The name of the Document root Element (created here)
     * @return the Document
     * @throws DomException
     */
    public static Document createXmlDocument(String rootName) throws DomException {
        try {
            Document document = getXmlDocumentBuilder().newDocument();
            Element root = document.createElement(rootName);

            document.appendChild(root);
            return document;

        } catch (Exception e) {
            throw new DomException(e.toString());
        }
    }

    /**
     * Copy an XML document, adding it as a child of the target document root
     * @param source Document to copy
     * @param target Document to contain copy
     */
    public static void copyDocument(Document source, Document target) {
        Node node = target.importNode(source.getDocumentElement(), true);

        target.getDocumentElement().appendChild(node);
    }

    /**
     * Copy a Node from one source document, adding it to the document
     * root of a different, target Document
     * @param source Document to copy
     * @param target Document to contain copy
     */
    public static void copyDocumentNode(Node source, Document target) {
        Node node = target.importNode(source, true);

        target.getDocumentElement().appendChild(node);
    }

    /**
     * Parse XML text (from an input stream) into a Document.
     * @param xmlStream The XML text stream
     * @return DOM Document
     * @throws DomException
     */
    public static Document parseXmlStream(InputStream xmlStream) throws DomException {
        try {
            return getXmlDocumentBuilder().parse(new InputSource(xmlStream));

        } catch (Exception e) {
            throw new DomException(e.toString());
        }
    }

    /**
     * Parse XML text (from an input stream) into a Document - namespace aware.
     * @param xmlStream The XML text stream
     * @return DOM Document
     * @throws DomException
     */
    public static Document parseXmlStreamNS(InputStream xmlStream) throws DomException {
        try {
            return getXmlDocumentBuilderNS().parse(new InputSource(xmlStream));

        } catch (Exception e) {
            throw new DomException(e.toString());
        }
    }

    /**
     * Parse XML text (from a Reader) into a Document.
     * @param xmlReader The XML Reader
     * @return DOM Document
     * @throws DomException
     */
    public static Document parseXmlReader(Reader xmlReader) throws DomException {

        try {
            return getXmlDocumentBuilder().parse(new InputSource(xmlReader));

        } catch (Exception e) {
            throw new DomException(e.toString());
        }
    }

    /**
     * Parse XML text (from a raw byte array) into a Document.
     * @param xml The XML text
     * @return DOM Document
     * @throws DomException
     */
    public static Document parseXmlBytes(byte[] xml) throws DomException {
        return parseXmlStream(new ByteArrayInputStream(xml));
    }

    /**
     * Parse XML text (from a raw byte array) into a Document - namespace aware.
     * @param xml The XML text
     * @return DOM Document
     * @throws DomException
     */
    public static Document parseXmlBytesNS(byte[] xml) throws DomException {
        return parseXmlStreamNS(new ByteArrayInputStream(xml));
    }

    /**
     * Parse XML text (from a string) into a Document.
     * @param xml The XML text
     * @return DOM Document
     * @throws DomException
     */
    public static Document parseXmlString(String xml) throws DomException {
        return parseXmlStream(new ByteArrayInputStream(xml.getBytes()));
    }

    /**
     * Parse an XML file into a Document.
     * @param filename - The filename to parse
     * @return DOM Document
     * @throws DomException
     */
    public static Document parseXmlFile(String filename) throws DomException {
        try {
            return getXmlDocumentBuilder().parse(filename);
        } catch (Exception exception) {
            throw new DomException(exception.toString());
        }
    }

    /**
     * Set up and configure an HTML DOM parser.  We specifiy a
     * default encoding value to be used when no encoding information
     * is available in the HTML document itself.
     *
     * An appropriate META tag will override this default:
     * <code>
     *   <meta http-equiv="Content-Type" content="text/html; charset=XXXX">
     * </code>
     *
     * @return The parser
     */

    /*******************************************************************************
        
      * We originally used the Neko HTML parser here.  This was a boon as it
      * gracefully handled both HTML and XML (which it wraped in HTML and
      * BODY tags).  Sadly, it is closely tied to Xerces,
      *
      * At a future date, we'll look for an appropriate substitute.  At present,
      * parsing only XML is good enough (the Sirsi Web2 Bridge is the only
      * supported search source, and it's an XML API to SingleSearch).
        
      private static org.cyberneko.html.parsers.DOMParser newHtmlDomParser()
             throws SAXNotRecognizedException, SAXNotSupportedException {
        org.cyberneko.html.parsers.DOMParser domParser;
        
        domParser = new org.cyberneko.html.parsers.DOMParser();
        domParser.setProperty(ENCODING_OPTION, INPUT_ENCODING);
        
        return domParser;
      }
        
    *******************************************************************************/

    /**
     * Parse HTML from a Reader
     * @param reader Reader input
     * @return DOM Document
     * @throws DomException
     */

    /*******************************************************************************
      *
      * See notes on Neko HTML (above)
      *
        
      public static Document parseHtmlReader(Reader reader) throws DomException {
        return parseHtmlFromInputSource(new InputSource(reader));
      }
        
    *******************************************************************************/

    /**
     * Parse HTML from an InputSource
     * @param in InputSource
     * @return DOM Document
     * @throws DomException
     */

    /*******************************************************************************
      *
      * See notes on Neko HTML (above)
      *
        
      public static Document parseHtmlFromInputSource(InputSource in) throws DomException {
        try {
          org.cyberneko.html.parsers.DOMParser domParser;
        
          domParser = newHtmlDomParser();
          domParser.parse(in);
          return domParser.getDocument();
        
        } catch (Exception e) {
          throw new DomException(e.toString());
        }
      }
        
    *******************************************************************************/

    /**
     * Parse HTML text (from a raw byte array) into a Document.
     * @param html The HTML text
     * @return DOM Document
     * @throws DomException
     *<p>
     * The used to be:
     *    <code>parseHtmlStream(new ByteArrayInputStream(html));</code>
     */
    public static Document parseHtmlBytes(byte[] html) throws DomException {
        return parseXmlStreamNS(new ByteArrayInputStream(html));
    }

    /**
     * Parse HTML text (from a String) into a Document.
     * @param html The HTML text
     * @return DOM Document
     * @throws DomException
     *<p>
     * This used to be:
     *    <code>return parseHtmlReader(new StringReader(html));</code>
     */
    public static Document parseHtmlString(String html) throws DomException {
        return parseXmlReader(new StringReader(html));
    }

    /**
     * Write formatted XML text to supplied OutputStream.
     * @param node Node to write
     * @param target stream to write to
     * @throws DomException
     */
    public static void serializeXml(Node node, OutputStream target) throws DomException {
        try {
            Transformer transformer = TransformerFactory.newInstance().newTransformer();
            transformer.setOutputProperty(OutputKeys.INDENT, "yes");
            transformer.transform(new DOMSource(node), new StreamResult(target));

        } catch (Exception e) {
            throw new DomException(e.toString());
        }
    }

    /**
     * Write formatted XML text to supplied Writer.
     * @param node the Node to write
     * @param writer Writer the document is written to
     * @throws DomException
     */
    public static void serializeXml(Node node, Writer writer) throws DomException {
        try {
            Transformer transformer = TransformerFactory.newInstance().newTransformer();
            transformer.setOutputProperty(OutputKeys.INDENT, "yes");
            transformer.transform(new DOMSource(node), new StreamResult(writer));

        } catch (Exception e) {
            throw new DomException(e.toString());
        }
    }

    /**
     * Write formatted XML text to a String.
     * @param object The XML Document, HTML Document, or Element to write
     * @return String containing the formatted document text
     * @throws DomException
     */
    public static String serialize(Object object) throws DomException {
        ByteArrayOutputStream stream = null;
        Writer writer = null;

        try {
            stream = new ByteArrayOutputStream();
            writer = new OutputStreamWriter(stream, ENCODING);

            if (object instanceof Document) {
                serializeXml((Node) ((Document) object).getDocumentElement(), writer);
            } else if (object instanceof Element) {
                serializeXml((Node) object, writer);
            } else {
                throw new IllegalArgumentException("Unexpected object for serialzation: " + object.toString());
            }
            return stream.toString();

        } catch (Exception e) {
            throw new DomException(e.toString());

        } finally {
            try {
                if (writer != null)
                    writer.close();
            } catch (Exception ignore) {
            }
            try {
                if (stream != null)
                    stream.close();
            } catch (Exception ignore) {
            }
        }
    }
}