com.opendoorlogistics.core.utils.XMLUtils.java Source code

Java tutorial

Introduction

Here is the source code for com.opendoorlogistics.core.utils.XMLUtils.java

Source

/*******************************************************************************
 * Copyright (c) 2014 Open Door Logistics (www.opendoorlogistics.com)
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the GNU Lesser Public License v3
 * which accompanies this distribution, and is available at http://www.gnu.org/licenses/lgpl.txt
 ******************************************************************************/
package com.opendoorlogistics.core.utils;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.StringWriter;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.commons.io.IOUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Node;

import com.sun.org.apache.xml.internal.serialize.OutputFormat;
import com.sun.org.apache.xml.internal.serialize.XMLSerializer;

final public class XMLUtils {
    private XMLUtils() {
    }

    public static String toString(Node node) {
        try {
            TransformerFactory transformerFactory = TransformerFactory.newInstance();
            Transformer transformer = transformerFactory.newTransformer();
            DOMSource source = new DOMSource(node);
            StringWriter writer = new StringWriter();
            StreamResult result = new StreamResult(writer);
            transformer.transform(source, result);
            return writer.toString();
        } catch (Throwable e) {
            throw new RuntimeException(e);
        }
    }

    public static OutputFormat getPrettyPrintFormat() {
        OutputFormat format = new OutputFormat();
        format.setLineWidth(120);
        format.setIndenting(true);
        format.setIndent(2);
        format.setEncoding("UTF-8");
        return format;
    }

    public static String toString(Node doc, OutputFormat format) {
        try {
            StringWriter stringOut = new StringWriter();
            XMLSerializer serial = new XMLSerializer(stringOut, format);
            serial.serialize(doc);
            return stringOut.toString();
        } catch (Throwable e) {
            throw new RuntimeException(e);
        }

    }

    public static Document load(File file) {
        try {
            if (file.exists()) {

                DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
                DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
                Document doc = dBuilder.parse(file);
                TextNodesRemover.cleanEmptyTextNodes(doc);
                return doc;
            }
        } catch (Throwable e) {
            throw new RuntimeException(e);
        }

        return null;
    }

    public static Document parse(String xml) {
        try {
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
            Document doc = dBuilder.parse(new ByteArrayInputStream(xml.getBytes("UTF-8")));
            TextNodesRemover.cleanEmptyTextNodes(doc);
            return doc;

        } catch (Throwable e) {
            throw new RuntimeException(e);
        }

    }

    /**
     * See http://stackoverflow.com/questions/16641835/strange-xml-indentation. Removes text nodes that only contains whitespace. The conditions for removing
     * text nodes, besides only containing whitespace, are: If the parent node has at least one child of any of the following types, all whitespace-only
     * text-node children will be removed: - ELEMENT child - CDATA child - COMMENT child
     * 
     * The purpose of this is to make the format() method (that use a Transformer for formatting) more consistent regarding indenting and line breaks.
     */
    private static class TextNodesRemover {

        private static void cleanEmptyTextNodes(Node parentNode) {
            boolean removeEmptyTextNodes = false;
            Node childNode = parentNode.getFirstChild();
            while (childNode != null) {
                removeEmptyTextNodes |= checkNodeTypes(childNode);
                childNode = childNode.getNextSibling();
            }

            if (removeEmptyTextNodes) {
                removeEmptyTextNodes(parentNode);
            }
        }

        private static void removeEmptyTextNodes(Node parentNode) {
            Node childNode = parentNode.getFirstChild();
            while (childNode != null) {
                // grab the "nextSibling" before the child node is removed
                Node nextChild = childNode.getNextSibling();

                short nodeType = childNode.getNodeType();
                if (nodeType == Node.TEXT_NODE) {
                    boolean containsOnlyWhitespace = childNode.getNodeValue().trim().isEmpty();
                    if (containsOnlyWhitespace) {
                        parentNode.removeChild(childNode);
                    }
                }
                childNode = nextChild;
            }
        }

        private static boolean checkNodeTypes(Node childNode) {
            short nodeType = childNode.getNodeType();

            if (nodeType == Node.ELEMENT_NODE) {
                cleanEmptyTextNodes(childNode); // recurse into subtree
            }

            if (nodeType == Node.ELEMENT_NODE || nodeType == Node.CDATA_SECTION_NODE
                    || nodeType == Node.COMMENT_NODE) {
                return true;
            } else {
                return false;
            }
        }

    }
}