com.adaptris.core.util.XmlHelper.java Source code

Java tutorial

Introduction

Here is the source code for com.adaptris.core.util.XmlHelper.java

Source

/*
 * Copyright 2015 Adaptris Ltd.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
*/

package com.adaptris.core.util;

import static org.apache.commons.lang.StringUtils.defaultIfBlank;
import static org.apache.commons.lang.StringUtils.isBlank;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.StringReader;

import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.apache.xerces.util.XMLChar;
import org.w3c.dom.Document;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

import com.adaptris.core.AdaptrisMessage;
import com.adaptris.core.CoreException;
import com.adaptris.util.XmlUtils;

/**
 * Helper class for handling XML within an {@linkplain com.adaptris.core.AdaptrisMessage}
 *
 * @author lchan
 * @author $Author: $
 */
public class XmlHelper {
    // XML 1.0
    // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
    // private static final String XML10 = "[^" + "\u0009\r\n" + "\u0020-\uD7FF" + "\uE000-\uFFFD" + "\ud800\udc00-\udbff\udfff"
    // + "]";

    // XML 1.1
    // [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
    // private static final String XML11 = "[^" + "\u0001-\uD7FF" + "\uE000-\uFFFD" + "\ud800\udc00-\udbff\udfff" + "]+";

    private static final String ILLEGAL_XML = "[^" + "\u0009\r\n" + "\u0020-\uD7FF" + "\uE000-\uFFFD"
            + "\ud800\udc00-\udbff\udfff" + "]";

    private static final String[] INVALID_ELEMENT_CHARS = { "\\\\", "\\?", "\\*", "\\:", " ", "\\|", "&", "\\\"",
            "\\'", "<", ">", "\\)", "\\(", "\\/", "#" };
    private static final String ELEM_REPL_VALUE = "_";

    /**
     * Create an XMLUtils class from an AdaptrisMessage.
     * 
     * @param msg the Adaptris message
     * @return an XxmlUtils instance
     * @throws CoreException if the msg does not contain valid XML.
     * @deprecated Consider using {@link #createXmlUtils(AdaptrisMessage, NamespaceContext)} instead.
     */
    public static XmlUtils createXmlUtils(AdaptrisMessage msg) throws CoreException {
        return createXmlUtils(msg, null);
    }

    /**
     * Create an XMLUtils class from an AdaptrisMessage.
     * 
     * @deprecated since 3.1.0 use {{@link #createDocument(AdaptrisMessage, DocumentBuilderFactoryBuilder)} instead.
     * @param msg the Adaptris message
     * @param ctx the NamespaceContext.
     * @return an XxmlUtils instance
     * @throws CoreException if the msg does not contain valid XML.
     */
    @Deprecated
    public static XmlUtils createXmlUtils(AdaptrisMessage msg, NamespaceContext ctx) throws CoreException {
        return createXmlUtils(msg, ctx, DocumentBuilderFactoryBuilder.newInstance());
    }

    /**
     * Create an XMLUtils class from an AdaptrisMessage.
     * 
     * @param msg the Adaptris message
     * @param ctx the NamespaceContext.
     * @param builder configuration for the underlying {@link DocumentBuilderFactory} instance..
     * @return an XmlUtils instance
     * @throws CoreException if the msg does not contain valid XML.
     */
    public static XmlUtils createXmlUtils(AdaptrisMessage msg, NamespaceContext ctx,
            DocumentBuilderFactoryBuilder builder) throws CoreException {
        XmlUtils result = null;
        DivertConsoleOutput dc = new DivertConsoleOutput();

        try (InputStream input = msg.getInputStream()) {
            DocumentBuilderFactory dbf = DocumentBuilderFactoryBuilder.newInstance(builder).withNamespaceAware(ctx)
                    .build();
            result = new XmlUtils(ctx, dbf);
            InputSource in = new InputSource(input);
            // Well what we're going to do here is annoyingly bad, but I want to eat
            // those stupid System.err messages that contain shit like
            // "[Fatal Error] :1:1: Content is not allowed in prolog"
            dc.divert();
            result.setSource(in);
        } catch (Exception e) {
            result = null;
        } finally {
            dc.resume();
        }
        if (dc.consoleOutput().contains("[Fatal Error]") || result == null) {
            throw new CoreException("Document " + msg.getUniqueId()
                    + " does not appear to be an XML Document, error was [" + dc.consoleOutput() + "]");
        }
        return result;
    }

    /**
     * Create a document from an AdaptrisMessage.
     * 
     * @see #createDocument(AdaptrisMessage, boolean)
     * @deprecated Consider using {@link #createDocument(AdaptrisMessage, NamespaceContext)} instead.
     */
    @Deprecated
    public static Document createDocument(AdaptrisMessage msg)
            throws ParserConfigurationException, IOException, SAXException {
        return createDocument(msg, false);
    }

    /**
     * Create a document from an AdaptrisMessage.
     * @deprecated since 3.1.0 use {{@link #createDocument(AdaptrisMessage, DocumentBuilderFactoryBuilder)} instead.
     * @param msg the AdaptrisMessage
     * @param namespaceAware whether or not the document builder factory used will be namespace aware.
     * @return the Document element
     */
    @Deprecated
    public static Document createDocument(AdaptrisMessage msg, boolean namespaceAware)
            throws ParserConfigurationException, IOException, SAXException {
        return createDocument(msg, DocumentBuilderFactoryBuilder.newInstance().withNamespaceAware(namespaceAware));
    }

    /**
     * Create a document from an AdaptrisMessage.
     * @param msg the AdaptrisMessage
     * @param builder configuration for the underlying {@link DocumentBuilderFactory} instance..
     * @return the Document element
     */
    public static Document createDocument(AdaptrisMessage msg, DocumentBuilderFactoryBuilder builder)
            throws ParserConfigurationException, IOException, SAXException {
        return createDocument(msg.getInputStream(), builder, false);
    }

    /**
     * Create a document from an AdaptrisMessage.
     * @param msg the AdaptrisMessage
     * @param builder configuration for the underlying {@link DocumentBuilderFactory} instance.
     * @param newDocOnFailure create a new Document if the msg failed to parse.
     * @return the Document element
     */
    public static Document createDocument(AdaptrisMessage msg, DocumentBuilderFactoryBuilder builder,
            boolean newDocOnFailure) throws ParserConfigurationException, IOException, SAXException {
        return createDocument(msg.getInputStream(), builder, newDocOnFailure);
    }

    /**
     * Convenience method to create a document from an AdaptrisMessage.
     * @deprecated since 3.1.0 use {{@link #createDocument(AdaptrisMessage, DocumentBuilderFactoryBuilder)} instead.
     * @param msg the AdaptrisMessage
     * @param namespaceContext the name space context, if null, then we will not be namespace aware.
     * @return the Document element
     */
    @Deprecated
    public static Document createDocument(AdaptrisMessage msg, NamespaceContext namespaceContext)
            throws ParserConfigurationException, IOException, SAXException {
        return createDocument(msg, namespaceContext != null ? true : false);
    }

    private static DocumentBuilder newDocumentBuilder(DocumentBuilderFactoryBuilder cfg)
            throws ParserConfigurationException {
        DocumentBuilder builder = DocumentBuilderFactoryBuilder.newInstance(cfg).build().newDocumentBuilder();
        builder.setErrorHandler(new DefaultErrorHandler());
        return builder;
    }

    /**
     * Create a document from a String.
     *
     * @deprecated since 3.1.0 use {{@link #createDocument(String, DocumentBuilderFactoryBuilder)} instead.
     * @param s the string containing XML
     * @return the Document element
     */
    @Deprecated
    public static Document createDocument(String s) throws ParserConfigurationException, IOException, SAXException {
        return createDocument(s, false);
    }

    /**
     * Convenience method to create a document from an AdaptrisMessage.
     *
     * @deprecated since 3.1.0 use {{@link #createDocument(String, DocumentBuilderFactoryBuilder)} instead.
     * @param s the string containing XML
     * @param namespaceContext the namespace context, if null, then we will not be namespace aware.
     * @return the Document element
     */
    @Deprecated
    public static Document createDocument(String s, NamespaceContext namespaceContext)
            throws ParserConfigurationException, IOException, SAXException {
        return createDocument(s, namespaceContext != null ? true : false);
    }

    /**
     * Create a document from a String.
     *
     * @deprecated since 3.1.0 use {{@link #createDocument(String, DocumentBuilderFactoryBuilder)} instead.
     * @param s the string containing XML
     * @param namespaceAware whether or not the document builder factory used will be namespace aware.
     * @return the Document element
     */
    @Deprecated
    public static Document createDocument(String s, boolean namespaceAware)
            throws ParserConfigurationException, IOException, SAXException {
        return createDocument(s, DocumentBuilderFactoryBuilder.newInstance().withNamespaceAware(namespaceAware));
    }

    /**
     * Create a document from an AdaptrisMessage.
     * @param s the string containing XML
     * @param builder configuration for the underlying {@link DocumentBuilderFactory} instance..
     * @return the Document element
     */
    public static Document createDocument(String s, DocumentBuilderFactoryBuilder builder)
            throws ParserConfigurationException, IOException, SAXException {
        return createDocument(s, builder, false);
    }

    /**
     * Create a document from an AdaptrisMessage.
     * @param s the string containing XML
     * @param builder configuration for the underlying {@link DocumentBuilderFactory} instance..
     * @param newDocOnFailure return a new document if the String is not XML.
     * @return the Document element
     */
    public static Document createDocument(String s, DocumentBuilderFactoryBuilder builder, boolean newDocOnFailure)
            throws ParserConfigurationException, IOException, SAXException {
        Document result = null;
        DocumentBuilder docBuilder = newDocumentBuilder(builder);

        try (StringReader in = new StringReader(s)) {
            result = docBuilder.parse(new InputSource(in));
        } catch (IOException | SAXException e) {
            if (newDocOnFailure) {
                result = docBuilder.newDocument();
            } else {
                throw e;
            }
        }
        return result;
    }

    /**
     * Create a document from an {@code InputStream}.
     * 
     * @param in the inputstream
     * @param builder configuration for the underlying {@link DocumentBuilderFactory} instance..
     * @return the Document element
     */
    public static Document createDocument(InputStream in, DocumentBuilderFactoryBuilder builder)
            throws ParserConfigurationException, IOException, SAXException {
        return createDocument(in, builder, false);
    }

    /**
     * Create a document from an {@code InputStream}.
     * 
     * @param in the inputstream
     * @param builder configuration for the underlying {@link DocumentBuilderFactory} instance.
     * @param newDocOnFailure return a new document if the input stream failed to parse.
     * @return the Document element
     */
    public static Document createDocument(InputStream in, DocumentBuilderFactoryBuilder builder,
            boolean newDocOnFailure) throws ParserConfigurationException, IOException, SAXException {
        DocumentBuilder docBuilder = newDocumentBuilder(builder);
        Document result = null;
        try (InputStream docIn = in) {
            result = docBuilder.parse(new InputSource(docIn));
        } catch (IOException | SAXException e) {
            if (newDocOnFailure) {
                result = docBuilder.newDocument();
            } else {
                throw e;
            }
        }
        return result;
    }

    /**
     * Make a safe element name by stripping out illegal XML characters and illegal element characters.
     * 
     * @param input the input string.
     * @param defaultIfBlank the default element name if the input resolves to the blank string (whitespace is trimmed).
     * @return a safe element name that conforms to the specification.
     */
    public static String safeElementName(String input, String defaultIfBlank) {
        String name = defaultIfBlank(input, "").trim();
        name = name.replaceAll(ILLEGAL_XML, "");
        if (isBlank(name)) {
            name = defaultIfBlank;
        } else {
            if (!XMLChar.isNameStart((name.charAt(0)))) {
                name = ELEM_REPL_VALUE + name;
            }
            for (String invalid : INVALID_ELEMENT_CHARS) {
                name = name.replaceAll(invalid, ELEM_REPL_VALUE);
            }
        }

        return name;
    }

    /**
     * Strip illegal XML characters from data.
     * <p>
     * The following regular expression is used to strip out all invalid XML 1.0 characters :
     * <code>"[^\u0009\r\n\u0020-\uD7FF\uE000-\uFFFD\ud800\udc00-\udbff\udfff]"</code>.
     * </p>
     * 
     * @param input the input string
     * @return a string where all the invalid characters have been removed.
     */
    public static String stripIllegalXmlCharacters(String input) {
        return input.replaceAll(ILLEGAL_XML, "");
    }

    /**
     * Write an XML document to the message with specified encoding.
     * 
     * @param doc the document
     * @param msg the message
     * @param encoding will default to "UTF-8" if not specified, and the msg does not have a declared content encoding.
     * @throws Exception
     */
    public static void writeXmlDocument(Document doc, AdaptrisMessage msg, String encoding) throws Exception {
        String encodingToUse = getXmlEncoding(msg, encoding);
        try (OutputStream out = msg.getOutputStream()) {
            new XmlUtils().writeDocument(doc, out, encodingToUse);
        }
        msg.setContentEncoding(encodingToUse);
    }

    /**
     * Figure out what encoding to use when writing a document.
     * 
     * @param msg the message
     * @param enc the configured encoding, if any.
     * @return either the value of {@code encoding}, {@code AdaptrisMessage#getContentEncoding()} or 'UTF-8' in that order or
     *         preference.
     */
    public static String getXmlEncoding(AdaptrisMessage msg, String enc) {
        String encoding = "UTF-8";
        if (!isBlank(enc)) {
            encoding = enc;
        } else if (!isBlank(msg.getContentEncoding())) {
            encoding = msg.getContentEncoding();
        }
        return encoding;
    }

    private static class DefaultErrorHandler implements ErrorHandler {

        @Override
        public void error(SAXParseException e) throws SAXException {
            throw e;
        }

        @Override
        public void warning(SAXParseException e) throws SAXException {
            throw e;
        }

        @Override
        public void fatalError(SAXParseException e) throws SAXException {
            throw e;
        }
    }

    private static final class DivertConsoleOutput {

        private PrintStream stderr, stdout, divert;
        private ByteArrayOutputStream out;

        DivertConsoleOutput() {
            stderr = System.err;
            stdout = System.out;
            out = new ByteArrayOutputStream();
            divert = new PrintStream(out, true);
        }

        void divert() {
            try {
                System.setErr(divert);
                System.setOut(divert);
            } catch (SecurityException ignored) {
                ;
            }

        }

        void resume() {
            try {
                System.setErr(stderr);
                System.setOut(stdout);
            } catch (SecurityException ignored) {
                ;
            }
            divert.flush();
        }

        String consoleOutput() {
            return out.toString();
        }
    }
}