org.mycore.common.xml.MCRXMLHelper.java Source code

Java tutorial

Introduction

Here is the source code for org.mycore.common.xml.MCRXMLHelper.java

Source

/*
 * 
 * $Revision$ $Date$
 *
 * This file is part of ***  M y C o R e  ***
 * See http://www.mycore.de/ for details.
 *
 * This program is free software; you can use it, redistribute it
 * and / or modify it under the terms of the GNU General Public License
 * (GPL) as published by the Free Software Foundation; either version 2
 * of the License or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program, in a file called gpl.txt or license.txt.
 * If not, write to the Free Software Foundation Inc.,
 * 59 Temple Place - Suite 330, Boston, MA  02111-1307 USA
 */

package org.mycore.common.xml;

import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;

import javax.xml.XMLConstants;
import javax.xml.transform.TransformerException;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.Validator;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jdom2.Attribute;
import org.jdom2.Comment;
import org.jdom2.Content;
import org.jdom2.DocType;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.Namespace;
import org.jdom2.Parent;
import org.jdom2.ProcessingInstruction;
import org.jdom2.Text;
import org.jdom2.Verifier;
import org.jdom2.output.Format;
import org.jdom2.output.XMLOutputter;
import org.jdom2.transform.JDOMSource;
import org.mycore.common.MCRConstants;
import org.mycore.common.MCRException;
import org.mycore.common.content.MCRByteContent;
import org.mycore.common.content.streams.MCRByteArrayOutputStream;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonPrimitive;

/**
 * This class provides some static utility methods to deal with XML/DOM
 * elements, nodes etc.
 * 
 * @author Detlev Degenhardt
 * @author Frank Ltzenkirchen
 * @author Thomas Scheffler (yagee)
 */
public class MCRXMLHelper {

    private static final Logger LOGGER = LogManager.getLogger(MCRXMLHelper.class);

    /**
     * Removes characters that are illegal in XML text nodes or attribute
     * values.
     * 
     * @param text
     *            the String that should be used in XML elements or attributes
     * @return the String with all illegal characters removed
     */
    public static String removeIllegalChars(String text) {
        if (text == null || text.trim().length() == 0) {
            return text;
        }
        if (org.jdom2.Verifier.checkCharacterData(text) == null) {
            return text;
        }

        // It seems we have to filter out invalid XML characters...
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < text.length(); i++) {
            if (Verifier.isXMLCharacter(text.charAt(i))) {
                sb.append(text.charAt(i));
            }
        }
        return sb.toString();
    }

    /**
     * validates <code>doc</code> using XML Schema defined <code>schemaURI</code>
     * @param doc document to be validated
     * @param schemaURI URI of XML Schema document
     * @throws SAXException if validation fails
     * @throws IOException if resolving resources fails
     */
    public static void validate(Document doc, String schemaURI) throws SAXException, IOException {
        SchemaFactory sf = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
        sf.setResourceResolver(MCREntityResolver.instance());
        Schema schema;
        try {
            schema = sf.newSchema(MCRURIResolver.instance().resolve(schemaURI, null));
        } catch (TransformerException e) {
            Throwable cause = e.getCause();
            if (cause == null) {
                throw new IOException(e);
            }
            if (cause instanceof SAXException) {
                throw (SAXException) cause;
            }
            if (cause instanceof IOException) {
                throw (IOException) cause;
            }
            throw new IOException(e);
        }
        Validator validator = schema.newValidator();
        validator.setResourceResolver(MCREntityResolver.instance());
        validator.validate(new JDOMSource(doc));
    }

    /**
     * @see JDOMtoGSONSerializer
     * 
     * @param content the jdom element to serialize
     * @return a gson element
     */
    public static JsonElement jsonSerialize(Content content) {
        return JDOMtoGSONSerializer.serialize(content);
    }

    /**
     * @see JDOMtoGSONSerializer#serializeElement(Element)
     * 
     * @param element the jdom element to serialize
     * @return a gson object
     */
    public static JsonObject jsonSerialize(Element element) {
        return JDOMtoGSONSerializer.serializeElement(element);
    }

    /**
     * checks whether two documents are equal.
     * 
     * This test performs a deep check across all child components of a
     * Document.
     * 
     * @param d1
     *            first Document to compare
     * @param d2
     *            second Document to compare
     * @return true, if d1 and d2 are deep equal
     * @see Document#equals(java.lang.Object)
     */
    public static boolean deepEqual(Document d1, Document d2) {
        try {
            return JDOMEquivalent.equivalent(canonicalElement(d1), canonicalElement(d2));
        } catch (Exception e) {
            LOGGER.warn("Could not compare documents.", e);
            return false;
        }
    }

    /**
     * checks whether two elements are equal.
     * 
     * This test performs a deep check across all child components of a
     * element.
     * 
     * @param e1
     *            first Element to compare
     * @param e2
     *            second Element to compare
     * @return true, if e1 and e2 are deep equal
     * @see Document#equals(java.lang.Object)
     */
    public static boolean deepEqual(Element e1, Element e2) {
        try {
            return JDOMEquivalent.equivalent(canonicalElement(e1), canonicalElement(e2));
        } catch (Exception e) {
            LOGGER.warn("Could not compare elements.", e);
            return false;
        }
    }

    private static Element canonicalElement(Parent e) throws IOException, SAXParseException {
        XMLOutputter xout = new XMLOutputter(Format.getCompactFormat());
        MCRByteArrayOutputStream bout = new MCRByteArrayOutputStream();
        if (e instanceof Element) {
            xout.output((Element) e, bout);
        } else {
            xout.output((Document) e, bout);
        }
        Document xml = MCRXMLParserFactory.getNonValidatingParser()
                .parseXML(new MCRByteContent(bout.getBuffer(), 0, bout.size()));
        return xml.getRootElement();
    }

    private static class JDOMEquivalent {

        private JDOMEquivalent() {
        }

        public static boolean equivalent(Element e1, Element e2) {
            return equivalentName(e1, e2) && equivalentAttributes(e1, e2)
                    && equivalentContent(e1.getContent(), e2.getContent());
        }

        public static boolean equivalent(Text t1, Text t2) {
            String v1 = t1.getValue();
            String v2 = t2.getValue();
            boolean equals = v1.equals(v2);
            if (!equals && LOGGER.isDebugEnabled()) {
                LOGGER.debug("Text differs \"" + t1 + "\"!=\"" + t2 + "\"");
            }
            return equals;
        }

        public static boolean equivalent(DocType d1, DocType d2) {
            boolean equals = d1.getPublicID().equals(d2.getPublicID()) && d1.getSystemID().equals(d2.getSystemID());
            if (!equals && LOGGER.isDebugEnabled()) {
                LOGGER.debug("DocType differs \"" + d1 + "\"!=\"" + d2 + "\"");
            }
            return equals;
        }

        public static boolean equivalent(Comment c1, Comment c2) {
            String v1 = c1.getValue();
            String v2 = c2.getValue();
            boolean equals = v1.equals(v2);
            if (!equals && LOGGER.isDebugEnabled()) {
                LOGGER.debug("Comment differs \"" + c1 + "\"!=\"" + c2 + "\"");
            }
            return equals;
        }

        public static boolean equivalent(ProcessingInstruction p1, ProcessingInstruction p2) {
            String t1 = p1.getTarget();
            String t2 = p2.getTarget();
            String d1 = p1.getData();
            String d2 = p2.getData();
            boolean equals = t1.equals(t2) && d1.equals(d2);
            if (!equals && LOGGER.isDebugEnabled()) {
                LOGGER.debug("ProcessingInstruction differs \"" + p1 + "\"!=\"" + p2 + "\"");
            }
            return equals;
        }

        public static boolean equivalentAttributes(Element e1, Element e2) {
            List<Attribute> aList1 = e1.getAttributes();
            List<Attribute> aList2 = e2.getAttributes();
            if (aList1.size() != aList2.size()) {
                if (LOGGER.isDebugEnabled()) {
                    LOGGER.debug("Number of attributes differ \"" + aList1 + "\"!=\"" + aList2 + "\" for element "
                            + e1.getName());
                }
                return false;
            }
            HashSet<String> orig = new HashSet<String>(aList1.size());
            for (Attribute attr : aList1) {
                orig.add(attr.toString());
            }
            for (Attribute attr : aList2) {
                orig.remove(attr.toString());
            }
            if (!orig.isEmpty() && LOGGER.isDebugEnabled()) {
                LOGGER.debug("Attributes differ \"" + aList1 + "\"!=\"" + aList2 + "\"");
            }
            return orig.isEmpty();
        }

        public static boolean equivalentContent(List<Content> l1, List<Content> l2) {
            if (l1.size() != l2.size()) {
                if (LOGGER.isDebugEnabled()) {
                    LOGGER.debug("Number of content list elements differ " + l1.size() + "!=" + l2.size());
                }
                return false;
            }
            boolean result = true;
            Iterator<Content> i1 = l1.iterator();
            Iterator<Content> i2 = l2.iterator();
            while (result && i1.hasNext() && i2.hasNext()) {
                Object o1 = i1.next();
                Object o2 = i2.next();
                if (o1 instanceof Element && o2 instanceof Element) {
                    result = equivalent((Element) o1, (Element) o2);
                } else if (o1 instanceof Text && o2 instanceof Text) {
                    result = equivalent((Text) o1, (Text) o2);
                } else if (o1 instanceof Comment && o2 instanceof Comment) {
                    result = equivalent((Comment) o1, (Comment) o2);
                } else if (o1 instanceof ProcessingInstruction && o2 instanceof ProcessingInstruction) {
                    result = equivalent((ProcessingInstruction) o1, (ProcessingInstruction) o2);
                } else if (o1 instanceof DocType && o2 instanceof DocType) {
                    result = equivalent((DocType) o1, (DocType) o2);
                } else {
                    result = false;
                }
            }
            return result;
        }

        public static boolean equivalentName(Element e1, Element e2) {
            Namespace ns1 = e1.getNamespace();
            String localName1 = e1.getName();

            Namespace ns2 = e2.getNamespace();
            String localName2 = e2.getName();

            return ns1.equals(ns2) && localName1.equals(localName2);
        }
    }

    /**
     * Helper class to serialize jdom XML to gson JSON.
     * <p>
     * To support fast javascript dot property access its decided to use the underscore (_)
     * for attributes and the dollar sign ($) for text nodes. The colon sign (:) is used for
     * namespaces (you have to use square brackets in javascript for accessing those).
     * </p>
     * 
     * <ul>
     *   <li><b>_version</b> -> version attribute</li>
     *   <li><b>$text</b> -> text node</li>
     *   <li><b>_xmlns:mods</b> -> mods namespace</li>
     *   <li><b>_mods:title</b> -> title attribute with mods namespace</li>
     * </ul>
     * 
     * <b>Example</b>
     * <pre>
     * {
     *   "_version": "3.0",
     *   "_xmlns:mods": "http://www.loc.gov/mods/v3"
     *   "mods:titleInfo": {
     *     "mods:title": {
     *       "$text": "hello xml serializer"
     *     }
     *   }
     * }
     * </pre>
     * <ul>
     *   <li><b>get the version</b> -> mods._version -> "3.0"</li>
     *   <li><b>get the text of the title</b> -> mods["mods:titleInfo"]["mods:title"].$text -> "hello xml serializer"</li>
     * </ul>
     * <b>BE AWARE THAT MIXED CONTENT IS NOT SUPPORTED!</b>
     * 
     * @author Matthias Eichner
     */
    private static class JDOMtoGSONSerializer {

        /**
         * This method is capable of serializing Elements and Text nodes.
         * Return null otherwise.
         * 
         * @param content the content to serialize
         * @return the serialized content, or null if the type is not supported
         */
        public static JsonElement serialize(Content content) {
            if (content instanceof Element) {
                return serializeElement((Element) content);
            }
            if (content instanceof Text) {
                return serializeText((Text) content);
            }
            return null;
        }

        public static JsonPrimitive serializeText(Text text) {
            return new JsonPrimitive(text.getText());
        }

        public static JsonObject serializeElement(Element element) {
            JsonObject json = new JsonObject();

            // text
            String text = element.getText();
            if (text != null && text.trim().length() > 0) {
                json.addProperty("$text", text);
            }

            // attributes
            element.getAttributes().forEach(attr -> {
                json.addProperty(getName(attr), attr.getValue());
            });

            // namespaces
            element.getNamespacesIntroduced().forEach(ns -> {
                json.addProperty(getName(ns), ns.getURI().toString());
            });

            // children
            element.getChildren().stream().map(e -> new Pair<>(e.getNamespace(), e.getName())).distinct()
                    .forEach(pair -> {
                        String name = getName(pair.y, pair.x);
                        List<Element> children = element.getChildren(pair.y, pair.x);
                        if (children.size() == 1) {
                            json.add(name, serializeElement(children.get(0)));
                        } else if (children.size() >= 2) {
                            JsonArray arr = new JsonArray();
                            children.forEach(child -> {
                                arr.add(serialize(child));
                            });
                            json.add(name, arr);
                        } else {
                            throw new MCRException("Unexcpected error while parsing children of element '"
                                    + element.getName() + "'");
                        }
                    });
            return json;
        }

        private static String getName(Namespace ns) {
            StringBuffer buffer = new StringBuffer("_");
            buffer.append("xmlns:").append(getCononicalizedPrefix(ns));
            return buffer.toString();
        }

        private static String getName(Attribute attribute) {
            StringBuffer buffer = new StringBuffer("_");
            buffer.append(getName(attribute.getName(), attribute.getNamespace()));
            return buffer.toString();
        }

        private static String getName(String name, Namespace namespace) {
            StringBuffer buffer = new StringBuffer();
            if (namespace != null && !namespace.getURI().equals("")) {
                buffer.append(getCononicalizedPrefix(namespace)).append(":");
            }
            return buffer.append(name).toString();
        }

        private static String getCononicalizedPrefix(Namespace namespace) {
            return MCRConstants.getStandardNamespaces().parallelStream().filter(namespace::equals).findAny()
                    .map(Namespace::getPrefix).orElse(namespace.getPrefix());
        }

        private static class Pair<X, Y> {
            public final X x;

            public final Y y;

            public Pair(X x, Y y) {
                this.x = x;
                this.y = y;
            }

            @SuppressWarnings("rawtypes")
            @Override
            public boolean equals(Object obj) {
                if (obj == null && !(obj instanceof Pair)) {
                    return false;
                }
                return Objects.equals(this.x, ((Pair) obj).x) && Objects.equals(this.y, ((Pair) obj).y);
            }

        }

    }
}