org.kitodo.dataaccess.format.xml.Namespaces.java Source code

Java tutorial

Introduction

Here is the source code for org.kitodo.dataaccess.format.xml.Namespaces.java

Source

/*
 * (c) Kitodo. Key to digital objects e. V. <contact@kitodo.org>
 *
 * This file is part of the Kitodo project.
 *
 * It is licensed under GNU General Public License version 3 or later.
 *
 * For the full copyright and license information, please read the
 * GPL3-License.txt file that was distributed with this source code.
 */

package org.kitodo.dataaccess.format.xml;

import java.io.IOException;
import java.net.InetAddress;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

import org.apache.commons.collections4.BidiMap;
import org.apache.commons.collections4.bidimap.DualHashBidiMap;
import org.kitodo.dataaccess.RDF;
import org.kitodo.dataaccess.RDFS;

/**
 * Class to handle namespaces.
 *
 * <p>
 * Technically, a namespace is an arbitrary abbreviation of a URI. For example,
 * the URL {@code https://www.kitodo.org/software/kitodoproduction} can be
 * abbreviated as {@code a:oduction}, given the namespace prefix {@code o:} has
 * been declared as {@code https://www.kitodo.org/software/kitodopr}. However,
 * such abbreviations are not very useful.
 *
 * <p>
 * Compared to files in directories, the namespace can be considered as the
 * path. There are two variants of namespaces that must be distinguished: URIs
 * where the simple name is separated by a number sign ({@code #}), and URIs
 * where it is separated by a slash ({@code /}).
 *
 * <p>
 * <em>In Linked Data,</em> namespaces are syntactical sugar. Some Linked Data
 * formats do not even make use of namespaces at all. For the others, the
 * namespace should be considered as a part of the interface. Namespaces must be
 * declared the way that, concatenated with the simple name, they form the
 * complete URI.
 *
 * <p>
 * <em>For XML files,</em> if namespaces are to be used (they are optional),
 * they <strong>must</strong> be declared using namespace prefixes.
 * ({@code <mods:mods>} is a valid XML tag, while
 * {@code <http://www.loc.gov/mods/v3#mods>} is not.) For XML attributes, that
 * belong to the same namespace as the XML element on which they are declared,
 * the prefix can be omitted. ({@code <mets:structMap TYPE="LOGICAL">} is equal
 * to {@code <mets:structMap mets:TYPE="LOGICAL">}. The attribute {@code TYPE}
 * implicitly is a member of the namespace abbreviated as {@code mets:}, while
 * the value {@code "LOGICAL"} is not. The value is a plain string. This is an
 * oddity of the XML format.) Unlike any other format, namespaces ending with a
 * number sign are declared <strong>without</strong> the trailing number sign,
 * while namespaces ending in a slash are declared <strong>with</strong> the
 * trailing slash.
 */
public class Namespaces extends DualHashBidiMap<String, String> {
    /**
     * Maps the constant namespaces "xml:" and "xmlns:" to their reserved
     * prefixes. The map is populated in a static block below.
     */
    private static final Map<String, String> CONSTANTS = new HashMap<>(3);

    /**
     * The default "rdf:" prefix, which may however be overridden. In this
     * class, all prefix Strings are without the colon.
     */
    private static final String DEFAULT_PREFIX_RDF = "rdf";

    /**
     * The default "rdfs:" prefix, which may however be overridden. In this
     * class, all prefix Strings are without the colon.
     */
    private static final String DEFAULT_PREFIX_RDFS = "rdfs";

    /**
     * The "xml:" prefix which is reserved and must neither be used otherwise
     * nor be named differently. In this class, all prefix Strings are without
     * the colon.
     */
    private static final String PREFIX_XML = "xml";

    /**
     * The "xmlns:" prefix which is reserved and must neither be used otherwise
     * nor be named differently. In this class, all prefix Strings are without
     * the colon.
     */
    private static final String PREFIX_XMLNS = "xmlns";

    private static final long serialVersionUID = 1L;
    /**
     * The XMLNS namespace URL.
     */
    public static final String XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/";

    /**
     * Populates the map with the constant namespaces "xml:" and "xmlns:".
     */
    static {
        CONSTANTS.put(XMLNS_NAMESPACE, PREFIX_XMLNS);
        CONSTANTS.put(XML.NAMESPACE, PREFIX_XML);
    }

    /**
     * Counter to create new namespace prefixes.
     */
    private long next;

    /**
     * The map of used namespaces in serializing an XML document. Mapping
     * direction is namespace to prefix, {@code #} namespaces are stored without
     * the {@code #} at the end, {@code /} namespaces are stored with a
     * {@code /} as last character.
     */
    private final Map<String, String> used;

    /**
     * Creates a new NamespaceHandler for a given Node which is considered the
     * root node of the XML document.
     */
    public Namespaces() {
        this(null);
    }

    /**
     * Creates a new NamespaceHandler for a given Node which is considered the
     * root node of the XML document.
     *
     * @param presets
     *            Presets of namespace shortcuts, mapped namespace to shortcut.
     *            May be null or empty.
     */
    public Namespaces(Map<String, String> presets) {
        super.put(DEFAULT_PREFIX_RDF, RDF.NAMESPACE);
        super.put(DEFAULT_PREFIX_RDFS, RDFS.NAMESPACE);
        if (presets != null) {
            super.putAll(presets);
        }
        used = new HashMap<>();
    }

    /**
     * Returns the abbreviated attribute. If the attribute is in the same
     * namespace as the element that will hold it, the prefix is omitted.
     *
     * @param element
     *            node the attribute is on
     * @param attribute
     *            attribute key to convert
     * @return abbreviated or basic attribute
     */
    String abbreviateAttribute(String element, String attribute) {
        String ns = namespaceOfForXMLFile(attribute);
        if (ns.isEmpty()) {
            return attribute;
        }
        available(ns);
        String tag = attribute.substring(ns.endsWith("/") ? ns.length() : ns.length() + 1);
        return namespaceOfForXMLFile(element).equals(ns) ? tag : used.get(ns) + ':' + tag;
    }

    /**
     * Returns the abbreviated element name.
     *
     * @param element
     *            URL to abbreviate for an element name
     * @return the abbreviated node name
     */
    String abbreviateElement(String element) {
        String ns = namespaceOfForXMLFile(element);
        if (ns.isEmpty()) {
            return element;
        }
        available(ns);
        return used.get(ns) + ':' + element.substring(ns.endsWith("/") ? ns.length() : ns.length() + 1);
    }

    /**
     * Returns a sequence of letters from a positive whole number.
     *
     * @param value
     *            number to convert
     * @return a, b, c, , x, y, z, aa, ab, ac, 
     */
    private static String asLetters(long value) {
        int codePoint = (int) ('a' + (--value % 26));
        long higher = value / 26;
        String letter = new String(Character.toChars(codePoint));
        return higher == 0 ? letter : asLetters(higher).concat(letter);
    }

    /**
     * Grants that a prefix is available for a namespace. Missing prefixes are
     * either fetched from the constants or the presets or else are created
     * alphabetically ascending.
     *
     * @param namespace
     *            the namespace
     */
    private void available(String namespace) {
        if (!used.containsKey(namespace)) {
            if (CONSTANTS.containsKey(namespace)) {
                used.put(namespace, CONSTANTS.get(namespace));
            } else {
                String withHash = namespace.endsWith("/") || namespace.endsWith("#") ? namespace
                        : namespace.concat("#");
                BidiMap<String, String> inversedSuper = super.inverseBidiMap();
                if (inversedSuper.containsKey(withHash)) {
                    used.put(namespace, inversedSuper.get(withHash));
                } else {
                    String prefix;
                    do {
                        prefix = asLetters(++next);
                    } while (super.containsKey(prefix) || CONSTANTS.containsValue(prefix));
                    used.put(namespace, prefix);
                }
            }
        }
    }

    /**
     * Recomposes an URL from a namespace and a local name, adding a number sign
     * in between if the URL does not end in a slash.
     *
     * @param namespace
     *            namespace for URL
     * @param localName
     *            local name for URL
     * @return combined URL
     */
    public static String concat(String namespace, String localName) {
        boolean addNumberSign = !namespace.endsWith("/") && !namespace.endsWith("#");
        StringBuilder result = new StringBuilder(namespace.length() + (addNumberSign ? 1 : 0) + localName.length());
        result.append(namespace);
        if (addNumberSign) {
            result.append('#');
        }
        result.append(localName);
        return result.toString();
    }

    /**
     * Expands an abbreviated URL.
     *
     * @param abbreviatedUrl
     *            URL to expand
     * @return expanded URL
     */
    public String expand(String abbreviatedUrl) {
        return expand(abbreviatedUrl, this);
    }

    /**
     * Expands an abbreviated URL referencing a map of prefixes.
     *
     * @param abbreviatedUrl
     *            URL to expand
     * @param prefixes
     *            map of prefixes
     * @return expanded URL
     */
    public static String expand(String abbreviatedUrl, Map<String, String> prefixes) {
        String namespace = prefixes.get(getPrefix(abbreviatedUrl));
        if (namespace == null) {
            return abbreviatedUrl;
        }
        return concat(namespace, abbreviatedUrl.substring(abbreviatedUrl.indexOf(':') + 1));
    }

    /**
     * Returns the prefix of an abbreviated URL.
     *
     * @param abbreviatedUrl
     *            abbreviated URL
     * @return the prefix
     */
    public static String getPrefix(String abbreviatedUrl) {
        return abbreviatedUrl.substring(0, abbreviatedUrl.indexOf(':'));
    }

    /**
     * Returns the local name part of an URL. If the URL contains an anchor
     * symbol the local part is considered the sequence after it, otherwise the
     * local name is considered the sequence after the last slash.
     *
     * @param url
     *            URL to return the local name from
     * @return the local name
     */
    public static String localNameOf(String url) {
        int numberSign = url.indexOf('#');
        if (numberSign > -1) {
            return url.substring(numberSign + 1);
        }
        return url.substring(url.lastIndexOf('/') + 1);
    }

    /**
     * Converts a URI to a globally unique name space. The name space is formed
     * with the host name of the machine the program is running on.
     *
     * @param uri
     *            URI to create a globally unique URI for
     * @return globally unique namespace
     * @throws IOException
     *             if it fails
     */
    public static String namespaceFromURI(URI uri) throws IOException {
        try {
            String host = uri.getHost();
            String path = uri.getPath();
            if (host == null) {
                host = InetAddress.getLocalHost().getCanonicalHostName();
                if ((path != null) && path.startsWith("//")) {
                    int pathStart = path.indexOf('/', 2);
                    String remote = path.substring(2, pathStart);
                    path = path.substring(pathStart);
                    host = remote.contains(".") ? remote
                            : remote.concat(host.substring(InetAddress.getLocalHost().getHostName().length()));
                }
            }
            String scheme = uri.getScheme();
            if ((scheme == null) || !scheme.toLowerCase().startsWith("http")) {
                scheme = "http";
            }
            return new URI(scheme, uri.getUserInfo(), host, uri.getPort(), path, uri.getQuery(), "")
                    .toASCIIString();
        } catch (URISyntaxException e) {
            String message = e.getMessage();
            throw new IllegalArgumentException(message != null ? message : e.getClass().getName(), e);
        }
    }

    /**
     * Returns the namespace part of an URL. If the URL contains an anchor
     * symbol the namespace is considered the sequence up to and including it,
     * otherwise the namespace is considered the sequence up to and including
     * the last slash.
     *
     * @param url
     *            url to return the namespace from
     * @return the namespace
     */
    public static String namespaceOf(String url) {
        int numberSign = url.indexOf('#');
        if (numberSign > -1) {
            return url.substring(0, numberSign + 1);
        }
        return url.substring(0, url.lastIndexOf('/') + 1);
    }

    /**
     * Returns the namespace part of an URL. If the URL contains an anchor
     * symbol the namespace is considered the sequence before it, omitting the
     * anchor symbol, otherwise the namespace is considered the sequence up to
     * and including the last slash.
     *
     * @param url
     *            url to return the namespace from
     * @return the namespace
     */
    static String namespaceOfForXMLFile(String url) {
        int numberSign = url.indexOf('#');
        if (numberSign > -1) {
            return url.substring(0, numberSign);
        }
        return url.substring(0, url.lastIndexOf('/') + 1);
    }

    /**
     * Returns all namespaces to be added to the document head. Mapping
     * direction is namespace to prefix, {@code #} namespaces are stored without
     * the {@code #} at the end, {@code /} namespaces are stored with a
     * {@code /} as last character. The method must be called after the document
     * has been converted.
     *
     * @return all namespaces
     */
    Set<Entry<String, String>> namespaceSetForXMLFile() {
        Map<String, String> result = new HashMap<>((int) Math.ceil(used.size() / 0.75));
        for (Entry<String, String> entry : used.entrySet()) {
            String prefix = entry.getKey();
            result.put(PREFIX_XMLNS + ':' + entry.getValue(), prefix);
        }
        return result.entrySet();
    }
}