org.dspace.content.crosswalk.MODSDisseminationCrosswalk.java Source code

Java tutorial

Introduction

Here is the source code for org.dspace.content.crosswalk.MODSDisseminationCrosswalk.java

Source

/**
 * The contents of this file are subject to the license and copyright
 * detailed in the LICENSE and NOTICE files at the root of the source
 * tree and available online at
 *
 * http://www.dspace.org/license/
 */
package org.dspace.content.crosswalk;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.StringReader;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import org.apache.commons.lang.ArrayUtils;
import org.apache.log4j.Logger;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.DCValue;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Collection;
import org.dspace.content.Community;
import org.dspace.content.Item;
import org.dspace.content.Site;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.Constants;
import org.dspace.core.SelfNamedPlugin;
import org.jdom.Attribute;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.Namespace;
import org.jdom.Text;
import org.jdom.Verifier;
import org.jdom.input.SAXBuilder;
import org.jdom.output.XMLOutputter;
import org.jdom.xpath.XPath;

/**
 * Configurable MODS Crosswalk
 * <p>
 * This class supports multiple dissemination crosswalks from DSpace
 * internal data to the MODS XML format
 *  (see <a href="http://www.loc.gov/standards/mods/">http://www.loc.gov/standards/mods/</a>.)
 * <p>
 * It registers multiple Plugin names, which it reads from
 * the DSpace configuration as follows:
 *
 * <h3>Configuration</h3>
 * Every key starting with <code>"crosswalk.mods.properties."</code> describes a
 * MODS crosswalk.  Everything after the last period is the <em>plugin name</em>,
 * and the value is the pathname (relative to <code><em>dspace.dir</em>/config</code>)
 * of the crosswalk configuration file.
 * <p>
 * You can have two names point to the same crosswalk,
 * just add two configuration entries with the same value, e.g.
 * <pre>
 *    crosswalk.mods.properties.MODS = crosswalks/mods.properties
 *    crosswalk.mods.properties.default = crosswalks/mods.properties
 * </pre>
 * The first line creates a plugin with the name <code>"MODS"</code>
 * which is configured from the file <em>dspace-dir</em><code>/config/crosswalks/mods.properties</code>.
 * <p>
 * Since there is significant overhead in reading the properties file to
 * configure the crosswalk, and a crosswalk instance may be used any number
 * of times, we recommend caching one instance of the crosswalk for each
 * name and simply reusing those instances.  The PluginManager does this
 * by default.
 *
 * @author Larry Stone
 * @author Scott Phillips
 * @version $Revision$
 */
public class MODSDisseminationCrosswalk extends SelfNamedPlugin implements DisseminationCrosswalk {
    /** log4j category */
    private static Logger log = Logger.getLogger(MODSDisseminationCrosswalk.class);

    private static final String CONFIG_PREFIX = "crosswalk.mods.properties.";

    /**
     * Fill in the plugin alias table from DSpace configuration entries
     * for configuration files for flavors of MODS crosswalk:
     */
    private static String aliases[] = null;
    static {
        List<String> aliasList = new ArrayList<String>();
        Enumeration<String> pe = (Enumeration<String>) ConfigurationManager.propertyNames();
        while (pe.hasMoreElements()) {
            String key = pe.nextElement();
            if (key.startsWith(CONFIG_PREFIX)) {
                aliasList.add(key.substring(CONFIG_PREFIX.length()));
            }
        }
        aliases = (String[]) aliasList.toArray(new String[aliasList.size()]);
    }

    public static String[] getPluginNames() {
        return (String[]) ArrayUtils.clone(aliases);
    }

    /**
     * MODS namespace.
     */
    public static final Namespace MODS_NS = Namespace.getNamespace("mods", "http://www.loc.gov/mods/v3");

    private static final Namespace XLINK_NS = Namespace.getNamespace("xlink", "http://www.w3.org/1999/xlink");

    private static final Namespace namespaces[] = { MODS_NS, XLINK_NS };

    /**  URL of MODS XML Schema */
    public static final String MODS_XSD = "http://www.loc.gov/standards/mods/v3/mods-3-1.xsd";

    private static final String schemaLocation = MODS_NS.getURI() + " " + MODS_XSD;

    private static XMLOutputter outputUgly = new XMLOutputter();
    private static SAXBuilder builder = new SAXBuilder();

    private Map<String, modsTriple> modsMap = null;

    /**
     * Container for crosswalk mapping: expressed as "triple" of:
     * 1. QDC field name (really field.qualifier).
     * 2. XML subtree to add to MODS record.
     * 3. XPath expression showing places to plug in the value.
     */
    static class modsTriple {
        public String qdc = null;
        public Element xml = null;
        public XPath xpath = null;

        /**
         * Initialize from text versions of QDC, XML and XPath.
         * The DC stays a string; parse the XML with appropriate
         * namespaces; "compile" the XPath.
         */
        public static modsTriple create(String qdc, String xml, String xpath) {
            modsTriple result = new modsTriple();

            final String prolog = "<mods xmlns:" + MODS_NS.getPrefix() + "=\"" + MODS_NS.getURI() + "\" " + "xmlns:"
                    + XLINK_NS.getPrefix() + "=\"" + XLINK_NS.getURI() + "\">";
            final String postlog = "</mods>";
            try {
                result.qdc = qdc;
                result.xpath = XPath.newInstance(xpath);
                result.xpath.addNamespace(MODS_NS.getPrefix(), MODS_NS.getURI());
                result.xpath.addNamespace(XLINK_NS);
                Document d = builder.build(new StringReader(prolog + xml + postlog));
                result.xml = (Element) d.getRootElement().getContent(0);
            } catch (JDOMException je) {
                log.error("Error initializing modsTriple(\"" + qdc + "\",\"" + xml + "\",\"" + xpath + "\"): got "
                        + je.toString());
                return null;
            } catch (IOException je) {
                log.error("Error initializing modsTriple(\"" + qdc + "\",\"" + xml + "\",\"" + xpath + "\"): got "
                        + je.toString());
                return null;
            }
            return result;
        }
    }

    /**
     * Initialize Crosswalk table from a properties file
     * which itself is the value of the DSpace configuration property
     * "crosswalk.mods.properties.X", where "X" is the alias name of this instance.
     * Each instance may be configured with a separate mapping table.
     *
     * The MODS crosswalk configuration properties follow the format:
     *
     *  {field-name} = {XML-fragment} | {XPath}
     *
     *  1. qualified DC field name is of the form
     *       {MDschema}.{element}.{qualifier}
     *
     *      e.g.  dc.contributor.author
     *
     *  2. XML fragment is prototype of metadata element, with empty or "%s"
     *     placeholders for value(s).  NOTE: Leave the %s's in becaue
     *     it's much easier then to see if something is broken.
     *
     *  3. XPath expression listing point(s) in the above XML where
     *     the value is to be inserted.  Context is the element itself.
     *
     * Example properties line:
     *
     *  dc.description.abstract = <mods:abstract>%s</mods:abstract> | text()
     *
     */
    private void initMap() throws CrosswalkInternalException {
        if (modsMap != null) {
            return;
        }
        String myAlias = getPluginInstanceName();
        if (myAlias == null) {
            log.error(
                    "Must use PluginManager to instantiate MODSDisseminationCrosswalk so the class knows its name.");
            return;
        }
        String cmPropName = CONFIG_PREFIX + myAlias;
        String propsFilename = ConfigurationManager.getProperty(cmPropName);
        if (propsFilename == null) {
            String msg = "MODS crosswalk missing " + "configuration file for crosswalk named \"" + myAlias + "\"";
            log.error(msg);
            throw new CrosswalkInternalException(msg);
        } else {
            String parent = ConfigurationManager.getProperty("dspace.dir") + File.separator + "config"
                    + File.separator;
            File propsFile = new File(parent, propsFilename);
            Properties modsConfig = new Properties();
            FileInputStream pfs = null;
            try {
                pfs = new FileInputStream(propsFile);
                modsConfig.load(pfs);
            } catch (IOException e) {
                log.error("Error opening or reading MODS properties file: " + propsFile.toString() + ": "
                        + e.toString());
                throw new CrosswalkInternalException("MODS crosswalk cannot " + "open config file: " + e.toString(),
                        e);
            } finally {
                if (pfs != null) {
                    try {
                        pfs.close();
                    } catch (IOException ioe) {
                    }
                }
            }

            modsMap = new HashMap<String, modsTriple>();
            Enumeration<String> pe = (Enumeration<String>) modsConfig.propertyNames();
            while (pe.hasMoreElements()) {
                String qdc = pe.nextElement();
                String val = modsConfig.getProperty(qdc);
                String pair[] = val.split("\\s+\\|\\s+", 2);
                if (pair.length < 2) {
                    log.warn("Illegal MODS mapping in " + propsFile.toString() + ", line = " + qdc + " = " + val);
                } else {
                    modsTriple trip = modsTriple.create(qdc, pair[0], pair[1]);
                    if (trip != null) {
                        modsMap.put(qdc, trip);
                    }
                }
            }
        }
    }

    /**
     *  Return the MODS namespace
     */
    public Namespace[] getNamespaces() {
        return (Namespace[]) ArrayUtils.clone(namespaces);
    }

    /**
     * Return the MODS schema
     */
    public String getSchemaLocation() {
        return schemaLocation;
    }

    /**
     * Returns object's metadata in MODS format, as List of XML structure nodes.
     */
    public List<Element> disseminateList(DSpaceObject dso)
            throws CrosswalkException, IOException, SQLException, AuthorizeException {
        return disseminateListInternal(dso, true);
    }

    /**
     * Disseminate an Item, Collection, or Community to MODS.
     */
    public Element disseminateElement(DSpaceObject dso)
            throws CrosswalkException, IOException, SQLException, AuthorizeException {
        Element root = new Element("mods", MODS_NS);
        root.setAttribute("schemaLocation", schemaLocation, XSI_NS);
        root.addContent(disseminateListInternal(dso, false));
        return root;
    }

    private List<Element> disseminateListInternal(DSpaceObject dso, boolean addSchema)
            throws CrosswalkException, IOException, SQLException, AuthorizeException {
        DCValue[] dcvs = null;
        if (dso.getType() == Constants.ITEM) {
            dcvs = item2Metadata((Item) dso);
        } else if (dso.getType() == Constants.COLLECTION) {
            dcvs = collection2Metadata((Collection) dso);
        } else if (dso.getType() == Constants.COMMUNITY) {
            dcvs = community2Metadata((Community) dso);
        } else if (dso.getType() == Constants.SITE) {
            dcvs = site2Metadata((Site) dso);
        } else {
            throw new CrosswalkObjectNotSupported(
                    "MODSDisseminationCrosswalk can only crosswalk Items, Collections, or Communities");
        }
        initMap();

        List<Element> result = new ArrayList<Element>(dcvs.length);

        for (int i = 0; i < dcvs.length; i++) {
            String qdc = dcvs[i].schema + "." + dcvs[i].element;
            if (dcvs[i].qualifier != null) {
                qdc += "." + dcvs[i].qualifier;
            }
            String value = dcvs[i].value;

            modsTriple trip = modsMap.get(qdc);
            if (trip == null) {
                log.warn("WARNING: " + getPluginInstanceName() + ": No MODS mapping for \"" + qdc + "\"");
            } else {
                try {
                    Element me = (Element) trip.xml.clone();
                    if (addSchema) {
                        me.setAttribute("schemaLocation", schemaLocation, XSI_NS);
                    }
                    Iterator ni = trip.xpath.selectNodes(me).iterator();
                    if (!ni.hasNext()) {
                        log.warn("XPath \"" + trip.xpath.getXPath() + "\" found no elements in \""
                                + outputUgly.outputString(me) + "\", qdc=" + qdc);
                    }
                    while (ni.hasNext()) {
                        Object what = ni.next();
                        if (what instanceof Element) {
                            ((Element) what).setText(checkedString(value));
                        } else if (what instanceof Attribute) {
                            ((Attribute) what).setValue(checkedString(value));
                        } else if (what instanceof Text) {
                            ((Text) what).setText(checkedString(value));
                        } else {
                            log.warn("Got unknown object from XPath, class=" + what.getClass().getName());
                        }
                    }
                    result.add(me);
                } catch (JDOMException je) {
                    log.error("Error following XPath in modsTriple: context=" + outputUgly.outputString(trip.xml)
                            + ", xpath=" + trip.xpath.getXPath() + ", exception=" + je.toString());
                }
            }
        }
        return result;
    }

    /**
     * ModsCrosswalk can disseminate: Items, Collections, Communities, and Site.
     */
    public boolean canDisseminate(DSpaceObject dso) {
        return (dso.getType() == Constants.ITEM || dso.getType() == Constants.COLLECTION
                || dso.getType() == Constants.COMMUNITY || dso.getType() == Constants.SITE);
    }

    /**
     * ModsCrosswalk prefer's element form over list.
     */
    public boolean preferList() {
        return false;
    }

    /**
     * Generate a list of metadata elements for the given DSpace
     * site.
     *
     * @param site
     *            The site to derive metadata from
     */
    protected DCValue[] site2Metadata(Site site) {
        List<DCValue> metadata = new ArrayList<DCValue>();

        String identifier_uri = "http://hdl.handle.net/" + site.getHandle();
        String title = site.getName();
        String url = site.getURL();

        if (identifier_uri != null) {
            metadata.add(createDCValue("identifier.uri", null, identifier_uri));
        }

        //FIXME: adding two URIs for now (site handle and URL), in case site isn't using handles
        if (url != null) {
            metadata.add(createDCValue("identifier.uri", null, url));
        }

        if (title != null) {
            metadata.add(createDCValue("title", null, title));
        }

        return (DCValue[]) metadata.toArray(new DCValue[metadata.size()]);
    }

    /**
     * Generate a list of metadata elements for the given DSpace
     * community.
     *
     * @param community
     *            The community to derive metadata from
     */
    protected DCValue[] community2Metadata(Community community) {
        List<DCValue> metadata = new ArrayList<DCValue>();

        String description = community.getMetadata("introductory_text");
        String description_abstract = community.getMetadata("short_description");
        String description_table = community.getMetadata("side_bar_text");
        String identifier_uri = "http://hdl.handle.net/" + community.getHandle();
        String rights = community.getMetadata("copyright_text");
        String title = community.getMetadata("name");

        if (description != null) {
            metadata.add(createDCValue("description", null, description));
        }

        if (description_abstract != null) {
            metadata.add(createDCValue("description", "abstract", description_abstract));
        }

        if (description_table != null) {
            metadata.add(createDCValue("description", "tableofcontents", description_table));
        }

        if (identifier_uri != null) {
            metadata.add(createDCValue("identifier.uri", null, identifier_uri));
        }

        if (rights != null) {
            metadata.add(createDCValue("rights", null, rights));
        }

        if (title != null) {
            metadata.add(createDCValue("title", null, title));
        }

        return (DCValue[]) metadata.toArray(new DCValue[metadata.size()]);
    }

    /**
     * Generate a list of metadata elements for the given DSpace
     * collection.
     *
     * @param collection
     *            The collection to derive metadata from
     */
    protected DCValue[] collection2Metadata(Collection collection) {
        List<DCValue> metadata = new ArrayList<DCValue>();

        String description = collection.getMetadata("introductory_text");
        String description_abstract = collection.getMetadata("short_description");
        String description_table = collection.getMetadata("side_bar_text");
        String identifier_uri = "http://hdl.handle.net/" + collection.getHandle();
        String provenance = collection.getMetadata("provenance_description");
        String rights = collection.getMetadata("copyright_text");
        String rights_license = collection.getMetadata("license");
        String title = collection.getMetadata("name");

        if (description != null) {
            metadata.add(createDCValue("description", null, description));
        }

        if (description_abstract != null) {
            metadata.add(createDCValue("description", "abstract", description_abstract));
        }

        if (description_table != null) {
            metadata.add(createDCValue("description", "tableofcontents", description_table));
        }

        if (identifier_uri != null) {
            metadata.add(createDCValue("identifier", "uri", identifier_uri));
        }

        if (provenance != null) {
            metadata.add(createDCValue("provenance", null, provenance));
        }

        if (rights != null) {
            metadata.add(createDCValue("rights", null, rights));
        }

        if (rights_license != null) {
            metadata.add(createDCValue("rights.license", null, rights_license));
        }

        if (title != null) {
            metadata.add(createDCValue("title", null, title));
        }

        return (DCValue[]) metadata.toArray(new DCValue[metadata.size()]);
    }

    /**
     * Generate a list of metadata elements for the given DSpace item.
     *
     * @param item
     *            The item to derive metadata from
     */
    protected DCValue[] item2Metadata(Item item) {
        DCValue[] dcvs = item.getMetadata(Item.ANY, Item.ANY, Item.ANY, Item.ANY);

        return dcvs;
    }

    private DCValue createDCValue(String element, String qualifier, String value) {
        DCValue dcv = new DCValue();
        dcv.schema = "dc";
        dcv.element = element;
        dcv.qualifier = qualifier;
        dcv.value = value;
        return dcv;
    }

    // check for non-XML characters
    private String checkedString(String value) {
        if (value == null) {
            return null;
        }
        String reason = Verifier.checkCharacterData(value);
        if (reason == null) {
            return value;
        } else {
            if (log.isDebugEnabled()) {
                log.debug("Filtering out non-XML characters in string, reason=" + reason);
            }
            StringBuffer result = new StringBuffer(value.length());
            for (int i = 0; i < value.length(); ++i) {
                char c = value.charAt(i);
                if (Verifier.isXMLCharacter((int) c)) {
                    result.append(c);
                }
            }
            return result.toString();
        }
    }

}