dk.netarkivet.common.utils.XmlUtils.java Source code

Java tutorial

Introduction

Here is the source code for dk.netarkivet.common.utils.XmlUtils.java

Source

/* File:        $Id$
 * Revision:    $Revision$
 * Author:      $Author$
 * Date:        $Date$
 *
 * The Netarchive Suite - Software to harvest and preserve websites
 * Copyright 2004-2012 The Royal Danish Library, the Danish State and
 * University Library, the National Library of France and the Austrian
 * National Library.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 */

package dk.netarkivet.common.utils;

import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Source;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.Validator;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.xml.sax.SAXException;

import dk.netarkivet.common.exceptions.ArgumentNotValid;
import dk.netarkivet.common.exceptions.IOFailure;

/**
 * Utilities for handling XML-files.
 *
 */
public class XmlUtils {

    private static Log log = LogFactory.getLog(XmlUtils.class.getName());

    /** Read and parse an XML-file, and return
      * a Document object representing this object.
      * @param f a given xml file
      * @return a Document representing the xml file
      * @throws IOFailure if unable to read the xml file
      *          or unable to parse the file as XML
      */
    public static Document getXmlDoc(File f) throws IOFailure {
        ArgumentNotValid.checkNotNull(f, "File f");
        SAXReader reader = new SAXReader();
        if (!f.canRead()) {
            log.debug("Could not read file: '" + f + "'");
            throw new IOFailure("Could not read file: '" + f + "'");
        }

        try {
            return reader.read(f);
        } catch (DocumentException e) {
            log.warn("Could not parse the file as XML: '" + f + "'", e);
            throw new IOFailure("Could not parse the file as XML: '" + f + "'", e);
        }
    }

    /** Read and parse an XML stream, and return
     * a Document object representing this object.
     * @param resourceAsStream a given xml document
     * @return a Document representing the xml document
     * @throws IOFailure if unable to read the xml document
     *          or unable to parse the document as XML
     */
    public static Document getXmlDoc(InputStream resourceAsStream) {
        ArgumentNotValid.checkNotNull(resourceAsStream, "InputStream resourceAsStream");
        SAXReader reader = new SAXReader();
        try {
            return reader.read(resourceAsStream);
        } catch (DocumentException e) {
            log.warn("Could not parse inputstream as XML: " + resourceAsStream, e);
            throw new IOFailure("Could not parse inputstream as XML:" + resourceAsStream, e);
        }
    }

    /**
     * Set a XmlNode defined by the given XPath to the given value.
     *
     * @param doc   the Document, which is being modified
     * @param xpath the given XPath
     * @param value the given value
     * @throws IOFailure If the given XPath was not found in the document
     */
    public static void setNode(Document doc, String xpath, String value) {
        ArgumentNotValid.checkNotNull(doc, "Document doc");
        ArgumentNotValid.checkNotNullOrEmpty(xpath, "String xpath");
        ArgumentNotValid.checkNotNull(value, "String value");

        Node xpathNode = doc.selectSingleNode(xpath);
        if (xpathNode == null) {
            throw new IOFailure("Element '" + xpath + "' could not be found in the document '"
                    + doc.getRootElement().getName() + "'!");
        }
        xpathNode.setText(value);
    }

    /**
     * Set a List of XmlNodes defined by the given XPath to the given value.
     *
     * @param doc   the Document, which is being modified
     * @param xpath the given XPath
     * @param value the given value
     * @throws IOFailure If the given XPath was not found in the document
     */
    public static void setNodes(Document doc, String xpath, String value) {
        ArgumentNotValid.checkNotNull(doc, "Document doc");
        ArgumentNotValid.checkNotNullOrEmpty(xpath, "String xpath");
        ArgumentNotValid.checkNotNull(value, "String value");
        List<Node> xpathNodes = doc.selectNodes(xpath);
        if (xpathNodes == null) {
            throw new IOFailure("Element '" + xpath + "' could not be found in the document '"
                    + doc.getRootElement().getName() + "'!");
        }
        for (int i = 0; i < xpathNodes.size(); ++i) {
            xpathNodes.get(i).setText(value);
        }
    }

    /**
     * Validate that the settings xml files conforms to the XSD.
     *
     * @param xsdFile Schema to check settings against.
     * @throws ArgumentNotValid if unable to validate the settings files
     * @throws IOFailure If unable to read the settings files and/or 
     * the xsd file.
     */
    public static void validateWithXSD(File xsdFile) {
        ArgumentNotValid.checkNotNull(xsdFile, "File xsdFile");
        List<File> settingsFiles = Settings.getSettingsFiles();
        for (File settingsFile : settingsFiles) {
            try {
                DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
                builderFactory.setNamespaceAware(true);
                DocumentBuilder parser = builderFactory.newDocumentBuilder();
                org.w3c.dom.Document document = parser.parse(settingsFile);

                // create a SchemaFactory capable of understanding WXS schemas
                SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);

                // load a WXS schema, represented by a Schema instance
                Source schemaFile = new StreamSource(xsdFile);
                Schema schema = factory.newSchema(schemaFile);

                // create a Validator instance, which can be used to validate an
                // instance document
                Validator validator = schema.newValidator();

                // validate the DOM tree
                try {
                    validator.validate(new DOMSource(document));
                } catch (SAXException e) {
                    // instance document is invalid!
                    final String msg = "Settings file '" + settingsFile + "' does not validate using '" + xsdFile
                            + "'";
                    log.warn(msg, e);
                    throw new ArgumentNotValid(msg, e);
                }
            } catch (IOException e) {
                throw new IOFailure("Error while validating: ", e);
            } catch (ParserConfigurationException e) {
                final String msg = "Error validating settings file '" + settingsFile + "'";
                log.warn(msg, e);
                throw new ArgumentNotValid(msg, e);
            } catch (SAXException e) {
                final String msg = "Error validating settings file '" + settingsFile + "'";
                log.warn(msg, e);
                throw new ArgumentNotValid(msg, e);
            }
        }
    }

    /**
     * Write document tree to file.
     *
     * @param doc the document tree to save.
     * @param f the file to write the document to.
     * @throws IOFailure On trouble writing XML file to disk.
     */
    public static void writeXmlToFile(Document doc, File f) throws IOFailure {
        FileOutputStream fos = null;
        try {
            try {
                fos = new FileOutputStream(f);
                StreamUtils.writeXmlToStream(doc, fos);
            } finally {
                if (fos != null) {
                    fos.close();
                }
            }
        } catch (IOException e) {
            throw new IOFailure("Unable to write XML to file '" + f.getAbsolutePath() + "'", e);
        }
    }

    /** Parses a given string to produce a {@link org.w3c.dom.Document} 
     * instance.
     * @param xml Some XML text.
     * @return a {@link org.w3c.dom.Document} parsed from the given xml. 
     * @throws DocumentException If unable to parse the given text as XML.
     */
    public static Document documentFromString(String xml) throws DocumentException {
        Document doc;
        try {
            SAXReader reader = new SAXReader();
            StringReader in = new StringReader(xml);
            doc = reader.read(in);
            in.close();
        } catch (DocumentException e) {
            log.warn("Failed to read the contents of the string as XML:" + xml);
            throw e;
        }
        return doc;
    }

}