info.magnolia.importexport.DataTransporter.java Source code

Java tutorial

Introduction

Here is the source code for info.magnolia.importexport.DataTransporter.java

Source

/**
 * This file Copyright (c) 2003-2012 Magnolia International
 * Ltd.  (http://www.magnolia-cms.com). All rights reserved.
 *
 *
 * This file is dual-licensed under both the Magnolia
 * Network Agreement and the GNU General Public License.
 * You may elect to use one or the other of these licenses.
 *
 * This file is distributed in the hope that it will be
 * useful, but AS-IS and WITHOUT ANY WARRANTY; without even the
 * implied warranty of MERCHANTABILITY or FITNESS FOR A
 * PARTICULAR PURPOSE, TITLE, or NONINFRINGEMENT.
 * Redistribution, except as permitted by whichever of the GPL
 * or MNA you select, is prohibited.
 *
 * 1. For the GPL license (GPL), you can redistribute and/or
 * modify this file under the terms of the GNU General
 * Public License, Version 3, as published by the Free Software
 * Foundation.  You should have received a copy of the GNU
 * General Public License, Version 3 along with this program;
 * if not, write to the Free Software Foundation, Inc., 51
 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * 2. For the Magnolia Network Agreement (MNA), this file
 * and the accompanying materials are made available under the
 * terms of the MNA which accompanies this distribution, and
 * is available at http://www.magnolia-cms.com/mna.html
 *
 * Any modifications to this file must keep this entire header
 * intact.
 *
 */
package info.magnolia.importexport;

import info.magnolia.cms.beans.runtime.Document;
import info.magnolia.cms.core.Content;
import info.magnolia.cms.core.HierarchyManager;
import info.magnolia.cms.core.ItemType;
import info.magnolia.cms.core.SystemProperty;
import info.magnolia.cms.util.ContentUtil;
import info.magnolia.cms.util.NodeDataUtil;
import info.magnolia.context.MgnlContext;
import info.magnolia.importexport.filters.ImportXmlRootFilter;
import info.magnolia.importexport.filters.MagnoliaV2Filter;
import info.magnolia.importexport.filters.MetadataUuidFilter;
import info.magnolia.importexport.filters.RemoveMixversionableFilter;
import info.magnolia.importexport.filters.VersionFilter;
import info.magnolia.importexport.postprocessors.MetaDataImportPostProcessor;
import info.magnolia.jcr.util.NodeUtil;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.text.MessageFormat;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.DeflaterOutputStream;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;

import javax.jcr.ImportUUIDBehavior;
import javax.jcr.Node;
import javax.jcr.NodeIterator;
import javax.jcr.PathNotFoundException;
import javax.jcr.RepositoryException;
import javax.jcr.Session;
import javax.jcr.Workspace;
import javax.xml.transform.Source;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.stream.StreamSource;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.xml.serialize.OutputFormat;
import org.apache.xml.serialize.XMLSerializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLFilter;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;

/**
 * Utility class for manipulation of XML files (mainly JCR XML).
 * @author <a href="mailto:niko@macnica.com">Nicolas Modrzyk</a>
 * @author Oliver Lietz
 *
 * @version $Id$
 */
public class DataTransporter {

    private static final Pattern DOT_NAME_PATTERN = Pattern.compile("[\\w\\-]*\\.*[\\w\\-]*");

    private static final int INDENT_VALUE = 2;

    private static Logger log = LoggerFactory.getLogger(DataTransporter.class.getName());

    final static int BOOTSTRAP_IMPORT_MODE = ImportUUIDBehavior.IMPORT_UUID_COLLISION_REPLACE_EXISTING;

    public static final String ZIP = ".zip";

    public static final String GZ = ".gz";

    public static final String XML = ".xml";

    public static final String PROPERTIES = ".properties";

    public static final String DOT = ".";

    public static final String SLASH = "/";

    public static final String UTF8 = "UTF-8";

    public static final String JCR_ROOT = "jcr:root";

    /**
     * Converts a xml document into a file.
     * @param xmlDocument uploaded file
     * @param repositoryName selected repository
     * @param basepath base path in repository
     * @param keepVersionHistory if <code>false</code> version info will be stripped before importing the document
     * @param importMode a valid value for ImportUUIDBehavior
     * @param saveAfterImport
     * @param createBasepathIfNotExist
     * @throws IOException
     * @see ImportUUIDBehavior
     */
    public static synchronized void importDocument(Document xmlDocument, String repositoryName, String basepath,
            boolean keepVersionHistory, int importMode, boolean saveAfterImport, boolean createBasepathIfNotExist)
            throws IOException {
        File xmlFile = xmlDocument.getFile();
        importFile(xmlFile, repositoryName, basepath, keepVersionHistory, importMode, saveAfterImport,
                createBasepathIfNotExist);
    }

    /**
     * Creates an <code>InputStream</code> backed by the specified xml file.
     * @param xmlFile (zipped/gzipped) XML file to import
     * @param repositoryName selected repository
     * @param basepath base path in repository
     * @param keepVersionHistory if <code>false</code> version info will be stripped before importing the document
     * @param importMode a valid value for ImportUUIDBehavior
     * @param saveAfterImport
     * @param createBasepathIfNotExist
     * @throws IOException
     * @see ImportUUIDBehavior
     */
    public static synchronized void importFile(File xmlFile, String repositoryName, String basepath,
            boolean keepVersionHistory, int importMode, boolean saveAfterImport, boolean createBasepathIfNotExist)
            throws IOException {
        String name = xmlFile.getAbsolutePath();

        InputStream xmlStream = getInputStreamForFile(xmlFile);
        importXmlStream(xmlStream, repositoryName, basepath, name, keepVersionHistory, importMode, saveAfterImport,
                createBasepathIfNotExist);
    }

    /**
     * @param xmlFile
     * @param repositoryName
     * @throws IOException
     */
    public static void executeBootstrapImport(File xmlFile, String repositoryName) throws IOException {
        String filenameWithoutExt = StringUtils.substringBeforeLast(xmlFile.getName(), DOT);
        if (filenameWithoutExt.endsWith(XML)) {
            // if file ends in .xml.gz or .xml.zip
            // need to keep the .xml to be able to view it after decompression
            filenameWithoutExt = StringUtils.substringBeforeLast(xmlFile.getName(), DOT);
        }
        String pathName = StringUtils.substringAfter(StringUtils.substringBeforeLast(filenameWithoutExt, DOT), DOT);

        pathName = decodePath(pathName, UTF8);

        String basepath = SLASH + StringUtils.replace(pathName, DOT, SLASH);

        if (xmlFile.getName().endsWith(PROPERTIES)) {
            Properties properties = new Properties();
            FileInputStream stream = new FileInputStream(xmlFile);
            properties.load(stream);
            stream.close();
            importProperties(properties, repositoryName);
        } else {
            DataTransporter.importFile(xmlFile, repositoryName, basepath, false, BOOTSTRAP_IMPORT_MODE, true, true);
        }
    }

    /**
     * Deprecated.
     * @param properties
     * @param repositoryName
     * @deprecated since 4.0 - use the PropertiesImportExport class instead.
     */
    public static void importProperties(Properties properties, String repositoryName) {
        for (Iterator iter = properties.keySet().iterator(); iter.hasNext();) {
            String key = (String) iter.next();
            String value = (String) properties.get(key);

            String name = StringUtils.substringAfterLast(key, ".");
            String path = StringUtils.substringBeforeLast(key, ".").replace('.', '/');
            Content node = ContentUtil.getContent(repositoryName, path);
            if (node != null) {
                try {
                    NodeDataUtil.getOrCreate(node, name).setValue(value);
                    node.save();
                } catch (RepositoryException e) {
                    log.error("can't set property " + key, e);
                }
            }
        }

    }

    /**
     * Imports XML stream into repository.
     * XML is filtered by <code>MagnoliaV2Filter</code>, <code>VersionFilter</code> and <code>ImportXmlRootFilter</code>
     * if <code>keepVersionHistory</code> is set to <code>false</code>
     * @param xmlStream XML stream to import
     * @param repositoryName selected repository
     * @param basepath base path in repository
     * @param name (absolute path of <code>File</code>)
     * @param keepVersionHistory if <code>false</code> version info will be stripped before importing the document
     * @param importMode a valid value for ImportUUIDBehavior
     * @param saveAfterImport
     * @param createBasepathIfNotExist
     * @throws IOException
     * @see ImportUUIDBehavior
     * @see ImportXmlRootFilter
     * @see VersionFilter
     * @see MagnoliaV2Filter
     */
    public static synchronized void importXmlStream(InputStream xmlStream, String repositoryName, String basepath,
            String name, boolean keepVersionHistory, int importMode, boolean saveAfterImport,
            boolean createBasepathIfNotExist) throws IOException {

        // TODO hopefully this will be fixed with a more useful message with the Bootstrapper refactoring
        if (xmlStream == null) {
            throw new IOException("Can't import a null stream into repository: " + repositoryName + ", basepath: "
                    + basepath + ", name: " + name);
        }

        HierarchyManager hm = MgnlContext.getHierarchyManager(repositoryName);
        if (hm == null) {
            throw new IllegalStateException(
                    "Can't import " + name + " since repository " + repositoryName + " does not exist.");
        }
        Workspace ws = hm.getWorkspace();

        if (log.isDebugEnabled()) {
            log.debug("Importing content into repository: [{}] from: [{}] into path: [{}]",
                    new Object[] { repositoryName, name, basepath });
        }

        if (!hm.isExist(basepath) && createBasepathIfNotExist) {
            try {
                ContentUtil.createPath(hm, basepath, ItemType.CONTENT);
            } catch (RepositoryException e) {
                log.error("can't create path [{}]", basepath);
            }
        }

        Session session = ws.getSession();

        try {

            // Collects a list with all nodes at the basepath before import so we can see exactly which nodes were imported afterwards
            List<Node> nodesBeforeImport = NodeUtil
                    .asList(NodeUtil.asIterable(session.getNode(basepath).getNodes()));

            if (keepVersionHistory) {
                // do not manipulate
                session.importXML(basepath, xmlStream, importMode);
            } else {
                // create readers/filters and chain
                XMLReader initialReader = XMLReaderFactory
                        .createXMLReader(org.apache.xerces.parsers.SAXParser.class.getName());
                try {
                    initialReader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
                } catch (SAXException e) {
                    log.error("could not set parser feature");
                }

                XMLFilter magnoliaV2Filter = null;

                // if stream is from regular file, test for belonging XSL file to apply XSL transformation to XML
                if (new File(name).isFile()) {
                    InputStream xslStream = getXslStreamForXmlFile(new File(name));
                    if (xslStream != null) {
                        Source xslSource = new StreamSource(xslStream);
                        SAXTransformerFactory saxTransformerFactory = (SAXTransformerFactory) SAXTransformerFactory
                                .newInstance();
                        XMLFilter xslFilter = saxTransformerFactory.newXMLFilter(xslSource);
                        magnoliaV2Filter = new MagnoliaV2Filter(xslFilter);
                    }
                }

                if (magnoliaV2Filter == null) {
                    magnoliaV2Filter = new MagnoliaV2Filter(initialReader);
                }

                XMLFilter versionFilter = new VersionFilter(magnoliaV2Filter);

                // enable this to strip useless "name" properties from dialogs
                // versionFilter = new UselessNameFilter(versionFilter);

                // enable this to strip mix:versionable from pre 3.6 xml files
                versionFilter = new RemoveMixversionableFilter(versionFilter);

                XMLReader finalReader = new ImportXmlRootFilter(versionFilter);

                ContentHandler handler = session.getImportContentHandler(basepath, importMode);
                finalReader.setContentHandler(handler);

                // parse XML, import is done by handler from session
                try {
                    finalReader.parse(new InputSource(xmlStream));
                } finally {
                    IOUtils.closeQuietly(xmlStream);
                }

                if (((ImportXmlRootFilter) finalReader).rootNodeFound) {
                    String path = basepath;
                    if (!path.endsWith(SLASH)) {
                        path += SLASH;
                    }

                    Node dummyRoot = (Node) session.getItem(path + JCR_ROOT);
                    for (Iterator iter = dummyRoot.getNodes(); iter.hasNext();) {
                        Node child = (Node) iter.next();
                        // move childs to real root

                        if (session.itemExists(path + child.getName())) {
                            session.getItem(path + child.getName()).remove();
                        }

                        session.move(child.getPath(), path + child.getName());
                    }
                    // delete the dummy node
                    dummyRoot.remove();
                }

                // Post process all nodes that were imported
                NodeIterator nodesAfterImport = session.getNode(basepath).getNodes();
                while (nodesAfterImport.hasNext()) {
                    Node nodeAfterImport = nodesAfterImport.nextNode();
                    boolean existedBeforeImport = false;
                    for (Node nodeBeforeImport : nodesBeforeImport) {
                        if (NodeUtil.isSame(nodeAfterImport, nodeBeforeImport)) {
                            existedBeforeImport = true;
                            break;
                        }
                    }
                    if (!existedBeforeImport) {
                        postProcessAfterImport(nodeAfterImport);
                    }
                }
            }
        } catch (Exception e) {
            throw new RuntimeException("Error importing " + name + ": " + e.getMessage(), e);
        } finally {
            IOUtils.closeQuietly(xmlStream);
        }

        try {
            if (saveAfterImport) {
                session.save();
            }
        } catch (RepositoryException e) {
            log.error(MessageFormat.format(
                    "Unable to save changes to the [{0}] repository due to a {1} Exception: {2}.",
                    new Object[] { repositoryName, e.getClass().getName(), e.getMessage() }), e);
            throw new IOException(e.getMessage());
        }
    }

    private static void postProcessAfterImport(Node node) throws RepositoryException {
        try {
            new MetaDataImportPostProcessor().postProcessNode(node);
        } catch (RepositoryException e) {
            throw new RepositoryException("Failed to post process imported nodes at path "
                    + NodeUtil.getNodePathIfPossible(node) + ": " + e.getMessage(), e);
        }
    }

    /**
     * @param file
     * @return XSL stream for Xml file or <code>null</code>
     */
    protected static InputStream getXslStreamForXmlFile(File file) {
        InputStream xslStream = null;
        String xlsFilename = StringUtils.substringBeforeLast(file.getAbsolutePath(), ".") + ".xsl";
        File xslFile = new File(xlsFilename);
        if (xslFile.exists()) {
            try {
                xslStream = new FileInputStream(xslFile);
                log.info("XSL file for [" + file.getName() + "] found (" + xslFile.getName() + ")");
            } catch (FileNotFoundException e) { // should never happen (xslFile.exists())
                e.printStackTrace();
            }
        }
        return xslStream;
    }

    /**
     * Creates a stream from the (zipped/gzipped) XML file.
     * @param xmlFile
     * @return stream of the file
     * @throws IOException
     */
    private static InputStream getInputStreamForFile(File xmlFile) throws IOException {
        InputStream xmlStream;
        // looks like the zip one is buggy. It throws exception when trying to use it
        if (xmlFile.getName().endsWith(ZIP)) {
            xmlStream = new ZipInputStream((new FileInputStream(xmlFile)));
        } else if (xmlFile.getName().endsWith(GZ)) {
            xmlStream = new GZIPInputStream((new FileInputStream(xmlFile)));
        } else { // if(fileName.endsWith(XML))
            xmlStream = new FileInputStream(xmlFile);
        }
        return xmlStream;
    }

    public static void executeExport(OutputStream baseOutputStream, boolean keepVersionHistory, boolean format,
            Session session, String basepath, String repository, String ext) throws IOException {
        OutputStream outputStream = baseOutputStream;
        if (ext.endsWith(ZIP)) {
            outputStream = new ZipOutputStream(baseOutputStream);
        } else if (ext.endsWith(GZ)) {
            outputStream = new GZIPOutputStream(baseOutputStream);
        }

        try {
            if (keepVersionHistory) {
                // use exportSystemView in order to preserve property types
                // http://issues.apache.org/jira/browse/JCR-115
                if (!format) {
                    session.exportSystemView(basepath, outputStream, false, false);
                } else {
                    parseAndFormat(outputStream, null, repository, basepath, session, false);
                }
            } else {
                // use XMLSerializer and a SAXFilter in order to rewrite the
                // file
                XMLReader reader = new VersionFilter(
                        XMLReaderFactory.createXMLReader(org.apache.xerces.parsers.SAXParser.class.getName()));
                parseAndFormat(outputStream, reader, repository, basepath, session, false);
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        } catch (SAXException e) {
            throw new RuntimeException(e);
        } catch (RepositoryException e) {
            throw new RuntimeException(e);
        }

        // finish the stream properly if zip stream
        // this is not done by the IOUtils
        if (outputStream instanceof DeflaterOutputStream) {
            ((DeflaterOutputStream) outputStream).finish();
        }

        baseOutputStream.flush();
        IOUtils.closeQuietly(baseOutputStream);
    }

    /**
     * Exports the content of the repository, and format it if necessary.
     * @param stream the stream to write the content to
     * @param reader the reader to use to parse the xml content (so that we can perform filtering), if null instanciate
     * a default one
     * @param repository the repository to export
     * @param basepath the basepath in the repository
     * @param session the session to use to export the data from the repository
     * @param noRecurse
     * @throws IOException
     * @throws SAXException
     * @throws RepositoryException
     * @throws PathNotFoundException
     */
    public static void parseAndFormat(OutputStream stream, XMLReader reader, String repository, String basepath,
            Session session, boolean noRecurse)
            throws IOException, SAXException, PathNotFoundException, RepositoryException {

        if (reader == null) {
            reader = XMLReaderFactory.createXMLReader(org.apache.xerces.parsers.SAXParser.class.getName());
        }

        // write to a temp file and then re-read it to remove version history
        File tempFile = File.createTempFile("export-" + repository + session.getUserID(), ".xml");
        OutputStream fileStream = new FileOutputStream(tempFile);

        try {
            session.exportSystemView(basepath, fileStream, false, noRecurse);
        } finally {
            IOUtils.closeQuietly(fileStream);
        }

        readFormatted(reader, tempFile, stream);

        if (!tempFile.delete()) {
            log.warn("Could not delete temporary export file {}", tempFile.getAbsolutePath());
        }
    }

    /**
     * @param reader
     * @param inputFile
     * @param outputStream
     * @throws FileNotFoundException
     * @throws IOException
     * @throws SAXException
     */
    protected static void readFormatted(XMLReader reader, File inputFile, OutputStream outputStream)
            throws FileNotFoundException, IOException, SAXException {
        InputStream fileInputStream = new FileInputStream(inputFile);
        readFormatted(reader, fileInputStream, outputStream);
        IOUtils.closeQuietly(fileInputStream);
    }

    /**
     * @param reader
     * @param inputStream
     * @param outputStream
     * @throws FileNotFoundException
     * @throws IOException
     * @throws SAXException
     */
    protected static void readFormatted(XMLReader reader, InputStream inputStream, OutputStream outputStream)
            throws FileNotFoundException, IOException, SAXException {

        OutputFormat outputFormat = new OutputFormat();

        outputFormat.setPreserveSpace(false); // this is ok, doesn't affect text nodes??
        outputFormat.setIndenting(true);
        outputFormat.setIndent(INDENT_VALUE);
        outputFormat.setLineWidth(120); // need to be set after setIndenting()!

        final boolean removeUnwantedNamespaces = !SystemProperty
                .getBooleanProperty("magnolia.export.keep_extra_namespaces"); // MAGNOLIA-2960
        MetadataUuidFilter metadataUuidFilter = new MetadataUuidFilter(reader, removeUnwantedNamespaces); // MAGNOLIA-1650
        metadataUuidFilter.setContentHandler(new XMLSerializer(outputStream, outputFormat));
        metadataUuidFilter.parse(new InputSource(inputStream));

        IOUtils.closeQuietly(inputStream);
    }

    /**
     *
     * @param path path to encode
     * @param separator "." (dot) or "/", it will be not encoded if found
     * @param enc charset
     * @return the path encoded
     */
    public static String encodePath(String path, String separator, String enc) {
        StringBuilder pathEncoded = new StringBuilder();
        try {
            if (!StringUtils.contains(path, separator)) {
                return URLEncoder.encode(path, enc);
            }
            for (int i = 0; i < path.length(); i++) {
                String ch = String.valueOf(path.charAt(i));
                if (separator.equals(ch)) {
                    pathEncoded.append(ch);
                } else {
                    pathEncoded.append(URLEncoder.encode(ch, enc));
                }
            }
        } catch (UnsupportedEncodingException e) {
            return path;
        }
        return pathEncoded.toString();
    }

    /**
     * decode a path (ex. %D0%9D%D0%B0.%B2%D0%BE%D0%BB%D0%BD)
     * @param path path to decode
     * @param enc charset
     * @return the path decoded
     */
    public static String decodePath(String path, String enc) {
        String pathEncoded = StringUtils.EMPTY;
        try {
            pathEncoded = URLDecoder.decode(path, enc);
        } catch (UnsupportedEncodingException e) {
            return path;
        }
        return pathEncoded;
    }

    /**
     * Prior to 4.5 Magnolia used to produce export xml filenames where the / (slash) separating sub nodes was replaced by a dot.
     * Since 4.5, Magnolia enables dots in path names, therefore dots which are part of the node name have to be escaped by doubling them.
     * I.e. given a path like this <code>/foo/bar.baz/test../dir/baz..bar</code>, this method will produce
     * <code>.foo.bar..baz.test.....dir.baz....bar</code>.
     */
    public static String createExportPath(String path) {
        //TODO if someone is smarter than me (not an impossible thing) and can do this with one single elegant regex, please do it.
        String newPath = path.replace(".", "..");
        newPath = newPath.replace("/", ".");
        return newPath;
    }

    /**
     * The opposite of {@link #createExportPath(String)}.
     * I.e. given a path like this <code>.foo.bar..baz.test.....dir.baz....bar</code>, this method will produce <code>/foo/bar.baz/test../dir/baz..bar</code>.
     */
    public static String revertExportPath(String exportPath) {
        if (".".equals(exportPath)) {
            return "/";
        }

        //TODO I have a feeling there's a simpler way to achieve our goal.
        Matcher matcher = DOT_NAME_PATTERN.matcher(exportPath);

        StringBuilder reversed = new StringBuilder(exportPath.length());

        while (matcher.find()) {
            String group = matcher.group();
            int dotsNumber = StringUtils.countMatches(group, ".");
            if (dotsNumber == 1) {
                reversed.append(group.replaceFirst("\\.", "/"));
            } else {
                String dots = StringUtils.substringBeforeLast(group, ".").replace("..", ".");
                String name = StringUtils.substringAfterLast(group, ".");
                reversed.append(dots);
                //if number is odd, the last dot has to be replaced with a slash
                if (dotsNumber % 2 != 0) {
                    reversed.append("/");
                }
                reversed.append(name);
            }
        }
        return reversed.toString();
    }

}