org.alfresco.extension.bulkimport.source.fs.FilesystemSourceUtils.java Source code

Java tutorial

Introduction

Here is the source code for org.alfresco.extension.bulkimport.source.fs.FilesystemSourceUtils.java

Source

/*
 * Copyright (C) 2007-2016 Peter Monks.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * 
 * This file is part of an unsupported extension to Alfresco.
 * 
 */

package org.alfresco.extension.bulkimport.source.fs;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigDecimal;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.alfresco.repo.content.ContentStore;
import org.alfresco.repo.content.encoding.ContentCharsetFinder;
import org.alfresco.repo.content.filestore.FileContentStore;
import org.alfresco.service.cmr.repository.ContentData;
import org.alfresco.service.cmr.repository.MimetypeService;
import org.alfresco.extension.bulkimport.source.BulkImportItemVersion;

/**
 * This class is a miscellaneous grab bag of filesystem methods that are
 * intended to be statically imported.
 *
 * @author Peter Monks (pmonks@gmail.com)
 *
 */
public final class FilesystemSourceUtils {
    private final static String DEFAULT_TEXT_ENCODING = "UTF-8";
    private final static int MAX_CONTENT_URL_LENGTH = 255;

    // Regexes for matching version files
    public final static String VERSION_LABEL_REGEX = "([\\d]+)(\\.([\\d]+))?"; // Group 0 = version label, Group 1 = major version #, group 3 (if not null) = minor version #
    private final static String VERSION_SUFFIX_REGEX = "\\.v(" + VERSION_LABEL_REGEX + ")\\z"; // Note: group numbers are one greater than shown above
    private final static String VERSION_FILENAME_REGEX = ".+" + VERSION_SUFFIX_REGEX;
    private final static Pattern VERSION_FILENAME_PATTERN = Pattern.compile(VERSION_FILENAME_REGEX);

    /**
     * Returns true if the suspectedChild is within the given directory.  The
     * String version is preferred over this one, as it forces normalisation
     * of the two paths first.
     * 
     * @param directory      The directory in which to check <i>(may be null, although doing so will always return false)</i>.
     * @param suspectedChild The suspect child to check for <i>(may be null, although doing so will always return false)</i>.
     * @return true if and only if suspectedChild is "within" directory.  Note that this comparison is done solely at a "path string"
     *         level.  It will attempt to remove relative path elements (".." especially) to avoid incorrect results, but YMMV.
     */
    public final static boolean isInDirectory(final File directory, final File suspectedChild) {
        return (isInDirectory(directory.getAbsolutePath(), suspectedChild.getAbsolutePath()));
    }

    /**
     * Returns true if the suspectedChild is within the given directory.  This
     * method is preferred over the File version, as it forces normalisation
     * of the two paths
     * (see <a href="http://commons.apache.org/proper/commons-io/javadocs/api-2.4/org/apache/commons/io/FilenameUtils.html#normalize(java.lang.String, boolean)">this reference</a>)
     * first.
     * 
     * @param directoryPath      The directory in which to check <i>(may be null, although doing so will always return false)</i>.
     * @param suspectedChildPath The suspect child to check for <i>(may be null, although doing so will always return false)</i>.
     * @return true if and only if suspectedChild is "within" directory.  Note that this comparison is done solely at a "path string"
     *         level.
     */
    public final static boolean isInDirectory(final String directoryPath, final String suspectedChildPath) {
        return (isInDirectoryImpl(new File(FilenameUtils.normalize(directoryPath, true)),
                new File(FilenameUtils.normalize(suspectedChildPath, true))));
    }

    private final static boolean isInDirectoryImpl(final File directory, final File suspectedChild) {
        boolean result = false;

        if (directory != null && suspectedChild != null) {
            if (suspectedChild.equals(directory)) {
                result = true;
            } else {
                result = isInDirectoryImpl(directory, suspectedChild.getParentFile());
            }
        }

        return (result);
    }

    /**
     * Determines whether the given file is already located in an Alfresco managed content store.  Used to determine
     * whether to perform a streaming or in-place import.
     * 
     * @param contentStore The content store Alfresco is configured to use <i>(must not be null)</i>.
     * @param source The file to test.  Typically this would be the source directory for the import <i>(must not be null)</i>.
     * @return True if the given file is in an Alfresco managed content store, false otherwise.
     */
    public final static boolean isInContentStore(final ContentStore contentStore, final File source) {
        boolean result = false;
        final String contentStoreRoot = contentStore.getRootLocation();

        if (contentStoreRoot != null && contentStoreRoot.trim().length() > 0) {
            final File contentStoreRootFile = new File(contentStoreRoot);

            // If the content store root doesn't exist as a file, we're probably dealing with a non-filesystem content store
            if (contentStoreRootFile.exists() && contentStoreRootFile.isDirectory()) {
                result = isInDirectory(contentStoreRoot, source.getAbsolutePath());
            }
        }

        return (result);
    }

    /**
     * @param file The file to get the name of <i>(may be null)</i>.
     * @return A human readable rendition of the file <i>(null when file is null)</i>.
     */
    public final static String getFileName(final File file) {
        String result = null;

        if (file != null) {
            result = file.getAbsolutePath();
        }

        return (result);
    }

    /**
     * This method does the magic of constructing the content URL for
     * "in-place" content.
     * 
     * @param mimeTypeService The Alfresco MimetypeService <i>(must not be null)</i>.
     * @param contentStore    The content store Alfresco is configured to use <i>(must not be null)</i>.
     * @param contentFile     The content file to build a content URL for <i>(must not be null)</i>.
     * @return The constructed <code>ContentData</code>, or null if the contentFile cannot be in-place imported for any reason.
     */
    public final static ContentData buildContentProperty(final MimetypeService mimeTypeService,
            final ContentStore contentStore, final File contentFile) {
        ContentData result = null;

        final String normalisedFilename = FilenameUtils.normalize(contentFile.getAbsolutePath(), true);
        String normalisedContentStoreRoot = FilenameUtils.normalize(contentStore.getRootLocation(), true);

        // Ensure content store root ends with a single / character
        if (!normalisedContentStoreRoot.endsWith("/")) {
            normalisedContentStoreRoot = normalisedContentStoreRoot + "/";
        }

        // If, after normalisation, the filename doesn't start with the content store root, we can't in-place import
        if (normalisedFilename.startsWith(normalisedContentStoreRoot)) {
            final String contentStoreRelativeFilename = normalisedFilename
                    .substring(normalisedContentStoreRoot.length());
            final String contentUrl = FileContentStore.STORE_PROTOCOL + ContentStore.PROTOCOL_DELIMITER
                    + contentStoreRelativeFilename;

            // If the resulting content URL would be too long, we can't in-place import
            if (contentUrl.length() <= MAX_CONTENT_URL_LENGTH) {
                final String mimeType = mimeTypeService.guessMimetype(contentFile.getName());
                final String encoding = guessEncoding(mimeTypeService, contentFile, mimeType);

                result = new ContentData(contentUrl, mimeType, contentFile.length(), encoding);
            }
        }

        return (result);
    }

    /**
     * Attempt to guess the encoding of a text file , falling back to {@link #DEFAULT_TEXT_ENCODING}.
     *
     * @param mimeTypeService The Alfresco MimetypeService <i>(must not be null)</i>.
     * @param file            The {@link java.io.File} to test <i>(must not be null)</i>.
     * @param mimeType        The file MIME type. Used to first distinguish between binary and text files <i>(must not be null)</i>.
     * @return The text encoding as a {@link String}.
     */
    public final static String guessEncoding(final MimetypeService mimeTypeService, final File file,
            final String mimeType) {
        String result = DEFAULT_TEXT_ENCODING;
        InputStream is = null;
        final ContentCharsetFinder charsetFinder = mimeTypeService.getContentCharsetFinder();

        if (mimeTypeService.isText(mimeType)) {
            try {
                is = new BufferedInputStream(new FileInputStream(file));
                result = charsetFinder.getCharset(is, mimeType).name();
            } catch (final IOException ioe) {
                result = DEFAULT_TEXT_ENCODING;
            } finally {
                IOUtils.closeQuietly(is);
            }
        }

        return (result);
    }

    /**
     * Strips the version suffix (if any) from a filename.
     * 
     * @param fileName The filename to strip the version suffix from <i>(must not be null, empty or blank)</i>.
     * @return The filename with the version suffix (if any) stripped.
     */
    public static String stripVersionSuffix(final String fileName) {
        String result = fileName;

        if (isVersionFile(result)) {
            result = result.replaceFirst(VERSION_SUFFIX_REGEX, "");
        }

        return (result);
    }

    /**
     * Get the name of the parent file for this file.
     * 
     * @param metadataLoader The configured <code>MetadataLoader</code> <i>(must not be null)</i>.
     * @param fileName       The filename to check <i>(must not be null, empty or blank)</i>.
     * @return The name of the parent file this file.
     */
    public static String getParentName(final MetadataLoader metadataLoader, final String fileName) {
        String result = stripVersionSuffix(fileName);

        if (isMetadataFile(metadataLoader, result)) {
            result = result.substring(0, result.length()
                    - (MetadataLoader.METADATA_SUFFIX + metadataLoader.getMetadataFileExtension()).length());
        }

        return (result);
    }

    /**
     * @param fileName The filename to check <i>(must not be null, empty or blank)</i>.
     * @return True if the given filename represents a version file, false otherwise.
     */
    public static boolean isVersionFile(final String fileName) {
        Matcher matcher = VERSION_FILENAME_PATTERN.matcher(fileName);

        return (matcher.matches());
    }

    /**
     * @param metadataLoader The configured <code>MetadataLoader</code> <i>(must not be null)</i>.
     * @param fileName       The filename to check <i>(must not be null, empty or blank)</i>.
     * @return True if the given filename represents a metadata file, false otherwise.
     */
    public static boolean isMetadataFile(final MetadataLoader metadataLoader, final String fileName) {
        boolean result = false;

        if (metadataLoader != null) {
            final String tmpFileName = stripVersionSuffix(fileName);

            result = tmpFileName
                    .endsWith(MetadataLoader.METADATA_SUFFIX + metadataLoader.getMetadataFileExtension());
        }

        return (result);
    }

    /**
     * @param fileName The filename to check <i>(must not be null, empty or blank)</i>.
     * @return The version label for the given filename, or <code>Version.VERSION_HEAD</code> if it doesn't have one.
     */
    public static BigDecimal getVersionNumber(final String fileName) {
        BigDecimal result = null;

        if (fileName != null) {
            Matcher m = VERSION_FILENAME_PATTERN.matcher(fileName);

            if (m.matches()) {
                result = new BigDecimal(m.group(1)); // Group 1 = version label, including full stop separator for decimal version numbers
            } else {
                result = BulkImportItemVersion.VERSION_HEAD; // Filename doesn't include a version label, so its version is HEAD
            }
        }

        return (result);
    }

}