org.pentaho.hadoop.shim.common.DistributedCacheUtilImpl.java Source code

Java tutorial

Introduction

Here is the source code for org.pentaho.hadoop.shim.common.DistributedCacheUtilImpl.java

Source

/*******************************************************************************
 *
 * Pentaho Big Data
 *
 * Copyright (C) 2002-2017 by Hitachi Vantara : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/

package org.pentaho.hadoop.shim.common;

import org.apache.commons.io.IOUtils;
import org.apache.commons.vfs2.AllFileSelector;
import org.apache.commons.vfs2.FileDepthSelector;
import org.apache.commons.vfs2.FileObject;
import org.apache.commons.vfs2.FileSelectInfo;
import org.apache.commons.vfs2.FileSelector;
import org.apache.commons.vfs2.FileSystemException;
import org.apache.commons.vfs2.FileType;
import org.apache.commons.vfs2.FileTypeSelector;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.util.VersionInfo;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.exception.KettleFileException;
import org.pentaho.di.core.plugins.PluginFolder;
import org.pentaho.di.core.plugins.PluginFolderInterface;
import org.pentaho.di.core.vfs.KettleVFS;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.hadoop.shim.HadoopConfiguration;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

/**
 * Utility to work with Hadoop's Distributed Cache
 */
public class DistributedCacheUtilImpl implements org.pentaho.hadoop.shim.api.DistributedCacheUtil {

    /**
     * Path within the installation directory to deploy libraries
     */
    private static final String PATH_LIB = "lib";

    /**
     * Pentaho MapReduce library path within a Hadoop configuration
     */
    private static final String PATH_PMR = "pmr";

    /**
     * Client-only library path within a Hadoop configuration
     */
    private static final String PATH_CLIENT = "client";

    /**
     * Path within the installation directory to deploy plugins
     */
    private static final String PATH_PLUGINS = "plugins";

    /**
     * Default buffer size when compressing/uncompressing files.
     */
    private static final int DEFAULT_BUFFER_SIZE = 8192;

    /**
     * Pattern to match all files that are not in the lib/ directory. Matches any string that does not contain /lib
     */
    private static final Pattern NOT_LIB_FILES = Pattern.compile("^((?!/lib).)*$");

    /**
     * Default permission for cached files
     * <p/>
     * Not using FsPermission.createImmutable due to EOFExceptions when using it with Hadoop 0.20.2
     */
    private static final FsPermission CACHED_FILE_PERMISSION = new FsPermission((short) 0755);

    /**
     * Public permission for cached files  due to org.apache.hadoop.mapreduce.filecache
     * .ClientDistributedCacheManager#isPublic(org.apache.hadoop.conf.Configuration,
     * java.net.URI, java.util.Map)
     * <p/>
     * Not using FsPermission.createImmutable due to EOFExceptions when using it with Hadoop 0.20.2
     */
    private static final FsPermission PUBLIC_CACHED_FILE_PERMISSION = new FsPermission((short) 0777);

    /**
     * Name of the Big Data Plugin folder
     */
    public static final String PENTAHO_BIG_DATA_PLUGIN_FOLDER_NAME = "pentaho-big-data-plugin";

    /**
     * Name of the Shim configuration file
     */
    private static final String CONFIG_PROPERTIES = "config.properties";

    /**
     * Prefix for properties we want to omit when copying to the cluster
     */
    private static final String AUTH_PREFIX = "pentaho.authentication";

    /**
     * The Hadoop Configuration this Distributed Cache Utility is part of
     */
    private HadoopConfiguration configuration;

    public DistributedCacheUtilImpl(HadoopConfiguration configurationParam) {
        if (configurationParam == null) {
            throw new NullPointerException();
        }
        this.configuration = configurationParam;
    }

    /**
     * Creates the path to a lock file within the provided directory
     *
     * @param dir Directory to generate lock file path within
     * @return Path to lock file within {@code dir}
     */
    public Path getLockFileAt(Path dir) {
        return new Path(dir, ".lock");
    }

    /**
     * This validates that the Kettle Environment is installed. "Installed" means the kettle engine and supporting
     * jars/plugins exist in the provided file system at the path provided.
     *
     * @param fs   File System to check for the Kettle Environment in
     * @param root Root path the Kettle Environment should reside within
     * @return True if the Kettle Environment is installed at {@code root}.
     * @throws IOException Error investigating installation
     */
    public boolean isKettleEnvironmentInstalledAt(FileSystem fs, Path root) throws IOException {
        // These directories must exist
        Path[] directories = new Path[] { new Path(root, PATH_LIB), new Path(root, PATH_PLUGINS),
                new Path(new Path(root, PATH_PLUGINS), PENTAHO_BIG_DATA_PLUGIN_FOLDER_NAME) };
        // This file must not exist
        Path lock = getLockFileAt(root);
        // These directories must exist
        for (Path dir : directories) {
            if (!(fs.exists(dir) && fs.getFileStatus(dir).isDir())) {
                return false;
            }
        }
        // There's no lock file
        return !fs.exists(lock);
    }

    public void installKettleEnvironment(FileObject pmrArchive, FileSystem fs, Path destination,
            FileObject bigDataPlugin, String additionalPlugins) throws IOException, KettleFileException {
        if (pmrArchive == null) {
            throw new NullPointerException("pmrArchive is required");
        }
        if (destination == null) {
            throw new NullPointerException("destination is required");
        }
        if (bigDataPlugin == null) {
            throw new NullPointerException("big data plugin required");
        }

        FileObject extracted = extractToTemp(pmrArchive);

        // Write lock file while we're installing
        Path lockFile = getLockFileAt(destination);
        FSDataOutputStream out = fs.create(lockFile, true);
        //We should close output stream, otherwise the file will be locked on Windows
        out.close();

        stageForCache(extracted, fs, destination, true, false);

        stageBigDataPlugin(fs, destination, bigDataPlugin);

        if (!Const.isEmpty(additionalPlugins)) {
            stagePluginsForCache(fs, new Path(destination, PATH_PLUGINS), additionalPlugins);
        }

        // Delete the lock file now that we're done. It is intentional that we're not doing this in a try/finally. If the
        // staging fails for some reason we require the user to forcibly overwrite the (partial) installation
        fs.delete(lockFile, true);
    }

    /**
     * Move files from the source folder to the destination folder, overwriting any files that may already exist there.
     *
     * @param fs           File system to write to
     * @param dest         Destination to move source file/folder into
     * @param pluginFolder Big Data plugin folder
     * @throws KettleFileException
     * @throws IOException
     */
    private void stageBigDataPlugin(FileSystem fs, Path dest, FileObject pluginFolder)
            throws KettleFileException, IOException {
        Path pluginsDir = new Path(dest, PATH_PLUGINS);
        Path libDir = new Path(dest, PATH_LIB);
        Path bigDataPluginDir = new Path(pluginsDir, pluginFolder.getName().getBaseName());

        // Stage everything except the hadoop-configurations and pmr libraries
        for (FileObject f : pluginFolder.findFiles(new FileDepthSelector(1, 1))) {
            if (!"hadoop-configurations".equals(f.getName().getBaseName())
                    && !"pentaho-mapreduce-libraries.zip".equals(f.getName().getBaseName())) {
                stageForCache(f, fs, new Path(bigDataPluginDir, f.getName().getBaseName()), true, false);
            }
        }

        // Stage the current Hadoop configuration without its client-only or pmr libraries (these will be copied into the
        // lib dir)
        Path hadoopConfigDir = new Path(new Path(bigDataPluginDir, "hadoop-configurations"),
                configuration.getIdentifier());
        for (FileObject f : configuration.getLocation().findFiles(new FileSelector() {
            @Override
            public boolean includeFile(FileSelectInfo info) throws Exception {
                return FileType.FILE.equals(info.getFile().getType());
            }

            @Override
            public boolean traverseDescendents(FileSelectInfo info) throws Exception {
                String name = info.getFile().getName().getBaseName();
                return !((PATH_PMR.equals(name) || PATH_CLIENT.equals(name))
                        && PATH_LIB.equals(info.getFile().getParent().getName().getBaseName()));
            }
        })) {
            // Create relative path to write to
            String relPath = configuration.getLocation().getName().getRelativeName(f.getName());
            stageForCache(f, fs, new Path(hadoopConfigDir, relPath), true, false);
        }

        // Stage all pmr libraries for the Hadoop configuration into the root library path for the Kettle environment
        for (FileObject f : configuration.getLocation().resolveFile(PATH_LIB).resolveFile(PATH_PMR)
                .findFiles(new FileTypeSelector(FileType.FILE))) {
            stageForCache(f, fs, new Path(libDir, f.getName().getBaseName()), true, false);
        }
    }

    /**
     * Stage a comma-separated list of plugin folders into a directory in HDFS.
     *
     * @param fs                File System to write to
     * @param pluginsDir        Root plugins directory in HDFS to copy folders into
     * @param pluginFolderNames Comma-separated list of plugin folders to copy. These are relative to a root Kettle plugin
     *                          folder (as defined by {@link Const#PLUGIN_BASE_FOLDERS_PROP})
     * @throws KettleFileException Error locating a plugin folder
     * @throws IOException         Error copying
     */
    public void stagePluginsForCache(FileSystem fs, Path pluginsDir, String pluginFolderNames)
            throws KettleFileException, IOException {
        if (pluginFolderNames == null) {
            throw new IllegalArgumentException("pluginFolderNames required");
        }
        if (!fs.exists(pluginsDir)) {
            fs.mkdirs(pluginsDir);
        }
        for (String localPluginPath : pluginFolderNames.split(",")) {
            localPluginPath.trim();
            Object[] localFileTuple = findPluginFolder(localPluginPath);
            if (localFileTuple == null || !((FileObject) localFileTuple[0]).exists()) {
                throw new KettleFileException(BaseMessages.getString(DistributedCacheUtilImpl.class,
                        "DistributedCacheUtil.PluginDirectoryNotFound", localPluginPath));
            }
            FileObject localFile = (FileObject) localFileTuple[0];
            String relativePath = (String) localFileTuple[1];
            Path pluginDir = new Path(pluginsDir, relativePath);
            stageForCache(localFile, fs, pluginDir, true, false);
        }
    }

    /**
     * Configure the provided configuration to use the Distributed Cache and include all files in {@code
     * kettleInstallDir}. All jar files in lib/ will be added to the classpath.
     *
     * @param conf             Configuration to update
     * @param fs               File system to load Kettle Environment installation from
     * @param kettleInstallDir Directory that contains the Kettle installation to use in the file system provided
     * @throws KettleFileException
     * @throws IOException
     */
    public void configureWithKettleEnvironment(Configuration conf, FileSystem fs, Path kettleInstallDir)
            throws KettleFileException, IOException {
        Path libDir = new Path(kettleInstallDir, PATH_LIB);
        // Add all files to the classpath found in the lib directory
        List<Path> libraryJars = findFiles(fs, libDir, null);
        addCachedFilesToClasspath(libraryJars, conf);

        List<Path> nonLibFiles = findFiles(fs, kettleInstallDir, NOT_LIB_FILES);
        addCachedFiles(nonLibFiles, conf);
    }

    /**
     * Register a list of files from a Hadoop file system to be available and placed on the classpath when the
     * configuration is used to submit Hadoop jobs
     *
     * @param files Paths to add to the classpath of the configuration provided
     * @param conf  Configuration to modify
     * @throws IOException
     */
    public void addCachedFilesToClasspath(List<Path> files, Configuration conf) throws IOException {
        DistributedCache.createSymlink(conf);
        for (Path file : files) {
            // We need to disqualify the path so Distributed Cache in 0.20.2 can properly add the resources to
            // the classpath: https://issues.apache.org/jira/browse/MAPREDUCE-752
            addFileToClassPath(disqualifyPath(file), conf);
        }
    }

    /**
     * Add an file path to the current set of classpath entries. It adds the file to cache as well.
     * <p/>
     * This is copied from Hadoop 0.20.2 o.a.h.filecache.DistributedCache so we can inject the correct path separator for
     * the environment the cluster is executing in. See {@link #getClusterPathSeparator()}.
     *
     * @param file Path of the file to be added
     * @param conf Configuration that contains the classpath setting
     */
    public void addFileToClassPath(Path file, Configuration conf) throws IOException {

        // TODO Replace this with a Hadoop shim if we end up having version-specific implementations scattered around

        // Save off the classloader, to make sure the version info can be loaded successfully from the hadoop-common JAR
        ClassLoader cl = Thread.currentThread().getContextClassLoader();
        Thread.currentThread().setContextClassLoader(VersionInfo.class.getClassLoader());

        // Get the version string or set to a default value
        String version;
        try {
            version = VersionInfo.getVersion();
        } catch (Throwable t) {
            version = "unknown";
        }

        // Restore the original classloader
        Thread.currentThread().setContextClassLoader(cl);

        String classpath = conf.get("mapred.job.classpath.files");
        conf.set("mapred.job.classpath.files",
                classpath == null ? file.toString() : classpath + getClusterPathSeparator() + file.toString());
        FileSystem fs = FileSystem.get(conf);
        URI uri = fs.makeQualified(file).toUri();

        DistributedCache.addCacheFile(uri, conf);
    }

    /**
     * Register a list of paths from a Hadoop file system to be available when the configuration is used to submit Hadoop
     * jobs
     *
     * @param paths Paths to add to the list of cached paths for the configuration provided
     * @param conf  Configuration to modify
     * @throws IOException
     */
    public void addCachedFiles(List<Path> paths, Configuration conf) throws IOException {
        DistributedCache.createSymlink(conf);
        for (Path path : paths) {
            // Build a URI and set the path's short name in the fragment so the file is copied properly
            DistributedCache.addCacheFile(URI.create(path.toUri() + "#" + path.getName()), conf);
        }
    }

    /**
     * Removes the schema, host, and authentication portion of a path in its URI.
     *
     * @param path Path to cleanse
     * @return New path relative to the root of the filesystem
     */
    public Path disqualifyPath(Path path) {
        return new Path(path.toUri().getPath());
    }

    @Deprecated
    public void stageForCache(FileObject source, FileSystem fs, Path dest, boolean overwrite)
            throws IOException, KettleFileException {
        stageForCache(source, fs, dest, overwrite, false);
    }

    /**
     * Stages the source file or folder to a Hadoop file system and sets their permission and replication value
     * appropriately to be used with the Distributed Cache. WARNING: This will delete the contents of dest before staging
     * the archive.
     *
     * @param source    File or folder to copy to the file system. If it is a folder all contents will be copied into
     *                  dest.
     * @param fs        Hadoop file system to store the contents of the archive in
     * @param dest      Destination to copy source into. If source is a file, the new file name will be exactly dest. If
     *                  source is a folder its contents will be copied into dest. For more info see {@link
     *                  FileSystem#copyFromLocalFile(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path)}.
     * @param overwrite Should an existing file or folder be overwritten? If not an exception will be thrown.
     * @throws IOException         Destination exists is not a directory
     * @throws KettleFileException Source does not exist or destination exists and overwrite is false.
     */
    public void stageForCache(FileObject source, FileSystem fs, Path dest, boolean overwrite, boolean isPublic)
            throws IOException, KettleFileException {
        if (!source.exists()) {
            throw new KettleFileException(BaseMessages.getString(DistributedCacheUtilImpl.class,
                    "DistributedCacheUtil.SourceDoesNotExist", source));
        }

        if (fs.exists(dest)) {
            if (overwrite) {
                // It is a directory, clear it out
                fs.delete(dest, true);
            } else {
                throw new KettleFileException(BaseMessages.getString(DistributedCacheUtilImpl.class,
                        "DistributedCacheUtil.DestinationExists", dest.toUri().getPath()));
            }
        }

        // Use the same replication we'd use for submitting jobs
        short replication = (short) fs.getConf().getInt("mapred.submit.replication", 10);

        if (source.getURL().toString().endsWith(CONFIG_PROPERTIES)) {
            copyConfigProperties(source, fs, dest);
        } else {
            Path local = new Path(source.getURL().getPath());
            fs.copyFromLocalFile(local, dest);
        }

        if (isPublic) {
            fs.setPermission(dest, PUBLIC_CACHED_FILE_PERMISSION);
        } else {
            fs.setPermission(dest, CACHED_FILE_PERMISSION);
        }
        fs.setReplication(dest, replication);
    }

    private void copyConfigProperties(FileObject source, FileSystem fs, Path dest) {
        try (FSDataOutputStream output = fs.create(dest);
                InputStream input = source.getContent().getInputStream()) {

            List<String> lines = IOUtils.readLines(input);
            for (String line : lines) {
                if (!line.startsWith(AUTH_PREFIX)) {
                    IOUtils.write(line, output);
                    IOUtils.write(String.format("%n"), output);
                }
            }
        } catch (IOException e) {
            throw new RuntimeException("Error copying modified version of config.properties", e);
        }
    }

    /**
     * Recursively searches for all files starting at the directory provided with the extension provided. If no extension
     * is provided all files will be returned.
     *
     * @param root      Directory to start the search for files in
     * @param extension File extension to search for. If null all files will be returned.
     * @return List of absolute path names to all files found in {@code dir} and its subdirectories.
     * @throws KettleFileException
     * @throws FileSystemException
     */
    @SuppressWarnings("unchecked")
    public List<String> findFiles(FileObject root, final String extension) throws FileSystemException {
        FileObject[] files = root.findFiles(new FileSelector() {
            @Override
            public boolean includeFile(FileSelectInfo fileSelectInfo) throws Exception {
                return extension == null || extension.equals(fileSelectInfo.getFile().getName().getExtension());
            }

            @Override
            public boolean traverseDescendents(FileSelectInfo fileSelectInfo) throws Exception {
                return FileType.FOLDER.equals(fileSelectInfo.getFile().getType());
            }
        });

        if (files == null) {
            return Collections.EMPTY_LIST;
        }

        List<String> paths = new ArrayList<String>();
        for (FileObject file : files) {
            try {
                paths.add(file.getURL().toURI().getPath());
            } catch (URISyntaxException ex) {
                throw new FileSystemException("Error getting URI of file: " + file.getURL().getPath());
            }
        }
        return paths;
    }

    /**
     * Looks for all files in the path within the given file system that match the pattern provided. Only the direct
     * descendants of {@code path} will be evaluated; this is not recursive.
     *
     * @param fs              File system to search within
     * @param path            Path to search in
     * @param fileNamePattern Pattern of file name to match. If {@code null}, all files will be matched.
     * @return All {@link Path}s that match the provided pattern.
     * @throws IOException Error retrieving listing status of a path from the file system
     */
    public List<Path> findFiles(FileSystem fs, Path path, Pattern fileNamePattern) throws IOException {
        FileStatus[] files = fs.listStatus(path);
        List<Path> found = new ArrayList<Path>(files.length);
        for (FileStatus file : files) {
            if (fileNamePattern == null || fileNamePattern.matcher(file.getPath().toString()).matches()) {
                found.add(file.getPath());
            }
        }
        return found;
    }

    /**
     * Delete a directory and all of its contents
     *
     * @param dir Directory to delete
     * @return True if the directory was deleted successfully
     */
    public boolean deleteDirectory(FileObject dir) throws FileSystemException {
        dir.delete(new AllFileSelector());
        return !dir.exists();
    }

    /**
     * Extract a zip archive to a temp directory.
     *
     * @param archive Zip archive to extract
     * @return Directory the zip was extracted into
     * @throws IOException
     * @throws KettleFileException
     * @see DistributedCacheUtilImpl#extract(org.apache.commons.vfs.FileObject, org.apache.commons.vfs.FileObject)
     */
    public FileObject extractToTemp(FileObject archive) throws IOException, KettleFileException {
        if (archive == null) {
            throw new NullPointerException("archive is required");
        }
        // Ask KettleVFS for a temporary file name without extension and use that as our temporary folder to extract into
        FileObject dest = KettleVFS.createTempFile("", "", System.getProperty("java.io.tmpdir"));
        return extract(archive, dest);
    }

    /**
     * Extract a zip archive to a directory.
     *
     * @param archive Zip archive to extract
     * @param dest    Destination directory. This must not exist!
     * @return Directory the zip was extracted into
     * @throws IllegalArgumentException when the archive file does not exist or the destination directory already exists
     * @throws IOException
     * @throws KettleFileException
     */
    public FileObject extract(FileObject archive, FileObject dest) throws IOException, KettleFileException {
        if (!archive.exists()) {
            throw new IllegalArgumentException("archive does not exist: " + archive.getURL().getPath());
        }

        if (dest.exists()) {
            throw new IllegalArgumentException("destination already exists");
        }
        dest.createFolder();

        try {
            byte[] buffer = new byte[DEFAULT_BUFFER_SIZE];
            int len = 0;
            ZipInputStream zis = new ZipInputStream(archive.getContent().getInputStream());
            try {
                ZipEntry ze;
                while ((ze = zis.getNextEntry()) != null) {
                    FileObject entry = KettleVFS.getFileObject(dest + Const.FILE_SEPARATOR + ze.getName());
                    FileObject parent = entry.getParent();
                    if (parent != null) {
                        parent.createFolder();
                    }
                    if (ze.isDirectory()) {
                        entry.createFolder();
                        continue;
                    }

                    OutputStream os = KettleVFS.getOutputStream(entry, false);
                    try {
                        while ((len = zis.read(buffer)) > 0) {
                            os.write(buffer, 0, len);
                        }
                    } finally {
                        if (os != null) {
                            os.close();
                        }
                    }
                }
            } finally {
                if (zis != null) {
                    zis.close();
                }
            }
        } catch (Exception ex) {
            // Try to clean up the temp directory and all files
            if (!deleteDirectory(dest)) {
                throw new KettleFileException("Could not clean up temp dir after error extracting", ex);
            }
            throw new KettleFileException("error extracting archive", ex);
        }

        return dest;
    }

    /**
     * Attempts to find a plugin's installation folder on disk within all known plugin folder locations
     *
     * @param pluginFolderName Name of plugin folder
     * @return Tuple of [(FileObject) Location of the first plugin folder found as a direct descendant of one of the known
     * plugin folder locations, (String) Relative path from parent]
     * @throws KettleFileException Error getting plugin folders
     */
    protected Object[] findPluginFolder(final String pluginFolderName) throws KettleFileException {
        List<PluginFolderInterface> pluginFolders = PluginFolder.populateFolders(null);
        if (pluginFolders != null) {
            for (PluginFolderInterface pluginFolder : pluginFolders) {
                FileObject folder = KettleVFS.getFileObject(pluginFolder.getFolder());

                try {
                    if (folder.exists()) {
                        FileObject[] files = folder.findFiles(new FileSelector() {
                            @Override
                            public boolean includeFile(FileSelectInfo fileSelectInfo) throws Exception {
                                if (fileSelectInfo.getFile().equals(fileSelectInfo.getBaseFolder())) {
                                    // Do not consider the base folders
                                    return false;
                                }
                                // Determine relative name to compare
                                int baseNameLength = fileSelectInfo.getBaseFolder().getName().getPath().length()
                                        + 1;
                                String relativeName = fileSelectInfo.getFile().getName().getPath()
                                        .substring(baseNameLength);
                                // Compare plugin folder name with the relative name
                                return pluginFolderName.equals(relativeName);
                            }

                            @Override
                            public boolean traverseDescendents(FileSelectInfo fileSelectInfo) throws Exception {
                                return true;
                            }
                        });
                        if (files != null && files.length > 0) {
                            return new Object[] { files[0], folder.getName().getRelativeName(files[0].getName()) }; // Return the first match
                        }
                    }
                } catch (FileSystemException ex) {
                    throw new KettleFileException("Error searching for folder '" + pluginFolderName + "'", ex);
                }
            }
        }
        return null;
    }

    /**
     * Determine the class path separator of the cluster. For now there is no way to determine the remote cluster's path
     * separator nor would we want to ultimately do that. This can be configured externally with the system property
     * "hadoop.cluster.path.separator". This will default to ":" if the system property is not set.
     * <p/>
     * This is not necessary for Hadoop 0.21.x. See https://issues.apache.org/jira/browse/HADOOP-4864.
     *
     * @return Path separator to use when building up the classpath to use for the Distributed Cache
     */
    public String getClusterPathSeparator() {
        return System.getProperty("hadoop.cluster.path.separator", ":");
    }

    // Wrapping/delegating methods
    @Override
    public boolean isKettleEnvironmentInstalledAt(org.pentaho.hadoop.shim.api.fs.FileSystem fs,
            org.pentaho.hadoop.shim.api.fs.Path kettleEnvInstallDir) throws IOException {
        return isKettleEnvironmentInstalledAt(ShimUtils.asFileSystem(fs), ShimUtils.asPath(kettleEnvInstallDir));
    }

    @Override
    public void configureWithKettleEnvironment(org.pentaho.hadoop.shim.api.Configuration conf,
            org.pentaho.hadoop.shim.api.fs.FileSystem fs, org.pentaho.hadoop.shim.api.fs.Path kettleEnvInstallDir)
            throws KettleFileException, IOException {
        configureWithKettleEnvironment(ShimUtils.asConfiguration(conf), ShimUtils.asFileSystem(fs),
                ShimUtils.asPath(kettleEnvInstallDir));
    }

    @Override
    public void installKettleEnvironment(FileObject pmrLibArchive, org.pentaho.hadoop.shim.api.fs.FileSystem fs,
            org.pentaho.hadoop.shim.api.fs.Path destination, FileObject bigDataPluginFolder,
            String additionalPlugins) throws KettleFileException, IOException {
        installKettleEnvironment(pmrLibArchive, ShimUtils.asFileSystem(fs), ShimUtils.asPath(destination),
                bigDataPluginFolder, additionalPlugins);

    }

    @Override
    public void stageForCache(FileObject source, org.pentaho.hadoop.shim.api.fs.FileSystem fs,
            org.pentaho.hadoop.shim.api.fs.Path dest, boolean overwrite, boolean isPublic) throws IOException {
        try {
            stageForCache(source, ShimUtils.asFileSystem(fs), ShimUtils.asPath(dest), overwrite, isPublic);
        } catch (KettleFileException e) {
            throw new IOException(e);
        }
    }

    @Override
    public void addCachedFilesToClasspath(org.pentaho.hadoop.shim.api.Configuration conf,
            org.pentaho.hadoop.shim.api.fs.FileSystem fs, org.pentaho.hadoop.shim.api.fs.Path source,
            Pattern fileNamePattern) throws IOException {
        List<Path> libraryJars = findFiles(ShimUtils.asFileSystem(fs), ShimUtils.asPath(source), fileNamePattern);
        addCachedFilesToClasspath(libraryJars, ShimUtils.asConfiguration(conf));

    }

    @Override
    public void addCachedFiles(org.pentaho.hadoop.shim.api.Configuration conf,
            org.pentaho.hadoop.shim.api.fs.FileSystem fs, org.pentaho.hadoop.shim.api.fs.Path source,
            Pattern fileNamePattern) throws IOException {

        List<Path> nonLibFiles = findFiles(ShimUtils.asFileSystem(fs), ShimUtils.asPath(source), fileNamePattern);
        addCachedFiles(nonLibFiles, ShimUtils.asConfiguration(conf));
    }
}