Java tutorial
/******************************************************************************* * * Pentaho Big Data * * Copyright (C) 2002-2017 by Hitachi Vantara : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.hadoop.shim.common; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assume.assumeTrue; import java.io.IOException; import java.util.List; import java.util.regex.Pattern; import org.apache.commons.vfs2.AllFileSelector; import org.apache.commons.vfs2.FileObject; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import org.pentaho.di.core.Const; import org.pentaho.di.core.vfs.KettleVFS; import org.pentaho.hadoop.shim.HadoopConfiguration; import org.pentaho.hadoop.shim.spi.MockHadoopShim; /** * There are tests of DistributedCacheUtil using hadoop local file system implementation. So these tests requires * additional settings to be run on Windows: it needs to have <b>hadoop.home.dir</b> variable pointed to dir with * <i>\bin\winutils.exe</i> * <p> * Depending on possible issues with hadoop file system on Windows any of these tests can be skipped. E.g. using the * following code below: * * <pre> * <code> * // Don't run this test on Windows env * assumeTrue( !isWindows() ); * </code> * </pre> */ public class DistributedCacheUtilImplOSDependentTest { private static HadoopConfiguration TEST_CONFIG; private static String PLUGIN_BASE = null; private static final String OS_NAME = System.getProperty("os.name", "unknown"); protected static boolean isWindows() { return OS_NAME.startsWith("Windows"); } @BeforeClass public static void setup() throws Exception { // Create some Hadoop configuration specific pmr libraries TEST_CONFIG = new HadoopConfiguration( DistributedCacheTestUtil.createTestHadoopConfiguration( "bin/test/" + DistributedCacheUtilImplOSDependentTest.class.getSimpleName()), "test-config", "name", new MockHadoopShim()); PLUGIN_BASE = System.getProperty(Const.PLUGIN_BASE_FOLDERS_PROP); // Fake out the "plugins" directory for the project's root directory System.setProperty(Const.PLUGIN_BASE_FOLDERS_PROP, KettleVFS.getFileObject(".").getURL().toURI().getPath()); } @AfterClass public static void teardown() { if (PLUGIN_BASE != null) { System.setProperty(Const.PLUGIN_BASE_FOLDERS_PROP, PLUGIN_BASE); } } @Test public void stageForCache() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG); // Copy the contents of test folder FileObject source = DistributedCacheTestUtil.createTestFolderWithContent(); try { Path root = new Path("bin/test/stageArchiveForCacheTest"); Path dest = new Path(root, "org/pentaho/mapreduce/"); Configuration conf = new Configuration(); FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem(conf); DistributedCacheTestUtil.stageForCacheTester(ch, source, fs, root, dest, 6, 6); } finally { source.delete(new AllFileSelector()); } } @Test public void stageForCache_destination_exists() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG); Configuration conf = new Configuration(); FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem(conf); FileObject source = DistributedCacheTestUtil.createTestFolderWithContent(); try { Path root = new Path("bin/test/stageForCache_destination_exists"); Path dest = new Path(root, "dest"); fs.mkdirs(dest); assertTrue(fs.exists(dest)); assertTrue(fs.getFileStatus(dest).isDir()); DistributedCacheTestUtil.stageForCacheTester(ch, source, fs, root, dest, 6, 6); } finally { source.delete(new AllFileSelector()); } } @Test public void stagePluginsForCache() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG); Configuration conf = new Configuration(); FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem(conf); Path pluginsDir = new Path("bin/test/plugins-installation-dir"); FileObject pluginDir = DistributedCacheTestUtil.createTestFolderWithContent(); try { ch.stagePluginsForCache(fs, pluginsDir, "bin/test/sample-folder"); Path pluginInstallPath = new Path(pluginsDir, "bin/test/sample-folder"); assertTrue(fs.exists(pluginInstallPath)); ContentSummary summary = fs.getContentSummary(pluginInstallPath); assertEquals(6, summary.getFileCount()); assertEquals(6, summary.getDirectoryCount()); } finally { pluginDir.delete(new AllFileSelector()); fs.delete(pluginsDir, true); } } @Test public void findFiles_hdfs_native() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG); // Copy the contents of test folder FileObject source = DistributedCacheTestUtil.createTestFolderWithContent(); Path root = new Path("bin/test/stageArchiveForCacheTest"); Configuration conf = new Configuration(); FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem(conf); Path dest = new Path(root, "org/pentaho/mapreduce/"); try { try { ch.stageForCache(source, fs, dest, true); List<Path> files = ch.findFiles(fs, dest, null); assertEquals(5, files.size()); files = ch.findFiles(fs, dest, Pattern.compile(".*jar$")); assertEquals(2, files.size()); files = ch.findFiles(fs, dest, Pattern.compile(".*folder$")); assertEquals(1, files.size()); } finally { fs.delete(root, true); } } finally { source.delete(new AllFileSelector()); } } @Test public void installKettleEnvironment() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG); Configuration conf = new Configuration(); FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem(conf); // This "empty pmr" contains a lib/ folder but with no content FileObject pmrArchive = KettleVFS.getFileObject(getClass().getResource("/empty-pmr.zip").toURI().getPath()); FileObject bigDataPluginDir = DistributedCacheTestUtil .createTestFolderWithContent(DistributedCacheUtilImpl.PENTAHO_BIG_DATA_PLUGIN_FOLDER_NAME); Path root = new Path("bin/test/installKettleEnvironment"); try { ch.installKettleEnvironment(pmrArchive, fs, root, bigDataPluginDir, null); assertTrue(ch.isKettleEnvironmentInstalledAt(fs, root)); } finally { bigDataPluginDir.delete(new AllFileSelector()); fs.delete(root, true); } } @Test public void installKettleEnvironment_additional_plugins() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG); Configuration conf = new Configuration(); FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem(conf); // This "empty pmr" contains a lib/ folder but with no content FileObject pmrArchive = KettleVFS.getFileObject(getClass().getResource("/empty-pmr.zip").toURI().getPath()); FileObject bigDataPluginDir = DistributedCacheTestUtil .createTestFolderWithContent(DistributedCacheUtilImpl.PENTAHO_BIG_DATA_PLUGIN_FOLDER_NAME); String pluginName = "additional-plugin"; FileObject additionalPluginDir = DistributedCacheTestUtil.createTestFolderWithContent(pluginName); Path root = new Path("bin/test/installKettleEnvironment"); try { ch.installKettleEnvironment(pmrArchive, fs, root, bigDataPluginDir, "bin/test/" + pluginName); assertTrue(ch.isKettleEnvironmentInstalledAt(fs, root)); assertTrue(fs.exists(new Path(root, "plugins/bin/test/" + pluginName))); } finally { bigDataPluginDir.delete(new AllFileSelector()); additionalPluginDir.delete(new AllFileSelector()); fs.delete(root, true); } } @Test public void isPmrInstalledAt() throws IOException { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG); Configuration conf = new Configuration(); FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem(conf); Path root = new Path("bin/test/ispmrInstalledAt"); Path lib = new Path(root, "lib"); Path plugins = new Path(root, "plugins"); Path bigDataPlugin = new Path(plugins, DistributedCacheUtilImpl.PENTAHO_BIG_DATA_PLUGIN_FOLDER_NAME); Path lockFile = ch.getLockFileAt(root); FSDataOutputStream lockFileOut = null; FSDataOutputStream bigDataPluginFileOut = null; try { // Create all directories (parent directories created automatically) fs.mkdirs(lib); fs.mkdirs(bigDataPlugin); assertTrue(ch.isKettleEnvironmentInstalledAt(fs, root)); // If lock file is there pmr is not installed lockFileOut = fs.create(lockFile); assertFalse(ch.isKettleEnvironmentInstalledAt(fs, root)); // Try to create a file instead of a directory for the pentaho-big-data-plugin. This should be detected. fs.delete(bigDataPlugin, true); bigDataPluginFileOut = fs.create(bigDataPlugin); assertFalse(ch.isKettleEnvironmentInstalledAt(fs, root)); } finally { lockFileOut.close(); bigDataPluginFileOut.close(); fs.delete(root, true); } } @Test public void configureWithPmr() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG); Configuration conf = new Configuration(); FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem(conf); // This "empty pmr" contains a lib/ folder and some empty kettle-*.jar files but no actual content FileObject pmrArchive = KettleVFS.getFileObject(getClass().getResource("/empty-pmr.zip").toURI().getPath()); FileObject bigDataPluginDir = DistributedCacheTestUtil .createTestFolderWithContent(DistributedCacheUtilImpl.PENTAHO_BIG_DATA_PLUGIN_FOLDER_NAME); Path root = new Path("bin/test/installKettleEnvironment"); try { ch.installKettleEnvironment(pmrArchive, fs, root, bigDataPluginDir, null); assertTrue(ch.isKettleEnvironmentInstalledAt(fs, root)); ch.configureWithKettleEnvironment(conf, fs, root); // Make sure our libraries are on the classpathi assertTrue(conf.get("mapred.cache.files").contains("lib/kettle-core.jar")); assertTrue(conf.get("mapred.cache.files").contains("lib/kettle-engine.jar")); assertTrue(conf.get("mapred.job.classpath.files").contains("lib/kettle-core.jar")); assertTrue(conf.get("mapred.job.classpath.files").contains("lib/kettle-engine.jar")); // Make sure the configuration specific jar made it! assertTrue(conf.get("mapred.cache.files").contains("lib/configuration-specific.jar")); // Make sure our plugins folder is registered assertTrue(conf.get("mapred.cache.files").contains("#plugins")); // Make sure our libraries aren't included twice assertFalse(conf.get("mapred.cache.files").contains("#lib")); // We should not have individual files registered assertFalse(conf.get("mapred.cache.files").contains("pentaho-big-data-plugin/jar1.jar")); assertFalse(conf.get("mapred.cache.files").contains("pentaho-big-data-plugin/jar2.jar")); assertFalse(conf.get("mapred.cache.files").contains("pentaho-big-data-plugin/folder/file.txt")); } finally { bigDataPluginDir.delete(new AllFileSelector()); fs.delete(root, true); } } }