Java tutorial
/******************************************************************************* * * Pentaho Big Data * * Copyright (C) 2002-2017 by Hitachi Vantara : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.hadoop.shim.common; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.mockito.Matchers.any; import static org.mockito.Matchers.anyInt; import static org.mockito.Matchers.anyShort; import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.net.URL; import java.util.Arrays; import java.util.List; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; import org.apache.commons.vfs2.AllFileSelector; import org.apache.commons.vfs2.FileObject; import org.apache.commons.vfs2.FileSelector; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import org.pentaho.di.core.Const; import org.pentaho.di.core.exception.KettleFileException; import org.pentaho.di.core.vfs.KettleVFS; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.hadoop.shim.HadoopConfiguration; import org.pentaho.hadoop.shim.common.fs.PathProxy; import org.pentaho.hadoop.shim.spi.MockHadoopShim; /** * Test the DistributedCacheUtil */ public class DistributedCacheUtilImplTest { private static HadoopConfiguration TEST_CONFIG; private static String PLUGIN_BASE = null; @BeforeClass public static void setup() throws Exception { // Create some Hadoop configuration specific pmr libraries TEST_CONFIG = new HadoopConfiguration( DistributedCacheTestUtil.createTestHadoopConfiguration( "bin/test/" + DistributedCacheUtilImplTest.class.getSimpleName()), "test-config", "name", new MockHadoopShim()); PLUGIN_BASE = System.getProperty(Const.PLUGIN_BASE_FOLDERS_PROP); // Fake out the "plugins" directory for the project's root directory System.setProperty(Const.PLUGIN_BASE_FOLDERS_PROP, KettleVFS.getFileObject(".").getURL().toURI().getPath()); } @AfterClass public static void teardown() { if (PLUGIN_BASE != null) { System.setProperty(Const.PLUGIN_BASE_FOLDERS_PROP, PLUGIN_BASE); } } @Test(expected = NullPointerException.class) public void instantiation() { new DistributedCacheUtilImpl(null); } @Test public void deleteDirectory() throws Exception { FileObject test = KettleVFS.getFileObject("bin/test/deleteDirectoryTest"); test.createFolder(); DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG); ch.deleteDirectory(test); try { assertFalse(test.exists()); } finally { // Delete the directory with java.io.File if it wasn't removed File f = new File("bin/test/deleteDirectoryTest"); if (f.exists() && !f.delete()) { throw new IOException("unable to delete test directory: " + f.getAbsolutePath()); } } } @Test public void extract_invalid_archive() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG); try { ch.extract(KettleVFS.getFileObject("bogus"), null); fail("expected exception"); } catch (IllegalArgumentException ex) { assertTrue(ex.getMessage().startsWith("archive does not exist")); } } @Test public void extract_destination_exists() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG); FileObject archive = KettleVFS .getFileObject(getClass().getResource("/pentaho-mapreduce-sample.jar").toURI().getPath()); try { ch.extract(archive, KettleVFS.getFileObject(".")); } catch (IllegalArgumentException ex) { assertTrue(ex.getMessage(), "destination already exists".equals(ex.getMessage())); } } @Test public void extractToTemp() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG); FileObject archive = KettleVFS .getFileObject(getClass().getResource("/pentaho-mapreduce-sample.jar").toURI().getPath()); FileObject extracted = ch.extractToTemp(archive); assertNotNull(extracted); assertTrue(extracted.exists()); try { // There should be 3 files and 5 directories inside the root folder (which is the 9th entry) assertTrue(extracted.findFiles(new AllFileSelector()).length == 9); } finally { // clean up after ourself ch.deleteDirectory(extracted); } } @Test public void extractToTempZipEntriesMixed() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG); File dest = File.createTempFile("entriesMixed", ".zip"); ZipOutputStream outputStream = new ZipOutputStream(new FileOutputStream(dest)); ZipEntry e = new ZipEntry("zipEntriesMixed" + "/" + "someFile.txt"); outputStream.putNextEntry(e); byte[] data = "someOutString".getBytes(); outputStream.write(data, 0, data.length); outputStream.closeEntry(); e = new ZipEntry("zipEntriesMixed" + "/"); outputStream.putNextEntry(e); outputStream.closeEntry(); outputStream.close(); FileObject archive = KettleVFS.getFileObject(dest.getAbsolutePath()); FileObject extracted = null; try { extracted = ch.extractToTemp(archive); } catch (IOException | KettleFileException e1) { e1.printStackTrace(); fail("Exception not expected in this case"); } assertNotNull(extracted); assertTrue(extracted.exists()); try { // There should be 3 files and 5 directories inside the root folder (which is the 9th entry) assertTrue(extracted.findFiles(new AllFileSelector()).length == 3); } finally { // clean up after ourself ch.deleteDirectory(extracted); dest.delete(); } } @Test public void extractToTemp_missing_archive() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG); try { ch.extractToTemp(null); fail("Expected exception"); } catch (NullPointerException ex) { assertEquals("archive is required", ex.getMessage()); } } @Test public void findFiles_vfs() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG); FileObject testFolder = DistributedCacheTestUtil.createTestFolderWithContent(); try { // Simply test we can find the jar files in our test folder List<String> jars = ch.findFiles(testFolder, "jar"); assertEquals(4, jars.size()); // Look for all files and folders List<String> all = ch.findFiles(testFolder, null); assertEquals(12, all.size()); } finally { testFolder.delete(new AllFileSelector()); } } @Test public void findFiles_vfs_hdfs() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG); URL url = new URL("http://localhost:8020/path/to/file"); Configuration conf = mock(Configuration.class); FileSystem fs = mock(FileSystem.class); FileObject source = mock(FileObject.class); Path dest = mock(Path.class); FileObject hdfsDest = mock(FileObject.class); Path root = mock(Path.class); FileObject[] fileObjects = new FileObject[12]; for (int i = 0; i < fileObjects.length; i++) { URL fileUrl = new URL("http://localhost:8020/path/to/file/" + i); FileObject fileObject = mock(FileObject.class); fileObjects[i] = fileObject; doReturn(fileUrl).when(fileObject).getURL(); } doReturn(url).when(source).getURL(); doReturn(conf).when(fs).getConf(); doReturn(0).when(conf).getInt(any(String.class), anyInt()); doReturn(true).when(source).exists(); doReturn(fileObjects).when(hdfsDest).findFiles(any(FileSelector.class)); doReturn(true).when(fs).delete(root, true); doReturn(fileObjects.length).when(source).delete(any(AllFileSelector.class)); doNothing().when(fs).copyFromLocalFile(any(Path.class), any(Path.class)); doNothing().when(fs).setPermission(any(Path.class), any(FsPermission.class)); doReturn(true).when(fs).setReplication(any(Path.class), anyShort()); try { try { ch.stageForCache(source, fs, dest, true); List<String> files = ch.findFiles(hdfsDest, null); assertEquals(12, files.size()); } finally { fs.delete(root, true); } } finally { source.delete(new AllFileSelector()); } } @Test public void stageForCache_missing_source() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG); Configuration conf = new Configuration(); FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem(conf); Path dest = new Path("bin/test/bogus-destination"); FileObject bogusSource = KettleVFS.getFileObject("bogus"); try { ch.stageForCache(bogusSource, fs, dest, true); fail("expected exception when source does not exist"); } catch (KettleFileException ex) { assertEquals(BaseMessages.getString(DistributedCacheUtilImpl.class, "DistributedCacheUtil.SourceDoesNotExist", bogusSource), ex.getMessage().trim()); } } @Test public void stageForCache_destination_no_overwrite() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG); Configuration conf = new Configuration(); FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem(conf); FileObject source = DistributedCacheTestUtil.createTestFolderWithContent(); try { Path root = new Path("bin/test/stageForCache_destination_exists"); Path dest = new Path(root, "dest"); fs.mkdirs(dest); assertTrue(fs.exists(dest)); assertTrue(fs.getFileStatus(dest).isDir()); try { ch.stageForCache(source, fs, dest, false); } catch (KettleFileException ex) { assertTrue(ex.getMessage(), ex.getMessage().contains("Destination exists")); } finally { fs.delete(root, true); } } finally { source.delete(new AllFileSelector()); } } @Test public void addCachedFilesToClasspath() throws IOException { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG); Configuration conf = new Configuration(); List<Path> files = Arrays.asList(new Path("a"), new Path("b"), new Path("c")); ch.addCachedFilesToClasspath(files, conf); // this check is not needed for each and every shim if ("true".equals(System.getProperty("org.pentaho.hadoop.shims.check.symlink", "false"))) { assertEquals("yes", conf.get("mapred.create.symlink")); } for (Path file : files) { assertTrue(conf.get("mapred.cache.files").contains(file.toString())); assertTrue(conf.get("mapred.job.classpath.files").contains(file.toString())); } } @Test public void installKettleEnvironment_missing_arguments() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG); try { ch.installKettleEnvironment(null, (org.pentaho.hadoop.shim.api.fs.FileSystem) null, null, null, null); fail("Expected exception on missing archive"); } catch (NullPointerException ex) { assertEquals("pmrArchive is required", ex.getMessage()); } try { ch.installKettleEnvironment(KettleVFS.getFileObject("."), (org.pentaho.hadoop.shim.api.fs.FileSystem) null, null, null, null); fail("Expected exception on missing archive"); } catch (NullPointerException ex) { assertEquals("destination is required", ex.getMessage()); } try { ch.installKettleEnvironment(KettleVFS.getFileObject("."), (org.pentaho.hadoop.shim.api.fs.FileSystem) null, new PathProxy("."), null, null); fail("Expected exception on missing archive"); } catch (NullPointerException ex) { assertEquals("big data plugin required", ex.getMessage()); } } @Test(expected = IllegalArgumentException.class) public void stagePluginsForCache_no_folders() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG); ch.stagePluginsForCache(DistributedCacheTestUtil.getLocalFileSystem(new Configuration()), new Path("bin/test/plugins-installation-dir"), null); } @Test(expected = KettleFileException.class) public void stagePluginsForCache_invalid_folder() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG); ch.stagePluginsForCache(DistributedCacheTestUtil.getLocalFileSystem(new Configuration()), new Path("bin/test/plugins-installation-dir"), "bin/bogus-plugin-name"); } @Test public void findPluginFolder() throws Exception { DistributedCacheUtilImpl util = new DistributedCacheUtilImpl(TEST_CONFIG); // Fake out the "plugins" directory for the project's root directory String originalValue = System.getProperty(Const.PLUGIN_BASE_FOLDERS_PROP); System.setProperty(Const.PLUGIN_BASE_FOLDERS_PROP, KettleVFS.getFileObject(".").getURL().toURI().getPath()); assertNotNull("Should have found plugin dir: bin/", util.findPluginFolder("bin")); assertNotNull("Should be able to find nested plugin dir: bin/test/", util.findPluginFolder("bin/test")); assertNull("Should not have found plugin dir: org/", util.findPluginFolder("org")); System.setProperty(Const.PLUGIN_BASE_FOLDERS_PROP, originalValue); } @Test public void addFilesToClassPath() throws IOException { DistributedCacheUtilImpl util = new DistributedCacheUtilImpl(TEST_CONFIG); Path p1 = new Path("/testing1"); Path p2 = new Path("/testing2"); Configuration conf = new Configuration(); util.addFileToClassPath(p1, conf); util.addFileToClassPath(p2, conf); assertEquals("/testing1:/testing2", conf.get("mapred.job.classpath.files")); } @Test public void addFilesToClassPath_custom_path_separator() throws IOException { DistributedCacheUtilImpl util = new DistributedCacheUtilImpl(TEST_CONFIG); Path p1 = new Path("/testing1"); Path p2 = new Path("/testing2"); Configuration conf = new Configuration(); String originalValue = System.getProperty("hadoop.cluster.path.separator", ":"); System.setProperty("hadoop.cluster.path.separator", "J"); util.addFileToClassPath(p1, conf); util.addFileToClassPath(p2, conf); assertEquals("/testing1J/testing2", conf.get("mapred.job.classpath.files")); System.setProperty("hadoop.cluster.path.separator", originalValue); } }