Java tutorial
/** * Copyright 2015 Cloudera Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kitesdk.data.oozie; import org.kitesdk.data.PartitionView; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import java.util.Map; import java.util.Set; import org.apache.avro.Schema; import org.apache.avro.SchemaBuilder; import org.apache.avro.generic.GenericRecord; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.oozie.ErrorCode; import org.apache.oozie.XException; import org.apache.oozie.dependency.URIHandlerException; import org.apache.oozie.service.ServiceException; import org.apache.oozie.service.Services; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.kitesdk.data.Dataset; import org.kitesdk.data.DatasetDescriptor; import org.kitesdk.data.DatasetReader; import org.kitesdk.data.DatasetWriter; import org.kitesdk.data.Formats; import org.kitesdk.data.MiniDFSTest; import org.kitesdk.data.PartitionStrategy; import org.kitesdk.data.Signalable; import org.kitesdk.data.RefinableView; import org.kitesdk.data.View; import org.kitesdk.data.oozie.KiteURIHandler; import org.kitesdk.data.spi.DatasetRepository; import org.kitesdk.data.spi.DefaultConfiguration; import org.kitesdk.data.spi.OptionBuilder; import org.kitesdk.data.spi.Registration; import org.kitesdk.data.spi.URIPattern; import org.kitesdk.data.spi.filesystem.FileSystemDatasetRepository; import com.google.common.io.Files; @RunWith(Parameterized.class) public class TestKiteURIHandler extends MiniDFSTest { protected static final String NAMESPACE = "ns1"; protected static final String NAME = "provider_test1"; @Parameterized.Parameters public static Collection<Object[]> data() { Object[][] data = new Object[][] { { false }, // default to local FS { true } }; // default to distributed FS return Arrays.asList(data); } // whether this should use the DFS provided by MiniDFSTest protected boolean distributed; protected Configuration conf; protected DatasetDescriptor testDescriptor; protected FileSystem fs; private Configuration startingConf; private String startingOozieHome; private File serviceTempDir; public TestKiteURIHandler(boolean distributed) { this.distributed = distributed; } public DatasetRepository newRepo() { return new FileSystemDatasetRepository.Builder().configuration(conf) .rootDirectory(URI.create("target/data")).build(); } @After public void removeDataPath() throws IOException { fs.delete(new Path("target/data"), true); // restore configuration DefaultConfiguration.set(startingConf); if (serviceTempDir != null) { FileUtils.deleteDirectory(serviceTempDir); serviceTempDir = null; } if (Services.get() != null) { Services.get().destroy(); } if (startingOozieHome == null) { System.clearProperty("oozie.home.dir"); } else { System.setProperty("oozie.home.dir", startingOozieHome); startingOozieHome = null; } } @Before public void setUp() throws IOException, URISyntaxException { this.conf = (distributed ? MiniDFSTest.getConfiguration() : new Configuration()); this.fs = FileSystem.get(conf); this.testDescriptor = new DatasetDescriptor.Builder().format(Formats.AVRO) .schema(SchemaBuilder.record("Event").fields().requiredLong("timestamp").requiredString("message") .endRecord()) .partitionStrategy(new PartitionStrategy.Builder().year("timestamp").month("timestamp") .day("timestamp").build()) .build(); uriHandler = new KiteURIHandler(); startingConf = DefaultConfiguration.get(); startingOozieHome = System.getProperty("oozie.home.dir"); } private KiteURIHandler uriHandler; @Test public void uriToNonExistantDatasetsView() throws URIHandlerException, URISyntaxException { URI uri = new URI("view:file:target/data/data/nomailbox?message=hello"); Assert.assertFalse("URIs to datasets that don't exist should return false", uriHandler.exists(uri, null)); } @Test public void supportedSchemes() { uriHandler.init(new Configuration()); Set<String> scheme = new HashSet<String>(); scheme.add("view"); scheme.add("dataset"); Assert.assertEquals(scheme, uriHandler.getSupportedSchemes()); } @Test(expected = UnsupportedOperationException.class) public void registerNotifications() throws URISyntaxException, URIHandlerException { URI uri = new URI("view:hdfs://localhost:9083/default/person?version=201404240000"); uriHandler.registerForNotification(uri, new Configuration(), "user", "234"); } @Test(expected = UnsupportedOperationException.class) public void unregisterNotifications() throws URISyntaxException { URI uri = new URI("view:hdfs://localhost:9083/default/person?version=201404240000"); uriHandler.unregisterFromNotification(uri, "2324"); } @Test public void checkURIDoesNotExist() throws URIHandlerException, IOException { DatasetRepository repository = newRepo(); Dataset<GenericRecord> dataset = repository.create("data", "notreadymailbox", testDescriptor); RefinableView<GenericRecord> view = dataset.with("message", "hello"); Assert.assertFalse(uriHandler.exists(view.getUri(), null)); } @Test public void checkURIExistsView() throws URIHandlerException, IOException { DatasetRepository repository = newRepo(); Dataset<GenericRecord> dataset = repository.create("data", "readymailbox", testDescriptor); View<GenericRecord> view = dataset.with("message", "hello"); ((Signalable<GenericRecord>) view).signalReady(); Assert.assertTrue(uriHandler.exists(view.getUri(), null)); } @Test public void checkURIExistsDataset() throws URIHandlerException, IOException { DatasetRepository repository = newRepo(); Dataset<GenericRecord> dataset = repository.create("data", "readymailbox", testDescriptor); ((Signalable<GenericRecord>) ((View<GenericRecord>) dataset)).signalReady(); Assert.assertTrue(uriHandler.exists(dataset.getUri(), null)); } @Test public void validateInvalidScheme() throws URIHandlerException { String uri = "repo:hdfs:/default/cloudera/users?favoriteColor=pink"; try { uriHandler.validate(uri); Assert.fail("Validate with an invalid schema should have thrown an exception"); } catch (XException ex) { Assert.assertEquals(ErrorCode.E0904, ex.getErrorCode()); } } @Test public void validateNotAURI() throws URIHandlerException { String uri = "clearly not a uri"; try { uriHandler.validate(uri); Assert.fail("Validate with an invalid URI should have thrown an exception"); } catch (XException ex) { Assert.assertEquals(ErrorCode.E0906, ex.getErrorCode()); } } @Test public void existsForNonReadiableView() throws URIHandlerException, URISyntaxException { Registration.register(new URIPattern("unreadiable?absolute=true"), new URIPattern("unreadiable::namespace/:dataset?absolute=true"), new UnreadiableDatasetBuilder()); URI uri = new URI("view:unreadiable:default/person?version=201404240000"); Assert.assertFalse(uriHandler.exists(uri, null)); } @Test public void loadConfigFromHCatAccessor() throws URIHandlerException, URISyntaxException, ServiceException, IOException { setupKiteConfigurationService(true, true); URI uri = new URI("view:file:target/data/data/nomailbox?message=hello"); uriHandler.exists(uri, null); Configuration defaultConf = DefaultConfiguration.get(); Assert.assertEquals("test.value", defaultConf.get("test.property")); Services.get().get(KiteConfigurationService.class).getKiteConf().set("test.value", "something.else"); // doesn't modify default config on further exist calls uriHandler.exists(uri, null); defaultConf = DefaultConfiguration.get(); Assert.assertEquals("test.value", defaultConf.get("test.property")); Assert.assertEquals("something.else", Services.get().get(KiteConfigurationService.class).getKiteConf().get("test.value")); } @Test public void noConfigLoadedWhenNoServices() throws URIHandlerException, URISyntaxException { URI uri = new URI("view:file:target/data/data/nomailbox?message=hello"); uriHandler.exists(uri, null); Assert.assertNull(Services.get()); } @Test public void noConfigLoadedWhenNoKiteService() throws URIHandlerException, URISyntaxException, ServiceException, FileNotFoundException, IOException { setupKiteConfigurationService(false, false); URI uri = new URI("view:file:target/data/data/nomailbox?message=hello"); uriHandler.exists(uri, null); Assert.assertNotNull(Services.get()); Assert.assertNull(Services.get().get(KiteConfigurationService.class)); } @Test public void noConfigLoadedIfNoKiteServiceConfig() throws URIHandlerException, URISyntaxException, ServiceException, FileNotFoundException, IOException { setupKiteConfigurationService(true, false); URI uri = new URI("view:file:target/data/data/nomailbox?message=hello"); uriHandler.exists(uri, null); Assert.assertNotNull(Services.get()); Assert.assertNotNull(Services.get().get(KiteConfigurationService.class)); Assert.assertNull(Services.get().get(KiteConfigurationService.class).getKiteConf()); } private void setupKiteConfigurationService(boolean loadKiteService, boolean loadKiteConfig) throws ServiceException, FileNotFoundException, IOException { serviceTempDir = Files.createTempDir(); File confDir = new File(serviceTempDir, "conf"); File hadoopConfDir = new File(confDir, "hadoop-conf"); File hadoopConfTarget = new File(hadoopConfDir, "hadoop-site.xml"); File actionConfDir = new File(confDir, "action-conf"); File hiveConfDir = new File(confDir, "hive-conf"); File hiveConfTarget = new File(hiveConfDir, "hive-site.xml"); File oozieSiteTarget = new File(confDir, "oozie-site.xml"); confDir.mkdir(); hadoopConfDir.mkdir(); actionConfDir.mkdir(); hiveConfDir.mkdir(); Configuration oozieSiteConf = new Configuration(false); if (loadKiteConfig) { oozieSiteConf.set("oozie.service.KiteConfigurationService.kite.configuration", hiveConfTarget.getAbsolutePath()); } oozieSiteConf.set("oozie.services", "org.apache.oozie.service.HadoopAccessorService"); oozieSiteConf.writeXml(new FileOutputStream(oozieSiteTarget)); conf.writeXml(new FileOutputStream(hadoopConfTarget)); Configuration hiveConf = new Configuration(false); hiveConf.set("test.property", "test.value"); hiveConf.writeXml(new FileOutputStream(hiveConfTarget)); // set to the temp directory System.setProperty("oozie.home.dir", serviceTempDir.getAbsolutePath()); Services services = new Services(); services.init(); if (loadKiteService) { services.setService(KiteConfigurationService.class); } } //minimal implementation of the dataset stack to get an non-readiable view to load in the handler private static final class UnreadiableDatasetRepository implements DatasetRepository { @Override public <E> Dataset<E> load(String namespace, String name) { return null; } @Override public <E> Dataset<E> load(String namespace, String name, Class<E> type) { return new UnreadiableDataset<E>(); } @Override public <E> Dataset<E> create(String namespace, String name, DatasetDescriptor descriptor) { return null; } @Override public <E> Dataset<E> create(String namespace, String name, DatasetDescriptor descriptor, Class<E> type) { return null; } @Override public <E> Dataset<E> update(String namespace, String name, DatasetDescriptor descriptor) { return null; } @Override public <E> Dataset<E> update(String namespace, String name, DatasetDescriptor descriptor, Class<E> type) { return null; } @Override public boolean delete(String namespace, String name) { return false; } @Override public boolean exists(String namespace, String name) { return false; } @Override public Collection<String> namespaces() { return null; } @Override public Collection<String> datasets(String namespace) { return null; } @Override public URI getUri() { return null; } } @SuppressWarnings("rawtypes") private static final class UnreadiableDataset<E> implements Dataset<E> { @Override public RefinableView<E> with(String name, Object... values) { return null; } @Override public RefinableView<E> from(String name, Comparable value) { return null; } @Override public RefinableView<E> fromAfter(String name, Comparable value) { return null; } @Override public RefinableView<E> to(String name, Comparable value) { return null; } @Override public RefinableView<E> toBefore(String name, Comparable value) { return null; } @Override public Dataset<E> getDataset() { return null; } @Override public DatasetReader<E> newReader() { return null; } @Override public DatasetWriter<E> newWriter() { return null; } @Override public boolean includes(E entity) { return false; } @Override public boolean deleteAll() { return false; } @Override public Iterable<PartitionView<E>> getCoveringPartitions() { return null; } @Override public Class<E> getType() { return null; } @Override public boolean isEmpty() { return false; } @Override public String getName() { return null; } @Override public String getNamespace() { return null; } @Override public DatasetDescriptor getDescriptor() { return null; } @Override public URI getUri() { return null; } @Override public Schema getSchema() { return null; } @Override public View<GenericRecord> asSchema(Schema schema) { return null; } @Override public <T> View<T> asType(Class<T> type) { return null; } } private static final class UnreadiableDatasetBuilder implements OptionBuilder<DatasetRepository> { @Override public DatasetRepository getFromOptions(Map<String, String> options) { return new UnreadiableDatasetRepository(); } } }