Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.oozie.action.hadoop; import java.io.FileInputStream; import java.io.InputStream; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.StringReader; import java.io.Writer; import java.text.MessageFormat; import java.util.Arrays; import java.util.Map; import java.util.Properties; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.JobID; import org.apache.hadoop.mapred.RunningJob; import org.apache.oozie.WorkflowActionBean; import org.apache.oozie.WorkflowJobBean; import org.apache.oozie.client.WorkflowAction; import org.apache.oozie.service.ConfigurationService; import org.apache.oozie.service.HadoopAccessorService; import org.apache.oozie.service.Services; import org.apache.oozie.service.WorkflowAppService; import org.apache.oozie.util.ClassUtils; import org.apache.oozie.util.IOUtils; import org.apache.oozie.util.XConfiguration; import org.apache.oozie.util.XmlUtils; import org.jdom.Element; import org.jdom.Namespace; public class TestHiveActionExecutor extends ActionExecutorTestCase { private static final String NEW_LINE = System.getProperty("line.separator", "\n"); private static final String SAMPLE_DATA_TEXT = "3\n4\n6\n1\n2\n7\n9\n0\n8\n"; private static final String HIVE_SCRIPT_FILENAME = "script.q"; private static final String INPUT_DIRNAME = "input"; private static final String OUTPUT_DIRNAME = "output"; private static final String DATA_FILENAME = "data.txt"; @SuppressWarnings("unchecked") public void testSetupMethods() throws Exception { HiveActionExecutor ae = new HiveActionExecutor(); assertEquals(Arrays.asList(HiveMain.class), ae.getLauncherClasses()); assertEquals("hive", ae.getType()); } private String getHiveScript(String inputPath, String outputPath) { StringBuilder buffer = new StringBuilder(NEW_LINE); buffer.append("set -v;").append(NEW_LINE); buffer.append("DROP TABLE IF EXISTS test;").append(NEW_LINE); buffer.append("CREATE EXTERNAL TABLE test (a INT) STORED AS"); buffer.append(NEW_LINE).append("TEXTFILE LOCATION '"); buffer.append(inputPath).append("';").append(NEW_LINE); buffer.append("INSERT OVERWRITE DIRECTORY '"); buffer.append(outputPath).append("'").append(NEW_LINE); buffer.append("SELECT (a-1) FROM test;").append(NEW_LINE); return buffer.toString(); } private String getActionScriptXml() { String script = "<hive xmlns=''uri:oozie:hive-action:0.2''>" + "<job-tracker>{0}</job-tracker>" + "<name-node>{1}</name-node>" + "<configuration>" + "<property>" + "<name>javax.jdo.option.ConnectionURL</name>" + "<value>jdbc:derby:" + getTestCaseDir() + "/db;create=true</value>" + "</property>" + "<property>" + "<name>javax.jdo.option.ConnectionDriverName</name>" + "<value>org.apache.derby.jdbc.EmbeddedDriver</value>" + "</property>" + "<property>" + "<name>javax.jdo.option.ConnectionUserName</name>" + "<value>sa</value>" + "</property>" + "<property>" + "<name>javax.jdo.option.ConnectionPassword</name>" + "<value> </value>" + "</property>" + "<property>" + "<name>oozie.hive.log.level</name>" + "<value>DEBUG</value>" + "</property>" + "</configuration>" + "<script>" + HIVE_SCRIPT_FILENAME + "</script>" + "</hive>"; return MessageFormat.format(script, getJobTrackerUri(), getNameNodeUri()); } private String getActionQueryXml(String query) { String script = "<hive xmlns=''uri:oozie:hive-action:0.6''>" + "<job-tracker>{0}</job-tracker>" + "<name-node>{1}</name-node>" + "<configuration>" + "<property>" + "<name>javax.jdo.option.ConnectionURL</name>" + "<value>jdbc:derby:" + getTestCaseDir() + "/db;create=true</value>" + "</property>" + "<property>" + "<name>javax.jdo.option.ConnectionDriverName</name>" + "<value>org.apache.derby.jdbc.EmbeddedDriver</value>" + "</property>" + "<property>" + "<name>javax.jdo.option.ConnectionUserName</name>" + "<value>sa</value>" + "</property>" + "<property>" + "<name>javax.jdo.option.ConnectionPassword</name>" + "<value> </value>" + "</property>" + "<property>" + "<name>oozie.hive.log.level</name>" + "<value>DEBUG</value>" + "</property>" + "</configuration>"; return MessageFormat.format(script, getJobTrackerUri(), getNameNodeUri()) + "<query>" + query + "</query>" + "</hive>"; } public void testHiveAction() throws Exception { Path inputDir = new Path(getFsTestCaseDir(), INPUT_DIRNAME); Path outputDir = new Path(getFsTestCaseDir(), OUTPUT_DIRNAME); String hiveScript = getHiveScript(inputDir.toString(), outputDir.toString()); FileSystem fs = getFileSystem(); { Path script = new Path(getAppPath(), HIVE_SCRIPT_FILENAME); Writer scriptWriter = new OutputStreamWriter(fs.create(script)); scriptWriter.write(hiveScript); scriptWriter.close(); Writer dataWriter = new OutputStreamWriter(fs.create(new Path(inputDir, DATA_FILENAME))); dataWriter.write(SAMPLE_DATA_TEXT); dataWriter.close(); Context context = createContext(getActionScriptXml()); Namespace ns = Namespace.getNamespace("uri:oozie:hive-action:0.2"); final RunningJob launcherJob = submitAction(context, ns); String launcherId = context.getAction().getExternalId(); waitFor(200 * 1000, new Predicate() { public boolean evaluate() throws Exception { return launcherJob.isComplete(); } }); assertTrue(launcherJob.isSuccessful()); Configuration conf = new XConfiguration(); conf.set("user.name", getTestUser()); Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf); assertFalse(LauncherMapperHelper.hasIdSwap(actionData)); HiveActionExecutor ae = new HiveActionExecutor(); ae.check(context, context.getAction()); assertTrue(launcherId.equals(context.getAction().getExternalId())); assertEquals("SUCCEEDED", context.getAction().getExternalStatus()); assertNotNull(context.getAction().getData()); ae.end(context, context.getAction()); assertEquals(WorkflowAction.Status.OK, context.getAction().getStatus()); assertNotNull(context.getAction().getData()); Properties outputData = new Properties(); outputData.load(new StringReader(context.getAction().getData())); assertTrue(outputData.containsKey(LauncherMain.HADOOP_JOBS)); assertEquals(outputData.get(LauncherMain.HADOOP_JOBS), context.getExternalChildIDs()); //while this works in a real cluster, it does not with miniMR //assertTrue(outputData.getProperty(LauncherMain.HADOOP_JOBS).trim().length() > 0); //assertTrue(!actionData.get(LauncherMapper.ACTION_DATA_EXTERNAL_CHILD_IDS).isEmpty()); assertTrue(fs.exists(outputDir)); assertTrue(fs.isDirectory(outputDir)); } { Context context = createContext(getActionQueryXml(hiveScript)); Namespace ns = Namespace.getNamespace("uri:oozie:hive-action:0.6"); final RunningJob launcherJob = submitAction(context, ns); String launcherId = context.getAction().getExternalId(); waitFor(200 * 1000, new Predicate() { public boolean evaluate() throws Exception { return launcherJob.isComplete(); } }); assertTrue(launcherJob.isSuccessful()); Configuration conf = new XConfiguration(); conf.set("user.name", getTestUser()); Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf); assertFalse(LauncherMapperHelper.hasIdSwap(actionData)); HiveActionExecutor ae = new HiveActionExecutor(); ae.check(context, context.getAction()); assertTrue(launcherId.equals(context.getAction().getExternalId())); assertEquals("SUCCEEDED", context.getAction().getExternalStatus()); assertNotNull(context.getAction().getData()); ae.end(context, context.getAction()); assertEquals(WorkflowAction.Status.OK, context.getAction().getStatus()); assertNotNull(context.getAction().getData()); Properties outputData = new Properties(); outputData.load(new StringReader(context.getAction().getData())); assertTrue(outputData.containsKey(LauncherMain.HADOOP_JOBS)); assertEquals(outputData.get(LauncherMain.HADOOP_JOBS), context.getExternalChildIDs()); //while this works in a real cluster, it does not with miniMR //assertTrue(outputData.getProperty(LauncherMain.HADOOP_JOBS).trim().length() > 0); //assertTrue(!actionData.get(LauncherMapper.ACTION_DATA_EXTERNAL_CHILD_IDS).isEmpty()); assertTrue(fs.exists(outputDir)); assertTrue(fs.isDirectory(outputDir)); } } private RunningJob submitAction(Context context, Namespace ns) throws Exception { HiveActionExecutor ae = new HiveActionExecutor(); WorkflowAction action = context.getAction(); ae.prepareActionDir(getFileSystem(), context); ae.submitLauncher(getFileSystem(), context, action); String jobId = action.getExternalId(); String jobTracker = action.getTrackerUri(); String consoleUrl = action.getConsoleUrl(); assertNotNull(jobId); assertNotNull(jobTracker); assertNotNull(consoleUrl); Element e = XmlUtils.parseXml(action.getConf()); XConfiguration conf = new XConfiguration( new StringReader(XmlUtils.prettyPrint(e.getChild("configuration", ns)).toString())); conf.set("mapred.job.tracker", e.getChildTextTrim("job-tracker", ns)); conf.set("fs.default.name", e.getChildTextTrim("name-node", ns)); conf.set("user.name", context.getProtoActionConf().get("user.name")); conf.set("group.name", getTestGroup()); JobConf jobConf = Services.get().get(HadoopAccessorService.class).createJobConf(jobTracker); XConfiguration.copy(conf, jobConf); String user = jobConf.get("user.name"); String group = jobConf.get("group.name"); JobClient jobClient = Services.get().get(HadoopAccessorService.class).createJobClient(user, jobConf); final RunningJob runningJob = jobClient.getJob(JobID.forName(jobId)); assertNotNull(runningJob); return runningJob; } private String copyJar(String targetFile, Class<?> anyContainedClass) throws Exception { String file = ClassUtils.findContainingJar(anyContainedClass); System.out.println("[copy-jar] class: " + anyContainedClass + ", local jar ==> " + file); Path targetPath = new Path(getAppPath(), targetFile); FileSystem fs = getFileSystem(); InputStream is = new FileInputStream(file); OutputStream os = fs.create(new Path(getAppPath(), targetPath)); IOUtils.copyStream(is, os); return targetPath.toString(); } private Context createContext(String actionXml) throws Exception { HiveActionExecutor ae = new HiveActionExecutor(); XConfiguration protoConf = new XConfiguration(); protoConf.set(WorkflowAppService.HADOOP_USER, getTestUser()); SharelibUtils.addToDistributedCache("hive", getFileSystem(), getFsTestCaseDir(), protoConf); WorkflowJobBean wf = createBaseWorkflow(protoConf, "hive-action"); WorkflowActionBean action = (WorkflowActionBean) wf.getActions().get(0); action.setType(ae.getType()); action.setConf(actionXml); return new Context(wf, action); } public void testActionConfLoadDefaultResources() throws Exception { ConfigurationService.setBoolean( "oozie.service.HadoopAccessorService.action.configurations.load.default.resources", false); Path inputDir = new Path(getFsTestCaseDir(), INPUT_DIRNAME); Path outputDir = new Path(getFsTestCaseDir(), OUTPUT_DIRNAME); FileSystem fs = getFileSystem(); Path script = new Path(getAppPath(), HIVE_SCRIPT_FILENAME); Writer scriptWriter = new OutputStreamWriter(fs.create(script)); scriptWriter.write(getHiveScript(inputDir.toString(), outputDir.toString())); scriptWriter.close(); Writer dataWriter = new OutputStreamWriter(fs.create(new Path(inputDir, DATA_FILENAME))); dataWriter.write(SAMPLE_DATA_TEXT); dataWriter.close(); Context context = createContext(getActionScriptXml()); Namespace ns = Namespace.getNamespace("uri:oozie:hive-action:0.2"); submitAction(context, ns); FSDataInputStream os = fs.open(new Path(context.getActionDir(), LauncherMapper.ACTION_CONF_XML)); XConfiguration conf = new XConfiguration(); conf.addResource(os); assertNull(conf.get("oozie.HadoopAccessorService.created")); } }