Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.oozie.action.hadoop; import java.io.BufferedReader; import java.io.File; import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; import java.io.PrintStream; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Pattern; import org.apache.commons.io.FileUtils; import org.apache.commons.io.output.TeeOutputStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hive.beeline.BeeLine; public class Hive2Main extends LauncherMain { private static final Pattern[] HIVE2_JOB_IDS_PATTERNS = { Pattern.compile("Ended Job = (job_\\S*)") }; private static final Set<String> DISALLOWED_BEELINE_OPTIONS = new HashSet<String>(); static { DISALLOWED_BEELINE_OPTIONS.add("-u"); DISALLOWED_BEELINE_OPTIONS.add("-n"); DISALLOWED_BEELINE_OPTIONS.add("-p"); DISALLOWED_BEELINE_OPTIONS.add("-d"); DISALLOWED_BEELINE_OPTIONS.add("-e"); DISALLOWED_BEELINE_OPTIONS.add("-f"); DISALLOWED_BEELINE_OPTIONS.add("-a"); DISALLOWED_BEELINE_OPTIONS.add("--help"); } public static void main(String[] args) throws Exception { run(Hive2Main.class, args); } private static Configuration initActionConf() { // Loading action conf prepared by Oozie Configuration actionConf = new Configuration(false); String actionXml = System.getProperty("oozie.action.conf.xml"); if (actionXml == null) { throw new RuntimeException("Missing Java System Property [oozie.action.conf.xml]"); } if (!new File(actionXml).exists()) { throw new RuntimeException("Action Configuration XML file [" + actionXml + "] does not exist"); } else { System.out.println("Using action configuration file " + actionXml); } actionConf.addResource(new Path("file:///", actionXml)); setYarnTag(actionConf); // Propagate delegation related props from launcher job to Hive job String delegationToken = getFilePathFromEnv("HADOOP_TOKEN_FILE_LOCATION"); if (delegationToken != null) { actionConf.set("mapreduce.job.credentials.binary", delegationToken); actionConf.set("tez.credentials.path", delegationToken); System.out.println("------------------------"); System.out.println("Setting env property for mapreduce.job.credentials.binary to: " + delegationToken); System.out.println("------------------------"); System.setProperty("mapreduce.job.credentials.binary", delegationToken); } else { System.out.println("Non-Kerberos execution"); } // See https://issues.apache.org/jira/browse/HIVE-1411 actionConf.set("datanucleus.plugin.pluginRegistryBundleCheck", "LOG"); return actionConf; } @Override protected void run(String[] args) throws Exception { System.out.println(); System.out.println("Oozie Hive 2 action configuration"); System.out.println("================================================================="); System.out.println(); Configuration actionConf = initActionConf(); //Logfile to capture job IDs String hadoopJobId = System.getProperty("oozie.launcher.job.id"); if (hadoopJobId == null) { throw new RuntimeException("Launcher Hadoop Job ID system property not set"); } String logFile = new File("hive2-oozie-" + hadoopJobId + ".log").getAbsolutePath(); List<String> arguments = new ArrayList<String>(); String jdbcUrl = actionConf.get(Hive2ActionExecutor.HIVE2_JDBC_URL); if (jdbcUrl == null) { throw new RuntimeException( "Action Configuration does not have [" + Hive2ActionExecutor.HIVE2_JDBC_URL + "] property"); } arguments.add("-u"); arguments.add(jdbcUrl); // Use the user who is running the map task String username = actionConf.get("user.name"); arguments.add("-n"); arguments.add(username); String password = actionConf.get(Hive2ActionExecutor.HIVE2_PASSWORD); if (password == null) { // Have to pass something or Beeline might interactively prompt, which we don't want password = "DUMMY"; } arguments.add("-p"); arguments.add(password); // We always use the same driver arguments.add("-d"); arguments.add("org.apache.hive.jdbc.HiveDriver"); String scriptPath = actionConf.get(Hive2ActionExecutor.HIVE2_SCRIPT); String query = actionConf.get(Hive2ActionExecutor.HIVE2_QUERY); if (scriptPath != null) { if (!new File(scriptPath).exists()) { throw new RuntimeException("Hive 2 script file [" + scriptPath + "] does not exist"); } // print out current directory & its contents File localDir = new File("dummy").getAbsoluteFile().getParentFile(); System.out.println("Current (local) dir = " + localDir.getAbsolutePath()); System.out.println("------------------------"); for (String file : localDir.list()) { System.out.println(" " + file); } System.out.println("------------------------"); System.out.println(); // Prepare the Hive Script String script = readStringFromFile(scriptPath); System.out.println(); System.out.println("Script [" + scriptPath + "] content: "); System.out.println("------------------------"); System.out.println(script); System.out.println("------------------------"); System.out.println(); arguments.add("-f"); arguments.add(scriptPath); } else if (query != null) { System.out.println("Query: "); System.out.println("------------------------"); System.out.println(query); System.out.println("------------------------"); System.out.println(); String filename = createScriptFile(query); arguments.add("-f"); arguments.add(filename); } else { throw new RuntimeException("Action Configuration does not have [" + Hive2ActionExecutor.HIVE2_SCRIPT + "], or [" + Hive2ActionExecutor.HIVE2_QUERY + "] property"); } // Pass any parameters to Beeline via arguments String[] params = MapReduceMain.getStrings(actionConf, Hive2ActionExecutor.HIVE2_PARAMS); if (params.length > 0) { System.out.println("Parameters:"); System.out.println("------------------------"); for (String param : params) { System.out.println(" " + param); int idx = param.indexOf('='); if (idx == -1) { throw new RuntimeException("Parameter expression must contain an assignment: " + param); } else if (idx == 0) { throw new RuntimeException("Parameter value not specified: " + param); } arguments.add("--hivevar"); arguments.add(param); } System.out.println("------------------------"); System.out.println(); } // This tells BeeLine to look for a delegation token; otherwise it won't and will fail in secure mode because there are no // Kerberos credentials. In non-secure mode, this argument is ignored so we can simply always pass it. arguments.add("-a"); arguments.add("delegationToken"); String[] beelineArgs = MapReduceMain.getStrings(actionConf, Hive2ActionExecutor.HIVE2_ARGS); for (String beelineArg : beelineArgs) { if (DISALLOWED_BEELINE_OPTIONS.contains(beelineArg)) { throw new RuntimeException("Error: Beeline argument " + beelineArg + " is not supported"); } arguments.add(beelineArg); } // Propagate MR job tag if defined if (actionConf.get(LauncherMain.MAPREDUCE_JOB_TAGS) != null) { arguments.add("--hiveconf"); arguments.add("mapreduce.job.tags=" + actionConf.get(LauncherMain.MAPREDUCE_JOB_TAGS)); } // Propagate "oozie.*" configs (but not "oozie.launcher.*" nor "oozie.hive2.*") for (Map.Entry<String, String> oozieConfig : actionConf.getValByRegex("^oozie\\.(?!launcher|hive2).+") .entrySet()) { arguments.add("--hiveconf"); arguments.add(oozieConfig.getKey() + "=" + oozieConfig.getValue()); } System.out.println("Beeline command arguments :"); for (String arg : arguments) { System.out.println(" " + arg); } System.out.println(); LauncherMainHadoopUtils.killChildYarnJobs(actionConf); System.out.println("================================================================="); System.out.println(); System.out.println(">>> Invoking Beeline command line now >>>"); System.out.println(); System.out.flush(); try { runBeeline(arguments.toArray(new String[arguments.size()]), logFile); } catch (SecurityException ex) { if (LauncherSecurityManager.getExitInvoked()) { if (LauncherSecurityManager.getExitCode() != 0) { throw ex; } } } finally { System.out.println("\n<<< Invocation of Beeline command completed <<<\n"); writeExternalChildIDs(logFile, HIVE2_JOB_IDS_PATTERNS, "Beeline"); } } private String createScriptFile(String query) throws IOException { String filename = "oozie-hive2-query-" + System.currentTimeMillis() + ".hql"; File f = new File(filename); FileUtils.writeStringToFile(f, query, "UTF-8"); return filename; } private void runBeeline(String[] args, String logFile) throws Exception { // We do this instead of calling BeeLine.main so we can duplicate the error stream for harvesting Hadoop child job IDs BeeLine beeLine = new BeeLine(); beeLine.setErrorStream(new PrintStream(new TeeOutputStream(System.err, new FileOutputStream(logFile)))); int status = beeLine.begin(args, null); if (status != 0) { System.exit(status); } } private static String readStringFromFile(String filePath) throws IOException { String line; BufferedReader br = null; try { br = new BufferedReader(new FileReader(filePath)); StringBuilder sb = new StringBuilder(); String sep = System.getProperty("line.separator"); while ((line = br.readLine()) != null) { sb.append(line).append(sep); } return sb.toString(); } finally { if (br != null) { br.close(); } } } }