Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hive.spark.client; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.google.common.collect.Lists; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.concurrent.Callable; import java.util.concurrent.Future; import java.util.concurrent.FutureTask; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hive.common.log.LogRedirector; import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hive.spark.client.rpc.RpcServer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Extends the {@link AbstractSparkClient} and launches a child process to run Spark's {@code * bin/spark-submit} script. Logs are re-directed from the child process logs. */ class SparkSubmitSparkClient extends AbstractSparkClient { private static final Logger LOG = LoggerFactory.getLogger(SparkSubmitSparkClient.class); private static final String SPARK_HOME_ENV = "SPARK_HOME"; private static final String SPARK_HOME_KEY = "spark.home"; private static final long serialVersionUID = -4272763023516238171L; private List<String> argv; SparkSubmitSparkClient(RpcServer rpcServer, Map<String, String> conf, HiveConf hiveConf, String sessionid) throws IOException { super(rpcServer, conf, hiveConf, sessionid); } @Override protected String getSparkHome() { String sparkHome = Strings.emptyToNull(conf.get(SPARK_HOME_KEY)); if (sparkHome == null) { sparkHome = Strings.emptyToNull(System.getenv(SPARK_HOME_ENV)); } if (sparkHome == null) { sparkHome = Strings.emptyToNull(System.getProperty(SPARK_HOME_KEY)); } Preconditions.checkNotNull(sparkHome, "Cannot use " + HiveConf.HIVE_SPARK_SUBMIT_CLIENT + " without setting Spark Home"); String master = conf.get("spark.master"); Preconditions.checkArgument(master != null, "spark.master is not defined."); argv = Lists.newLinkedList(); argv.add(new File(sparkHome, "bin/spark-submit").getAbsolutePath()); return sparkHome; } @Override protected void addAppArg(String arg) { argv.add(arg); } @Override protected void addExecutableJar(String jar) { argv.add(jar); } @Override protected void addPropertiesFile(String absolutePath) { argv.add("--properties-file"); argv.add(absolutePath); } @Override protected void addClass(String name) { argv.add("--class"); argv.add(RemoteDriver.class.getName()); } @Override protected void addJars(String jars) { argv.add("--jars"); argv.add(jars); } @Override protected void addProxyUser(String proxyUser) { argv.add("--proxy-user"); argv.add(proxyUser); } @Override protected void addKeytabAndPrincipal(boolean isDoAsEnabled, String keyTabFile, String principal) { if (isDoAsEnabled) { List<String> kinitArgv = Lists.newLinkedList(); kinitArgv.add("kinit"); kinitArgv.add(principal); kinitArgv.add("-k"); kinitArgv.add("-t"); kinitArgv.add(keyTabFile + ";"); kinitArgv.addAll(argv); argv = kinitArgv; } else { // if doAs is not enabled, we pass the principal/keypad to spark-submit in order to // support the possible delegation token renewal in Spark argv.add("--principal"); argv.add(principal); argv.add("--keytab"); argv.add(keyTabFile); } } @Override protected void addNumExecutors(String numOfExecutors) { argv.add("--num-executors"); argv.add(numOfExecutors); } @Override protected void addExecutorMemory(String executorMemory) { argv.add("--executor-memory"); argv.add(executorMemory); } @Override protected void addExecutorCores(String executorCores) { argv.add("--executor-cores"); argv.add(executorCores); } private String getSparkJobCredentialProviderPassword() { if (conf.containsKey("spark.yarn.appMasterEnv.HADOOP_CREDSTORE_PASSWORD")) { return conf.get("spark.yarn.appMasterEnv.HADOOP_CREDSTORE_PASSWORD"); } else if (conf.containsKey("spark.executorEnv.HADOOP_CREDSTORE_PASSWORD")) { return conf.get("spark.executorEnv.HADOOP_CREDSTORE_PASSWORD"); } return null; } @Override protected Future<Void> launchDriver(String isTesting, RpcServer rpcServer, String clientId) throws IOException { Callable<Void> runnable; String cmd = Joiner.on(" ").join(argv); LOG.info("Running client driver with argv: {}", cmd); ProcessBuilder pb = new ProcessBuilder("sh", "-c", cmd); // Prevent hive configurations from being visible in Spark. pb.environment().remove("HIVE_HOME"); pb.environment().remove("HIVE_CONF_DIR"); // Add credential provider password to the child process's environment // In case of Spark the credential provider location is provided in the jobConf when the job is submitted String password = getSparkJobCredentialProviderPassword(); if (password != null) { pb.environment().put(Constants.HADOOP_CREDENTIAL_PASSWORD_ENVVAR, password); } if (isTesting != null) { pb.environment().put("SPARK_TESTING", isTesting); } final Process child = pb.start(); String threadName = Thread.currentThread().getName(); final List<String> childErrorLog = Collections.synchronizedList(new ArrayList<String>()); final LogRedirector.LogSourceCallback callback = () -> isAlive; LogRedirector.redirect("spark-submit-stdout-redir-" + threadName, new LogRedirector(child.getInputStream(), LOG, callback)); LogRedirector.redirect("spark-submit-stderr-redir-" + threadName, new LogRedirector(child.getErrorStream(), LOG, childErrorLog, callback)); runnable = () -> { try { int exitCode = child.waitFor(); if (exitCode != 0) { List<String> errorMessages = new ArrayList<>(); synchronized (childErrorLog) { for (String line : childErrorLog) { if (StringUtils.containsIgnoreCase(line, "Error")) { errorMessages.add("\"" + line + "\""); } } } String errStr = errorMessages.isEmpty() ? "?" : Joiner.on(',').join(errorMessages); rpcServer.cancelClient(clientId, new RuntimeException("spark-submit process failed " + "with exit code " + exitCode + " and error " + errStr)); } } catch (InterruptedException ie) { LOG.warn( "Thread waiting on the child process (spark-submit) is interrupted, killing the child process."); rpcServer.cancelClient(clientId, "Thread waiting on the child process (spark-submit) is interrupted"); Thread.interrupted(); child.destroy(); } catch (Exception e) { String errMsg = "Exception while waiting for child process (spark-submit)"; LOG.warn(errMsg, e); rpcServer.cancelClient(clientId, errMsg); } return null; }; FutureTask<Void> futureTask = new FutureTask<>(runnable); Thread driverThread = new Thread(futureTask); driverThread.setDaemon(true); driverThread.setName("SparkSubmitMonitor"); driverThread.start(); return futureTask; } }