Java tutorial
/* * Copyright [2013-2014] PayPal Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ml.shifu.guagua.yarn.util; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Map; import ml.shifu.guagua.yarn.GuaguaYarnConstants; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.yarn.api.ApplicationConstants; import org.apache.hadoop.yarn.api.ApplicationConstants.Environment; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.api.records.LocalResourceType; import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.Records; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Splitter; import com.google.common.collect.Maps; /** * {@link YarnUtils} is yarn-related helper class. */ public final class YarnUtils { private static final Logger LOG = LoggerFactory.getLogger(YarnUtils.class); public static final String GUAGUA_YARN_TMP = "tmp"; /** Private constructor, this is a utility class only */ private YarnUtils() { } /** * Build local resources including main app jar, lib jars, log4j.properties and guagua-conf.xml. */ public static Map<String, LocalResource> getLocalResourceMap(Configuration conf, ApplicationId appId) throws IOException { Map<String, LocalResource> localResources = Maps.newHashMap(); FileSystem fs = FileSystem.get(conf); // yarn app jar addFileToResourceMap(localResources, fs, getPathForResource(fs, conf.get(GuaguaYarnConstants.GUAGUA_YARN_APP_JAR), appId)); try { addFileToResourceMap(localResources, fs, getPathForResource(fs, GuaguaYarnConstants.GUAGUA_LOG4J_PROPERTIES, appId)); } catch (FileNotFoundException e) { LOG.warn("log4j.properties does not exist!"); } addFileToResourceMap(localResources, fs, getPathForResource(fs, GuaguaYarnConstants.GUAGUA_CONF_FILE, appId)); // Libs String libs = conf.get(GuaguaYarnConstants.GUAGUA_YARN_APP_LIB_JAR); if (StringUtils.isNotEmpty(libs)) { for (String jar : Splitter.on(GuaguaYarnConstants.GUAGUA_APP_LIBS_SEPERATOR).split(libs)) { addFileToResourceMap(localResources, fs, getPathForResource(fs, jar.trim(), appId)); } } return localResources; } /** * Return java command according to main class, vm args, program args and memory settings. */ public static List<String> getCommand(String mainClass, String vmArgs, String programArgs, String memory) { List<String> commands = new ArrayList<String>(1); commands.add(getCommandBase(mainClass, vmArgs, programArgs, memory).toString()); LOG.info("commands:{}", commands); return commands; } private static StringBuilder getCommandBase(String mainClass, String vmArgs, String programArgs, String memory) { List<String> commands = new ArrayList<String>(8); commands.add("exec"); commands.add(Environment.JAVA_HOME.$() + File.separator + "bin" + File.separator + "java"); commands.add("-Xms" + memory + "m"); commands.add("-Xmx" + memory + "m"); if (vmArgs != null) { commands.add(vmArgs); } commands.add("-cp .:${CLASSPATH}"); commands.add(mainClass); if (programArgs != null) { commands.add(programArgs); } commands.add( "1> " + ApplicationConstants.LOG_DIR_EXPANSION_VAR + File.separator + ApplicationConstants.STDOUT); commands.add( "2> " + ApplicationConstants.LOG_DIR_EXPANSION_VAR + File.separator + ApplicationConstants.STDERR); StringBuilder sb = new StringBuilder(200); for (String cmd : commands) { sb.append(cmd).append(" "); } return sb; } /** * Populate the environment string map to be added to the environment vars in a remote execution container. * * @param env * the map of env var values. * @param conf * the Configuration to pull values from. */ public static void addLocalClasspathToEnv(final Map<String, String> env, final Configuration conf) { StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$()); // add current folder classPathEnv.append(File.pathSeparatorChar).append("./*"); // add yarn app classpath for (String cpEntry : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) { classPathEnv.append(File.pathSeparatorChar).append(cpEntry.trim()); } for (String jar : conf.getStrings(MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH, org.apache.hadoop.util.StringUtils .getStrings(MRJobConfig.DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH))) { classPathEnv.append(File.pathSeparatorChar).append(jar.trim()); } // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(File.pathSeparatorChar).append(Environment.CLASSPATH.$()); } // add guagua app jar file String path = getFileName(conf.get(GuaguaYarnConstants.GUAGUA_YARN_APP_JAR)); classPathEnv.append(GuaguaYarnConstants.CURRENT_DIR).append(path).append(File.pathSeparatorChar); // Any libraries we may have copied over? String libs = conf.get(GuaguaYarnConstants.GUAGUA_YARN_APP_LIB_JAR); if (StringUtils.isNotEmpty(libs)) { for (String jar : Splitter.on(GuaguaYarnConstants.GUAGUA_APP_LIBS_SEPERATOR).split(libs)) { classPathEnv.append(GuaguaYarnConstants.CURRENT_DIR).append(getFileName(jar.trim())) .append(File.pathSeparatorChar); } } // add log4j classPathEnv.append(GuaguaYarnConstants.CURRENT_DIR).append(GuaguaYarnConstants.GUAGUA_LOG4J_PROPERTIES) .append(File.pathSeparatorChar); // add guagua-conf.xml classPathEnv.append(GuaguaYarnConstants.CURRENT_DIR).append(GuaguaYarnConstants.GUAGUA_CONF_FILE); env.put(Environment.CLASSPATH.toString(), classPathEnv.toString()); } private static String getFileName(String string) { return new Path(string).getName(); } /** * Get path to store local resources on hdfs. */ public static Path getPathForResource(FileSystem fs, String loc, ApplicationId appId) { return new Path(getAppDirectory(fs, appId), getFileName(loc)); } /** * Working folder to store jars, files and other resources */ public static Path getAppDirectory(FileSystem fs, ApplicationId appId) { return fs.makeQualified(new Path( new Path(File.separator + GUAGUA_YARN_TMP, GuaguaYarnConstants.GUAGUA_HDFS_DIR), appId.toString())); } /** * Copy batch of resources to hdfs app directory for master and containers. */ public static void copyLocalResourcesToFs(Configuration conf, ApplicationId appId) throws IOException { FileSystem fs = FileSystem.get(conf); Path appDir = getAppDirectory(fs, appId); // copy yarn app jar String appJar = conf.get(GuaguaYarnConstants.GUAGUA_YARN_APP_JAR); copyToFs(conf, appJar, new Path(appDir, getFileName(appJar)).toString()); // copy guagua-conf.xml: done one export method // copyToFs(conf, appJar, new Path(tempDir, GuaguaYarnConstants.GUAGUA_CONF_FILE).toString()); // Copy all lib jars String libs = conf.get(GuaguaYarnConstants.GUAGUA_YARN_APP_LIB_JAR); if (StringUtils.isNotEmpty(libs)) { for (String jar : Splitter.on(GuaguaYarnConstants.GUAGUA_APP_LIBS_SEPERATOR).split(libs)) { copyToFs(conf, jar, new Path(appDir, getFileName(jar.trim())).toString()); } } } private static void copyToFs(Configuration conf, String local, String remote) throws IOException { FileSystem fs = FileSystem.get(conf); Path src = new Path(local); Path dst = fs.makeQualified(new Path(remote)); fs.copyFromLocalFile(false, true, src, dst); } /** * Boilerplate to add a file to the local resources.. * * @param localResources * the LocalResources map to populate. * @param fs * handle to the HDFS file system. * @param target * the file to send to the remote container. */ public static void addFileToResourceMap(Map<String, LocalResource> localResources, FileSystem fs, Path target) throws IOException { LocalResource resource = Records.newRecord(LocalResource.class); FileStatus destStatus = fs.getFileStatus(target); resource.setResource(ConverterUtils.getYarnUrlFromURI(target.toUri())); resource.setSize(destStatus.getLen()); resource.setTimestamp(destStatus.getModificationTime()); resource.setType(LocalResourceType.FILE); // use FILE, even for jars! resource.setVisibility(LocalResourceVisibility.APPLICATION); localResources.put(target.getName(), resource); LOG.info("Registered file in LocalResources :{} ", target); } /** * Copy local file to a named-file in remote FS */ public static void copyLocalResourceToFs(String src, String dst, Configuration conf, ApplicationId appId) throws IOException { FileSystem fs = FileSystem.get(conf); Path tempDir = getAppDirectory(fs, appId); copyToFs(conf, src, new Path(tempDir, dst).toString()); } /** * Export our populated Configuration as an XML file to be used by the ApplicationMaster's exec container, and * register it with LocalResources. * * @param conf * the current Configuration object to be published. * @param appId * the ApplicationId to stamp this app's base HDFS resources dir. */ public static void exportGuaguaConfiguration(Configuration conf, ApplicationId appId) throws IOException { FileSystem fs = FileSystem.get(conf); Path hdfsConfPath = new Path(getAppDirectory(fs, appId), GuaguaYarnConstants.GUAGUA_CONF_FILE); FSDataOutputStream fos = null; try { fos = FileSystem.get(conf).create(hdfsConfPath, true); conf.writeXml(fos); fos.flush(); } finally { if (null != fos) { fos.close(); } } } }