Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.cloudera.llama.am; import com.cloudera.llama.am.api.LlamaAM; import com.cloudera.llama.util.FastFormat; import com.cloudera.llama.util.ParamChecker; import com.cloudera.llama.am.yarn.YarnRMConnector; import com.cloudera.llama.server.AbstractServer; import com.cloudera.llama.server.NodeMapper; import com.cloudera.llama.server.ServerConfiguration; import com.cloudera.llama.util.CLIParser; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.cli.PatternOptionBuilder; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSClient; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.ProxyUsers; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeReport; import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.client.api.YarnClient; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.MiniYARNCluster; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.bridge.SLF4JBridgeHandler; import java.io.File; import java.io.FileOutputStream; import java.net.URI; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; public class MiniLlama { static { System.setProperty("log4j.configuration", "llama-log4j.properties"); SLF4JBridgeHandler.removeHandlersForRootLogger(); SLF4JBridgeHandler.install(); } private static final String HELP_CMD = "help"; private static final String MINICLUSTER_CMD = "minicluster"; private static final String CLUSTER_CMD = "cluster"; private static final String NODES = "nodes"; private static final String HDFS_NO_FORMAT = "hdfsnoformat"; private static final String HDFS_WRITE_CONF = "hdfswriteconf"; private static CLIParser createParser() { CLIParser parser = new CLIParser("minillama", new String[0]); Option nodes = new Option(NODES, true, "number of nodes (default: 1)"); nodes.setRequired(false); nodes.setType(PatternOptionBuilder.NUMBER_VALUE); Option hdfsNoFormat = new Option(HDFS_NO_FORMAT, false, "don't format mini HDFS"); hdfsNoFormat.setRequired(false); Option hdfsWriteConf = new Option(HDFS_WRITE_CONF, true, "file to write mini HDFS configuration"); hdfsWriteConf.setRequired(false); //help Options options = new Options(); parser.addCommand(HELP_CMD, "", "display usage for all commands or specified command", options, false); //minicluster options = new Options(); options.addOption(nodes); options.addOption(hdfsNoFormat); options.addOption(hdfsWriteConf); parser.addCommand(MINICLUSTER_CMD, "", "start embedded mini HDFS/Yarn cluster", options, false); //cluster options = new Options(); parser.addCommand(CLUSTER_CMD, "", "use external HDFS/Yarn cluster", options, false); return parser; } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(false); conf.addResource("llama-site.xml"); CLIParser parser = createParser(); try { CLIParser.Command command = parser.parse(args); if (command.getName().equals(HELP_CMD)) { parser.showHelp(command.getCommandLine()); } else { final MiniLlama llama; if (command.getName().equals(MINICLUSTER_CMD)) { CommandLine cl = command.getCommandLine(); int nodes = Integer.parseInt(cl.getOptionValue(NODES, "1")); conf = createMiniLlamaConf(conf, nodes); llama = new MiniLlama(conf); llama.skipDfsFormat(cl.hasOption(HDFS_NO_FORMAT)); if (cl.hasOption(HDFS_WRITE_CONF)) { llama.setWriteHadoopConfig(cl.getOptionValue(HDFS_WRITE_CONF)); } } else { conf.setBoolean(MINI_USE_EXTERNAL_HADOOP_KEY, true); conf = createMiniLlamaConf(conf, 1); //nodes is ignored llama = new MiniLlama(conf); } llama.start(); String clusterType = (command.getName().equals(MINICLUSTER_CMD)) ? "embedded HDFS/Yarn mini-cluster" : "external HDFS/Yarn cluster"; LOG.info("**************************************************************" + "*******************************************************"); LOG.info("Mini Llama running with {} with {} nodes, " + "HDFS URI: {} Llama URI: {}", clusterType, llama.getNodes(), llama.getHadoopConf().get("fs.defaultFS"), llama.getAddressHost() + ":" + llama.getAddressPort()); LOG.info("*************************************************************" + "********************************************************"); Runtime.getRuntime().addShutdownHook(new Thread("minillama-shutdownhoock") { @Override public void run() { llama.stop(); } }); synchronized (MiniLlama.class) { MiniLlama.class.wait(); } } } catch (ParseException ex) { System.err.println("Invalid sub-command: " + ex.getMessage()); System.err.println(); System.err.println(parser.shortHelp()); System.exit(1); } catch (Throwable ex) { System.err.println("Error: " + ex.getMessage()); ex.printStackTrace(System.err); System.exit(2); } } private static ServerConfiguration S_CONF = new AMServerConfiguration(new Configuration(false)); public static Configuration createMiniLlamaConf(Configuration conf, int nodes) { ParamChecker.notNull(conf, "conf"); ParamChecker.greaterThan(nodes, 0, "nodes"); conf.set(ServerConfiguration.CONFIG_DIR_KEY, ""); conf.setIfUnset(LlamaAM.RM_CONNECTOR_CLASS_KEY, YarnRMConnector.class.getName()); conf.setInt(MINI_CLUSTER_NODES_KEY, nodes); conf.setIfUnset(S_CONF.getPropertyName(ServerConfiguration.SERVER_ADDRESS_KEY), "localhost:0"); conf.setIfUnset(S_CONF.getPropertyName(ServerConfiguration.SERVER_ADMIN_ADDRESS_KEY), "localhost:0"); conf.setIfUnset(S_CONF.getPropertyName(ServerConfiguration.HTTP_ADDRESS_KEY), "localhost:0"); conf.setBoolean(YarnConfiguration.RM_SCHEDULER_INCLUDE_PORT_IN_NODE_NAME, true); conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, 0); conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 0); conf.set(YarnRMConnector.HADOOP_USER_NAME_KEY, System.getProperty("user.name")); return conf; } public static Configuration createMiniClusterConf(int nodes) { return createMiniLlamaConf(new Configuration(false), nodes); } private static final Logger LOG = LoggerFactory.getLogger(MiniLlama.class); public static final String MINI_USE_EXTERNAL_HADOOP_KEY = "llama.am.server.mini.use.external.hadoop"; public static final String MINI_SERVER_CLASS_KEY = "llama.am.server.mini.server.class"; private static final String MINI_CLUSTER_NODES_KEY = "llama.am.server.mini.cluster.nodes"; private final Configuration conf; private boolean skipDfsFormat = false; private String writeHdfsConfig = null; private final AbstractServer server; private List<String> dataNodes; private MiniDFSCluster miniHdfs; private Configuration hadoopConf; private MiniYARNCluster miniYarn; private boolean useExternalHadoop; private int nodes; public MiniLlama(Configuration conf) { ParamChecker.notNull(conf, "conf"); Class<? extends AbstractServer> klass = conf.getClass(MINI_SERVER_CLASS_KEY, LlamaAMServer.class, AbstractServer.class); server = ReflectionUtils.newInstance(klass, conf); this.conf = server.getConf(); useExternalHadoop = conf.getBoolean(MINI_USE_EXTERNAL_HADOOP_KEY, false); } public Configuration getConf() { return conf; } public Configuration getHadoopConf() { return hadoopConf; } public void skipDfsFormat(boolean skipDfsFormat) { this.skipDfsFormat = skipDfsFormat; } public void setWriteHadoopConfig(String writeHdfsConfig) { this.writeHdfsConfig = writeHdfsConfig; } private Map<String, String> getDataNodeNodeManagerMapping(Configuration conf) throws Exception { Map<String, String> map = new HashMap<String, String>(); DFSClient dfsClient = new DFSClient(new URI(conf.get("fs.defaultFS")), conf); DatanodeInfo[] DNs = dfsClient.datanodeReport(HdfsConstants.DatanodeReportType.ALL); YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf); yarnClient.start(); List<NodeId> nodeIds = getYarnNodeIds(conf); if (nodeIds.size() != DNs.length) { throw new RuntimeException("Number of DNs and NMs differ, MiniLlama " + "node mapping requires them to be equal at startup"); } LOG.info("HDFS/YARN mapping:"); for (int i = 0; i < DNs.length; i++) { String key = DNs[i].getXferAddr(); NodeId nodeId = nodeIds.get(i); String value = nodeId.getHost() + ":" + nodeId.getPort(); map.put(key, value); LOG.info(" DN/NM: " + key + "/" + value); } yarnClient.stop(); nodes = map.size(); verifySingleHost(map.keySet(), "DataNode"); verifySingleHost(map.values(), "NodeManager"); return map; } private void verifySingleHost(Collection<String> addresses, String nodeType) { String host = null; for (String address : addresses) { String h = address.substring(0, address.indexOf(":")); if (host == null) { host = h; } else if (!host.equals(h)) { throw new RuntimeException( FastFormat.format("Cluster {}s are running in the same host: {}", nodeType, addresses)); } } } public void start() throws Exception { if (useExternalHadoop) { hadoopConf = new YarnConfiguration(); } else { hadoopConf = startMiniHadoop(); if (writeHdfsConfig != null) { FileOutputStream fos = new FileOutputStream(new File(writeHdfsConfig)); hadoopConf.writeXml(fos); fos.close(); } } server.getConf().setClass(S_CONF.getPropertyName(ServerConfiguration.NODE_NAME_MAPPING_CLASS_KEY), MiniClusterNodeMapper.class, NodeMapper.class); Map<String, String> mapping = getDataNodeNodeManagerMapping(hadoopConf); MiniClusterNodeMapper.addMapping(server.getConf(), mapping); for (Map.Entry entry : hadoopConf) { server.getConf().set((String) entry.getKey(), (String) entry.getValue()); } dataNodes = new ArrayList<String>(mapping.keySet()); dataNodes = Collections.unmodifiableList(dataNodes); server.start(); } public void stop() { server.stop(); if (useExternalHadoop) { stopMiniHadoop(); } } public int getNodes() { return nodes; } public String getAddressHost() { return server.getAddressHost(); } public int getAddressPort() { return server.getAddressPort(); } public List<String> getDataNodes() { return dataNodes; } private Configuration startMiniHadoop() throws Exception { int clusterNodes = getConf().getInt(MINI_CLUSTER_NODES_KEY, 1); if (System.getProperty(MiniDFSCluster.PROP_TEST_BUILD_DATA) == null) { String testBuildData = new File("target").getAbsolutePath(); System.setProperty(MiniDFSCluster.PROP_TEST_BUILD_DATA, testBuildData); } //to trigger hdfs-site.xml registration as default resource new HdfsConfiguration(); Configuration conf = new YarnConfiguration(); String llamaProxyUser = System.getProperty("user.name"); conf.set("hadoop.security.authentication", "simple"); conf.set("hadoop.proxyuser." + llamaProxyUser + ".hosts", "*"); conf.set("hadoop.proxyuser." + llamaProxyUser + ".groups", "*"); String[] userGroups = new String[] { "g" }; UserGroupInformation.createUserForTesting(llamaProxyUser, userGroups); int hdfsPort = 0; String fsUri = conf.get("fs.defaultFS"); if (fsUri != null && !fsUri.equals("file:///")) { int i = fsUri.lastIndexOf(":"); if (i > -1) { try { hdfsPort = Integer.parseInt(fsUri.substring(i + 1)); } catch (Exception ex) { throw new RuntimeException( "Could not parse port from Hadoop's " + "'fs.defaultFS property: " + fsUri); } } } miniHdfs = new MiniDFSCluster(hdfsPort, conf, clusterNodes, !skipDfsFormat, true, null, null); miniHdfs.waitActive(); conf = miniHdfs.getConfiguration(0); miniYarn = new MiniYARNCluster("minillama", clusterNodes, 1, 1); conf.setBoolean(YarnConfiguration.RM_SCHEDULER_INCLUDE_PORT_IN_NODE_NAME, true); conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, 0); conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 0); miniYarn.init(conf); miniYarn.start(); conf = miniYarn.getConfig(); ProxyUsers.refreshSuperUserGroupsConfiguration(conf); return conf; } private List<NodeId> getYarnNodeIds(Configuration conf) throws Exception { List<NodeId> list = new ArrayList<NodeId>(); if (miniYarn != null) { int clusterNodes = getConf().getInt(MINI_CLUSTER_NODES_KEY, 1); for (int i = 0; i < clusterNodes; i++) { list.add(miniYarn.getNodeManager(i).getNMContext().getNodeId()); } } else { YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf); yarnClient.start(); List<NodeReport> nodes = yarnClient.getNodeReports(NodeState.RUNNING); for (int i = 0; i < nodes.size(); i++) { list.add(nodes.get(i).getNodeId()); } yarnClient.stop(); } return list; } private void stopMiniHadoop() { if (miniYarn != null) { miniYarn.stop(); miniYarn = null; } if (miniHdfs != null) { miniHdfs.shutdown(); miniHdfs = null; } } }