org.apache.hadoop.yarn.client.cli.RMAdminCLI.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.yarn.client.cli.RMAdminCLI.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.yarn.client.cli;

import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.MissingArgumentException;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.ha.HAAdmin;
import org.apache.hadoop.ha.HAServiceTarget;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.yarn.api.records.DecommissionType;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.NodeLabel;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceOption;
import org.apache.hadoop.yarn.client.ClientRMProxy;
import org.apache.hadoop.yarn.client.RMHAServiceTarget;
import org.apache.hadoop.yarn.conf.HAUtil;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
import org.apache.hadoop.yarn.server.api.ResourceManagerAdministrationProtocol;
import org.apache.hadoop.yarn.server.api.protocolrecords.AddToClusterNodeLabelsRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.CheckForDecommissioningNodesRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.CheckForDecommissioningNodesResponse;
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshClusterMaxPriorityRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesResourcesRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshQueuesRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshServiceAclsRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshSuperUserGroupsConfigurationRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshUserToGroupsMappingsRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.RemoveFromClusterNodeLabelsRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.ReplaceLabelsOnNodeRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceRequest;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.resource.Resources;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;

@Private
@Unstable
public class RMAdminCLI extends HAAdmin {

    private final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
    static CommonNodeLabelsManager localNodeLabelsManager = null;
    private static final String NO_LABEL_ERR_MSG = "No cluster node-labels are specified";
    private static final String NO_MAPPING_ERR_MSG = "No node-to-labels mappings are specified";
    private static final String INVALID_TIMEOUT_ERR_MSG = "Invalid timeout specified : ";
    private static final String ADD_LABEL_FORMAT_ERR_MSG = "Input format for adding node-labels is not correct, it should be "
            + "labelName1[(exclusive=true/false)],LabelName2[] ..";

    protected final static Map<String, UsageInfo> ADMIN_USAGE = ImmutableMap.<String, UsageInfo>builder()
            .put("-refreshQueues", new UsageInfo("", "Reload the queues' acls, states and scheduler specific "
                    + "properties. \n\t\tResourceManager will reload the " + "mapred-queues configuration file."))
            .put("-refreshNodes",
                    new UsageInfo("[-g [timeout in seconds] -client|server]",
                            "Refresh the hosts information at the ResourceManager. Here "
                                    + "[-g [timeout in seconds] -client|server] is optional, if we "
                                    + "specify the timeout then ResourceManager will wait for "
                                    + "timeout before marking the NodeManager as decommissioned."
                                    + " The -client|server indicates if the timeout tracking should"
                                    + " be handled by the client or the ResourceManager. The client"
                                    + "-side tracking is blocking, while the server-side tracking"
                                    + " is not. Omitting the timeout, or a timeout of -1, indicates"
                                    + " an infinite timeout."))
            .put("-refreshNodesResources",
                    new UsageInfo("", "Refresh resources of NodeManagers at the ResourceManager."))
            .put("-refreshSuperUserGroupsConfiguration",
                    new UsageInfo("", "Refresh superuser proxy groups mappings"))
            .put("-refreshUserToGroupsMappings", new UsageInfo("", "Refresh user-to-groups mappings"))
            .put("-refreshAdminAcls", new UsageInfo("", "Refresh acls for administration of ResourceManager"))
            .put("-refreshServiceAcl",
                    new UsageInfo("",
                            "Reload the service-level authorization policy file. \n\t\t"
                                    + "ResoureceManager will reload the authorization policy file."))
            .put("-getGroups", new UsageInfo("[username]", "Get the groups which given user belongs to."))
            .put("-addToClusterNodeLabels",
                    new UsageInfo("<\"label1(exclusive=true)," + "label2(exclusive=false),label3\">",
                            "add to cluster node labels. Default exclusivity is true"))
            .put("-removeFromClusterNodeLabels",
                    new UsageInfo("<label1,label2,label3> (label splitted by \",\")",
                            "remove from cluster node labels"))
            .put("-replaceLabelsOnNode", new UsageInfo(
                    "<\"node1[:port]=label1,label2 node2[:port]=label1,label2\"> " + "[-failOnUnknownNodes] ",
                    "replace labels on nodes" + " (please note that we do not support specifying multiple"
                            + " labels on a single host for now.)\n\t\t"
                            + "[-failOnUnknownNodes] is optional, when we set this"
                            + " option, it will fail if specified nodes are unknown."))
            .put("-directlyAccessNodeLabelStore", new UsageInfo("",
                    "This is DEPRECATED, will be removed in future releases. Directly access node label store, "
                            + "with this option, all node label related operations"
                            + " will not connect RM. Instead, they will"
                            + " access/modify stored node labels directly."
                            + " By default, it is false (access via RM)." + " AND PLEASE NOTE: if you configured"
                            + " yarn.node-labels.fs-store.root-dir to a local directory"
                            + " (instead of NFS or HDFS), this option will only work"
                            + " when the command run on the machine where RM is running."))
            .put("-refreshClusterMaxPriority", new UsageInfo("", "Refresh cluster max priority"))
            .put("-updateNodeResource", new UsageInfo("[NodeID] [MemSize] [vCores] ([OvercommitTimeout])",
                    "Update resource on specific node."))
            .build();

    public RMAdminCLI() {
        super();
    }

    public RMAdminCLI(Configuration conf) {
        super(conf);
    }

    protected void setErrOut(PrintStream errOut) {
        this.errOut = errOut;
    }

    private static void appendHAUsage(final StringBuilder usageBuilder) {
        for (Map.Entry<String, UsageInfo> cmdEntry : USAGE.entrySet()) {
            if (cmdEntry.getKey().equals("-help") || cmdEntry.getKey().equals("-failover")) {
                continue;
            }
            UsageInfo usageInfo = cmdEntry.getValue();
            usageBuilder.append(" [" + cmdEntry.getKey() + " " + usageInfo.args + "]");
        }
    }

    private static void buildHelpMsg(String cmd, StringBuilder builder) {
        UsageInfo usageInfo = ADMIN_USAGE.get(cmd);
        if (usageInfo == null) {
            usageInfo = USAGE.get(cmd);
            if (usageInfo == null) {
                return;
            }
        }
        String space = (usageInfo.args == "") ? "" : " ";
        builder.append("   " + cmd + space + usageInfo.args + ": " + usageInfo.help);
    }

    private static void buildIndividualUsageMsg(String cmd, StringBuilder builder) {
        boolean isHACommand = false;
        UsageInfo usageInfo = ADMIN_USAGE.get(cmd);
        if (usageInfo == null) {
            usageInfo = USAGE.get(cmd);
            if (usageInfo == null) {
                return;
            }
            isHACommand = true;
        }
        String space = (usageInfo.args == "") ? "" : " ";
        builder.append("Usage: yarn rmadmin [" + cmd + space + usageInfo.args + "]\n");
        if (isHACommand) {
            builder.append(cmd + " can only be used when RM HA is enabled");
        }
    }

    private static void buildUsageMsg(StringBuilder builder, boolean isHAEnabled) {
        builder.append("Usage: yarn rmadmin\n");
        for (Map.Entry<String, UsageInfo> cmdEntry : ADMIN_USAGE.entrySet()) {
            UsageInfo usageInfo = cmdEntry.getValue();
            builder.append("   " + cmdEntry.getKey() + " " + usageInfo.args + "\n");
        }
        if (isHAEnabled) {
            for (Map.Entry<String, UsageInfo> cmdEntry : USAGE.entrySet()) {
                String cmdKey = cmdEntry.getKey();
                if (!cmdKey.equals("-help")) {
                    UsageInfo usageInfo = cmdEntry.getValue();
                    builder.append("   " + cmdKey + " " + usageInfo.args + "\n");
                }
            }
        }
        builder.append("   -help" + " [cmd]\n");
    }

    private static void printHelp(String cmd, boolean isHAEnabled) {
        StringBuilder summary = new StringBuilder();
        summary.append("rmadmin is the command to execute YARN administrative " + "commands.\n");
        summary.append("The full syntax is: \n\n" + "yarn rmadmin" + " [-refreshQueues]"
                + " [-refreshNodes [-g [timeout in seconds] -client|server]]" + " [-refreshNodesResources]"
                + " [-refreshSuperUserGroupsConfiguration]" + " [-refreshUserToGroupsMappings]"
                + " [-refreshAdminAcls]" + " [-refreshServiceAcl]" + " [-getGroup [username]]"
                + " [-addToClusterNodeLabels <\"label1(exclusive=true)," + "label2(exclusive=false),label3\">]"
                + " [-removeFromClusterNodeLabels <label1,label2,label3>]" + " [-replaceLabelsOnNode "
                + "<\"node1[:port]=label1,label2 node2[:port]=label1\"> " + "[-failOnUnknownNodes]]"
                + " [-directlyAccessNodeLabelStore]" + " [-refreshClusterMaxPriority]"
                + " [-updateNodeResource [NodeID] [MemSize] [vCores] ([OvercommitTimeout])");
        if (isHAEnabled) {
            appendHAUsage(summary);
        }
        summary.append(" [-help [cmd]]");
        summary.append("\n");

        StringBuilder helpBuilder = new StringBuilder();
        System.out.println(summary);
        for (String cmdKey : ADMIN_USAGE.keySet()) {
            buildHelpMsg(cmdKey, helpBuilder);
            helpBuilder.append("\n");
        }
        if (isHAEnabled) {
            for (String cmdKey : USAGE.keySet()) {
                if (!cmdKey.equals("-help") && !cmdKey.equals("-failover")) {
                    buildHelpMsg(cmdKey, helpBuilder);
                    helpBuilder.append("\n");
                }
            }
        }
        helpBuilder.append(
                "   -help [cmd]: Displays help for the given command or all commands" + " if none is specified.");
        System.out.println(helpBuilder);
        System.out.println();
        ToolRunner.printGenericCommandUsage(System.out);
    }

    /**
     * Displays format of commands.
     * @param cmd The command that is being executed.
     */
    private static void printUsage(String cmd, boolean isHAEnabled) {
        StringBuilder usageBuilder = new StringBuilder();
        if (ADMIN_USAGE.containsKey(cmd) || USAGE.containsKey(cmd)) {
            buildIndividualUsageMsg(cmd, usageBuilder);
        } else {
            buildUsageMsg(usageBuilder, isHAEnabled);
        }
        System.err.println(usageBuilder);
        ToolRunner.printGenericCommandUsage(System.err);

    }

    protected ResourceManagerAdministrationProtocol createAdminProtocol() throws IOException {
        // Get the current configuration
        final YarnConfiguration conf = new YarnConfiguration(getConf());
        return ClientRMProxy.createRMProxy(conf, ResourceManagerAdministrationProtocol.class, true);
    }

    private int refreshQueues() throws IOException, YarnException {
        // Refresh the queue properties
        ResourceManagerAdministrationProtocol adminProtocol = createAdminProtocol();
        RefreshQueuesRequest request = recordFactory.newRecordInstance(RefreshQueuesRequest.class);
        adminProtocol.refreshQueues(request);
        return 0;
    }

    private int refreshNodes() throws IOException, YarnException {
        // Refresh the nodes
        ResourceManagerAdministrationProtocol adminProtocol = createAdminProtocol();
        RefreshNodesRequest request = RefreshNodesRequest.newInstance(DecommissionType.NORMAL);
        adminProtocol.refreshNodes(request);
        return 0;
    }

    private int refreshNodes(long timeout, String trackingMode) throws IOException, YarnException {
        if (!"client".equals(trackingMode)) {
            throw new UnsupportedOperationException("Only client tracking mode is currently supported.");
        }
        // Graceful decommissioning with timeout
        ResourceManagerAdministrationProtocol adminProtocol = createAdminProtocol();
        RefreshNodesRequest gracefulRequest = RefreshNodesRequest.newInstance(DecommissionType.GRACEFUL);
        adminProtocol.refreshNodes(gracefulRequest);
        CheckForDecommissioningNodesRequest checkForDecommissioningNodesRequest = recordFactory
                .newRecordInstance(CheckForDecommissioningNodesRequest.class);
        long waitingTime;
        boolean nodesDecommissioning = true;
        // timeout=-1 means wait for all the nodes to be gracefully
        // decommissioned
        for (waitingTime = 0; waitingTime < timeout || timeout == -1; waitingTime++) {
            // wait for one second to check nodes decommissioning status
            try {
                Thread.sleep(1000);
            } catch (InterruptedException e) {
                // Ignore the InterruptedException
            }
            CheckForDecommissioningNodesResponse checkForDecommissioningNodes = adminProtocol
                    .checkForDecommissioningNodes(checkForDecommissioningNodesRequest);
            Set<NodeId> decommissioningNodes = checkForDecommissioningNodes.getDecommissioningNodes();
            if (decommissioningNodes.isEmpty()) {
                nodesDecommissioning = false;
                break;
            } else {
                StringBuilder nodes = new StringBuilder();
                for (NodeId nodeId : decommissioningNodes) {
                    nodes.append(nodeId).append(",");
                }
                nodes.deleteCharAt(nodes.length() - 1);
                System.out.println("Nodes '" + nodes + "' are still decommissioning.");
            }
        }
        if (nodesDecommissioning) {
            System.out.println("Graceful decommissioning not completed in " + timeout
                    + " seconds, issueing forceful decommissioning command.");
            RefreshNodesRequest forcefulRequest = RefreshNodesRequest.newInstance(DecommissionType.FORCEFUL);
            adminProtocol.refreshNodes(forcefulRequest);
        } else {
            System.out.println("Graceful decommissioning completed in " + waitingTime + " seconds.");
        }
        return 0;
    }

    private int refreshNodesResources() throws IOException, YarnException {
        // Refresh the resources at the Nodemanager
        ResourceManagerAdministrationProtocol adminProtocol = createAdminProtocol();
        RefreshNodesResourcesRequest request = recordFactory.newRecordInstance(RefreshNodesResourcesRequest.class);
        adminProtocol.refreshNodesResources(request);
        return 0;
    }

    private int refreshUserToGroupsMappings() throws IOException, YarnException {
        // Refresh the user-to-groups mappings
        ResourceManagerAdministrationProtocol adminProtocol = createAdminProtocol();
        RefreshUserToGroupsMappingsRequest request = recordFactory
                .newRecordInstance(RefreshUserToGroupsMappingsRequest.class);
        adminProtocol.refreshUserToGroupsMappings(request);
        return 0;
    }

    private int refreshSuperUserGroupsConfiguration() throws IOException, YarnException {
        // Refresh the super-user groups
        ResourceManagerAdministrationProtocol adminProtocol = createAdminProtocol();
        RefreshSuperUserGroupsConfigurationRequest request = recordFactory
                .newRecordInstance(RefreshSuperUserGroupsConfigurationRequest.class);
        adminProtocol.refreshSuperUserGroupsConfiguration(request);
        return 0;
    }

    private int refreshAdminAcls() throws IOException, YarnException {
        // Refresh the admin acls
        ResourceManagerAdministrationProtocol adminProtocol = createAdminProtocol();
        RefreshAdminAclsRequest request = recordFactory.newRecordInstance(RefreshAdminAclsRequest.class);
        adminProtocol.refreshAdminAcls(request);
        return 0;
    }

    private int refreshServiceAcls() throws IOException, YarnException {
        // Refresh the service acls
        ResourceManagerAdministrationProtocol adminProtocol = createAdminProtocol();
        RefreshServiceAclsRequest request = recordFactory.newRecordInstance(RefreshServiceAclsRequest.class);
        adminProtocol.refreshServiceAcls(request);
        return 0;
    }

    private int refreshClusterMaxPriority() throws IOException, YarnException {
        // Refresh cluster max priority
        ResourceManagerAdministrationProtocol adminProtocol = createAdminProtocol();
        RefreshClusterMaxPriorityRequest request = recordFactory
                .newRecordInstance(RefreshClusterMaxPriorityRequest.class);
        adminProtocol.refreshClusterMaxPriority(request);
        return 0;
    }

    private int updateNodeResource(String nodeIdStr, int memSize, int cores, int overCommitTimeout)
            throws IOException, YarnException {
        // check resource value first
        if (invalidResourceValue(memSize, cores)) {
            throw new IllegalArgumentException(
                    "Invalid resource value: " + "(" + memSize + "," + cores + ") for updateNodeResource.");
        }
        // Refresh the nodes
        ResourceManagerAdministrationProtocol adminProtocol = createAdminProtocol();
        UpdateNodeResourceRequest request = recordFactory.newRecordInstance(UpdateNodeResourceRequest.class);
        NodeId nodeId = NodeId.fromString(nodeIdStr);

        Resource resource = Resources.createResource(memSize, cores);
        Map<NodeId, ResourceOption> resourceMap = new HashMap<NodeId, ResourceOption>();
        resourceMap.put(nodeId, ResourceOption.newInstance(resource, overCommitTimeout));
        request.setNodeResourceMap(resourceMap);
        adminProtocol.updateNodeResource(request);
        return 0;
    }

    // complain negative value for cpu or memory.
    private boolean invalidResourceValue(int memValue, int coreValue) {
        return (memValue < 0) || (coreValue < 0);
    }

    private int getGroups(String[] usernames) throws IOException {
        // Get groups users belongs to
        ResourceManagerAdministrationProtocol adminProtocol = createAdminProtocol();

        if (usernames.length == 0) {
            usernames = new String[] { UserGroupInformation.getCurrentUser().getUserName() };
        }

        for (String username : usernames) {
            StringBuilder sb = new StringBuilder();
            sb.append(username + " :");
            for (String group : adminProtocol.getGroupsForUser(username)) {
                sb.append(" ");
                sb.append(group);
            }
            System.out.println(sb);
        }

        return 0;
    }

    // Make it protected to make unit test can change it.
    protected static synchronized CommonNodeLabelsManager getNodeLabelManagerInstance(Configuration conf) {
        if (localNodeLabelsManager == null) {
            localNodeLabelsManager = new CommonNodeLabelsManager();
            localNodeLabelsManager.init(conf);
            localNodeLabelsManager.start();
        }
        return localNodeLabelsManager;
    }

    private List<NodeLabel> buildNodeLabelsFromStr(String args) {
        List<NodeLabel> nodeLabels = new ArrayList<>();
        for (String p : args.split(",")) {
            if (!p.trim().isEmpty()) {
                String labelName = p;

                // Try to parse exclusive
                boolean exclusive = NodeLabel.DEFAULT_NODE_LABEL_EXCLUSIVITY;
                int leftParenthesisIdx = p.indexOf("(");
                int rightParenthesisIdx = p.indexOf(")");

                if ((leftParenthesisIdx == -1 && rightParenthesisIdx != -1)
                        || (leftParenthesisIdx != -1 && rightParenthesisIdx == -1)) {
                    // Parenthese not match
                    throw new IllegalArgumentException(ADD_LABEL_FORMAT_ERR_MSG);
                }

                if (leftParenthesisIdx > 0 && rightParenthesisIdx > 0) {
                    if (leftParenthesisIdx > rightParenthesisIdx) {
                        // Parentese not match
                        throw new IllegalArgumentException(ADD_LABEL_FORMAT_ERR_MSG);
                    }

                    String property = p.substring(p.indexOf("(") + 1, p.indexOf(")"));
                    if (property.contains("=")) {
                        String key = property.substring(0, property.indexOf("=")).trim();
                        String value = property.substring(property.indexOf("=") + 1, property.length()).trim();

                        // Now we only support one property, which is exclusive, so check if
                        // key = exclusive and value = {true/false}
                        if (key.equals("exclusive") && ImmutableSet.of("true", "false").contains(value)) {
                            exclusive = Boolean.parseBoolean(value);
                        } else {
                            throw new IllegalArgumentException(ADD_LABEL_FORMAT_ERR_MSG);
                        }
                    } else if (!property.trim().isEmpty()) {
                        throw new IllegalArgumentException(ADD_LABEL_FORMAT_ERR_MSG);
                    }
                }

                // Try to get labelName if there's "(..)"
                if (labelName.contains("(")) {
                    labelName = labelName.substring(0, labelName.indexOf("(")).trim();
                }

                nodeLabels.add(NodeLabel.newInstance(labelName, exclusive));
            }
        }

        if (nodeLabels.isEmpty()) {
            throw new IllegalArgumentException(NO_LABEL_ERR_MSG);
        }
        return nodeLabels;
    }

    private Set<String> buildNodeLabelNamesFromStr(String args) {
        Set<String> labels = new HashSet<String>();
        for (String p : args.split(",")) {
            if (!p.trim().isEmpty()) {
                labels.add(p.trim());
            }
        }

        if (labels.isEmpty()) {
            throw new IllegalArgumentException(NO_LABEL_ERR_MSG);
        }
        return labels;
    }

    private int handleAddToClusterNodeLabels(String[] args, String cmd, boolean isHAEnabled)
            throws IOException, YarnException, ParseException {
        Options opts = new Options();
        opts.addOption("addToClusterNodeLabels", true, "Add to cluster node labels.");
        opts.addOption("directlyAccessNodeLabelStore", false, "Directly access node label store.");
        int exitCode = -1;
        CommandLine cliParser = null;
        try {
            cliParser = new GnuParser().parse(opts, args);
        } catch (MissingArgumentException ex) {
            System.err.println(NO_LABEL_ERR_MSG);
            printUsage(args[0], isHAEnabled);
            return exitCode;
        }

        List<NodeLabel> labels = buildNodeLabelsFromStr(cliParser.getOptionValue("addToClusterNodeLabels"));
        if (cliParser.hasOption("directlyAccessNodeLabelStore")) {
            getNodeLabelManagerInstance(getConf()).addToCluserNodeLabels(labels);
        } else {
            ResourceManagerAdministrationProtocol adminProtocol = createAdminProtocol();
            AddToClusterNodeLabelsRequest request = AddToClusterNodeLabelsRequest.newInstance(labels);
            adminProtocol.addToClusterNodeLabels(request);
        }
        return 0;
    }

    private int handleRemoveFromClusterNodeLabels(String[] args, String cmd, boolean isHAEnabled)
            throws IOException, YarnException, ParseException {
        Options opts = new Options();
        opts.addOption("removeFromClusterNodeLabels", true, "Remove From cluster node labels.");
        opts.addOption("directlyAccessNodeLabelStore", false, "Directly access node label store.");
        int exitCode = -1;
        CommandLine cliParser = null;
        try {
            cliParser = new GnuParser().parse(opts, args);
        } catch (MissingArgumentException ex) {
            System.err.println(NO_LABEL_ERR_MSG);
            printUsage(args[0], isHAEnabled);
            return exitCode;
        }

        Set<String> labels = buildNodeLabelNamesFromStr(cliParser.getOptionValue("removeFromClusterNodeLabels"));
        if (cliParser.hasOption("directlyAccessNodeLabelStore")) {
            getNodeLabelManagerInstance(getConf()).removeFromClusterNodeLabels(labels);
        } else {
            ResourceManagerAdministrationProtocol adminProtocol = createAdminProtocol();
            RemoveFromClusterNodeLabelsRequest request = RemoveFromClusterNodeLabelsRequest.newInstance(labels);
            adminProtocol.removeFromClusterNodeLabels(request);
        }

        return 0;
    }

    private Map<NodeId, Set<String>> buildNodeLabelsMapFromStr(String args) {
        Map<NodeId, Set<String>> map = new HashMap<NodeId, Set<String>>();

        for (String nodeToLabels : args.split("[ \n]")) {
            nodeToLabels = nodeToLabels.trim();
            if (nodeToLabels.isEmpty() || nodeToLabels.startsWith("#")) {
                continue;
            }

            String[] splits = nodeToLabels.split("=");
            int labelsStartIndex = 0;
            String nodeIdStr = splits[0];

            if (splits.length == 2) {
                splits = splits[1].split(",");
            } else if (nodeToLabels.endsWith("=")) {
                //case where no labels are mapped to a node
                splits = new String[0];
            } else {
                // "," also supported for compatibility
                splits = nodeToLabels.split(",");
                nodeIdStr = splits[0];
                labelsStartIndex = 1;
            }

            Preconditions.checkArgument(!nodeIdStr.trim().isEmpty(), "node name cannot be empty");

            NodeId nodeId = ConverterUtils.toNodeIdWithDefaultPort(nodeIdStr);
            map.put(nodeId, new HashSet<String>());

            for (int i = labelsStartIndex; i < splits.length; i++) {
                if (!splits[i].trim().isEmpty()) {
                    map.get(nodeId).add(splits[i].trim());
                }
            }

            int nLabels = map.get(nodeId).size();
            Preconditions.checkArgument(nLabels <= 1,
                    "%d labels specified on host=%s" + ", please note that we do not support specifying multiple"
                            + " labels on a single host for now.",
                    nLabels, nodeIdStr);
        }

        if (map.isEmpty()) {
            throw new IllegalArgumentException(NO_MAPPING_ERR_MSG);
        }
        return map;
    }

    private int handleReplaceLabelsOnNodes(String[] args, String cmd, boolean isHAEnabled)
            throws IOException, YarnException, ParseException {
        Options opts = new Options();
        opts.addOption("replaceLabelsOnNode", true, "Replace label on node.");
        opts.addOption("failOnUnknownNodes", false, "Fail on unknown nodes.");
        opts.addOption("directlyAccessNodeLabelStore", false, "Directly access node label store.");
        int exitCode = -1;
        CommandLine cliParser = null;
        try {
            cliParser = new GnuParser().parse(opts, args);
        } catch (MissingArgumentException ex) {
            System.err.println(NO_MAPPING_ERR_MSG);
            printUsage(args[0], isHAEnabled);
            return exitCode;
        }

        Map<NodeId, Set<String>> map = buildNodeLabelsMapFromStr(cliParser.getOptionValue("replaceLabelsOnNode"));
        return replaceLabelsOnNodes(map, cliParser.hasOption("failOnUnknownNodes"),
                cliParser.hasOption("directlyAccessNodeLabelStore"));
    }

    private int replaceLabelsOnNodes(Map<NodeId, Set<String>> map, boolean failOnUnknownNodes,
            boolean directlyAccessNodeLabelStore) throws IOException, YarnException {
        if (directlyAccessNodeLabelStore) {
            getNodeLabelManagerInstance(getConf()).replaceLabelsOnNode(map);
        } else {
            ResourceManagerAdministrationProtocol adminProtocol = createAdminProtocol();
            ReplaceLabelsOnNodeRequest request = ReplaceLabelsOnNodeRequest.newInstance(map);
            request.setFailOnUnknownNodes(failOnUnknownNodes);
            adminProtocol.replaceLabelsOnNode(request);
        }
        return 0;
    }

    @Override
    public int run(String[] args) throws Exception {
        YarnConfiguration yarnConf = getConf() == null ? new YarnConfiguration() : new YarnConfiguration(getConf());
        boolean isHAEnabled = yarnConf.getBoolean(YarnConfiguration.RM_HA_ENABLED,
                YarnConfiguration.DEFAULT_RM_HA_ENABLED);

        if (args.length < 1) {
            printUsage("", isHAEnabled);
            return -1;
        }

        int exitCode = -1;
        int i = 0;
        String cmd = args[i++];

        exitCode = 0;
        if ("-help".equals(cmd)) {
            if (i < args.length) {
                printUsage(args[i], isHAEnabled);
            } else {
                printHelp("", isHAEnabled);
            }
            return exitCode;
        }

        if (USAGE.containsKey(cmd)) {
            if (isHAEnabled) {
                return super.run(args);
            }
            System.out.println("Cannot run " + cmd + " when ResourceManager HA is not enabled");
            return -1;
        }

        //
        // verify that we have enough command line parameters
        //
        if ("-refreshAdminAcls".equals(cmd) || "-refreshQueues".equals(cmd) || "-refreshNodesResources".equals(cmd)
                || "-refreshServiceAcl".equals(cmd) || "-refreshUserToGroupsMappings".equals(cmd)
                || "-refreshSuperUserGroupsConfiguration".equals(cmd)) {
            if (args.length != 1) {
                printUsage(cmd, isHAEnabled);
                return exitCode;
            }
        }

        try {
            if ("-refreshQueues".equals(cmd)) {
                exitCode = refreshQueues();
            } else if ("-refreshNodes".equals(cmd)) {
                exitCode = handleRefreshNodes(args, cmd, isHAEnabled);
            } else if ("-refreshNodesResources".equals(cmd)) {
                exitCode = refreshNodesResources();
            } else if ("-refreshUserToGroupsMappings".equals(cmd)) {
                exitCode = refreshUserToGroupsMappings();
            } else if ("-refreshSuperUserGroupsConfiguration".equals(cmd)) {
                exitCode = refreshSuperUserGroupsConfiguration();
            } else if ("-refreshAdminAcls".equals(cmd)) {
                exitCode = refreshAdminAcls();
            } else if ("-refreshServiceAcl".equals(cmd)) {
                exitCode = refreshServiceAcls();
            } else if ("-refreshClusterMaxPriority".equals(cmd)) {
                exitCode = refreshClusterMaxPriority();
            } else if ("-getGroups".equals(cmd)) {
                String[] usernames = Arrays.copyOfRange(args, i, args.length);
                exitCode = getGroups(usernames);
            } else if ("-updateNodeResource".equals(cmd)) {
                exitCode = handleUpdateNodeResource(args, cmd, isHAEnabled);
            } else if ("-addToClusterNodeLabels".equals(cmd)) {
                exitCode = handleAddToClusterNodeLabels(args, cmd, isHAEnabled);
            } else if ("-removeFromClusterNodeLabels".equals(cmd)) {
                exitCode = handleRemoveFromClusterNodeLabels(args, cmd, isHAEnabled);
            } else if ("-replaceLabelsOnNode".equals(cmd)) {
                exitCode = handleReplaceLabelsOnNodes(args, cmd, isHAEnabled);
            } else {
                exitCode = -1;
                System.err.println(cmd.substring(1) + ": Unknown command");
                printUsage("", isHAEnabled);
            }

        } catch (IllegalArgumentException arge) {
            exitCode = -1;
            System.err.println(cmd.substring(1) + ": " + arge.getLocalizedMessage());
            printUsage(cmd, isHAEnabled);
        } catch (RemoteException e) {
            //
            // This is a error returned by hadoop server. Print
            // out the first line of the error message, ignore the stack trace.
            exitCode = -1;
            try {
                String[] content;
                content = e.getLocalizedMessage().split("\n");
                System.err.println(cmd.substring(1) + ": " + content[0]);
            } catch (Exception ex) {
                System.err.println(cmd.substring(1) + ": " + ex.getLocalizedMessage());
            }
        } catch (Exception e) {
            exitCode = -1;
            System.err.println(cmd.substring(1) + ": " + e.getLocalizedMessage());
        }
        if (null != localNodeLabelsManager) {
            localNodeLabelsManager.stop();
        }
        return exitCode;
    }

    // A helper method to reduce the number of lines of run()
    private int handleRefreshNodes(String[] args, String cmd, boolean isHAEnabled)
            throws IOException, YarnException, ParseException {
        Options opts = new Options();
        opts.addOption("refreshNodes", false, "Refresh the hosts information at the ResourceManager.");
        Option gracefulOpt = new Option("g", "graceful", true,
                "Wait for timeout before marking the NodeManager as decommissioned.");
        gracefulOpt.setOptionalArg(true);
        opts.addOption(gracefulOpt);
        opts.addOption("client", false, "Indicates the timeout tracking should be handled by the client.");
        opts.addOption("server", false, "Indicates the timeout tracking should be handled by the RM.");

        int exitCode = -1;
        CommandLine cliParser = null;
        try {
            cliParser = new GnuParser().parse(opts, args);
        } catch (MissingArgumentException ex) {
            System.out.println("Missing argument for options");
            printUsage(args[0], isHAEnabled);
            return exitCode;
        }

        long timeout = -1;
        if (cliParser.hasOption("g")) {
            String strTimeout = cliParser.getOptionValue("g");
            if (strTimeout != null) {
                timeout = validateTimeout(strTimeout);
            }
            String trackingMode = null;
            if (cliParser.hasOption("client")) {
                trackingMode = "client";
            } else if (cliParser.hasOption("server")) {
                trackingMode = "server";
            } else {
                printUsage(cmd, isHAEnabled);
                return -1;
            }
            return refreshNodes(timeout, trackingMode);
        } else {
            return refreshNodes();
        }
    }

    private long validateTimeout(String strTimeout) {
        long timeout;
        try {
            timeout = Long.parseLong(strTimeout);
        } catch (NumberFormatException ex) {
            throw new IllegalArgumentException(INVALID_TIMEOUT_ERR_MSG + strTimeout);
        }
        if (timeout < -1) {
            throw new IllegalArgumentException(INVALID_TIMEOUT_ERR_MSG + timeout);
        }
        return timeout;
    }

    private int handleUpdateNodeResource(String[] args, String cmd, boolean isHAEnabled)
            throws NumberFormatException, IOException, YarnException {
        int i = 1;
        if (args.length < 4 || args.length > 5) {
            System.err.println("Number of parameters specified for " + "updateNodeResource is wrong.");
            printUsage(cmd, isHAEnabled);
            return -1;
        } else {
            String nodeID = args[i++];
            String memSize = args[i++];
            String cores = args[i++];
            int overCommitTimeout = ResourceOption.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT;
            if (i == args.length - 1) {
                overCommitTimeout = Integer.parseInt(args[i]);
            }
            return updateNodeResource(nodeID, Integer.parseInt(memSize), Integer.parseInt(cores),
                    overCommitTimeout);
        }
    }

    private String validateTrackingMode(String mode) {
        if ("-client".equals(mode)) {
            return "client";
        }
        if ("-server".equals(mode)) {
            return "server";
        }
        throw new IllegalArgumentException("Invalid mode specified: " + mode);
    }

    @Override
    public void setConf(Configuration conf) {
        if (conf != null) {
            conf = addSecurityConfiguration(conf);
        }
        super.setConf(conf);
    }

    /**
     * Add the requisite security principal settings to the given Configuration,
     * returning a copy.
     * @param conf the original config
     * @return a copy with the security settings added
     */
    private static Configuration addSecurityConfiguration(Configuration conf) {
        // Make a copy so we don't mutate it. Also use an YarnConfiguration to
        // force loading of yarn-site.xml.
        conf = new YarnConfiguration(conf);
        conf.set(CommonConfigurationKeys.HADOOP_SECURITY_SERVICE_USER_NAME_KEY,
                conf.get(YarnConfiguration.RM_PRINCIPAL, ""));
        return conf;
    }

    @Override
    protected HAServiceTarget resolveTarget(String rmId) {
        Collection<String> rmIds = HAUtil.getRMHAIds(getConf());
        if (!rmIds.contains(rmId)) {
            StringBuilder msg = new StringBuilder();
            msg.append(rmId + " is not a valid serviceId. It should be one of ");
            for (String id : rmIds) {
                msg.append(id + " ");
            }
            throw new IllegalArgumentException(msg.toString());
        }
        try {
            YarnConfiguration conf = new YarnConfiguration(getConf());
            conf.set(YarnConfiguration.RM_HA_ID, rmId);
            return new RMHAServiceTarget(conf);
        } catch (IllegalArgumentException iae) {
            throw new YarnRuntimeException(
                    "Could not connect to " + rmId + "; the configuration for it might be missing");
        } catch (IOException ioe) {
            throw new YarnRuntimeException("Could not connect to RM HA Admin for node " + rmId);
        }
    }

    /**
     * returns the list of all resourcemanager ids for the given configuration.
     */
    @Override
    protected Collection<String> getTargetIds(String targetNodeToActivate) {
        return HAUtil.getRMHAIds(getConf());
    }

    @Override
    protected String getUsageString() {
        return "Usage: rmadmin";
    }

    public static void main(String[] args) throws Exception {
        int result = ToolRunner.run(new RMAdminCLI(), args);
        System.exit(result);
    }
}