com.twitter.heron.healthmgr.HealthManager.java Source code

Java tutorial

Introduction

Here is the source code for com.twitter.heron.healthmgr.HealthManager.java

Source

// Copyright 2016 Twitter. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.twitter.heron.healthmgr;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ScheduledFuture;
import java.util.logging.Level;
import java.util.logging.Logger;

import com.google.common.annotations.VisibleForTesting;
import com.google.inject.AbstractModule;
import com.google.inject.Guice;
import com.google.inject.Injector;
import com.google.inject.Singleton;
import com.google.inject.name.Names;
import com.microsoft.dhalion.api.IHealthPolicy;
import com.microsoft.dhalion.api.MetricsProvider;
import com.microsoft.dhalion.events.EventManager;
import com.microsoft.dhalion.policy.PoliciesExecutor;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;

import com.twitter.heron.classification.InterfaceStability.Evolving;
import com.twitter.heron.classification.InterfaceStability.Unstable;
import com.twitter.heron.common.config.SystemConfig;
import com.twitter.heron.common.utils.logging.LoggingHelper;
import com.twitter.heron.healthmgr.HealthPolicyConfigReader.PolicyConfigKey;
import com.twitter.heron.healthmgr.common.PackingPlanProvider;
import com.twitter.heron.healthmgr.sensors.TrackerMetricsProvider;
import com.twitter.heron.scheduler.client.ISchedulerClient;
import com.twitter.heron.scheduler.client.SchedulerClientFactory;
import com.twitter.heron.spi.common.Config;
import com.twitter.heron.spi.common.ConfigLoader;
import com.twitter.heron.spi.common.Context;
import com.twitter.heron.spi.common.Key;
import com.twitter.heron.spi.statemgr.IStateManager;
import com.twitter.heron.spi.statemgr.SchedulerStateManagerAdaptor;
import com.twitter.heron.spi.utils.ReflectionUtils;

/**
 * {@link HealthManager} makes a topology dynamic and self-regulating. This is implemented using
 * Dhalion library. The {@link HealthManager} will perform the following functions to achieve its
 * goal:
 * <ul>
 * <li>loads heron configuration including health policy configuration from
 * <code>healthmgr.yaml</code>
 * <li>initializing guice injector with metrics collection module from <code>tracker</code> or
 * <code>metrics cache</code>, <code>scheduler client</code> and <code>state client</code>
 * <li>initializes health policies instances and starts policy execution using
 * {@link PoliciesExecutor}
 * </ul>
 * The {@link HealthManager} is executed as a process. It is recommended that it is started on
 * container 0, colocated with the metrics provider and the scheduler service.
 * <p>
 * Required command line options for the {@link HealthManager} include
 * <ul>
 * <li>cluster name: <code>-c local</code>
 * <li>role: <code> -r dev</code>
 * <li>environment: <code> -e default</code>
 * <li>topology name: <code> -n AckingTopology</code>
 * <p>
 * </ul>
 * <p>
 * Optional command line options for the {@link HealthManager} include
 * <ul>
 * <li>health manager mode: <code> -m local</code>, default cluster
 * <li>heron home directory: <code> -d ~/.heron</code>, required if mode is local
 * <li>config directory: <code> -p ~/.heron/conf</code>, required if mode is local
 * <li>metrics type: <code>-s f.q.class.name</code>,
 * default: <code>com.twitter.heron.healthmgr.sensors.TrackerMetricsProvider</code>
 * <li>metrics source: <code>-t http://host:port</code>, default: <code>http://127.0.0.1:8888</code>
 * <li>enable verbose mode: <code> -v</code>
 * </ul>
 */
@Unstable
@Evolving
public class HealthManager {
    public static final String CONF_TOPOLOGY_NAME = "TOPOLOGY_NAME";
    public static final String CONF_METRICS_SOURCE_URL = "METRICS_SOURCE_URL";
    private static final String CONF_METRICS_SOURCE_TYPE = "METRICS_SOURCE_TYPE";

    private static final Logger LOG = Logger.getLogger(HealthManager.class.getName());
    private final Config config;
    private AbstractModule baseModule;

    private Config runtime;
    private Injector injector;
    private SchedulerStateManagerAdaptor stateMgrAdaptor;
    private ISchedulerClient schedulerClient;

    private List<IHealthPolicy> healthPolicies = new ArrayList<>();
    private HealthPolicyConfigReader policyConfigReader;

    public enum HealthManagerMode {
        cluster, local
    }

    private enum CliArgs {
        CLUSTER("cluster"), ROLE("role"), ENVIRONMENT("environment"), TOPOLOGY_NAME(
                "topology_name"), METRIC_SOURCE_URL("metric_source_url"), METRIC_SOURCE_TYPE(
                        "metric_source_type"), HERON_HOME("heron_home"), CONFIG_PATH("config_path"), MODE(
                                "mode"), VERBOSE("verbose"), METRICSMGR_PORT("metricsmgr_port");

        private String text;

        CliArgs(String name) {
            this.text = name;
        }
    }

    public HealthManager(Config config, AbstractModule baseModule) {
        this.config = config;
        this.baseModule = baseModule;
    }

    public static void main(String[] args) throws Exception {
        CommandLineParser parser = new DefaultParser();
        Options slaManagerCliOptions = constructCliOptions();

        // parse the help options first.
        Options helpOptions = constructHelpOptions();
        CommandLine cmd = parser.parse(helpOptions, args, true);
        if (cmd.hasOption("h")) {
            usage(slaManagerCliOptions);
            return;
        }

        try {
            cmd = parser.parse(slaManagerCliOptions, args);
        } catch (ParseException e) {
            usage(slaManagerCliOptions);
            throw new RuntimeException("Error parsing command line options: ", e);
        }

        HealthManagerMode mode = HealthManagerMode.cluster;
        if (hasOption(cmd, CliArgs.MODE)) {
            mode = HealthManagerMode.valueOf(getOptionValue(cmd, CliArgs.MODE));
        }

        Config config;
        switch (mode) {
        case cluster:
            config = Config.toClusterMode(Config.newBuilder().putAll(ConfigLoader.loadClusterConfig())
                    .putAll(commandLineConfigs(cmd)).build());
            break;

        case local:
            if (!hasOption(cmd, CliArgs.HERON_HOME) || !hasOption(cmd, CliArgs.CONFIG_PATH)) {
                throw new IllegalArgumentException("Missing heron_home or config_path argument");
            }
            String heronHome = getOptionValue(cmd, CliArgs.HERON_HOME);
            String configPath = getOptionValue(cmd, CliArgs.CONFIG_PATH);
            config = Config.toLocalMode(
                    Config.newBuilder().putAll(ConfigLoader.loadConfig(heronHome, configPath, null, null))
                            .putAll(commandLineConfigs(cmd)).build());
            break;

        default:
            throw new IllegalArgumentException("Invalid mode: " + getOptionValue(cmd, CliArgs.MODE));
        }

        setupLogging(cmd, config);

        LOG.info("Static Heron config loaded successfully ");
        LOG.fine(config.toString());

        // load the default config value and override with any command line values
        String metricSourceClassName = config.getStringValue(PolicyConfigKey.METRIC_SOURCE_TYPE.key());
        metricSourceClassName = getOptionValue(cmd, CliArgs.METRIC_SOURCE_TYPE, metricSourceClassName);

        String metricsUrl = config.getStringValue(PolicyConfigKey.METRIC_SOURCE_URL.key());
        metricsUrl = getOptionValue(cmd, CliArgs.METRIC_SOURCE_URL, metricsUrl);

        AbstractModule module = buildMetricsProviderModule(metricsUrl, metricSourceClassName);
        HealthManager healthManager = new HealthManager(config, module);

        LOG.info("Initializing health manager");
        healthManager.initialize();

        LOG.info("Starting Health Manager metirc posting thread");
        HealthManagerMetrics publishingMetricsRunnable = null;
        if (hasOption(cmd, CliArgs.METRICSMGR_PORT)) {
            publishingMetricsRunnable = new HealthManagerMetrics(
                    Integer.valueOf(getOptionValue(cmd, CliArgs.METRICSMGR_PORT)));
        }

        LOG.info("Starting Health Manager");
        PoliciesExecutor policyExecutor = new PoliciesExecutor(healthManager.healthPolicies);
        ScheduledFuture<?> future = policyExecutor.start();
        if (publishingMetricsRunnable != null) {
            new Thread(publishingMetricsRunnable).start();
        }
        try {
            future.get();
        } finally {
            policyExecutor.destroy();
            if (publishingMetricsRunnable != null) {
                publishingMetricsRunnable.close();
            }
        }
    }

    private static void setupLogging(CommandLine cmd, Config config) throws IOException {
        String systemConfigFilename = Context.systemConfigFile(config);

        SystemConfig systemConfig = SystemConfig.newBuilder(true).putAll(systemConfigFilename, true).build();

        Boolean verbose = hasOption(cmd, CliArgs.VERBOSE);
        Level loggingLevel = Level.INFO;
        if (verbose) {
            loggingLevel = Level.FINE;
        }

        String loggingDir = systemConfig.getHeronLoggingDirectory();
        LoggingHelper.loggerInit(loggingLevel, true);

        String fileName = String.format("%s-%s-%s", "heron", Context.topologyName(config), "healthmgr");
        LoggingHelper.addLoggingHandler(LoggingHelper.getFileHandler(fileName, loggingDir, true,
                systemConfig.getHeronLoggingMaximumSize(), systemConfig.getHeronLoggingMaximumFiles()));

        LOG.info("Logging setup done.");
    }

    private static boolean hasOption(CommandLine cmd, CliArgs argName) {
        return cmd.hasOption(argName.text);
    }

    private static String getOptionValue(CommandLine cmd, CliArgs argName) {
        return cmd.getOptionValue(argName.text, null);
    }

    private static String getOptionValue(CommandLine cmd, CliArgs argName, String defaultValue) {
        return cmd.getOptionValue(argName.text, defaultValue);
    }

    public void initialize() throws ReflectiveOperationException, FileNotFoundException {
        injector = Guice.createInjector(baseModule);

        stateMgrAdaptor = createStateMgrAdaptor();

        this.runtime = Config.newBuilder().put(Key.SCHEDULER_STATE_MANAGER_ADAPTOR, stateMgrAdaptor)
                .put(Key.TOPOLOGY_NAME, Context.topologyName(config)).build();

        this.schedulerClient = createSchedulerClient();

        this.policyConfigReader = createPolicyConfigReader();

        AbstractModule commonModule = buildCommonConfigModule();
        injector = injector.createChildInjector(commonModule);

        initializePolicies();
    }

    @SuppressWarnings("unchecked") // we don't know what T is until runtime
    private void initializePolicies() throws ClassNotFoundException {
        List<String> policyIds = policyConfigReader.getPolicyIds();
        for (String policyId : policyIds) {
            Map<String, Object> policyConfigMap = policyConfigReader.getPolicyConfig(policyId);
            HealthPolicyConfig policyConfig = new HealthPolicyConfig(policyConfigMap);

            String policyClassName = policyConfig.getPolicyClass();
            LOG.info(String.format("Initializing %s with class %s", policyId, policyClassName));
            Class<IHealthPolicy> policyClass = (Class<IHealthPolicy>) this.getClass().getClassLoader()
                    .loadClass(policyClassName);

            AbstractModule module = constructPolicySpecificModule(policyConfig);
            IHealthPolicy policy = injector.createChildInjector(module).getInstance(policyClass);

            healthPolicies.add(policy);
        }
    }

    @VisibleForTesting
    HealthPolicyConfigReader createPolicyConfigReader() throws FileNotFoundException {
        String policyConfigFile = Paths.get(Context.heronConf(config), PolicyConfigKey.CONF_FILE_NAME.key())
                .toString();
        HealthPolicyConfigReader configReader = new HealthPolicyConfigReader(policyConfigFile);
        configReader.loadConfig();
        return configReader;
    }

    @VisibleForTesting
    static AbstractModule buildMetricsProviderModule(final String sourceUrl, final String type) {
        return new AbstractModule() {
            @Override
            protected void configure() {
                bind(String.class).annotatedWith(Names.named(CONF_METRICS_SOURCE_URL)).toInstance(sourceUrl);
                bind(String.class).annotatedWith(Names.named(CONF_METRICS_SOURCE_TYPE)).toInstance(type);
            }
        };
    }

    private AbstractModule buildCommonConfigModule() throws ReflectiveOperationException {
        String metricSourceClassName = injector
                .getInstance(com.google.inject.Key.get(String.class, Names.named(CONF_METRICS_SOURCE_TYPE)));

        Class<? extends MetricsProvider> metricsProviderClass = Class.forName(metricSourceClassName)
                .asSubclass(MetricsProvider.class);

        return new AbstractModule() {
            @Override
            protected void configure() {
                bind(String.class).annotatedWith(Names.named(CONF_TOPOLOGY_NAME))
                        .toInstance(Context.topologyName(config));
                bind(String.class).annotatedWith(Names.named(TrackerMetricsProvider.CONF_CLUSTER))
                        .toInstance(Context.cluster(config));
                bind(String.class).annotatedWith(Names.named(TrackerMetricsProvider.CONF_ENVIRON))
                        .toInstance(Context.environ(config));
                bind(Config.class).toInstance(config);
                bind(EventManager.class).in(Singleton.class);
                bind(ISchedulerClient.class).toInstance(schedulerClient);
                bind(SchedulerStateManagerAdaptor.class).toInstance(stateMgrAdaptor);
                bind(PackingPlanProvider.class).in(Singleton.class);
                bind(MetricsProvider.class).to(metricsProviderClass).in(Singleton.class);
            }
        };
    }

    private AbstractModule constructPolicySpecificModule(final HealthPolicyConfig policyConfig) {
        return new AbstractModule() {
            @Override
            protected void configure() {
                bind(HealthPolicyConfig.class).toInstance(policyConfig);
            }
        };
    }

    @VisibleForTesting
    SchedulerStateManagerAdaptor createStateMgrAdaptor() throws ReflectiveOperationException {
        String stateMgrClass = Context.stateManagerClass(config);
        IStateManager stateMgr = ReflectionUtils.newInstance(stateMgrClass);
        stateMgr.initialize(config);
        return new SchedulerStateManagerAdaptor(stateMgr, 5000);
    }

    private ISchedulerClient createSchedulerClient() {
        return new SchedulerClientFactory(config, runtime).getSchedulerClient();
    }

    /**
     * Load the config parameters from the command line
     *
     * @param cmd command line options
     * @return config, the command line config
     */
    private static Config commandLineConfigs(CommandLine cmd) {
        String cluster = getOptionValue(cmd, CliArgs.CLUSTER);
        String role = getOptionValue(cmd, CliArgs.ROLE);
        String environ = getOptionValue(cmd, CliArgs.ENVIRONMENT);
        String topologyName = getOptionValue(cmd, CliArgs.TOPOLOGY_NAME);
        Boolean verbose = hasOption(cmd, CliArgs.VERBOSE);

        Config.Builder commandLineConfig = Config.newBuilder().put(Key.CLUSTER, cluster).put(Key.ROLE, role)
                .put(Key.ENVIRON, environ).put(Key.TOPOLOGY_NAME, topologyName).put(Key.VERBOSE, verbose);

        return commandLineConfig.build();
    }

    // Print usage options
    private static void usage(Options options) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(HealthManager.class.getSimpleName(), options);
    }

    // construct command line help options
    private static Options constructHelpOptions() {
        Options options = new Options();
        Option help = Option.builder("h").desc("List all options and their description").longOpt("help").build();

        options.addOption(help);
        return options;
    }

    // Construct all required command line options
    private static Options constructCliOptions() {
        Options options = new Options();

        Option cluster = Option.builder("c").desc("Cluster name in which the topology needs to run on")
                .longOpt(CliArgs.CLUSTER.text).hasArgs().argName(CliArgs.CLUSTER.text).required().build();

        Option role = Option.builder("r").desc("Role under which the topology needs to run")
                .longOpt(CliArgs.ROLE.text).hasArgs().argName(CliArgs.ROLE.text).required().build();

        Option environment = Option.builder("e").desc("Environment under which the topology needs to run")
                .longOpt(CliArgs.ENVIRONMENT.text).hasArgs().argName(CliArgs.ENVIRONMENT.text).build();

        Option heronHome = Option.builder("d").desc("Directory where heron is installed")
                .longOpt(CliArgs.HERON_HOME.text).hasArgs().argName("heron home dir").build();

        Option configFile = Option.builder("p").desc("Path of the config files").longOpt(CliArgs.CONFIG_PATH.text)
                .hasArgs().argName("config path").build();

        Option topologyName = Option.builder("n").desc("Name of the topology").longOpt(CliArgs.TOPOLOGY_NAME.text)
                .hasArgs().argName("topology name").required().build();

        Option metricsSourceURL = Option.builder("t").desc("metrics data source url with port number")
                .longOpt(CliArgs.METRIC_SOURCE_URL.text).hasArgs().argName("data source url").build();

        // candidate metrics sources are:
        // com.twitter.heron.healthmgr.sensors.TrackerMetricsProvider (default)
        // com.twitter.heron.healthmgr.sensors.MetricsCacheMetricsProvider
        Option metricsSourceType = Option.builder("s").desc("metrics data source type")
                .longOpt(CliArgs.METRIC_SOURCE_TYPE.text).hasArg().argName("data source type").build();

        // candidates:
        // local: Health manager is started manually
        // cluster: Health manager is started by executor on container 0 (default)
        Option mode = Option.builder("m").desc("Health manager process mode, cluster or local")
                .longOpt(CliArgs.MODE.text).hasArg().argName("process mode").build();

        Option verbose = Option.builder("v").desc("Enable debug logs").longOpt(CliArgs.VERBOSE.text).build();

        options.addOption(cluster);
        options.addOption(role);
        options.addOption(environment);
        options.addOption(heronHome);
        options.addOption(configFile);
        options.addOption(topologyName);
        options.addOption(metricsSourceType);
        options.addOption(metricsSourceURL);
        options.addOption(mode);
        options.addOption(verbose);

        return options;
    }

    @VisibleForTesting
    List<IHealthPolicy> getHealthPolicies() {
        return healthPolicies;
    }
}