com.alibaba.jstorm.daemon.nimbus.metric.ClusterMetricsContext.java Source code

Java tutorial

Introduction

Here is the source code for com.alibaba.jstorm.daemon.nimbus.metric.ClusterMetricsContext.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.alibaba.jstorm.daemon.nimbus.metric;

import com.alibaba.jstorm.common.metric.MetricMeta;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicIntegerArray;

import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.alibaba.jstorm.client.ConfigExtension;
import com.alibaba.jstorm.cluster.StormClusterState;
import com.alibaba.jstorm.common.metric.AsmGauge;
import com.alibaba.jstorm.daemon.nimbus.NimbusData;
import com.alibaba.jstorm.daemon.nimbus.metric.flush.FlushEvent;
import com.alibaba.jstorm.daemon.nimbus.metric.merge.MergeEvent;
import com.alibaba.jstorm.daemon.nimbus.metric.refresh.RefreshEvent;
import com.alibaba.jstorm.daemon.nimbus.metric.uploader.DefaultMetricUploader;
import com.alibaba.jstorm.daemon.nimbus.metric.uploader.MetricUploader;
import com.alibaba.jstorm.daemon.nimbus.metric.uploader.TopologyMetricDataInfo;
import com.alibaba.jstorm.daemon.nimbus.metric.uploader.UploadEvent;
import com.alibaba.jstorm.metric.DefaultMetricIDGenerator;
import com.alibaba.jstorm.metric.DefaultMetricQueryClient;
import com.alibaba.jstorm.metric.JStormMetricCache;
import com.alibaba.jstorm.metric.JStormMetrics;
import com.alibaba.jstorm.metric.MetaType;
import com.alibaba.jstorm.metric.MetricDef;
import com.alibaba.jstorm.metric.MetricIDGenerator;
import com.alibaba.jstorm.metric.MetricQueryClient;
import com.alibaba.jstorm.metric.MetricUtils;
import com.alibaba.jstorm.metric.SimpleJStormMetric;
import com.alibaba.jstorm.metric.TimeTicker;
import com.alibaba.jstorm.metric.TopologyMetricContext;
import com.alibaba.jstorm.utils.JStormUtils;
import com.alibaba.jstorm.utils.TimeUtils;
import com.codahale.metrics.Gauge;

import backtype.storm.generated.MetricInfo;
import backtype.storm.generated.TopologyMetric;
import backtype.storm.utils.Utils;

public class ClusterMetricsContext {
    private static final Logger LOG = LoggerFactory.getLogger(ClusterMetricsContext.class);

    protected JStormMetricCache metricCache;

    /**
     * map<topologyId, TopologyMetricContext>>, local memory cache, keeps only one snapshot of metrics.
     */
    protected final ConcurrentMap<String, TopologyMetricContext> topologyMetricContexts = new ConcurrentHashMap<>();

    public static final String PENDING_UPLOAD_METRIC_DATA = "__pending.upload.metrics__";
    public static final String PENDING_UPLOAD_METRIC_DATA_INFO = "__pending.upload.metrics.info__";

    // the slot is empty
    private static final int UNSET = 0;
    // the slot is ready for uploading
    private static final int SET = 1;
    // the slot is being uploaded
    private static final int UPLOADING = 2;
    // the slot will be set ready for uploading
    private static final int PRE_SET = 3;

    protected final AtomicIntegerArray metricStat;

    protected StormClusterState stormClusterState;

    private MetricUploaderDelegate metricUploaderDelegate;
    protected final List<MetricUploader> metricUploaders = new ArrayList<>();
    private MetricUploader lastMetricUploader;
    private final AtomicBoolean readyToUpload = new AtomicBoolean(false);

    protected AtomicBoolean isShutdown;
    protected String clusterName;
    protected int maxPendingUploadMetrics;

    private final NimbusData nimbusData;
    private MetricQueryClient metricQueryClient;

    /**
     * use default UUID generator
     */
    private final MetricIDGenerator metricIDGenerator = new DefaultMetricIDGenerator();

    public ClusterMetricsContext(final NimbusData nimbusData) {
        LOG.info("create cluster metrics context...");

        this.nimbusData = nimbusData;
        this.metricCache = nimbusData.getMetricCache();
        this.stormClusterState = nimbusData.getStormClusterState();
        this.isShutdown = nimbusData.getIsShutdown();
        clusterName = ConfigExtension.getClusterName(nimbusData.getConf());
        if (clusterName == null) {
            throw new RuntimeException("cluster.name property must be set in storm.yaml!");
        }

        this.maxPendingUploadMetrics = ConfigExtension.getMaxPendingMetricNum(nimbusData.getConf());
        this.metricStat = new AtomicIntegerArray(this.maxPendingUploadMetrics);

        int cnt = 0;
        for (int i = 0; i < maxPendingUploadMetrics; i++) {
            TopologyMetricDataInfo obj = getMetricDataInfoFromCache(i);
            if (obj != null) {
                this.metricStat.set(i, SET);
                cnt++;
            }
        }
        LOG.info("pending upload metrics: {}", cnt);

        // track nimbus JVM heap
        JStormMetrics.registerWorkerGauge(JStormMetrics.NIMBUS_METRIC_KEY, MetricDef.MEMORY_USED,
                new AsmGauge(new Gauge<Double>() {
                    @Override
                    public Double getValue() {
                        return JStormUtils.getJVMHeapMemory();
                    }
                }));
    }

    /**
     * init plugins and start event
     */
    public void init() {
        try {
            initPlugin();
        } catch (RuntimeException e) {
            LOG.error("init metrics plugin error:", e);
            System.exit(-1);
        }

        pushRefreshEvent();
        pushFlushEvent();
        pushMergeEvent();
        pushUploadEvent();
        pushDiagnosisEvent();
        LOG.info("Finish");
    }

    public void initPlugin() {
        // init rate controller
        MetricUploader.rateController.init(nimbusData.getConf());

        String metricUploadClass = ConfigExtension.getMetricUploaderClass(nimbusData.getConf());
        if (StringUtils.isBlank(metricUploadClass)) {
            metricUploadClass = DefaultMetricUploader.class.getName();
        }
        // init metric uploader
        LOG.info("metric uploader classes:{}", metricUploadClass);
        String[] classes = metricUploadClass.split(",");
        for (String klass : classes) {
            klass = klass.trim();
            if (StringUtils.isBlank(klass)) {
                continue;
            }

            Object instance = Utils.newInstance(klass);
            if (!(instance instanceof MetricUploader)) {
                throw new RuntimeException(klass + " isn't MetricUploader class ");
            }
            MetricUploader metricUploader = (MetricUploader) instance;
            try {
                metricUploader.init(nimbusData);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
            metricUploaders.add(metricUploader);
            LOG.info("Successfully init metric uploaders:{}", metricUploaders);
        }
        this.lastMetricUploader = metricUploaders.get(metricUploaders.size() - 1);

        // init metric query client
        String metricQueryClientClass = ConfigExtension.getMetricQueryClientClass(nimbusData.getConf());
        if (!StringUtils.isBlank(metricQueryClientClass)) {
            LOG.info("metric query client class:{}", metricQueryClientClass);
            this.metricQueryClient = (MetricQueryClient) Utils.newInstance(metricQueryClientClass);
        } else {
            LOG.warn("use default metric query client class.");
            this.metricQueryClient = new DefaultMetricQueryClient();
        }
        try {
            metricQueryClient.init(nimbusData.getConf());
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        LOG.info("Successfully init MetricQureyClient ");

        this.metricUploaderDelegate = new MetricUploaderDelegate();
        this.readyToUpload.set(true);
    }

    public void pushRefreshEvent() {
        LOG.debug("Issue RefreshEvent.RefreshSys Event");

        RefreshEvent refreshTopologyEvent = new RefreshEvent();
        refreshTopologyEvent.setClusterMetricsContext(this);
        nimbusData.getScheduExec().scheduleAtFixedRate(refreshTopologyEvent, 0, 60, TimeUnit.SECONDS);
    }

    public void pushFlushEvent() {
        FlushEvent event = new FlushEvent();
        event.setClusterMetricsContext(this);
        nimbusData.getScheduExec().scheduleAtFixedRate(event, 15, 15, TimeUnit.SECONDS);
    }

    public void pushMergeEvent() {
        MergeEvent event = new MergeEvent();
        event.setClusterMetricsContext(this);

        nimbusData.getScheduExec().scheduleAtFixedRate(event, 60, 60, TimeUnit.SECONDS);
    }

    public void pushDiagnosisEvent() {
        DiagnosisEvent event = new DiagnosisEvent();
        event.setClusterMetricsContext(this);

        nimbusData.getScheduExec().scheduleAtFixedRate(event, 60, 60, TimeUnit.SECONDS);
    }

    public void pushUploadEvent() {
        UploadEvent event = new UploadEvent();
        event.setClusterMetricsContext(this);

        // special, upload thread is actually an inf-loop, so we use Long.MAX_VALUE
        nimbusData.getScheduExec().scheduleAtFixedRate(event, 0, Long.MAX_VALUE, TimeUnit.SECONDS);
    }

    public void shutdown() {
        LOG.info("Begin to shutdown");
        getMetricUploaderDelegate().cleanup();

        LOG.info("Successfully shutdown");
    }

    /**
     * get topology metrics, note that only topology & component & worker
     * metrics are returned
     */
    public TopologyMetric getTopologyMetric(String topologyId) {
        long start = System.nanoTime();
        try {
            TopologyMetric ret = new TopologyMetric();
            List<MetricInfo> topologyMetrics = metricCache.getMetricData(topologyId, MetaType.TOPOLOGY);
            List<MetricInfo> componentMetrics = metricCache.getMetricData(topologyId, MetaType.COMPONENT);
            List<MetricInfo> workerMetrics = metricCache.getMetricData(topologyId, MetaType.WORKER);

            MetricInfo dummy = MetricUtils.mkMetricInfo();
            if (topologyMetrics.size() > 0) {
                // get the last min topology metric
                ret.set_topologyMetric(topologyMetrics.get(topologyMetrics.size() - 1));
            } else {
                ret.set_topologyMetric(dummy);
            }
            if (componentMetrics.size() > 0) {
                ret.set_componentMetric(componentMetrics.get(0));
            } else {
                ret.set_componentMetric(dummy);
            }
            if (workerMetrics.size() > 0) {
                ret.set_workerMetric(workerMetrics.get(0));
            } else {
                ret.set_workerMetric(dummy);
            }
            ret.set_taskMetric(dummy);
            ret.set_streamMetric(dummy);
            ret.set_nettyMetric(dummy);

            return ret;
        } finally {
            long end = System.nanoTime();
            SimpleJStormMetric.updateNimbusHistogram("getTopologyMetric", (end - start) / TimeUtils.NS_PER_US);
        }
    }

    public void deleteMetric(String topologyId, int metaType, List<String> idList) {
        final TopologyMetricContext context = topologyMetricContexts.get(topologyId);
        if (context != null) {
            for (String id : idList) {
                MetricMeta meta = metricQueryClient.getMetricMeta(clusterName, topologyId, MetaType.parse(metaType),
                        id);
                if (meta != null) {
                    LOG.warn("deleting metric meta:{}", meta);
                    metricQueryClient.deleteMeta(meta);
                    context.getMemMeta().remove(meta.getFQN());

                    metricCache.put(topologyId, context.getMemMeta());
                } else {
                    LOG.warn("Failed to delete metric meta, topology:{}, metaType:{}, id:{}, meta not found",
                            topologyId, metaType, id);
                }
            }
        } else {
            LOG.warn("Failed to delete metric meta, topology:{} doesn't exist!", topologyId);
        }
    }

    public TopologyMetricContext getClusterTopologyMetricContext() {
        return topologyMetricContexts.get(JStormMetrics.CLUSTER_METRIC_KEY);
    }

    public static String getWorkerSlotName(String hostname, Integer port) {
        return hostname + ":" + port;
    }

    public boolean isTopologyAlive(String topologyId) {
        return topologyMetricContexts.containsKey(topologyId);
    }

    public TopologyMetric getMetricDataFromCache(int idx) {
        return (TopologyMetric) metricCache.get(PENDING_UPLOAD_METRIC_DATA + idx);
    }

    public TopologyMetricDataInfo getMetricDataInfoFromCache(int idx) {
        return (TopologyMetricDataInfo) metricCache.get(PENDING_UPLOAD_METRIC_DATA_INFO + idx);
    }

    public Map<String, Long> registerMetrics(String topologyId, Set<String> metricNames) {
        TimeTicker ticker = new TimeTicker(TimeUnit.MILLISECONDS, true);

        TopologyMetricContext topologyMetricContext = topologyMetricContexts.get(topologyId);
        if (topologyMetricContext == null) {
            LOG.warn("topology metrics context does not exist for topology:{}!!!", topologyId);
            return new HashMap<>();
        }

        //        if (!topologyMetricContext.finishSyncRemote()) {
        //            LOG.warn("waiting for topology {} to finish sync with remote.", topologyId);
        //            return new HashMap<>();
        //        }

        ConcurrentMap<String, Long> memMeta = topologyMetricContexts.get(topologyId).getMemMeta();
        Map<String, Long> ret = new HashMap<>();
        for (String metricName : metricNames) {
            Long id = memMeta.get(metricName);
            if (id != null && MetricUtils.isValidId(id)) {
                ret.put(metricName, id);
            } else {
                id = metricIDGenerator.genMetricId(metricName);
                Long old = memMeta.putIfAbsent(metricName, id);
                if (old == null) {
                    ret.put(metricName, id);
                } else {
                    ret.put(metricName, old);
                }
            }
        }
        long cost = ticker.stop();
        LOG.info("register metrics, topology:{}, size:{}, cost:{}", topologyId, metricNames.size(), cost);

        return ret;
    }

    public void printDiagnosticStats() {
        StringBuilder sb = new StringBuilder(256);
        for (int i = 0, j = 0; i < maxPendingUploadMetrics; i++) {
            int v = metricStat.get(i);
            if (v != UNSET) {
                sb.append(i).append(":").append(v).append("\t");
                if (++j % 5 == 0) {
                    sb.append("\n");
                }
            }
        }
        LOG.info("metric stats\n--------------------------------------\n{}\n",
                sb.length() == 0 ? "ALL UNSET" : sb.toString());
    }

    public int getAndPresetFirstEmptyIndex() {
        for (int i = 0; i < maxPendingUploadMetrics; i++) {
            if (metricStat.get(i) == UNSET) {
                if (metricStat.compareAndSet(i, UNSET, PRE_SET)) {
                    return i;
                }
            }
        }
        return -1;
    }

    public int getFirstPendingUploadIndex() {
        for (int i = 0; i < maxPendingUploadMetrics; i++) {
            if (metricStat.get(i) == SET) {
                return i;
            }
        }
        return -1;
    }

    public void markUploaded(MetricUploader curUploader, int idx) {
        if (curUploader == lastMetricUploader) {
            forceMarkUploaded(idx);
        }
    }

    public void forceMarkUploaded(int idx) {
        this.metricCache.remove(PENDING_UPLOAD_METRIC_DATA + idx);
        this.metricCache.remove(PENDING_UPLOAD_METRIC_DATA_INFO + idx);
        this.metricStat.set(idx, UNSET);
        if (MetricUploader.rateController.isEnableRateControl()) {
            MetricUploader.rateController.decrUploadingNum();
        }
    }

    public void markUploading(int idx) {
        this.metricStat.set(idx, UPLOADING);
    }

    public void markSet(int idx) {
        this.metricStat.set(idx, SET);
    }

    public JStormMetricCache getMetricCache() {
        return metricCache;
    }

    public ConcurrentMap<String, TopologyMetricContext> getTopologyMetricContexts() {
        return topologyMetricContexts;
    }

    public StormClusterState getStormClusterState() {
        return stormClusterState;
    }

    public boolean isReadyToUpload() {
        return this.readyToUpload.get();
    }

    public MetricUploaderDelegate getMetricUploaderDelegate() {
        return metricUploaderDelegate;
    }

    public NimbusData getNimbusData() {
        return nimbusData;
    }

    public MetricQueryClient getMetricQueryClient() {
        return metricQueryClient;
    }

    public MetricIDGenerator getMetricIDGenerator() {
        return metricIDGenerator;
    }

    public String getClusterName() {
        return clusterName;
    }

    public class MetricUploaderDelegate {

        public boolean isUnderFlowControl() {
            return MetricUploader.rateController.isEnableRateControl()
                    && !MetricUploader.rateController.syncToUpload();
        }

        public void cleanup() {
            for (MetricUploader metricUploader : metricUploaders) {
                metricUploader.cleanup();
            }
        }

        /**
         * register metrics to external metric plugin
         */
        public boolean registerMetrics(String clusterName, String topologyId, Map<String, Long> metrics)
                throws Exception {
            boolean ret = true;
            for (MetricUploader metricUploader : metricUploaders) {
                ret &= metricUploader.registerMetrics(clusterName, topologyId, metrics);
            }
            return ret;
        }

        /**
         * upload topologyMetric to external metric plugin (such as database plugin)
         *
         * @return true means success, false means failure
         */
        public boolean upload(String clusterName, String topologyId, TopologyMetric tpMetric,
                Map<String, Object> metricContext) {
            boolean ret = true;
            for (MetricUploader metricUploader : metricUploaders) {
                ret &= metricUploader.upload(clusterName, topologyId, tpMetric, metricContext);
            }
            return ret;
        }

        /**
         * upload metrics with given key and metric context. the implementation can
         * retrieve metric data from rocks db
         * in the handler thread, which is kind of lazy-init, making it more
         * GC-friendly
         */
        public boolean upload(String clusterName, String topologyId, Object key,
                Map<String, Object> metricContext) {
            boolean ret = true;
            for (MetricUploader metricUploader : metricUploaders) {
                ret &= metricUploader.upload(clusterName, topologyId, key, metricContext);
            }
            return ret;
        }

        /**
         * Send an event to underlying handler
         */
        public boolean sendEvent(String clusterName, MetricEvent event) {
            boolean ret = true;
            for (MetricUploader metricUploader : metricUploaders) {
                ret &= metricUploader.sendEvent(clusterName, event);
            }
            return ret;
        }

    }
}