com.alibaba.jstorm.schedule.FollowerRunnable.java Source code

Java tutorial

Introduction

Here is the source code for com.alibaba.jstorm.schedule.FollowerRunnable.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.alibaba.jstorm.schedule;

import java.util.*;

import com.alibaba.jstorm.blobstore.BlobStore;
import com.alibaba.jstorm.blobstore.BlobStoreUtils;
import com.alibaba.jstorm.blobstore.BlobSynchronizer;
import com.alibaba.jstorm.blobstore.LocalFsBlobStore;
import com.alibaba.jstorm.callback.Callback;
import com.google.common.collect.Sets;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.alibaba.jstorm.callback.RunnableCallback;
import com.alibaba.jstorm.client.ConfigExtension;
import com.alibaba.jstorm.cluster.Cluster;
import com.alibaba.jstorm.cluster.StormClusterState;
import com.alibaba.jstorm.daemon.nimbus.NimbusData;
import com.alibaba.jstorm.utils.JStormUtils;
import com.alibaba.jstorm.utils.NetWorkUtils;

import backtype.storm.Config;
import backtype.storm.utils.Utils;

public class FollowerRunnable implements Runnable {
    private static final Logger LOG = LoggerFactory.getLogger(FollowerRunnable.class);

    private NimbusData data;

    private int sleepTime;

    private volatile boolean state = true;

    private RunnableCallback blobSyncCallback;

    private Callback leaderCallback;

    private final String hostPort;

    public static final String NIMBUS_DIFFER_COUNT_ZK = "nimbus.differ.count.zk";

    public static final Integer SLAVE_NIMBUS_WAIT_TIME = 60;

    @SuppressWarnings("unchecked")
    public FollowerRunnable(final NimbusData data, int sleepTime, Callback leaderCallback) {
        this.data = data;
        this.sleepTime = sleepTime;
        this.leaderCallback = leaderCallback;
        boolean isLocalIp;
        if (!ConfigExtension.isNimbusUseIp(data.getConf())) {
            this.hostPort = NetWorkUtils.hostname() + ":"
                    + Utils.getInt(data.getConf().get(Config.NIMBUS_THRIFT_PORT));
            isLocalIp = NetWorkUtils.hostname().equals("localhost");
        } else {
            this.hostPort = NetWorkUtils.ip() + ":" + Utils.getInt(data.getConf().get(Config.NIMBUS_THRIFT_PORT));
            isLocalIp = NetWorkUtils.ip().equals("127.0.0.1");
        }
        try {
            if (isLocalIp) {
                throw new Exception("the hostname which nimbus get is localhost");
            }
        } catch (Exception e1) {
            LOG.error("failed to get nimbus host!", e1);
            throw new RuntimeException(e1);
        }

        try {
            data.getStormClusterState().update_nimbus_slave(hostPort, data.uptime());
            data.getStormClusterState().update_nimbus_detail(hostPort, null);
        } catch (Exception e) {
            LOG.error("failed to register nimbus host!", e);
            throw new RuntimeException();
        }
        StormClusterState zkClusterState = data.getStormClusterState();
        try {
            if (!zkClusterState.leader_existed()) {
                this.tryToBeLeader(data.getConf());
            }
        } catch (Exception e) {
            LOG.error("failed to register nimbus details!", e);
            throw new RuntimeException();
        }
        try {
            if (!zkClusterState.leader_existed()) {
                this.tryToBeLeader(data.getConf());
            }
        } catch (Exception e1) {
            try {
                data.getStormClusterState().unregister_nimbus_host(hostPort);
                data.getStormClusterState().unregister_nimbus_detail(hostPort);
            } catch (Exception e2) {
                LOG.info("remove registered nimbus information due to task errors");
            } finally {
                LOG.error("try to be leader error.", e1);
                throw new RuntimeException(e1);
            }
        }
        blobSyncCallback = new RunnableCallback() {
            @Override
            public void run() {
                blobSync();
            }
        };
        if (data.getBlobStore() instanceof LocalFsBlobStore) {
            try {
                // register call back for blob-store
                data.getStormClusterState().blobstore(blobSyncCallback);
                setupBlobstore();
            } catch (Exception e) {
                LOG.error("setup blob store error", e);
            }
        }
    }

    // sets up blobstore state for all current keys
    private void setupBlobstore() throws Exception {
        BlobStore blobStore = data.getBlobStore();
        StormClusterState clusterState = data.getStormClusterState();
        Set<String> localSetOfKeys = Sets.newHashSet(blobStore.listKeys());
        Set<String> allKeys = Sets.newHashSet(clusterState.active_keys());
        Set<String> localAvailableActiveKeys = Sets.intersection(localSetOfKeys, allKeys);
        // keys on local but not on zk, we will delete it
        Set<String> keysToDelete = Sets.difference(localSetOfKeys, allKeys);
        LOG.debug("deleting keys not on zookeeper {}", keysToDelete);
        for (String key : keysToDelete) {
            blobStore.deleteBlob(key);
        }
        LOG.debug("Creating list of key entries for blobstore inside zookeeper {} local {}", allKeys,
                localAvailableActiveKeys);
        for (String key : localAvailableActiveKeys) {
            int versionForKey = BlobStoreUtils.getVersionForKey(key, data.getNimbusHostPortInfo(), data.getConf());
            clusterState.setup_blobstore(key, data.getNimbusHostPortInfo(), versionForKey);
        }
    }

    public boolean isLeader(String zkMaster) {
        if (StringUtils.isBlank(zkMaster)) {
            return false;
        }

        if (hostPort.equalsIgnoreCase(zkMaster)) {
            return true;
        }

        // Two nimbus running on the same node isn't allowed
        // so just checks ip is enough here
        String[] part = zkMaster.split(":");
        return NetWorkUtils.equals(part[0], NetWorkUtils.ip());
    }

    @Override
    public void run() {
        LOG.info("Follower thread starts!");
        while (state) {
            StormClusterState zkClusterState = data.getStormClusterState();
            try {
                Thread.sleep(sleepTime);
                if (!zkClusterState.leader_existed()) {
                    this.tryToBeLeader(data.getConf());
                    continue;
                }

                String master = zkClusterState.get_leader_host();
                boolean isZkLeader = isLeader(master);
                if (isZkLeader) {
                    if (!data.isLeader()) {
                        zkClusterState.unregister_nimbus_host(hostPort);
                        zkClusterState.unregister_nimbus_detail(hostPort);
                        data.setLeader(true);
                        leaderCallback.execute();
                    }
                    continue;
                } else {
                    if (data.isLeader()) {
                        LOG.info("New zk master is " + master);
                        JStormUtils.halt_process(1, "Lost zk master node, halt process");
                        return;
                    }
                }

                // here the nimbus is not leader
                if (data.getBlobStore() instanceof LocalFsBlobStore) {
                    blobSync();
                }
                zkClusterState.update_nimbus_slave(hostPort, data.uptime());
                update_nimbus_detail();
            } catch (InterruptedException ignored) {
            } catch (Exception e) {
                if (state) {
                    LOG.error("Unknown exception ", e);
                }
            }
        }
        LOG.info("Follower thread has been closed!");
    }

    public void clean() {
        state = false;
    }

    private synchronized void blobSync() {
        if (!data.isLeader()) {
            try {
                BlobStore blobStore = data.getBlobStore();
                StormClusterState clusterState = data.getStormClusterState();
                Set<String> localKeys = Sets.newHashSet(blobStore.listKeys());
                Set<String> zkKeys = Sets.newHashSet(clusterState.blobstore(blobSyncCallback));
                BlobSynchronizer blobSynchronizer = new BlobSynchronizer(blobStore, data.getConf());
                blobSynchronizer.setNimbusInfo(data.getNimbusHostPortInfo());
                blobSynchronizer.setBlobStoreKeySet(localKeys);
                blobSynchronizer.setZookeeperKeySet(zkKeys);
                blobSynchronizer.syncBlobs();
            } catch (Exception e) {
                LOG.error("blob sync error", e);
            }
        }
    }

    private void tryToBeLeader(final Map conf) throws Exception {
        boolean allowed = check_nimbus_priority();

        if (allowed) {
            RunnableCallback masterCallback = new RunnableCallback() {
                @Override
                public void run() {
                    try {
                        tryToBeLeader(conf);
                    } catch (Exception e) {
                        LOG.error("tryToBeLeader error", e);
                        // 30???
                        JStormUtils.halt_process(30, "Cant't be master" + e.getMessage());
                    }
                }
            };
            LOG.info("This nimbus can be leader");
            data.getStormClusterState().try_to_be_leader(Cluster.MASTER_SUBTREE, hostPort, masterCallback);
        } else {
            LOG.info("This nimbus can't be leader");
        }
    }

    /**
     * Compared with other nimbus to get priority of this nimbus
     */
    private boolean check_nimbus_priority() throws Exception {
        int gap = update_nimbus_detail();
        if (gap == 0) {
            return true;
        }

        int left = SLAVE_NIMBUS_WAIT_TIME;
        while (left > 0) {
            LOG.info("nimbus.differ.count.zk is {}, so after {} seconds, nimbus will try to be leader!", gap, left);
            Thread.sleep(10 * 1000);
            left -= 10;
        }

        StormClusterState zkClusterState = data.getStormClusterState();

        List<String> followers = zkClusterState.list_dirs(Cluster.NIMBUS_SLAVE_DETAIL_SUBTREE, false);
        if (followers == null || followers.size() == 0) {
            return false;
        }

        for (String follower : followers) {
            if (follower != null && !follower.equals(hostPort)) {
                Map bMap = zkClusterState.get_nimbus_detail(follower, false);
                if (bMap != null) {
                    Object object = bMap.get(NIMBUS_DIFFER_COUNT_ZK);
                    if (object != null && (JStormUtils.parseInt(object)) < gap) {
                        LOG.info("Current node can't be leader, due to {} has higher priority", follower);
                        return false;
                    }
                }
            }
        }

        return true;
    }

    private int update_nimbus_detail() throws Exception {
        //update count = count of zk's binary files - count of nimbus's binary files
        StormClusterState zkClusterState = data.getStormClusterState();

        // if we use other blobstore, such as HDFS, all nimbus slave can be leader
        // but if we use local blobstore, we should count topologies files
        int diffCount = 0;
        if (data.getBlobStore() instanceof LocalFsBlobStore) {
            Set<String> keysOnZk = Sets.newHashSet(zkClusterState.active_keys());
            Set<String> keysOnLocal = Sets.newHashSet(data.getBlobStore().listKeys());
            // we count number of keys which is on zk but not on local
            diffCount = Sets.difference(keysOnZk, keysOnLocal).size();
        }

        Map mtmp = zkClusterState.get_nimbus_detail(hostPort, false);
        if (mtmp == null) {
            mtmp = new HashMap();
        }
        mtmp.put(NIMBUS_DIFFER_COUNT_ZK, diffCount);
        zkClusterState.update_nimbus_detail(hostPort, mtmp);
        LOG.debug("update nimbus details " + mtmp);

        return diffCount;
    }

    /**
     * Check whether current node is master
     */
    private void checkOwnMaster() throws Exception {
        int retry_times = 10;

        StormClusterState zkClient = data.getStormClusterState();
        for (int i = 0; i < retry_times; i++, JStormUtils.sleepMs(sleepTime)) {

            if (!zkClient.leader_existed()) {
                continue;
            }

            String zkHost = zkClient.get_leader_host();
            if (hostPort.equals(zkHost)) {
                // current process own master
                return;
            }
            LOG.warn("Current nimbus has started thrift, but fail to set as leader in zk:" + zkHost);
        }

        String err = "Current nimbus failed to set as leader in zk, halting process";
        LOG.error(err);
        JStormUtils.halt_process(0, err);

    }

}