org.apache.cassandra.mutex.ClusterMutex.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.cassandra.mutex.ClusterMutex.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.cassandra.mutex;

import java.util.List;
import java.io.IOException;

import org.apache.cassandra.config.DatabaseDescriptor;

import org.apache.log4j.Logger;

import org.apache.zookeeper.ZooKeeper;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.ZooDefs.Ids;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.ConnectionLossException;
import org.apache.zookeeper.KeeperException.NodeExistsException;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.Watcher.Event.KeeperState;

public class ClusterMutex implements Watcher {
    private static Logger logger = Logger.getLogger(ClusterMutex.class);

    private static ClusterMutex instance;

    // Lazy on purpose. People who do not want mutex, should not need to have to worry about ZK
    private static class LazyHolder {
        private static final ClusterMutex clusterMutex = new ClusterMutex();
    }

    public static ClusterMutex instance() {
        return LazyHolder.clusterMutex;
    }

    // this must include hyphen (-) as the last character. substring search relies on it
    private final String LockPrefix = "lock-";

    // if we're disconnected from ZooKeeper server, how many times shall we try the lock
    // operation before giving up
    private final int OperationRetries = 3;

    // how long to sleep between retries. Actual time slept is RetryInterval multiplied by how
    // many times have we already tried.
    private final long RetryInterval = 500L;

    // Session timeout to ZooKeeper
    private final int SessionTimeout = 3000;

    private long lastConnect = 0;

    private ZooKeeper zk = null;
    private String root = "";
    private Integer mutex = null;

    private String hostString = new String();

    private ClusterMutex() {
        String zooKeeperRoot = DatabaseDescriptor.getZooKeeperRoot();
        if (zooKeeperRoot != null && !zooKeeperRoot.isEmpty())
            root = "/" + zooKeeperRoot;
        mutex = new Integer(1);

        String zooKeeperPort = DatabaseDescriptor.getZooKeeperPort();

        for (String zooKeeper : DatabaseDescriptor.getZooKeepers()) {
            logger.warn(zooKeeper);
            hostString += (hostString.isEmpty()) ? "" : ",";
            hostString += zooKeeper + ":" + zooKeeperPort;
            logger.warn(hostString);
        }

        try {
            connectZooKeeper();
            if (!root.isEmpty() && zk.exists(root, false) == null) {
                logger.info("Mutex root " + root + " does not exists, creating");
                zk.create(root, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
            }
        } catch (Exception e) {
            throw new RuntimeException("ClusterMutex initialization failed: " + e.getMessage());
        }
    }

    /**
     * Connect to zookeeper server
     */
    private synchronized void connectZooKeeper() throws IOException {
        if (zk != null && zk.getState() != ZooKeeper.States.CLOSED)
            return;
        logger.info("Connecting to ZooKeepers: " + hostString);
        zk = new ZooKeeper(hostString, SessionTimeout, this);
    }

    /**
     * close current session and try to connect to zookeeper server
     */
    private synchronized void reestablishZooKeeperSession() throws IOException {
        long now = System.currentTimeMillis();

        // let's not flood zookeeper with connection requests
        if ((now - lastConnect) < SessionTimeout) {
            if (logger.isTraceEnabled())
                logger.trace(
                        "Only " + (now - lastConnect) + "ms passed since last reconnect, not trying again yet");
            return;
        }

        lastConnect = now;

        try {
            zk.close();
        } catch (Exception e) {
            // ignore all exceptions. we're calling this just to make sure ephemeral nodes are
            // deleted. zk might be in an inconsistent state and cause exception.
        }

        connectZooKeeper();
    }

    /**
     * process any events from ZooKeeper. We simply wake up any clients that are waiting for
     * file deletion. Number of clients is usually very small (most likely just one), so no need
     * for any complex logic.
     */
    public void process(WatchedEvent event) {
        if (logger.isTraceEnabled())
            logger.trace("Got event " + event.getType() + ", keeper state " + event.getState() + ", path "
                    + event.getPath());

        synchronized (mutex) {
            mutex.notifyAll();
        }
    }

    private boolean isConnected() {
        return zk.getState() == ZooKeeper.States.CONNECTED;
    }

    /**
     * lock
     *
     * @param lockName lock to be locked for writing. name can be any string, but it must not
     * include slash (/) or any character disallowed by ZooKeeper (see
     * hadoop.apache.org/zookeeper/docs/current/zookeeperProgrammers.html#ch_zkDataModel).
     * @return name of the znode inside zookeeper holding this lock.
     */
    public String lock(String lockName) throws KeeperException, InterruptedException, IOException {
        for (int i = 1; i <= OperationRetries; i++) {
            try {
                return lockInternal(lockName);
            } catch (KeeperException.SessionExpiredException e) {
                logger.warn("ZooKeeper session expired, reconnecting");
                reestablishZooKeeperSession();
            } catch (KeeperException.ConnectionLossException e) {
                // ZooKeeper handles lost connection automatically, but in order to reset all
                // ephemeral nodes, we close the whole thing.
                logger.warn("ZooKeeper connection lost, reconnecting");
                reestablishZooKeeperSession();
            }

            try {
                Thread.sleep(RetryInterval * i);
            } catch (InterruptedException ignore) {
                // Just fall through to retry
            }
        }

        throw new KeeperException.ConnectionLossException();
    }

    /**
     * creates lock znode in zookeeper under lockPath. Lock name is
     * LockPrefix plus ephemeral sequence number given by zookeeper
     * 
     * @param lockPath name of the lock (directory in zookeeper)
     */
    private String createLockZNode(String lockPath) throws KeeperException, InterruptedException {
        String lockZNode = null;

        try {
            lockZNode = zk.create(lockPath + "/" + LockPrefix, new byte[0], Ids.OPEN_ACL_UNSAFE,
                    CreateMode.EPHEMERAL_SEQUENTIAL);
        } catch (NoNodeException e) {
            logger.info(lockPath + " does not exist, creating");
            zk.create(lockPath, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
            lockZNode = zk.create(lockPath + "/" + LockPrefix, new byte[0], Ids.OPEN_ACL_UNSAFE,
                    CreateMode.EPHEMERAL_SEQUENTIAL);
        }

        return lockZNode;
    }

    /**
     * lockInteral does the actual locking.
     *
     * @param same as in lock
     */
    private String lockInternal(String lockName) throws KeeperException, InterruptedException {
        String lockZNode = null;
        String lockPath = root + "/" + lockName;

        lockZNode = createLockZNode(lockPath);

        if (logger.isTraceEnabled())
            logger.trace("lockZNode created " + lockZNode);

        while (true) {
            // check what is our ID (sequence number at the end of file name added by ZK)
            int mySeqNum = Integer.parseInt(lockZNode.substring(lockZNode.lastIndexOf('-') + 1));
            int previousSeqNum = -1;
            String predessor = null;

            // get all children of lock znode and find the one that is just before us, if
            // any. This must be inside loop, as children might get deleted out of order because
            // of client disconnects. We cannot assume that the file that is in front of us this
            // time, is there next time. It might have been deleted even though earlier files
            // are still there.
            List<String> children = zk.getChildren(lockPath, false);
            if (children.isEmpty()) {
                logger.warn("No children in " + lockPath + " although one was just created. Going to try again");
                lockZNode = createLockZNode(lockPath);
                continue;
            }
            for (String child : children) {
                if (logger.isTraceEnabled())
                    logger.trace("child: " + child);
                int otherSeqNum = Integer.parseInt(child.substring(child.lastIndexOf('-') + 1));
                if (otherSeqNum < mySeqNum && otherSeqNum > previousSeqNum) {
                    previousSeqNum = otherSeqNum;
                    predessor = child;
                }
            }

            // our sequence number is smallest, we have the lock
            if (previousSeqNum == -1) {
                if (logger.isTraceEnabled())
                    logger.trace("No smaller znode sequences, " + lockZNode + " acquired lock");
                return lockZNode;
            }

            // there is at least one znode before us. wait for it to be deleted.
            synchronized (mutex) {
                if (zk.exists(lockPath + "/" + predessor, true) == null) {
                    if (logger.isTraceEnabled())
                        logger.trace(predessor + " does not exists, " + lockZNode + " acquired lock");
                    break;
                } else if (logger.isTraceEnabled())
                    logger.trace(predessor + " is still here, " + lockZNode + " must wait");

                mutex.wait();

                if (isConnected() == false) {
                    logger.info("ZooKeeper disconnected while waiting for lock");
                    throw new KeeperException.ConnectionLossException();
                }
            }
        }

        return lockZNode;
    }

    /**
     * unlock
     *
     * @param lockZNode this MUST be the string returned by lock call. Otherwise there will be
     * chaos.
     */
    public void unlock(String lockZNode) {
        assert (lockZNode != null);

        if (logger.isTraceEnabled())
            logger.trace("deleting " + lockZNode);

        try {
            zk.delete(lockZNode, -1);
        } catch (Exception e) {
            // We do not do anything here. The idea is to check that everything goes OK when
            // locking and let unlock always succeed from client's point of view. Ephemeral
            // nodes should be taken care of by ZooKeeper, so ignoring any errors here should
            // not break anything.
        }
    }

}