org.eclipse.gyrex.cloud.internal.locking.ZooKeeperLock.java Source code

Java tutorial

Introduction

Here is the source code for org.eclipse.gyrex.cloud.internal.locking.ZooKeeperLock.java

Source

/*******************************************************************************
 * Copyright (c) 2011, 2013 AGETO Service GmbH and others.
 * All rights reserved.
 *
 * This program and the accompanying materials are made available under the
 * terms of the Eclipse Public License v1.0 which accompanies this distribution,
 * and is available at http://www.eclipse.org/legal/epl-v10.html.
 *
 * Contributors:
 *     Gunnar Wagenknecht - initial API and implementation
 *******************************************************************************/
package org.eclipse.gyrex.cloud.internal.locking;

import java.io.UnsupportedEncodingException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.UUID;
import java.util.concurrent.Callable;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicBoolean;

import org.eclipse.gyrex.cloud.internal.CloudActivator;
import org.eclipse.gyrex.cloud.internal.CloudDebug;
import org.eclipse.gyrex.cloud.internal.CloudState;
import org.eclipse.gyrex.cloud.internal.NodeInfo;
import org.eclipse.gyrex.cloud.internal.zk.ZooKeeperGate;
import org.eclipse.gyrex.cloud.internal.zk.ZooKeeperGateCallable;
import org.eclipse.gyrex.cloud.internal.zk.ZooKeeperMonitor;
import org.eclipse.gyrex.cloud.services.locking.IDistributedLock;
import org.eclipse.gyrex.cloud.services.locking.ILockMonitor;
import org.eclipse.gyrex.cloud.services.zookeeper.ZooKeeperBasedService;
import org.eclipse.gyrex.common.identifiers.IdHelper;

import org.eclipse.core.runtime.IPath;
import org.eclipse.core.runtime.IStatus;
import org.eclipse.core.runtime.MultiStatus;
import org.eclipse.core.runtime.Status;

import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang.CharEncoding;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.commons.lang.math.NumberUtils;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.KeeperException.NotEmptyException;
import org.apache.zookeeper.KeeperException.SessionExpiredException;
import org.apache.zookeeper.ZooKeeper;
import org.apache.zookeeper.data.Stat;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * ZooKeeper lock implementation.
 * <p>
 * This implementation is based on ZooKeeper globally synchronous lock recipe.
 * This recipe also ensures that only one thread can hold a lock at any point in
 * time. The protocol is as follows.
 * <ol>
 * <li>Call create( ) with a pathname of "_locknode_/lock-" and the sequence and
 * ephemeral flags set.</li>
 * <li>Call getChildren( ) on the lock node without setting the watch flag (this
 * is important to avoid the herd effect).</li>
 * <li>If the pathname created in step 1 has the lowest sequence number suffix,
 * the client has the lock and the client exits the protocol.</li>
 * <li>The client calls exists( ) with the watch flag set on the path in the
 * lock directory with the next lowest sequence number.</li>
 * <li>if exists( ) returns false, go to step 2. Otherwise, wait for a
 * notification for the pathname from the previous step before going to step 2.</li>
 * </ol>
 * <p>
 * The unlock protocol is very simple: clients wishing to release a lock simply
 * delete the node they created in step 1.
 * </p>
 * <p>
 * Here are a few things to notice:
 * <ul>
 * <li>The removal of a node will only cause one client to wake up since each
 * node is watched by exactly one client. In this way, you avoid the herd
 * effect.</li>
 * <li>Because of the way locking is implemented, it is easy to see the amount
 * of lock contention, break locks, debug locking problems, etc.</li>
 * <li>It's possible to kill locks directly in ZooKeeper for administrative
 * purposes.</li>
 * </ul>
 * </p>
 */
public abstract class ZooKeeperLock<T extends IDistributedLock> extends ZooKeeperBasedService
        implements IDistributedLock {

    /**
     * The loop the actually acquires the lock.
     */
    private final class AcquireLockLoop implements Callable<Boolean> {
        /** abortTime */
        private final long abortTime;
        /** timeout */
        private final long timeout;
        /** recover */
        private final boolean recover;

        /**
         * Creates a new instance.
         * 
         * @param abortTime
         * @param timeout
         * @param recover
         */
        private AcquireLockLoop(final long abortTime, final long timeout, final boolean recover) {
            this.abortTime = abortTime;
            this.timeout = timeout;
            this.recover = recover;
        }

        @Override
        public Boolean call() throws Exception {
            // get gate
            final ZooKeeperGate zk = ZooKeeperGate.get();

            // start acquire loop
            ACQUIRE_LOOP: do {
                if (CloudDebug.zooKeeperLockService) {
                    LOG.debug("Starting acquire lock loop for lock {}/{}", lockNodePath, myLockName);
                }

                // 2. Call getChildren( ) on the lock node without setting the watch flag (this is important to avoid the herd effect).
                final Object[] nodeNames = zk.readChildrenNames(lockNodePath, null).toArray();

                // sanity check
                if (nodeNames.length == 0) {
                    // this is bogus, we actually created a node above
                    LOG.warn(
                            "Unexpected child count for ZooKeeper node {}. We just created a sequential child but it wasn't there. This may indicate an instability in the system.",
                            lockNodePath);
                    continue ACQUIRE_LOOP;
                }

                // sort based on sequence numbers
                sortLockNodeChildren(nodeNames);

                // the active lock name (first node in list)
                activeLockName = (String) nodeNames[0];
                if (CloudDebug.zooKeeperLockService) {
                    LOG.debug("Found active lock {} for lock {}", activeLockName, lockNodePath);
                }

                // 3. If the pathname created in step 1 has the lowest sequence number suffix, the client has the lock and the client exits the protocol.
                if (isActiveLock()) {
                    notifyLockAcquired();
                    return true;
                }

                // find our preceding node
                String precedingNodeName = null;
                for (int i = 0; i < nodeNames.length; i++) {
                    if (myLockName.equals(nodeNames[i])) {
                        // note, although not possible (we check equals i=0 above) we double check
                        precedingNodeName = i > 0 ? (String) nodeNames[i - 1] : null;
                    }
                }
                if (precedingNodeName == null) {
                    if (recover)
                        throw new LockAcquirationFailedException(getId(),
                                "Impossible to recover lock. The preceding lock could not be discovered.");
                    else
                        throw new LockAcquirationFailedException(getId(),
                                "Impossible to acquire lock. The preceding lock could not be discovered.");
                }
                if (CloudDebug.zooKeeperLockService) {
                    LOG.debug("Found preceding lock {} for lock {}", precedingNodeName, lockNodePath);
                }

                // 4. The client calls exists( ) with the watch flag set on the path in the lock directory with the next lowest sequence number.
                // 5. if exists( ) returns false, go to step 2. Otherwise, wait for a notification for the pathname from the previous step before going to step 2.
                final IPath pathToPreceedingNode = lockNodePath.append(precedingNodeName);
                if (zk.exists(pathToPreceedingNode)) {
                    // the lock is still taken, wait for the predecessor go away
                    // TODO: we really need to fix ZooKeeper in order to allow removal of transient watches
                    // for now we just sleep a little and re-try again
                    // (https://bugs.eclipse.org/bugs/show_bug.cgi?id=350927)
                    final long maxSleepTime = timeout <= 0 ? 5000L
                            : Math.max((abortTime - System.currentTimeMillis() - 500), 50L);
                    long sleepTime = 250L;
                    while ((timeout <= 0) || (abortTime > System.currentTimeMillis())) {
                        if (!zk.exists(pathToPreceedingNode)) {
                            // node has been removed
                            continue ACQUIRE_LOOP;
                        }

                        if (CloudDebug.zooKeeperLockService) {
                            LOG.debug("Sleeping {}ms lock {}/{}",
                                    new Object[] { sleepTime, lockNodePath, myLockName });
                        }

                        // sleep
                        Thread.sleep(sleepTime);

                        // update sleep time
                        sleepTime = Math.min(sleepTime * 2, maxSleepTime);
                    }
                }

                if (CloudDebug.zooKeeperLockService) {
                    LOG.debug("End acquire lock loop for lock {}/{}", lockNodePath, myLockName);
                }
            } while ((timeout <= 0) || (abortTime > System.currentTimeMillis()));

            if (CloudDebug.zooKeeperLockService) {
                LOG.debug("Timeout retrying to acquire lock {}/{}", lockNodePath, myLockName);
            }

            // when a this point the loop
            throw new TimeoutException(String.format("Unable to acquire lock %s within the given timeout. (%s/%s)",
                    getId(), activeLockName, myLockName));
        }

    }

    /**
     * Operation for creating a new lock node.
     */
    private final class CreateLockNode implements Callable<Boolean> {
        @Override
        public Boolean call() throws Exception {
            // note, we rely on any previously given lock name as locks are session-only locks and
            // typically may not be re-acquired
            if (!isClosed() && (null == myLockName)) {
                final ZooKeeperGate zk = ZooKeeperGate.get();

                // create node
                final IPath nodePath = zk.createPath(lockNodePath.append(LOCK_NAME_PREFIX),
                        isEphemeral() ? CreateMode.EPHEMERAL_SEQUENTIAL : CreateMode.PERSISTENT_SEQUENTIAL,
                        lockNodeContent);

                // extract lock name
                myLockName = nodePath.lastSegment();
                if (CloudDebug.zooKeeperLockService) {
                    LOG.debug("Created lock node {} for lock {}", myLockName, lockNodePath);
                }

                // generate recovery key
                myRecoveryKey = createRecoveryKey(myLockName, lockNodeContent);

                // allow remote kill
                zk.readRecord(nodePath, killMonitor, null);
            }
            return true;
        }
    }

    /**
     * Operation for deleting a lock node.
     */
    private final class DeleteLockNode extends ZooKeeperCallable<Boolean> {

        @Override
        protected Boolean call(final ZooKeeper keeper) throws Exception {
            final String lockName = myLockName;
            if (lockName == null)
                return false;

            // delete path
            try {
                if (CloudDebug.zooKeeperLockService) {
                    LOG.debug("Deleting my lock node in ZooKeeper {}/{}", lockNodePath, myLockName);
                }
                keeper.delete(lockNodePath.append(lockName).toString(), -1);
            } catch (final NoNodeException e) {
                // node already gone
                if (CloudDebug.zooKeeperLockService) {
                    LOG.debug("My lock node already gone {}/{}", lockNodePath, myLockName);
                }
            }

            // reset my lock name upon success
            myLockName = null;

            // also make an attempt to clean-up the lock node path
            // (but don't fail if we couldn't)
            try {
                if (CloudDebug.zooKeeperLockService) {
                    LOG.debug("Deleting lock node in ZooKeeper {}", lockNodePath);
                }
                keeper.delete(lockNodePath.toString(), -1);
            } catch (final NoNodeException e) {
                // node already gone
                if (CloudDebug.zooKeeperLockService) {
                    LOG.debug("Lock node already gone {}", lockNodePath);
                }
            } catch (final NotEmptyException e) {
                // node not empty (still other locks waiting)
                if (CloudDebug.zooKeeperLockService) {
                    LOG.debug("Lock node not empty {}", lockNodePath);
                }
            }

            // report success
            return true;
        }
    }

    private final class GetLockStatus extends ZooKeeperGateCallable<IStatus> {

        @Override
        protected IStatus call(final ZooKeeperGate zk) throws Exception {
            // get nodes
            Object[] nodeNames;
            try {
                nodeNames = zk.readChildrenNames(lockNodePath, null).toArray();
            } catch (final NoNodeException e) {
                nodeNames = null;
            }

            // quick check
            if ((nodeNames == null) || (nodeNames.length == 0))
                return info("Lock '%s' is inactive! There are no clients waiting to aquire the lock.", lockId);

            // prepare status (lock is obviously active!)
            final MultiStatus status = new MultiStatus(CloudActivator.SYMBOLIC_NAME, 0,
                    String.format("Lock '%s' is active", lockId), null);

            // sort lock nodes
            sortLockNodeChildren(nodeNames);

            // print details
            for (int i = 0; i < nodeNames.length; i++) {
                final String lockName = (String) nodeNames[i];

                // build node path
                final IPath nodePath = lockNodePath.append(lockName);

                // get node content
                final Stat stat = new Stat();
                final String record = zk.readRecord(nodePath, StringUtils.EMPTY, stat);

                final String[] lockData = StringUtils.splitByWholeSeparator(record, SEPARATOR);
                if ((lockData == null) | (lockData.length < 3)) {
                    status.add(error("Invalid data for client '%s'", lockName));
                    continue;
                }

                status.add(info("%s: %s (%s, 0x%s, owner 0x%s)", i == 0 ? "OWNER" : "WAITING", lockData[0],
                        lockData[1], StringUtils.left(lockData[2], 6), Long.toHexString(stat.getEphemeralOwner())));
            }

            return status;
        }

        private Status error(final String format, final Object... args) {
            return new Status(IStatus.INFO, CloudActivator.SYMBOLIC_NAME, String.format(format, args));
        }

        private Status info(final String format, final Object... args) {
            return new Status(IStatus.INFO, CloudActivator.SYMBOLIC_NAME, String.format(format, args));
        }

    }

    static enum KillReason {
        ZOOKEEPER_DISCONNECT, LOCK_DELETED, LOCK_STOLEN, REGULAR_RELEASE, ACQUIRE_FAILED, RESUME_FAILED
    }

    /**
     * Operation for recovering an existing lock node.
     */
    private final class RecoverLockNode implements Callable<Boolean> {

        private final String lockName;
        private final String expectedNodeContent;

        public RecoverLockNode(final String recoveryKey) {
            if (StringUtils.isBlank(recoveryKey))
                throw new IllegalArgumentException("recovery key must not be empty");

            // extract lock name and node content from recovery key
            final String[] extractRecoveryKeyDetails = extractRecoveryKeyDetails(recoveryKey);
            lockName = extractRecoveryKeyDetails[0];
            expectedNodeContent = extractRecoveryKeyDetails[1];
        }

        @Override
        public Boolean call() throws Exception {
            // note, we rely on any previously given lock name as locks are session-only locks and
            // typically may not be re-acquired
            if (!isClosed() && (null == myLockName)) {
                final ZooKeeperGate zk = ZooKeeperGate.get();

                if (CloudDebug.zooKeeperLockService) {
                    LOG.debug("Recovery attempt for lock node {} for lock {}", lockName, lockNodePath);
                }

                // build node path
                final IPath nodePath = lockNodePath.append(lockName);

                // get node content
                final Stat stat = new Stat();
                final String record = zk.readRecord(nodePath, StringUtils.EMPTY, stat);

                // check that node exists
                if (StringUtils.isBlank(record)) {
                    // does not exist, so return false here which indicates the we cannot recover
                    if (CloudDebug.zooKeeperLockService) {
                        LOG.debug("Recovery attempt failed. Lock node {}/{} does not exists", lockNodePath,
                                lockName);
                    }
                    //throw new LockAcquirationFailedException(lockId, "Unable to recover lock. The lock could not be found.");
                    return false;
                }

                // check that content matches
                if (!StringUtils.equals(record, expectedNodeContent)) {
                    if (CloudDebug.zooKeeperLockService) {
                        LOG.debug("Recovery attempt failed. Recovery key does not match for lock node {}/{}",
                                lockNodePath, lockName);
                    }
                    throw new LockAcquirationFailedException(lockId,
                            "Unable to recover lock. The recovery key does not match.");
                }

                // reset lock name
                myLockName = nodePath.lastSegment();
                if (CloudDebug.zooKeeperLockService) {
                    LOG.debug("Recovered lock node {} for lock {}", myLockName, lockNodePath);
                }

                // generate new recovery key
                myRecoveryKey = createRecoveryKey(myLockName, lockNodeContent);

                // write new lock name
                // note, we must pass the expected version in order to discover concurrent recovery requests
                zk.writeRecord(nodePath, myRecoveryKey, stat.getVersion());

                // allow remote kill
                zk.readRecord(nodePath, killMonitor, null);
            }
            return true;
        }
    }

    /**
     * The loop tries to resume a previously acquired lock.
     */
    private final class ResumeLockLoop implements Callable<Boolean> {
        @Override
        public Boolean call() throws Exception {
            // get gate
            final ZooKeeperGate zk = ZooKeeperGate.get();

            // start resume loop
            if (CloudDebug.zooKeeperLockService) {
                LOG.debug("Starting resume lock loop for lock {}/{}", lockNodePath, myLockName);
            }

            // 2. Call getChildren( ) on the lock node without setting the watch flag (this is important to avoid the herd effect).
            final Object[] nodeNames = zk.readChildrenNames(lockNodePath, null).toArray();

            // sanity check
            if (nodeNames.length == 0)
                // all children have been removed, the lock can't be resumed
                // (this also means that this lock node has been delete)
                return false;

            // sort based on sequence numbers
            Arrays.sort(nodeNames, new Comparator<Object>() {
                @Override
                public int compare(final Object o1, final Object o2) {
                    final String n1 = (String) o1;
                    final String n2 = (String) o2;
                    final int sequence1 = getSequenceNumber(n1);
                    final int sequence2 = getSequenceNumber(n2);
                    if (sequence1 == -1)
                        return sequence2 != -1 ? 1 : n1.compareTo(n2);
                    else
                        return sequence2 == -1 ? -1 : sequence1 - sequence2;
                }
            });

            // the active lock name
            activeLockName = (String) nodeNames[0];
            if (CloudDebug.zooKeeperLockService) {
                LOG.debug("Found active lock {} for lock {}", activeLockName, lockNodePath);
            }

            // if this is still the active after refresh then we are done
            if (isActiveLock()) {
                notifyLockAcquired();
                return true;
            }

            // check if our node is still there
            for (int i = 0; i < nodeNames.length; i++) {
                if (myLockName.equals(nodeNames[i]))
                    // this is strange, we aren't the active lock but out node is still there
                    throw new LockAcquirationFailedException(getId(),
                            "Impossible to resume lock. The active lock changed and conflicts with this lock.");
            }

            // we must assume that the lock has been deleted
            return false;
        }
    }

    /**
     * A monitor that allows to wait for deletion of a ZooKeeper node path.
     */
    static class WaitForDeletionMonitor extends ZooKeeperMonitor {

        private static CountDownLatch deletionHappend = new CountDownLatch(1);

        public boolean await(final long timeout) throws InterruptedException {
            if (timeout > 0)
                return deletionHappend.await(timeout, TimeUnit.MILLISECONDS);
            else {
                deletionHappend.await();
                return true;
            }
        }

        @Override
        protected void pathDeleted(final String path) {
            deletionHappend.countDown();
        };

    }

    private static final String SEPARATOR = "__";

    private static final String LOCK_NAME_PREFIX = "lock-";
    private static final Logger LOG = LoggerFactory.getLogger(ZooKeeperLock.class);

    /**
     * This method is only public for testing purposes. It must not be called by
     * clients.
     * 
     * @noreference This method is not intended to be referenced by clients.
     */
    public static String createRecoveryKey(final String lockName, final String nodeContent) {
        return lockName.concat(SEPARATOR).concat(nodeContent);
    }

    /**
     * This method is only public for testing purposes. It must not be called by
     * clients.
     * 
     * @noreference This method is not intended to be referenced by clients.
     */
    public static String[] extractRecoveryKeyDetails(final String recoveryKey) {
        final String[] keySegments = StringUtils.splitByWholeSeparator(recoveryKey, SEPARATOR);
        if (keySegments.length < 2)
            throw new IllegalArgumentException("invalid recovery key format");
        final String lockName = keySegments[0];
        final String nodeContent = StringUtils.removeStart(recoveryKey, lockName.concat(SEPARATOR));

        if (StringUtils.isBlank(lockName) || StringUtils.isBlank(nodeContent))
            throw new IllegalArgumentException("invalid recovery key format");
        return new String[] { lockName, nodeContent };
    }

    private static int getSequenceNumber(final String nodeName) {
        return NumberUtils.toInt(StringUtils.removeStart(nodeName, LOCK_NAME_PREFIX), -1);
    };

    final ZooKeeperMonitor killMonitor = new ZooKeeperMonitor() {
        @Override
        protected void pathDeleted(final String path) {
            // only react if we are still active (ZOOKEEPER-442)
            if (!isActiveLock() || isClosed())
                return;
            LOG.warn("Lock {} has been deleted on the lock server!", getId());
            killLock(KillReason.LOCK_DELETED);
        };

        @Override
        protected void recordChanged(final String path) {
            // only react if we are still active (ZOOKEEPER-442)
            if (!isActiveLock() || isClosed())
                return;
            // the lock record has been changed remotely
            // this means the lock was stolen
            LOG.warn("Lock {} has been stolen!", getId());
            killLock(KillReason.LOCK_STOLEN);
        };
    };

    final String lockId;

    final IPath lockNodePath;
    final String lockNodeContent;
    final boolean ephemeral;
    final boolean recoverable;
    private final ILockMonitor<T> lockMonitor;

    private final AtomicBoolean suspended = new AtomicBoolean(false);
    volatile String myLockName;

    volatile String myRecoveryKey;
    volatile String activeLockName;

    /**
     * Creates a new lock instance.
     * 
     * @param lockId
     *            the lock id
     * @param lockMonitor
     *            the lock monitor
     * @param lockNodeParentPath
     *            the lock node parent path
     * @param ephemeral
     *            <code>true</code> if an ephemeral node should be created,
     *            <code>false</code> otherwise
     * @param recovarable
     *            <code>true</code> if the lock is recoverable,
     *            <code>false</code> otherwise
     */
    public ZooKeeperLock(final String lockId, final ILockMonitor<T> lockMonitor, final IPath lockNodeParentPath,
            final boolean ephemeral, final boolean recovarable) {
        super(200l, 5);
        if (!IdHelper.isValidId(lockId))
            throw new IllegalArgumentException("invalid lock id; please see IdHelper#isValidId");
        this.lockId = lockId;
        lockNodePath = lockNodeParentPath.append(lockId);
        this.lockMonitor = lockMonitor;
        this.ephemeral = ephemeral;
        this.recoverable = recovarable;

        // pre-generate lock node content info
        NodeInfo nodeInfo = CloudState.getNodeInfo();
        if (null == nodeInfo) {
            nodeInfo = new NodeInfo();
        }
        try {
            lockNodeContent = nodeInfo.getNodeId() + SEPARATOR + nodeInfo.getLocation() + SEPARATOR
                    + DigestUtils.shaHex(UUID.randomUUID().toString().getBytes(CharEncoding.US_ASCII));
        } catch (final UnsupportedEncodingException e1) {
            throw new IllegalStateException("Please use a JVM that supports UTF-8.");
        }

        // check implementation
        try {
            asLockType();
        } catch (final ClassCastException e) {
            throw new ClassCastException(String.format(
                    "Cannot cast the lock implementation %s to the generic lock type. Please make sure that the implementation implements the interface. %s",
                    getClass().getName(), e.getMessage()));
        }

        // activate
        activate();
    }

    protected final T acquire(final long timeout, final boolean recover, final String recoveryKey)
            throws InterruptedException, TimeoutException {
        // define a logical abort condition for the acquire loop
        // this must be done first in order to also count any operation preceding the acquire loop
        final long abortTime = System.currentTimeMillis() + timeout;

        if (recover && !isRecoverable())
            throw new IllegalStateException("lock implementation is not recoverable");

        try {
            // create (or recover) lock node with a pathname of "_locknode_/lock-" and the sequence flag set
            if (recover) {
                if (!execute(new RecoverLockNode(recoveryKey)))
                    // null indicated that the lock as been removed
                    return null;
            } else {
                execute(new CreateLockNode());
            }

            // spin the lock acquisition loop
            execute(new AcquireLockLoop(abortTime, timeout, recover));

            // done
            return asLockType();
        } catch (final Exception e) {
            try {
                killLock(KillReason.ACQUIRE_FAILED);
            } catch (final Exception cleanUpException) {
                LOG.error(
                        "Error during cleanup of failed lock acquisition. Please check server logs and also check lock service server. The lock may now be stalled. {}",
                        ExceptionUtils.getRootCauseMessage(cleanUpException));
            }
            if (e instanceof InterruptedException)
                throw (InterruptedException) e;
            else if (e instanceof TimeoutException)
                throw (TimeoutException) e;
            else
                throw new LockAcquirationFailedException(lockId, e);
        }
    }

    @SuppressWarnings("unchecked")
    T asLockType() {
        // this is ugly
        return (T) ZooKeeperLock.this;
    }

    @Override
    protected void disconnect() {
        killLock(KillReason.ZOOKEEPER_DISCONNECT);
    }

    @Override
    protected void doClose() {
        if (CloudDebug.zooKeeperLockService) {
            LOG.debug("Closing lock {}/{}", lockNodePath, myLockName);
        }

        // reset active lock name (which will make the lock invalid)
        activeLockName = null;
    }

    @Override
    public String getId() {
        return lockId;
    }

    /**
     * This method is only exposed for testing purposes. Please do not call it.
     * 
     * @noreference This method is not intended to be referenced by clients.
     */
    public final String getMyLockName() {
        return myLockName;
    }

    /**
     * Constructs and returns lock status information.
     * <p>
     * Implementation note: the lock object will be closed immediately after the
     * status has been built.
     * </p>
     * 
     * @return a lock status
     */
    public IStatus getStatus() {
        try {
            return execute(new GetLockStatus());
        } catch (final Exception e) {
            return new Status(IStatus.ERROR, CloudActivator.SYMBOLIC_NAME,
                    String.format("Unable to read lock information. %s", e.getMessage()), e);
        } finally {
            // close lock object
            close();
        }
    }

    @Override
    protected String getToStringDetails() {
        final StringBuilder details = new StringBuilder();
        details.append("id=").append(lockId);
        if (isActiveLock()) {
            details.append(", ACQUIRED");
        }
        if (isSuspended()) {
            details.append(", SUSPENDED");
        }
        details.append(", lockName=").append(myLockName);
        details.append(", activeLockName=").append(activeLockName);
        return details.toString();
    }

    boolean isActiveLock() {
        // the lock is active if it has a name and the name matches the active lock
        final String myLockName = this.myLockName;
        final String activeLockName = this.activeLockName;
        return (myLockName != null) && (activeLockName != null) && activeLockName.equals(myLockName);
    }

    /**
     * Returns the ephemeral.
     * 
     * @return the ephemeral
     */
    protected final boolean isEphemeral() {
        return ephemeral;
    }

    /**
     * Returns the recoverable.
     * 
     * @return the recoverable
     */
    protected final boolean isRecoverable() {
        return recoverable;
    }

    @Override
    public boolean isSuspended() {
        return suspended.get();
    }

    @Override
    public boolean isValid() {
        // ensure that the lock is not suspended (bug 360813)
        resumeOrKill();

        // the lock is valid if it is the active lock
        return isActiveLock();
    }

    /**
     * Kills the lock.
     * 
     * @param killReason
     *            the kill reason
     */
    void killLock(final KillReason killReason) {
        // in order to release a lock we must delete the node we created
        // however, this might not be possible if the connection is already gone
        if ((myLockName == null) || isClosed())
            return;

        // immediately close in order to prevent re-entry
        close();

        if (CloudDebug.zooKeeperLockService) {
            LOG.debug("Killing lock {}/{}", lockNodePath, myLockName);
        }

        try {
            // attempt to delete the lock (if possible)
            if (shouldDeleteOnKill(killReason)) {
                execute(new DeleteLockNode());
            }

            // sent notification
            notifyLockReleased(killReason);
        } catch (final SessionExpiredException e) {
            // session expired so assume the node was removed by ZooKeeper
            if (CloudDebug.zooKeeperLockService) {
                LOG.debug("ZooKeeper session expired. Relying on ZooKeeper server to remove lock node {}/{}",
                        lockNodePath, myLockName);
            }
            // sent notification
            notifyLockReleased(KillReason.ZOOKEEPER_DISCONNECT);
        } catch (final Exception e) {
            // fail if this is a regular release
            if (killReason == KillReason.REGULAR_RELEASE)
                throw new IllegalStateException(String.format(
                        "Unable to remove lock node %s. Please check server logs and also ZooKeeper. If node still exists and the session is not closed it might never get released. %s",
                        lockNodePath.append(myLockName), ExceptionUtils.getRootCauseMessage(e)), e);

            // log error and continue
            LOG.warn(
                    "Unable to remove lock node {}. Please check server logs and also ZooKeeper. If node still exists and the session is not closed it might never get released. However, it should get released automatically after the session times out on the ZooKeeper server. {}",
                    lockNodePath.append(myLockName), ExceptionUtils.getRootCauseMessage(e));
        } finally {
            // close the service
            close();
        }
    }

    void notifyLockAcquired() {
        // log info message
        LOG.info("Successfully acquired lock {}!", getId());

        if (lockMonitor != null) {
            if (!isClosed() && isActiveLock()) {
                lockMonitor.lockAcquired(asLockType());
            }
        }
    }

    void notifyLockReleased(final KillReason reason) {
        // detect if released regularly
        final boolean released = reason == KillReason.REGULAR_RELEASE;

        // log info message (but don't spam logs if acquire failed)
        if (reason != KillReason.ACQUIRE_FAILED) {
            LOG.info(released ? "Successfully released lock {}!" : "Lost lock {}!", getId());
        }

        if (lockMonitor != null) {
            if (released) {
                lockMonitor.lockReleased(asLockType());
            } else if (reason != KillReason.ACQUIRE_FAILED) {
                // (note, do not notify whan acquire failed)
                lockMonitor.lockLost(asLockType());
            }
        }
    }

    void notifyLockSuspended() {
        // log info message
        LOG.info("Lock {} has been suspended!", getId());

        if (lockMonitor != null) {
            if (!isClosed() && isActiveLock()) {
                lockMonitor.lockSuspended(asLockType());
            }
        }
    }

    @Override
    protected void reconnect() {
        // only process event if this lock was active when suspended
        if (!isSuspended() || !isActiveLock() || isClosed())
            return;

        // when a ZooKeeper connection has been re-established we MUST ensure that
        // this is still the active lock; therefore we must refresh the active lock name
        try {
            // spin the lock resume loop
            if (!execute(new ResumeLockLoop())) {
                // the lock has been deleted
                killLock(KillReason.LOCK_DELETED);
            } else {
                // resume the lock
                suspended.set(false);
            }
        } catch (final Exception e) {
            try {
                killLock(KillReason.RESUME_FAILED);
            } catch (final Exception cleanUpException) {
                LOG.error(
                        "Error during cleanup of failed lock resume. Please check server logs and also check lock service server. The lock may now be stalled. {}",
                        ExceptionUtils.getRootCauseMessage(cleanUpException));
            }
        }
    }

    @Override
    public void release() {
        // kill the lock
        killLock(KillReason.REGULAR_RELEASE);
    }

    /**
     * This ensures that a suspended lock (due to ZooKeeper connection loss) is
     * either resumed or killed within a timeout.
     */
    private void resumeOrKill() {
        if (!isSuspended())
            return;

        LOG.warn("Lock {} is suspended! Waiting for resume.", getId());
        // TODO: the timeout should be read from the ZooKeeperGateConfig
        final long abortTime = System.currentTimeMillis() + 30000;
        while (isSuspended() && (abortTime < System.currentTimeMillis())) {
            sleep(1);
        }

        // disconnect lock if still suspended
        // (when suspended for a long time the connection might not recover, thus we start the disconnect procedure)
        if (isSuspended()) {
            disconnect();
        }
    }

    private boolean shouldDeleteOnKill(final KillReason killReason) {
        switch (killReason) {
        case LOCK_DELETED:
        case LOCK_STOLEN:
            // don't delete when a lock was stolen or already deleted
            return false;

        case REGULAR_RELEASE:
            // delete in any case if this is a regular release
            return true;

        case ZOOKEEPER_DISCONNECT:
            // start a delete attempt only if the node is not recoverable
            return !isRecoverable();

        case RESUME_FAILED:
        case ACQUIRE_FAILED:
            // delete if we can't acquire cleanly
            return true;

        default:
            // delete in all other cases
            LOG.warn(
                    "Unhandled lock kill reason {}. Please report this issue to the developers. They should sanity check the implementation.",
                    killReason);
            return true;
        }
    }

    void sortLockNodeChildren(final Object[] nodeNames) {
        // sort based on sequence numbers
        Arrays.sort(nodeNames, new Comparator<Object>() {
            @Override
            public int compare(final Object o1, final Object o2) {
                final String n1 = (String) o1;
                final String n2 = (String) o2;
                final int sequence1 = getSequenceNumber(n1);
                final int sequence2 = getSequenceNumber(n2);
                if (sequence1 == -1)
                    return sequence2 != -1 ? 1 : n1.compareTo(n2);
                else
                    return sequence2 == -1 ? -1 : sequence1 - sequence2;
            }
        });
    }

    @Override
    protected void suspend() {
        // set the lock to suspended
        if (suspended.compareAndSet(false, true)) {
            // fire lock suspended event
            notifyLockSuspended();
        }
    }

}