com.twitter.common.zookeeper.Group.java Source code

Java tutorial

Introduction

Here is the source code for com.twitter.common.zookeeper.Group.java

Source

// =================================================================================================
// Copyright 2011 Twitter, Inc.
// -------------------------------------------------------------------------------------------------
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this work except in compliance with the License.
// You may obtain a copy of the License in the LICENSE file, or at:
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =================================================================================================

package com.twitter.common.zookeeper;

import java.util.List;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;

import javax.annotation.Nullable;

import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;

import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.Watcher.Event.EventType;
import org.apache.zookeeper.data.ACL;

import com.twitter.common.base.Command;
import com.twitter.common.base.Commands;
import com.twitter.common.base.ExceptionalSupplier;
import com.twitter.common.base.MorePreconditions;
import com.twitter.common.util.BackoffHelper;
import com.twitter.common.zookeeper.ZooKeeperClient.ZooKeeperConnectionException;

/**
 * This class exposes methods for joining and monitoring distributed groups.  The groups this class
 * monitors are realized as persistent paths in ZooKeeper with ephemeral child nodes for
 * each member of a group.
 */
public class Group {
    private static final Logger LOG = Logger.getLogger(Group.class.getName());

    private static final Supplier<byte[]> NO_MEMBER_DATA = Suppliers.ofInstance(null);
    private static final String DEFAULT_NODE_NAME_PREFIX = "member_";

    private final ZooKeeperClient zkClient;
    private final ImmutableList<ACL> acl;
    private final String path;

    private final NodeScheme nodeScheme;
    private final Predicate<String> nodeNameFilter;

    private final BackoffHelper backoffHelper;

    /**
     * Creates a group rooted at the given {@code path}.  Paths must be absolute and trailing or
     * duplicate slashes will be normalized.  For example, all the following paths would create a
     * group at the normalized path /my/distributed/group:
     * <ul>
     *   <li>/my/distributed/group
     *   <li>/my/distributed/group/
     *   <li>/my/distributed//group
     * </ul>
     *
     * @param zkClient the client to use for interactions with ZooKeeper
     * @param acl the ACL to use for creating the persistent group path if it does not already exist
     * @param path the absolute persistent path that represents this group
     * @param nodeScheme the scheme that defines how nodes are created
     */
    public Group(ZooKeeperClient zkClient, Iterable<ACL> acl, String path, NodeScheme nodeScheme) {
        this.zkClient = Preconditions.checkNotNull(zkClient);
        this.acl = ImmutableList.copyOf(acl);
        this.path = ZooKeeperUtils.normalizePath(Preconditions.checkNotNull(path));

        this.nodeScheme = Preconditions.checkNotNull(nodeScheme);
        nodeNameFilter = new Predicate<String>() {
            @Override
            public boolean apply(String nodeName) {
                return Group.this.nodeScheme.isMember(nodeName);
            }
        };

        backoffHelper = new BackoffHelper();
    }

    /**
     * Equivalent to {@link #Group(ZooKeeperClient, Iterable, String, String)} with a
     * {@code namePrefix} of 'member_'.
     */
    public Group(ZooKeeperClient zkClient, Iterable<ACL> acl, String path) {
        this(zkClient, acl, path, DEFAULT_NODE_NAME_PREFIX);
    }

    /**
     * Equivalent to {@link #Group(ZooKeeperClient, Iterable, String, NodeScheme)} with a
     * {@link DefaultScheme} using {@code namePrefix}.
     */
    public Group(ZooKeeperClient zkClient, Iterable<ACL> acl, String path, String namePrefix) {
        this(zkClient, acl, path, new DefaultScheme(namePrefix));
    }

    public String getMemberPath(String memberId) {
        return path + "/" + MorePreconditions.checkNotBlank(memberId);
    }

    public String getPath() {
        return path;
    }

    public String getMemberId(String nodePath) {
        MorePreconditions.checkNotBlank(nodePath);
        Preconditions.checkArgument(nodePath.startsWith(path + "/"), "Not a member of this group[%s]: %s", path,
                nodePath);

        String memberId = StringUtils.substringAfterLast(nodePath, "/");
        Preconditions.checkArgument(nodeScheme.isMember(memberId), "Not a group member: %s", memberId);
        return memberId;
    }

    /**
     * Returns the current list of group member ids by querying ZooKeeper synchronously.
     *
     * @return the ids of all the present members of this group
     * @throws ZooKeeperConnectionException if there was a problem connecting to ZooKeeper
     * @throws KeeperException if there was a problem reading this group's member ids
     * @throws InterruptedException if this thread is interrupted listing the group members
     */
    public Iterable<String> getMemberIds()
            throws ZooKeeperConnectionException, KeeperException, InterruptedException {
        return Iterables.filter(zkClient.get().getChildren(path, false), nodeNameFilter);
    }

    /**
     * Gets the data for one of this groups members by querying ZooKeeper synchronously.
     *
     * @param memberId the id of the member whose data to retrieve
     * @return the data associated with the {@code memberId}
     * @throws ZooKeeperConnectionException if there was a problem connecting to ZooKeeper
     * @throws KeeperException if there was a problem reading this member's data
     * @throws InterruptedException if this thread is interrupted retrieving the member data
     */
    public byte[] getMemberData(String memberId)
            throws ZooKeeperConnectionException, KeeperException, InterruptedException {
        return zkClient.get().getData(getMemberPath(memberId), false, null);
    }

    /**
     * Represents membership in a distributed group.
     */
    public interface Membership {

        /**
         * Returns the persistent ZooKeeper path that represents this group.
         */
        String getGroupPath();

        /**
         * Returns the id (ZooKeeper node name) of this group member.  May change over time if the
         * ZooKeeper session expires.
         */
        String getMemberId();

        /**
         * Returns the full ZooKeeper path to this group member.  May change over time if the
         * ZooKeeper session expires.
         */
        String getMemberPath();

        /**
         * Updates the membership data synchronously using the {@code Supplier<byte[]>} passed to
         * {@link Group#join()}.
         *
         * @return the new membership data
         * @throws UpdateException if there was a problem updating the membership data
         */
        byte[] updateMemberData() throws UpdateException;

        /**
         * Cancels group membership by deleting the associated ZooKeeper member node.
         *
         * @throws JoinException if there is a problem deleting the node
         */
        void cancel() throws JoinException;
    }

    /**
     * Indicates an error joining a group.
     */
    public static class JoinException extends Exception {
        public JoinException(String message, Throwable cause) {
            super(message, cause);
        }
    }

    /**
     * Indicates an error updating a group member's data.
     */
    public static class UpdateException extends Exception {
        public UpdateException(String message, Throwable cause) {
            super(message, cause);
        }
    }

    /**
     * Equivalent to calling {@code join(null, null)}.
     */
    public final Membership join() throws JoinException, InterruptedException {
        return join(NO_MEMBER_DATA, null);
    }

    /**
     * Equivalent to calling {@code join(memberData, null)}.
     */
    public final Membership join(Supplier<byte[]> memberData) throws JoinException, InterruptedException {

        return join(memberData, null);
    }

    /**
     * Equivalent to calling {@code join(null, onLoseMembership)}.
     */
    public final Membership join(@Nullable final Command onLoseMembership)
            throws JoinException, InterruptedException {

        return join(NO_MEMBER_DATA, onLoseMembership);
    }

    /**
     * Joins this group and returns the resulting Membership when successful.  Membership will be
     * automatically cancelled when the current jvm process dies; however the returned Membership
     * object can be used to cancel membership earlier.  Unless
     * {@link com.twitter.common.zookeeper.Group.Membership#cancel()} is called the membership will
     * be maintained by re-establishing it silently in the background.
     *
     * <p>Any {@code memberData} given is persisted in the member node in ZooKeeper.  If an
     * {@code onLoseMembership} callback is supplied, it will be notified each time this member loses
     * membership in the group.
     *
     * @param memberData a supplier of the data to store in the member node
     * @param onLoseMembership a callback to notify when membership is lost
     * @return a Membership object with the member details
     * @throws JoinException if there was a problem joining the group
     * @throws InterruptedException if this thread is interrupted awaiting completion of the join
     */
    public final Membership join(Supplier<byte[]> memberData, @Nullable Command onLoseMembership)
            throws JoinException, InterruptedException {

        Preconditions.checkNotNull(memberData);
        ensurePersistentGroupPath();

        final ActiveMembership groupJoiner = new ActiveMembership(memberData, onLoseMembership);
        return backoffHelper.doUntilResult(new ExceptionalSupplier<Membership, JoinException>() {
            @Override
            public Membership get() throws JoinException {
                try {
                    return groupJoiner.join();
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                    throw new JoinException("Interrupted trying to join group at path: " + path, e);
                } catch (ZooKeeperConnectionException e) {
                    LOG.log(Level.WARNING, "Temporary error trying to join group at path: " + path, e);
                    return null;
                } catch (KeeperException e) {
                    if (zkClient.shouldRetry(e)) {
                        LOG.log(Level.WARNING, "Temporary error trying to join group at path: " + path, e);
                        return null;
                    } else {
                        throw new JoinException("Problem joining partition group at path: " + path, e);
                    }
                }
            }
        });
    }

    private void ensurePersistentGroupPath() throws JoinException, InterruptedException {
        backoffHelper.doUntilSuccess(new ExceptionalSupplier<Boolean, JoinException>() {
            @Override
            public Boolean get() throws JoinException {
                try {
                    ZooKeeperUtils.ensurePath(zkClient, acl, path);
                    return true;
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                    throw new JoinException("Interrupted trying to ensure group at path: " + path, e);
                } catch (ZooKeeperConnectionException e) {
                    LOG.log(Level.WARNING, "Problem connecting to ZooKeeper, retrying", e);
                    return false;
                } catch (KeeperException e) {
                    if (zkClient.shouldRetry(e)) {
                        LOG.log(Level.WARNING, "Temporary error ensuring path: " + path, e);
                        return false;
                    } else {
                        throw new JoinException("Problem ensuring group at path: " + path, e);
                    }
                }
            }
        });
    }

    private class ActiveMembership implements Membership {
        private final Supplier<byte[]> memberData;
        private final Command onLoseMembership;
        private String nodePath;
        private String memberId;
        private volatile boolean cancelled;
        private byte[] membershipData;

        public ActiveMembership(Supplier<byte[]> memberData, @Nullable Command onLoseMembership) {
            this.memberData = memberData;
            this.onLoseMembership = (onLoseMembership == null) ? Commands.NOOP : onLoseMembership;
        }

        @Override
        public String getGroupPath() {
            return path;
        }

        @Override
        public synchronized String getMemberId() {
            return memberId;
        }

        @Override
        public synchronized String getMemberPath() {
            return nodePath;
        }

        @Override
        public synchronized byte[] updateMemberData() throws UpdateException {
            byte[] membershipData = memberData.get();
            if (!ArrayUtils.isEquals(this.membershipData, membershipData)) {
                try {
                    zkClient.get().setData(nodePath, membershipData, ZooKeeperUtils.ANY_VERSION);
                    this.membershipData = membershipData;
                } catch (KeeperException e) {
                    throw new UpdateException("Problem updating membership data.", e);
                } catch (InterruptedException e) {
                    throw new UpdateException("Interrupted attempting to update membership data.", e);
                } catch (ZooKeeperConnectionException e) {
                    throw new UpdateException(
                            "Could not connect to the ZooKeeper cluster to update membership data.", e);
                }
            }
            return membershipData;
        }

        @Override
        public synchronized void cancel() throws JoinException {
            if (!cancelled) {
                try {
                    backoffHelper.doUntilSuccess(new ExceptionalSupplier<Boolean, JoinException>() {
                        @Override
                        public Boolean get() throws JoinException {
                            try {
                                zkClient.get().delete(nodePath, ZooKeeperUtils.ANY_VERSION);
                                return true;
                            } catch (InterruptedException e) {
                                Thread.currentThread().interrupt();
                                throw new JoinException("Interrupted trying to cancel membership: " + nodePath, e);
                            } catch (ZooKeeperConnectionException e) {
                                LOG.log(Level.WARNING, "Problem connecting to ZooKeeper, retrying", e);
                                return false;
                            } catch (NoNodeException e) {
                                LOG.info("Membership already cancelled, node at path: " + nodePath
                                        + " has been deleted");
                                return true;
                            } catch (KeeperException e) {
                                if (zkClient.shouldRetry(e)) {
                                    LOG.log(Level.WARNING, "Temporary error cancelling membership: " + nodePath, e);
                                    return false;
                                } else {
                                    throw new JoinException("Problem cancelling membership: " + nodePath, e);
                                }
                            }
                        }
                    });
                    cancelled = true; // Prevent auto-re-join logic from undoing this cancel.
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                    throw new JoinException("Problem cancelling membership: " + nodePath, e);
                }
            }
        }

        private class CancelledException extends IllegalStateException {
            /* marker */ }

        synchronized Membership join() throws ZooKeeperConnectionException, InterruptedException, KeeperException {

            if (cancelled) {
                throw new CancelledException();
            }

            if (nodePath == null) {
                // Re-join if our ephemeral node goes away due to session expiry - only needs to be
                // registered once.
                zkClient.registerExpirationHandler(new Command() {
                    @Override
                    public void execute() {
                        tryJoin();
                    }
                });
            }

            byte[] membershipData = memberData.get();
            String nodeName = nodeScheme.createName(membershipData);
            CreateMode createMode = nodeScheme.isSequential() ? CreateMode.EPHEMERAL_SEQUENTIAL
                    : CreateMode.EPHEMERAL;
            nodePath = zkClient.get().create(path + "/" + nodeName, membershipData, acl, createMode);
            memberId = Group.this.getMemberId(nodePath);
            LOG.info("Set group member ID to " + memberId);
            this.membershipData = membershipData;

            // Re-join if our ephemeral node goes away due to maliciousness.
            zkClient.get().exists(nodePath, new Watcher() {
                @Override
                public void process(WatchedEvent event) {
                    if (event.getType() == EventType.NodeDeleted) {
                        tryJoin();
                    }
                }
            });

            return this;
        }

        private final ExceptionalSupplier<Boolean, InterruptedException> tryJoin = new ExceptionalSupplier<Boolean, InterruptedException>() {
            @Override
            public Boolean get() throws InterruptedException {
                try {
                    join();
                    return true;
                } catch (CancelledException e) {
                    // Lost a cancel race - that's ok.
                    return true;
                } catch (ZooKeeperConnectionException e) {
                    LOG.log(Level.WARNING, "Problem connecting to ZooKeeper, retrying", e);
                    return false;
                } catch (KeeperException e) {
                    if (zkClient.shouldRetry(e)) {
                        LOG.log(Level.WARNING, "Temporary error re-joining group: " + path, e);
                        return false;
                    } else {
                        throw new IllegalStateException("Permanent problem re-joining group: " + path, e);
                    }
                }
            }
        };

        private synchronized void tryJoin() {
            onLoseMembership.execute();
            try {
                backoffHelper.doUntilSuccess(tryJoin);
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                throw new RuntimeException(
                        String.format("Interrupted while trying to re-join group: %s, giving up", path), e);
            }
        }
    }

    /**
     * An interface to an object that listens for changes to a group's membership.
     */
    public interface GroupChangeListener {

        /**
         * Called whenever group membership changes with the new list of member ids.
         *
         * @param memberIds the current member ids
         */
        void onGroupChange(Iterable<String> memberIds);
    }

    /**
     * An interface that dictates the scheme to use for storing and filtering nodes that represent
     * members of a distributed group.
     */
    public interface NodeScheme {
        /**
         * Determines if a child node is a member of a group by examining the node's name.
         *
         * @param nodeName the name of a child node found in a group
         * @return {@code true} if {@code nodeName} identifies a group member in this scheme
         */
        boolean isMember(String nodeName);

        /**
         * Generates a node name for the node representing this process in the distributed group.
         *
         * @param membershipData the data that will be stored in this node
         * @return the name for the node that will represent this process in the group
         */
        String createName(byte[] membershipData);

        /**
         * Indicates whether this scheme needs ephemeral sequential nodes or just ephemeral nodes.
         *
         * @return {@code true} if this scheme requires sequential node names; {@code false} otherwise
         */
        boolean isSequential();
    }

    /**
     * Indicates an error watching a group.
     */
    public static class WatchException extends Exception {
        public WatchException(String message, Throwable cause) {
            super(message, cause);
        }
    }

    /**
     * Watches this group for the lifetime of this jvm process.  This method will block until the
     * current group members are available, notify the {@code groupChangeListener} and then return.
     * All further changes to the group membership will cause notifications on a background thread.
     *
     * @param groupChangeListener the listener to notify of group membership change events
     * @return A command which, when executed, will stop watching the group.
     * @throws WatchException if there is a problem generating the 1st group membership list
     * @throws InterruptedException if interrupted waiting to gather the 1st group membership list
     */
    public final Command watch(final GroupChangeListener groupChangeListener)
            throws WatchException, InterruptedException {
        Preconditions.checkNotNull(groupChangeListener);

        try {
            ensurePersistentGroupPath();
        } catch (JoinException e) {
            throw new WatchException("Failed to create group path: " + path, e);
        }

        final GroupMonitor groupMonitor = new GroupMonitor(groupChangeListener);
        backoffHelper.doUntilSuccess(new ExceptionalSupplier<Boolean, WatchException>() {
            @Override
            public Boolean get() throws WatchException {
                try {
                    groupMonitor.watchGroup();
                    return true;
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                    throw new WatchException("Interrupted trying to watch group at path: " + path, e);
                } catch (ZooKeeperConnectionException e) {
                    LOG.log(Level.WARNING, "Temporary error trying to watch group at path: " + path, e);
                    return null;
                } catch (KeeperException e) {
                    if (zkClient.shouldRetry(e)) {
                        LOG.log(Level.WARNING, "Temporary error trying to watch group at path: " + path, e);
                        return null;
                    } else {
                        throw new WatchException("Problem trying to watch group at path: " + path, e);
                    }
                }
            }
        });
        return new Command() {
            @Override
            public void execute() {
                groupMonitor.stopWatching();
            }
        };
    }

    /**
     * Helps continuously monitor a group for membership changes.
     */
    private class GroupMonitor {
        private final GroupChangeListener groupChangeListener;
        private volatile boolean stopped = false;
        private Set<String> members;

        GroupMonitor(GroupChangeListener groupChangeListener) {
            this.groupChangeListener = groupChangeListener;
        }

        private final Watcher groupWatcher = new Watcher() {
            @Override
            public final void process(WatchedEvent event) {
                if (event.getType() == EventType.NodeChildrenChanged) {
                    tryWatchGroup();
                }
            }
        };

        private final ExceptionalSupplier<Boolean, InterruptedException> tryWatchGroup = new ExceptionalSupplier<Boolean, InterruptedException>() {
            @Override
            public Boolean get() throws InterruptedException {
                try {
                    watchGroup();
                    return true;
                } catch (ZooKeeperConnectionException e) {
                    LOG.log(Level.WARNING, "Problem connecting to ZooKeeper, retrying", e);
                    return false;
                } catch (KeeperException e) {
                    if (zkClient.shouldRetry(e)) {
                        LOG.log(Level.WARNING, "Temporary error re-watching group: " + path, e);
                        return false;
                    } else {
                        throw new IllegalStateException("Permanent problem re-watching group: " + path, e);
                    }
                }
            }
        };

        private void tryWatchGroup() {
            if (stopped) {
                return;
            }

            try {
                backoffHelper.doUntilSuccess(tryWatchGroup);
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                throw new RuntimeException(
                        String.format("Interrupted while trying to re-watch group: %s, giving up", path), e);
            }
        }

        private void watchGroup() throws ZooKeeperConnectionException, InterruptedException, KeeperException {

            if (stopped) {
                return;
            }

            List<String> children = zkClient.get().getChildren(path, groupWatcher);
            setMembers(Iterables.filter(children, nodeNameFilter));
        }

        private void stopWatching() {
            // TODO(William Farner): Cancel the watch when
            // https://issues.apache.org/jira/browse/ZOOKEEPER-442 is resolved.
            LOG.info("Stopping watch on " + this);
            stopped = true;
        }

        synchronized void setMembers(Iterable<String> members) {
            if (stopped) {
                LOG.info("Suppressing membership update, no longer watching " + this);
                return;
            }

            if (this.members == null) {
                // Reset our watch on the group if session expires - only needs to be registered once.
                zkClient.registerExpirationHandler(new Command() {
                    @Override
                    public void execute() {
                        tryWatchGroup();
                    }
                });
            }

            Set<String> membership = ImmutableSet.copyOf(members);
            if (!membership.equals(this.members)) {
                groupChangeListener.onGroupChange(members);
                this.members = membership;
            }
        }
    }

    /**
     * Default naming scheme implementation. Stores nodes at [given path] + "/" + [given prefix] +
     * ZooKeeper-generated member ID. For example, if the path is "/discovery/servicename", and the
     * prefix is "member_", the node's full path will look something like
     * {@code /discovery/servicename/member_0000000007}.
     */
    public static class DefaultScheme implements NodeScheme {
        private final String namePrefix;
        private final Pattern namePattern;

        /**
         * Creates a sequential node scheme based on the given node name prefix.
         *
         * @param namePrefix the prefix for the names of the member nodes
         */
        public DefaultScheme(String namePrefix) {
            this.namePrefix = MorePreconditions.checkNotBlank(namePrefix);
            namePattern = Pattern.compile("^" + Pattern.quote(namePrefix) + "-?[0-9]+$");
        }

        @Override
        public boolean isMember(String nodeName) {
            return namePattern.matcher(nodeName).matches();
        }

        @Override
        public String createName(byte[] membershipData) {
            return namePrefix;
        }

        @Override
        public boolean isSequential() {
            return true;
        }
    }

    @Override
    public String toString() {
        return "Group " + path;
    }
}