org.apache.bookkeeper.replication.AuditorElector.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.bookkeeper.replication.AuditorElector.java

Source

/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
 */
package org.apache.bookkeeper.replication;

import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.io.Serializable;
import java.io.IOException;

import org.apache.bookkeeper.proto.DataFormats.AuditorVoteFormat;
import com.google.common.annotations.VisibleForTesting;
import java.util.concurrent.Executors;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.bookkeeper.net.BookieSocketAddress;
import org.apache.bookkeeper.conf.ServerConfiguration;
import org.apache.bookkeeper.replication.ReplicationException.UnavailableException;
import org.apache.bookkeeper.stats.Counter;
import org.apache.bookkeeper.stats.NullStatsLogger;
import org.apache.bookkeeper.stats.StatsLogger;
import org.apache.bookkeeper.util.BookKeeperConstants;
import org.apache.commons.lang.StringUtils;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.ZooKeeper;
import org.apache.zookeeper.Watcher.Event.EventType;
import org.apache.zookeeper.Watcher.Event.KeeperState;
import org.apache.zookeeper.ZooDefs.Ids;
import com.google.protobuf.TextFormat;
import static com.google.common.base.Charsets.UTF_8;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.apache.bookkeeper.replication.ReplicationStats.ELECTION_ATTEMPTS;

/**
 * Performing auditor election using Apache ZooKeeper. Using ZooKeeper as a
 * coordination service, when a bookie bids for auditor, it creates an ephemeral
 * sequential file (znode) on ZooKeeper and considered as their vote. Vote
 * format is 'V_sequencenumber'. Election will be done by comparing the
 * ephemeral sequential numbers and the bookie which has created the least znode
 * will be elected as Auditor. All the other bookies will be watching on their
 * predecessor znode according to the ephemeral sequence numbers.
 */
public class AuditorElector {
    private static final Logger LOG = LoggerFactory.getLogger(AuditorElector.class);
    // Represents the index of the auditor node
    private static final int AUDITOR_INDEX = 0;
    // Represents vote prefix
    private static final String VOTE_PREFIX = "V_";
    // Represents path Separator
    private static final String PATH_SEPARATOR = "/";
    private static final String ELECTION_ZNODE = "auditorelection";
    // Represents urLedger path in zk
    private final String basePath;
    // Represents auditor election path in zk
    private final String electionPath;

    private final String bookieId;
    private final ServerConfiguration conf;
    private final ZooKeeper zkc;
    private final ExecutorService executor;

    private String myVote;
    Auditor auditor;
    private AtomicBoolean running = new AtomicBoolean(false);

    // Expose Stats
    private final Counter electionAttempts;
    private final StatsLogger statsLogger;

    /**
     * AuditorElector for performing the auditor election
     *
     * @param bookieId
     *            - bookie identifier, comprises HostAddress:Port
     * @param conf
     *            - configuration
     * @param zkc
     *            - ZK instance
     * @throws UnavailableException
     *             throws unavailable exception while initializing the elector
     */
    public AuditorElector(final String bookieId, ServerConfiguration conf, ZooKeeper zkc)
            throws UnavailableException {
        this(bookieId, conf, zkc, NullStatsLogger.INSTANCE);
    }

    /**
     * AuditorElector for performing the auditor election
     *
     * @param bookieId
     *            - bookie identifier, comprises HostAddress:Port
     * @param conf
     *            - configuration
     * @param zkc
     *            - ZK instance
     * @param statsLogger
     *            - stats logger
     * @throws UnavailableException
     *             throws unavailable exception while initializing the elector
     */
    public AuditorElector(final String bookieId, ServerConfiguration conf, ZooKeeper zkc, StatsLogger statsLogger)
            throws UnavailableException {
        this.bookieId = bookieId;
        this.conf = conf;
        this.zkc = zkc;
        this.statsLogger = statsLogger;
        this.electionAttempts = statsLogger.getCounter(ELECTION_ATTEMPTS);
        basePath = conf.getZkLedgersRootPath() + '/' + BookKeeperConstants.UNDER_REPLICATION_NODE;
        electionPath = basePath + '/' + ELECTION_ZNODE;
        createElectorPath();
        executor = Executors.newSingleThreadExecutor(new ThreadFactory() {
            @Override
            public Thread newThread(Runnable r) {
                return new Thread(r, "AuditorElector-" + bookieId);
            }
        });
    }

    private void createMyVote() throws KeeperException, InterruptedException {
        if (null == myVote || null == zkc.exists(myVote, false)) {
            AuditorVoteFormat.Builder builder = AuditorVoteFormat.newBuilder().setBookieId(bookieId);
            myVote = zkc.create(getVotePath(PATH_SEPARATOR + VOTE_PREFIX),
                    TextFormat.printToString(builder.build()).getBytes(UTF_8), Ids.OPEN_ACL_UNSAFE,
                    CreateMode.EPHEMERAL_SEQUENTIAL);
        }
    }

    private String getVotePath(String vote) {
        return electionPath + vote;
    }

    private void createElectorPath() throws UnavailableException {
        try {
            if (zkc.exists(basePath, false) == null) {
                try {
                    zkc.create(basePath, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
                } catch (KeeperException.NodeExistsException nee) {
                    // do nothing, someone else could have created it
                }
            }
            if (zkc.exists(getVotePath(""), false) == null) {
                try {
                    zkc.create(getVotePath(""), new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
                } catch (KeeperException.NodeExistsException nee) {
                    // do nothing, someone else could have created it
                }
            }
        } catch (KeeperException ke) {
            throw new UnavailableException("Failed to initialize Auditor Elector", ke);
        } catch (InterruptedException ie) {
            Thread.currentThread().interrupt();
            throw new UnavailableException("Failed to initialize Auditor Elector", ie);
        }
    }

    /**
     * Watching the predecessor bookies and will do election on predecessor node
     * deletion or expiration.
     */
    private class ElectionWatcher implements Watcher {
        @Override
        public void process(WatchedEvent event) {
            if (event.getState() == KeeperState.Expired) {
                LOG.error("Lost ZK connection, shutting down");
                submitShutdownTask();
            } else if (event.getType() == EventType.NodeDeleted) {
                submitElectionTask();
            }
        }
    }

    public void start() {
        running.set(true);
        submitElectionTask();
    }

    /**
     * Run cleanup operations for the auditor elector.
     */
    private void submitShutdownTask() {
        executor.submit(new Runnable() {
            public void run() {
                if (!running.compareAndSet(true, false)) {
                    return;
                }
                LOG.info("Shutting down AuditorElector");
                if (myVote != null) {
                    try {
                        zkc.delete(myVote, -1);
                    } catch (InterruptedException ie) {
                        LOG.warn("InterruptedException while deleting myVote: " + myVote, ie);
                    } catch (KeeperException ke) {
                        LOG.error("Exception while deleting myVote:" + myVote, ke);
                    }
                }
            }
        });
    }

    /**
     * Performing the auditor election using the ZooKeeper ephemeral sequential
     * znode. The bookie which has created the least sequential will be elect as
     * Auditor.
     */
    @VisibleForTesting
    void submitElectionTask() {

        Runnable r = new Runnable() {
            public void run() {
                if (!running.get()) {
                    return;
                }
                try {
                    // creating my vote in zk. Vote format is 'V_numeric'
                    createMyVote();
                    List<String> children = zkc.getChildren(getVotePath(""), false);

                    if (0 >= children.size()) {
                        throw new IllegalArgumentException(
                                "Atleast one bookie server should present to elect the Auditor!");
                    }

                    // sorting in ascending order of sequential number
                    Collections.sort(children, new ElectionComparator());
                    String voteNode = StringUtils.substringAfterLast(myVote, PATH_SEPARATOR);

                    // starting Auditing service
                    if (children.get(AUDITOR_INDEX).equals(voteNode)) {
                        // update the auditor bookie id in the election path. This is
                        // done for debugging purpose
                        AuditorVoteFormat.Builder builder = AuditorVoteFormat.newBuilder().setBookieId(bookieId);

                        zkc.setData(getVotePath(""), TextFormat.printToString(builder.build()).getBytes(UTF_8), -1);
                        auditor = new Auditor(bookieId, conf, zkc, statsLogger);
                        auditor.start();
                    } else {
                        // If not an auditor, will be watching to my predecessor and
                        // looking the previous node deletion.
                        Watcher electionWatcher = new ElectionWatcher();
                        int myIndex = children.indexOf(voteNode);
                        int prevNodeIndex = myIndex - 1;
                        if (null == zkc.exists(getVotePath(PATH_SEPARATOR) + children.get(prevNodeIndex),
                                electionWatcher)) {
                            // While adding, the previous znode doesn't exists.
                            // Again going to election.
                            submitElectionTask();
                        }
                        electionAttempts.inc();
                    }
                } catch (KeeperException e) {
                    LOG.error("Exception while performing auditor election", e);
                    submitShutdownTask();
                } catch (InterruptedException e) {
                    LOG.error("Interrupted while performing auditor election", e);
                    Thread.currentThread().interrupt();
                    submitShutdownTask();
                } catch (UnavailableException e) {
                    LOG.error("Ledger underreplication manager unavailable during election", e);
                    submitShutdownTask();
                }
            }
        };
        executor.submit(r);
    }

    @VisibleForTesting
    Auditor getAuditor() {
        return auditor;
    }

    /**
     * Query zookeeper for the currently elected auditor
     * @return the bookie id of the current auditor
     */
    public static BookieSocketAddress getCurrentAuditor(ServerConfiguration conf, ZooKeeper zk)
            throws KeeperException, InterruptedException, IOException {
        String electionRoot = conf.getZkLedgersRootPath() + '/' + BookKeeperConstants.UNDER_REPLICATION_NODE + '/'
                + ELECTION_ZNODE;

        List<String> children = zk.getChildren(electionRoot, false);
        Collections.sort(children, new AuditorElector.ElectionComparator());
        if (children.size() < 1) {
            return null;
        }
        String ledger = electionRoot + "/" + children.get(AUDITOR_INDEX);
        byte[] data = zk.getData(ledger, false, null);

        AuditorVoteFormat.Builder builder = AuditorVoteFormat.newBuilder();
        TextFormat.merge(new String(data, UTF_8), builder);
        AuditorVoteFormat v = builder.build();
        String[] parts = v.getBookieId().split(":");
        return new BookieSocketAddress(parts[0], Integer.parseInt(parts[1]));
    }

    /**
     * Shutting down AuditorElector
     */
    public void shutdown() throws InterruptedException {
        synchronized (this) {
            if (executor.isShutdown()) {
                return;
            }
            submitShutdownTask();
            executor.shutdown();
        }

        if (auditor != null) {
            auditor.shutdown();
            auditor = null;
        }
    }

    /**
     * If current bookie is running as auditor, return the status of the
     * auditor. Otherwise return the status of elector.
     *
     * @return
     */
    public boolean isRunning() {
        if (auditor != null) {
            return auditor.isRunning();
        }
        return running.get();
    }

    /**
     * Compare the votes in the ascending order of the sequence number. Vote
     * format is 'V_sequencenumber', comparator will do sorting based on the
     * numeric sequence value.
     */
    private static class ElectionComparator implements Comparator<String>, Serializable {
        /**
         * Return -1 if the first vote is less than second. Return 1 if the
         * first vote is greater than second. Return 0 if the votes are equal.
         */
        public int compare(String vote1, String vote2) {
            long voteSeqId1 = getVoteSequenceId(vote1);
            long voteSeqId2 = getVoteSequenceId(vote2);
            int result = voteSeqId1 < voteSeqId2 ? -1 : (voteSeqId1 > voteSeqId2 ? 1 : 0);
            return result;
        }

        private long getVoteSequenceId(String vote) {
            String voteId = StringUtils.substringAfter(vote, VOTE_PREFIX);
            return Long.parseLong(voteId);
        }
    }
}