org.apache.hadoop.hbase.procedure.Procedure.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hbase.procedure.Procedure.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.procedure;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.errorhandling.ForeignException;
import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
import org.apache.hadoop.hbase.errorhandling.ForeignExceptionListener;
import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
import org.apache.hadoop.hbase.errorhandling.TimeoutExceptionInjector;

import com.google.common.collect.Lists;

/**
 * A globally-barriered distributed procedure.  This class encapsulates state and methods for
 * tracking and managing a distributed procedure, as well as aborting if any member encounters
 * a problem or if a cancellation is requested.
 * <p>
 * All procedures first attempt to reach a barrier point with the {@link #sendGlobalBarrierStart()}
 * method.  The procedure contacts all members and waits for all subprocedures to execute
 * {@link Subprocedure#acquireBarrier} to acquire its local piece of the global barrier and then
 * send acquisition info back to the coordinator.  If all acquisitions at subprocedures succeed,
 * the coordinator then will call {@link #sendGlobalBarrierReached()}.  This notifies members to
 * execute the {@link Subprocedure#insideBarrier()} method.  The procedure is blocked until all
 * {@link Subprocedure#insideBarrier} executions complete at the members.  When
 * {@link Subprocedure#insideBarrier} completes at each member, the member sends notification to
 * the coordinator.  Once all members complete, the coordinator calls
 * {@link #sendGlobalBarrierComplete()}.
 * <p>
 * If errors are encountered remotely, they are forwarded to the coordinator, and
 * {@link Subprocedure#cleanup(Exception)} is called.
 * <p>
 * Each Procedure and each Subprocedure enforces a time limit on the execution time. If the time
 * limit expires before the procedure completes the {@link TimeoutExceptionInjector} will trigger
 * an {@link ForeignException} to abort the procedure.  This is particularly useful for situations
 * when running a distributed {@link Subprocedure} so participants can avoid blocking for extreme
 * amounts of time if one of the participants fails or takes a really long time (e.g. GC pause).
 * <p>
 * Users should generally not directly create or subclass instances of this.  They are created
 * for them implicitly via {@link ProcedureCoordinator#startProcedure(ForeignExceptionDispatcher,
 * String, byte[], List)}}
 */
@InterfaceAudience.Private
public class Procedure implements Callable<Void>, ForeignExceptionListener {
    private static final Log LOG = LogFactory.getLog(Procedure.class);

    //
    // Arguments and naming
    //

    // Name of the procedure
    final private String procName;
    // Arguments for this procedure execution
    final private byte[] args;

    //
    // Execution State
    //
    /** latch for waiting until all members have acquire in barrier state */
    final CountDownLatch acquiredBarrierLatch;
    /** latch for waiting until all members have executed and released their in barrier state */
    final CountDownLatch releasedBarrierLatch;
    /** latch for waiting until a procedure has completed */
    final CountDownLatch completedLatch;
    /** monitor to check for errors */
    private final ForeignExceptionDispatcher monitor;

    //
    // Execution Timeout Handling.
    //

    /** frequency to check for errors (ms) */
    protected final long wakeFrequency;
    protected final TimeoutExceptionInjector timeoutInjector;

    //
    // Members' and Coordinator's state
    //

    /** lock to prevent nodes from acquiring and then releasing before we can track them */
    private Object joinBarrierLock = new Object();
    private final List<String> acquiringMembers;
    private final List<String> inBarrierMembers;
    private ProcedureCoordinator coord;

    /**
     * Creates a procedure. (FOR TESTING)
     *
     * {@link Procedure} state to be run by a {@link ProcedureCoordinator}.
     * @param coord coordinator to call back to for general errors (e.g.
     *          {@link ProcedureCoordinator#rpcConnectionFailure(String, IOException)}).
     * @param monitor error monitor to check for external errors
     * @param wakeFreq frequency to check for errors while waiting
     * @param timeout amount of time to allow the procedure to run before cancelling
     * @param procName name of the procedure instance
     * @param args argument data associated with the procedure instance
     * @param expectedMembers names of the expected members
     */
    public Procedure(ProcedureCoordinator coord, ForeignExceptionDispatcher monitor, long wakeFreq, long timeout,
            String procName, byte[] args, List<String> expectedMembers) {
        this.coord = coord;
        this.acquiringMembers = new ArrayList<String>(expectedMembers);
        this.inBarrierMembers = new ArrayList<String>(acquiringMembers.size());
        this.procName = procName;
        this.args = args;
        this.monitor = monitor;
        this.wakeFrequency = wakeFreq;

        int count = expectedMembers.size();
        this.acquiredBarrierLatch = new CountDownLatch(count);
        this.releasedBarrierLatch = new CountDownLatch(count);
        this.completedLatch = new CountDownLatch(1);
        this.timeoutInjector = new TimeoutExceptionInjector(monitor, timeout);
    }

    /**
     * Create a procedure.
     *
     * Users should generally not directly create instances of this.  They are created them
     * implicitly via {@link ProcedureCoordinator#createProcedure(ForeignExceptionDispatcher,
     * String, byte[], List)}}
     *
     * @param coord coordinator to call back to for general errors (e.g.
     *          {@link ProcedureCoordinator#rpcConnectionFailure(String, IOException)}).
     * @param wakeFreq frequency to check for errors while waiting
     * @param timeout amount of time to allow the procedure to run before cancelling
     * @param procName name of the procedure instance
     * @param args argument data associated with the procedure instance
     * @param expectedMembers names of the expected members
     */
    public Procedure(ProcedureCoordinator coord, long wakeFreq, long timeout, String procName, byte[] args,
            List<String> expectedMembers) {
        this(coord, new ForeignExceptionDispatcher(), wakeFreq, timeout, procName, args, expectedMembers);
    }

    public String getName() {
        return procName;
    }

    /**
     * @return String of the procedure members both trying to enter the barrier and already in barrier
     */
    public String getStatus() {
        String waiting, done;
        synchronized (joinBarrierLock) {
            waiting = acquiringMembers.toString();
            done = inBarrierMembers.toString();
        }
        return "Procedure " + procName + " { waiting=" + waiting + " done=" + done + " }";
    }

    /**
     * Get the ForeignExceptionDispatcher
     * @return the Procedure's monitor.
     */
    public ForeignExceptionDispatcher getErrorMonitor() {
        return monitor;
    }

    /**
     * This call is the main execution thread of the barriered procedure.  It sends messages and
     * essentially blocks until all procedure members acquire or later complete but periodically
     * checks for foreign exceptions.
     */
    @Override
    @SuppressWarnings("finally")
    final public Void call() {
        LOG.info("Starting procedure '" + procName + "'");
        // start the timer
        timeoutInjector.start();

        // run the procedure
        try {
            // start by checking for error first
            monitor.rethrowException();
            LOG.debug("Procedure '" + procName + "' starting 'acquire'");
            sendGlobalBarrierStart();

            // wait for all the members to report acquisition
            LOG.debug("Waiting for all members to 'acquire'");
            waitForLatch(acquiredBarrierLatch, monitor, wakeFrequency, "acquired");
            monitor.rethrowException();

            LOG.debug("Procedure '" + procName + "' starting 'in-barrier' execution.");
            sendGlobalBarrierReached();

            // wait for all members to report barrier release
            waitForLatch(releasedBarrierLatch, monitor, wakeFrequency, "released");

            // make sure we didn't get an error during in barrier execution and release
            monitor.rethrowException();
            LOG.info("Procedure '" + procName + "' execution completed");
        } catch (Exception e) {
            if (e instanceof InterruptedException) {
                Thread.currentThread().interrupt();
            }
            String msg = "Procedure '" + procName + "' execution failed!";
            LOG.error(msg, e);
            receive(new ForeignException(getName(), e));
        } finally {
            LOG.debug("Running finish phase.");
            sendGlobalBarrierComplete();
            completedLatch.countDown();

            // tell the timer we are done, if we get here successfully
            timeoutInjector.complete();
            return null;
        }
    }

    /**
     * Sends a message to Members to create a new {@link Subprocedure} for this Procedure and execute
     * the {@link Subprocedure#acquireBarrier} step.
     * @throws ForeignException
     */
    public void sendGlobalBarrierStart() throws ForeignException {
        // start the procedure
        LOG.debug("Starting procedure '" + procName + "', kicking off acquire phase on members.");
        try {
            // send procedure barrier start to specified list of members. cloning the list to avoid
            // concurrent modification from the controller setting the prepared nodes
            coord.getRpcs().sendGlobalBarrierAcquire(this, args, Lists.newArrayList(this.acquiringMembers));
        } catch (IOException e) {
            coord.rpcConnectionFailure("Can't reach controller.", e);
        } catch (IllegalArgumentException e) {
            throw new ForeignException(getName(), e);
        }
    }

    /**
     * Sends a message to all members that the global barrier condition has been satisfied.  This
     * should only be executed after all members have completed its
     * {@link Subprocedure#acquireBarrier()} call successfully.  This triggers the member
     * {@link Subprocedure#insideBarrier} method.
     * @throws ForeignException
     */
    public void sendGlobalBarrierReached() throws ForeignException {
        try {
            // trigger to have member run {@link Subprocedure#insideBarrier}
            coord.getRpcs().sendGlobalBarrierReached(this, Lists.newArrayList(inBarrierMembers));
        } catch (IOException e) {
            coord.rpcConnectionFailure("Can't reach controller.", e);
        }
    }

    /**
     * Sends a message to members that all {@link Subprocedure#insideBarrier} calls have completed.
     * After this executes, the coordinator can assume that any state resources about this barrier
     * procedure state has been released.
     */
    public void sendGlobalBarrierComplete() {
        LOG.debug("Finished coordinator procedure - removing self from list of running procedures");
        try {
            coord.getRpcs().resetMembers(this);
        } catch (IOException e) {
            coord.rpcConnectionFailure("Failed to reset procedure:" + procName, e);
        }
    }

    //
    // Call backs from other external processes.
    //

    /**
     * Call back triggered by an individual member upon successful local barrier acquisition
     * @param member
     */
    public void barrierAcquiredByMember(String member) {
        LOG.debug("member: '" + member + "' joining acquired barrier for procedure '" + procName
                + "' on coordinator");
        if (this.acquiringMembers.contains(member)) {
            synchronized (joinBarrierLock) {
                if (this.acquiringMembers.remove(member)) {
                    this.inBarrierMembers.add(member);
                    acquiredBarrierLatch.countDown();
                }
            }
            LOG.debug("Waiting on: " + acquiredBarrierLatch + " remaining members to acquire global barrier");
        } else {
            LOG.warn("Member " + member + " joined barrier, but we weren't waiting on it to join."
                    + " Continuing on.");
        }
    }

    /**
     * Call back triggered by a individual member upon successful local in-barrier execution and
     * release
     * @param member
     */
    public void barrierReleasedByMember(String member) {
        boolean removed = false;
        synchronized (joinBarrierLock) {
            removed = this.inBarrierMembers.remove(member);
            if (removed) {
                releasedBarrierLatch.countDown();
            }
        }
        if (removed) {
            LOG.debug("Member: '" + member + "' released barrier for procedure'" + procName
                    + "', counting down latch.  Waiting for " + releasedBarrierLatch.getCount() + " more");
        } else {
            LOG.warn("Member: '" + member + "' released barrier for procedure'" + procName
                    + "', but we weren't waiting on it to release!");
        }
    }

    /**
     * Waits until the entire procedure has globally completed, or has been aborted.  If an
     * exception is thrown the procedure may or not have run cleanup to trigger the completion latch
     * yet.
     * @throws ForeignException
     * @throws InterruptedException
     */
    public void waitForCompleted() throws ForeignException, InterruptedException {
        waitForLatch(completedLatch, monitor, wakeFrequency, procName + " completed");
    }

    /**
     * Check if the entire procedure has globally completed, or has been aborted.
     * @throws ForeignException
     */
    public boolean isCompleted() throws ForeignException {
        // Rethrow exception if any
        monitor.rethrowException();
        return (completedLatch.getCount() == 0);
    }

    /**
     * A callback that handles incoming ForeignExceptions.
     */
    @Override
    public void receive(ForeignException e) {
        monitor.receive(e);
    }

    /**
     * Wait for latch to count to zero, ignoring any spurious wake-ups, but waking periodically to
     * check for errors
     * @param latch latch to wait on
     * @param monitor monitor to check for errors while waiting
     * @param wakeFrequency frequency to wake up and check for errors (in
     *          {@link TimeUnit#MILLISECONDS})
     * @param latchDescription description of the latch, for logging
     * @throws ForeignException type of error the monitor can throw, if the task fails
     * @throws InterruptedException if we are interrupted while waiting on latch
     */
    public static void waitForLatch(CountDownLatch latch, ForeignExceptionSnare monitor, long wakeFrequency,
            String latchDescription) throws ForeignException, InterruptedException {
        boolean released = false;
        while (!released) {
            if (monitor != null) {
                monitor.rethrowException();
            }
            /*
            ForeignExceptionDispatcher.LOG.debug("Waiting for '" + latchDescription + "' latch. (sleep:"
                + wakeFrequency + " ms)"); */
            released = latch.await(wakeFrequency, TimeUnit.MILLISECONDS);
        }
        // check error again in case an error raised during last wait
        if (monitor != null) {
            monitor.rethrowException();
        }
    }
}