edu.umass.cs.gigapaxos.PaxosInstanceStateMachine.java Source code

Introduction

Here is the source code for edu.umass.cs.gigapaxos.PaxosInstanceStateMachine.java
Source

/* Copyright (c) 2015 University of Massachusetts
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 * 
 * Initial developer(s): V. Arun */
package edu.umass.cs.gigapaxos;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.json.JSONException;
import org.json.JSONObject;

import edu.umass.cs.gigapaxos.PaxosConfig.PC;
import edu.umass.cs.gigapaxos.interfaces.ClientRequest;
import edu.umass.cs.gigapaxos.interfaces.Replicable;
import edu.umass.cs.gigapaxos.interfaces.Request;
import edu.umass.cs.gigapaxos.interfaces.SummarizableRequest;
import edu.umass.cs.gigapaxos.paxospackets.AcceptPacket;
import edu.umass.cs.gigapaxos.paxospackets.AcceptReplyPacket;
import edu.umass.cs.gigapaxos.paxospackets.BatchedAccept;
import edu.umass.cs.gigapaxos.paxospackets.BatchedAcceptReply;
import edu.umass.cs.gigapaxos.paxospackets.BatchedCommit;
import edu.umass.cs.gigapaxos.paxospackets.PValuePacket;
import edu.umass.cs.gigapaxos.paxospackets.PaxosPacket;
import edu.umass.cs.gigapaxos.paxospackets.PreparePacket;
import edu.umass.cs.gigapaxos.paxospackets.PrepareReplyPacket;
import edu.umass.cs.gigapaxos.paxospackets.ProposalPacket;
import edu.umass.cs.gigapaxos.paxospackets.RequestPacket;
import edu.umass.cs.gigapaxos.paxospackets.StatePacket;
import edu.umass.cs.gigapaxos.paxospackets.SyncDecisionsPacket;
import edu.umass.cs.gigapaxos.paxospackets.PaxosPacket.PaxosPacketType;
import edu.umass.cs.gigapaxos.paxosutil.Ballot;
import edu.umass.cs.gigapaxos.paxosutil.HotRestoreInfo;
import edu.umass.cs.gigapaxos.paxosutil.IntegerMap;
import edu.umass.cs.gigapaxos.paxosutil.LogMessagingTask;
import edu.umass.cs.gigapaxos.paxosutil.MessagingTask;
import edu.umass.cs.gigapaxos.paxosutil.PaxosInstanceCreationException;
import edu.umass.cs.gigapaxos.paxosutil.PrepareReplyAssembler;
import edu.umass.cs.gigapaxos.paxosutil.RequestInstrumenter;
import edu.umass.cs.gigapaxos.paxosutil.SlotBallotState;
import edu.umass.cs.gigapaxos.testing.TESTPaxosApp;
import edu.umass.cs.gigapaxos.testing.TESTPaxosConfig.TC;
import edu.umass.cs.nio.NIOTransport;
import edu.umass.cs.nio.nioutils.RTTEstimator;
import edu.umass.cs.reconfiguration.reconfigurationutils.RequestParseException;
import edu.umass.cs.utils.Config;
import edu.umass.cs.utils.Keyable;
import edu.umass.cs.utils.Pausable;
import edu.umass.cs.utils.Util;
import edu.umass.cs.utils.DelayProfiler;

/**
 * @author V. Arun
 * 
 *         This class is the top-level paxos class per instance or paxos group
 *         on a machine. This class is "protected" as the only way to use it
 *         will be through the corresponding PaxosManager even if there is just
 *         one paxos application running on the machine.
 *         <p>
 * 
 *         This class delegates much of the interesting paxos actions to
 *         PaxosAcceptorState and PaxosCoordinator. It delegates all messaging
 *         to PaxosManager's PaxosMessenger. It is "managed", i.e., its paxos
 *         group is created and its incoming packets are demultiplexed, by its
 *         PaxosManager. It's logging is handled by an implementation of
 *         AbstractPaxosLogger.
 *         <p>
 * 
 *         The high-level organization is best reflected in handlePaxosMessage,
 *         a method that delegates processing to the acceptor or coordinator and
 *         gets back a messaging task, e.g., receiving a prepare message will
 *         probably result in a prepare-reply messaging task, and so on.
 *         <p>
 * 
 *         Space: An inactive PaxosInstanceStateMachine, i.e., whose
 *         corresponding application is currently not processing any requests,
 *         uses ~225B *total*. Here is the breakdown: PaxosInstanceStateMachine
 *         final fields: ~80B PaxosAcceptor: ~90B PaxosCoordinatorState: ~60B
 *         Even in an inactive paxos instance, the total *total* space is much
 *         more because of PaxosManager (that internally uses FailureDetection)
 *         etc., but all that state is not incurred per paxos application, just
 *         per machine. Thus, if we have S=10 machines and N=10M applications
 *         each using paxos with K=10 replicas one each at each machine, each
 *         machine has 10M PaxosInstanceStateMachine instances that will use
 *         about 2.25GB (10M*225B). The amount of space used by PaxosManager and
 *         others is small and depends only on S, not N or K.
 *         <p>
 * 
 *         When actively processing requests, the total space per paxos instance
 *         can easily go up to thousands of bytes. But we are unlikely to be
 *         processing requests across even hundreds of thousands of different
 *         applications simultaneously if each request finishes executing in
 *         under a second. For example, if a single server's execution
 *         throughput is 10K requests/sec and each request takes 100ms to finish
 *         executing (including paxos coordination), then the number of active
 *         *requests* at a machine is on average ~100K. The number of active
 *         paxos instances at that machine is at most the number of active
 *         requests at that machine.
 * 
 */
public class PaxosInstanceStateMachine implements Keyable<String>, Pausable {
    /* If false, the paxosID is represented as a byte[], so we must invoke
     * getPaxosID() as infrequently as possible. */
    private static final boolean PAXOS_ID_AS_STRING = false;

    // must be >= 1, does not depend on anything else
    protected static final int INTER_CHECKPOINT_INTERVAL = Config.getGlobalInt(PaxosConfig.PC.CHECKPOINT_INTERVAL);// 100;

    // out-of-order-ness prompting synchronization, must be >=1
    protected static final int SYNC_THRESHOLD = 4 * INTER_CHECKPOINT_INTERVAL;

    // max decisions gap when reached will prompt checkpoint transfer
    protected static final int MAX_SYNC_DECISIONS_GAP = INTER_CHECKPOINT_INTERVAL;

    // minimum interval before another sync decisions request can be issued
    protected static final long MIN_RESYNC_DELAY = 1000;

    private static final boolean ENABLE_INSTRUMENTATION = Config.getGlobalBoolean(PC.ENABLE_INSTRUMENTATION);

    private static final boolean instrument() {
        return ENABLE_INSTRUMENTATION;
    }

    private static final boolean instrument(boolean flag) {
        return flag && ENABLE_INSTRUMENTATION;
    }

    private static final boolean instrument(int n) {
        return ENABLE_INSTRUMENTATION && Util.oneIn(n);
    }

    private static final void instrumentDelay(toLog field, long startTime) {
        if (field.log())
            DelayProfiler.updateDelay(field.toString(), startTime);
    }

    private static final void instrumentDelay(toLog field, long startTime, int n) {
        if (field.log())
            DelayProfiler.updateDelay(field.toString(), startTime, n);
    }

    private static enum SyncMode {
        DEFAULT_SYNC, FORCE_SYNC, SYNC_TO_PAUSE
    };

    /* Enabling this will slow down instance creation for null initialState as
     * an initial checkpoint will still be made. It will make no difference if
     * initialState is non-null as checkpointing non-null initial state is
     * necessary for safety.
     * 
     * The default setting must be true. Not allowing null checkpoints can cause
     * reconfiguration to stall as there is no way for the new epoch to
     * distinguish between no previous epoch final state and null previous epoch
     * final state. */
    protected static final boolean ENABLE_NULL_CHECKPOINT_STATE = true;

    /************ final Paxos state that is unchangeable after creation ***************/
    private final int[] groupMembers;
    // Object to allow easy testing across byte[] and String
    private final Object paxosID;
    private final int version;
    private final PaxosManager<?> paxosManager;
    // private final InterfaceReplicable clientRequestHandler;

    /************ Non-final paxos state that is changeable after creation *******************/
    // uses ~125B of empty space when not actively processing requests
    private PaxosAcceptor paxosState = null;
    // uses just a single pointer's worth of space unless I am a coordinator
    private PaxosCoordinator coordinator = null;
    /************ End of non-final paxos state ***********************************************/

    // static, so does not count towards space.
    private static Logger log = (PaxosManager.getLogger());

    PaxosInstanceStateMachine(String groupId, int version, int id, Set<Integer> gms, Replicable app,
            String initialState, PaxosManager<?> pm, final HotRestoreInfo hri, boolean missedBirthing) {

        /* Final assignments: A paxos instance is born with a paxosID, version
         * this instance's node ID, the application request handler, the paxos
         * manager, and the group members. */
        this.paxosID = PAXOS_ID_AS_STRING ? groupId : groupId.getBytes();
        this.version = version;
        // this.clientRequestHandler = app;
        this.paxosManager = pm;
        assert (gms != null && gms.size() > 0);
        Arrays.sort(this.groupMembers = Util.setToIntArray(gms));
        /**************** End of final assignments *******************/

        /* All non-final state is store in PaxosInstanceState (for acceptors) or
         * in PaxosCoordinatorState (for coordinators) that inherits from
         * PaxosInstanceState. */
        if (pm != null && hri == null)
            initiateRecovery(initialState, missedBirthing);
        else if ((hri != null) && hotRestore(hri)) {
            if (initialState != null) // batched creation
                // this.putInitialState(initialState);
                this.restore(initialState);
        } else if (pm == null)
            testingNoRecovery(); // used only for testing size
        assert (hri == null || initialState == null
                || hri.isCreateHRI()) : "Can not specify initial state for existing, paused paxos instance";
        incrInstanceCount(); // for instrumentation

        // log creation only if the number of instances is small
        log.log(((hri == null || initialState != null) && notManyInstances()) ? Level.INFO : Level.FINER,
                "Node{0} initialized paxos {1} {2} with members {3}; {4} {5} {6}",
                new Object[] { this.getNodeID(),
                        (this.paxosState.getBallotCoordLog() == this.getMyID() ? "coordinator" : "acceptor"),
                        this.getPaxosIDVersion(), Util.arrayOfIntToString(groupMembers), this.paxosState,
                        this.coordinator,
                        (initialState == null ? "{recovered_state=[" + Util.prefix(this.getCheckpointState(), 64)
                                : "{initial_state=[" + initialState) + "]}" });
    }

    /**
     * @return Version or epoch number corresponding to this reconfigurable
     *         paxos instance.
     */
    protected int getVersion() {
        return this.version;
    }

    // one of only two public methods
    public String getKey() {
        return this.getPaxosID();
    }

    public String toString() {
        return this.getNodeState();
    }

    protected String toStringLong() {
        return this.getNodeState() + this.paxosState + (this.coordinator != null ? this.coordinator : "");
    }

    /**
     * @return Paxos instance name concatenated with the version number.
     */
    protected String getPaxosIDVersion() {
        return this.getPaxosID() + ":" + this.getVersion();
    }

    protected String getPaxosID() {
        return (paxosID instanceof String ? (String) paxosID : new String((byte[]) paxosID));
    }

    protected int[] getMembers() {
        return this.groupMembers;
    }

    protected String getNodeID() {
        return this.paxosManager != null ? this.paxosManager.intToString(this.getMyID()) : "" + getMyID();
    }

    protected Replicable getApp() {
        return this.paxosManager.getApp(this.getPaxosID()); // this.clientRequestHandler;
    }

    protected PaxosManager<?> getPaxosManager() {
        return this.paxosManager;
    }

    protected int getMyID() {
        return (this.paxosManager != null ? this.paxosManager.getMyID() : -1);
    }

    /**
     * isStopped()==true means that this paxos instance is dead and completely
     * harmless (even if the underlying object has not been garbage collected by
     * the JVM. In particular, it can NOT make the app execute requests or send
     * out paxos messages to the external world.
     * 
     * @return Whether this paxos instance has been stopped.
     */
    protected boolean isStopped() {
        return this.paxosState.isStopped();
    }

    /**
     * Forces a synchronization wait. PaxosManager needs this to ensure that an
     * ongoing stop is fully executed.
     * 
     * @return True.
     */
    protected synchronized boolean synchronizedNoop() {
        return true;
    }

    // not synchronized as coordinator can die anytime anyway
    protected boolean forceStop() {
        if (!this.paxosState.isStopped())
            decrInstanceCount(); // for instrumentation
        PaxosCoordinator.forceStop(this.coordinator);
        this.coordinator = null;
        this.paxosState.forceStop(); //
        return true;
    }

    private boolean nullCheckpointStateEnabled() {
        return this.paxosManager.isNullCheckpointStateEnabled();
    }

    // removes all database and app state and can not be recovered anymore
    protected boolean kill(boolean clean) {
        // paxosState must be typically already stopped here
        this.forceStop();
        if (clean // clean kill implies reset app state
                && this.nullifyAppState(this.getPaxosID(), null)
                // and remove database state
                && AbstractPaxosLogger.kill(this.paxosManager.getPaxosLogger(), getPaxosID(), this.getVersion()))
            // paxos instance is "lost" now
            log.log(Level.FINE, "Paxos instance {0} cleanly terminated.", new Object[] { this });
        else
            // unclean "crash"
            log.severe(this + " crashing paxos instance " + getPaxosIDVersion()
                    + " likely because of an error while executing an application request. "
                    + "A paxos instance for " + getPaxosIDVersion()
                    + " or a higher version must either be explicitly (re-)created "
                    + "or this \"crashed\" instance will recover safely upon a reboot.");
        return true;
    }

    private boolean nullifyAppState(String paxosID, String state) {
        for (int i = 0; !this.restore(null); i++)
            if (waitRetry(RETRY_TIMEOUT) && i < RETRY_LIMIT)
                log.warning(this + " unable to delete application state; retrying");
            else
                throw new RuntimeException("Node" + getNodeID() + " unable to delete " + this.getPaxosIDVersion());
        return true;
    }

    private static final long RETRY_TIMEOUT = Config.getGlobalLong(PC.HANDLE_REQUEST_RETRY_INTERVAL);
    private static final int RETRY_LIMIT = Config.getGlobalInt(PC.HANDLE_REQUEST_RETRY_LIMIT);

    private static boolean waitRetry(long timeout) {
        try {
            Thread.sleep(timeout);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
        return true;
    }

    protected void setActive() {
        this.paxosState.setActive();
    }

    protected boolean isActive() {
        return this.paxosState.isActive();
    }

    private String getCheckpointState() {
        SlotBallotState sbs = this.paxosManager != null
                ? this.paxosManager.getPaxosLogger().getSlotBallotState(getPaxosID(), getVersion())
                : null;
        return sbs != null ? sbs.state : null;
    }

    /**
     * This is the main entry point into this class and is used by
     * {@link PaxosManager} to supply incoming packets.
     * 
     * @param obj
     *            JSONObject or RequestPacket.
     * @throws JSONException
     */
    protected void handlePaxosMessage(PaxosPacket obj) throws JSONException {
        this.handlePaxosMessage(obj, SyncMode.DEFAULT_SYNC);
    }

    /**
     * For legacy reasons, this method still accepts JSONObject in addition to
     * PaxosPacket as the first argument.
     * 
     * @param obj
     * @param mode
     * @throws JSONException
     */
    private void handlePaxosMessage(PaxosPacket pp, SyncMode mode) throws JSONException {
        long methodEntryTime = System.currentTimeMillis();
        assert (pp != null || !mode.equals(SyncMode.DEFAULT_SYNC));

        Level level = Level.FINEST;
        if (pp != null)
            log.log(level, "{0} received {1}", new Object[] { this, pp.getSummary(log.isLoggable(level)) });

        if (pp != null && pp.getVersion() != this.getVersion())
            return;

        /* Note: Because incoming messages may be handled concurrently, some
         * messages may continue to get processed for a little while after a
         * stop has been executed and even after isStopped() is true (because
         * isStopped() was false when those messages came in here). But that is
         * okay coz these messages can not spawn unsafe outgoing messages (as
         * messaging is turned off for all but DECISION or CHECKPOINT_STATE
         * packets) and can not change any disk state. */
        if (this.paxosState.isStopped()) {
            log.log(Level.INFO, "{0} stopped; dropping {1}", new Object[] { this, pp.getSummary() });
            return;
        }

        // recovery means we won't send any replies
        boolean recovery = pp != null ? PaxosPacket.isRecovery(pp) : false;
        /* The reason we should not process regular messages until this instance
         * has rolled forward is that it might respond to a prepare with a list
         * of accepts fetched from disk that may be inconsistent with its
         * acceptor state. */
        if (!this.paxosManager.hasRecovered(this) && !recovery)
            return; // only process recovery message during rollForward

        PaxosPacket.PaxosPacketType msgType = pp != null ? pp.getType() : PaxosPacket.PaxosPacketType.NO_TYPE;
        log.log(Level.FINEST, "{0} received {1}:{2}",
                new Object[] { this, msgType, pp != null ? pp.getSummary(log.isLoggable(Level.FINEST)) : pp });

        boolean isPoke = msgType.equals(PaxosPacketType.NO_TYPE);
        if (!isPoke)
            this.markActive();
        else
            log.log(Level.FINER, "{0} received NO_TYPE poke {1};", new Object[] { this, mode });

        MessagingTask[] mtasks = new MessagingTask[3];
        /* Check for coordinator'ing upon *every* message except poke messages.
         * Pokes are primarily for sync'ing decisions and could be also used to
         * resend accepts. There is little reason to send prepares proactively
         * if no new activity is happening. */
        mtasks[0] = (!recovery ?
        // check run for coordinator if not active
                (!PaxosCoordinator.isActive(this.coordinator)
                        // ignore pokes unless not caught up
                        && (!isPoke || !PaxosCoordinator.caughtUp(this.coordinator))) ? checkRunForCoordinator()
                                // else reissue long waiting accepts
                                : this.pokeLocalCoordinator()
                // neither during recovery
                : null);

        log.log(level, "{0} about to switch on packet type {1}",
                new Object[] { this, pp != null ? pp.getSummary(log.isLoggable(level)) : null });

        MessagingTask mtask = null;
        MessagingTask[] batchedTasks = null;

        switch (msgType) {
        case REQUEST:
            batchedTasks = handleRequest((RequestPacket) pp);
            // send RequestPacket to current coordinator
            break;
        // replica --> coordinator
        case PROPOSAL:
            batchedTasks = handleProposal((ProposalPacket) pp);
            // unicast ProposalPacket to coordinator or multicast AcceptPacket
            break;
        // coordinator --> replica
        case DECISION:
            mtask = handleCommittedRequest((PValuePacket) pp);
            // send nothing, but log decision
            break;
        case BATCHED_COMMIT:
            mtask = handleBatchedCommit((BatchedCommit) pp);
            // send nothing, but log decision
            break;
        // coordinator --> replica
        case PREPARE:
            mtask = handlePrepare((PreparePacket) pp);
            // send PreparePacket prepare reply to coordinator
            break;
        // replica --> coordinator
        case PREPARE_REPLY:
            mtask = handlePrepareReply((PrepareReplyPacket) pp);
            // send AcceptPacket[] to all
            break;
        // coordinator --> replica
        case ACCEPT:
            batchedTasks = handleAccept((AcceptPacket) pp);
            // send AcceptReplyPacket to coordinator
            break;
        // replica --> coordinator
        case ACCEPT_REPLY:
            mtask = handleAcceptReply((AcceptReplyPacket) pp);
            // send PValuePacket decision to all
            break;
        case BATCHED_ACCEPT_REPLY:
            batchedTasks = handleBatchedAcceptReply((BatchedAcceptReply) pp);
            // send PValuePacket decisions to all
            break;
        case BATCHED_ACCEPT:
            batchedTasks = handleBatchedAccept((BatchedAccept) pp);
            break;
        case SYNC_DECISIONS_REQUEST:
            mtask = handleSyncDecisionsPacket((SyncDecisionsPacket) pp);
            // send SynchronizeReplyPacket to sender
            break;
        case CHECKPOINT_STATE:
            mtask = handleCheckpoint((StatePacket) pp);
            break;
        case NO_TYPE: // not a real packet
            // sync if needed on poke
            mtasks[0] = (mtasks[0] != null) ? mtasks[0] : this.syncLongDecisionGaps(null, mode);
            break;
        default:
            assert (false) : "Paxos instance received an unrecognizable packet: " + (pp.getSummary());
        }
        mtasks[1] = mtask;

        // special case for method returning array of messaging tasks
        if (batchedTasks != null) {
            // mtasks[1] = batchedTasks[0];
            // mtasks[2] = batchedTasks[1];
            mtasks = MessagingTask.combine(mtasks, batchedTasks);
        }

        instrumentDelay(toLog.handlePaxosMessage, methodEntryTime);

        this.checkIfTrapped(pp, mtasks[1]); // just to print a warning
        if (!recovery) {
            this.sendMessagingTask(mtasks);
        }
    }

    /************** Start of private methods ****************/

    /* Invoked both when a paxos instance is first created and when it recovers
     * after a crash. It is all the same as far as the paxos instance is
     * concerned (provided we ensure that the app state after executing the
     * first request (slot 0) is checkpointed, which we do). */
    private boolean initiateRecovery(String initialState, boolean missedBirthing) {
        String pid = this.getPaxosID();
        // only place where version is checked
        SlotBallotState slotBallot = this.paxosManager.getPaxosLogger().getSlotBallotState(pid, this.getVersion());

        if (slotBallot != null) {
            log.log(Level.FINE, "{0} recovered state: {1}",
                    new Object[] { this, (slotBallot != null ? slotBallot.state : "NULL") });
            // check membership
            if (!slotBallot.members.equals(this.paxosManager.getStringNodesFromIntArray(groupMembers)))
                throw new PaxosInstanceCreationException(
                        "Paxos instance exists with a different replica group: " + (slotBallot.members));
            // update app state
            if (!this.restore(slotBallot.state))
                throw new PaxosInstanceCreationException("Unable to update app state with " + slotBallot.state);
        }

        this.coordinator = null;// new PaxosCoordinator(); // just a shell class
        // initial coordinator is assumed, not prepared
        if (slotBallot == null && roundRobinCoordinator(0) == this.getMyID())
            this.coordinator = PaxosCoordinator.createCoordinator(0, this.getMyID(), getMembers(),
                    (initialState != null || nullCheckpointStateEnabled() ? 1 : 0), true); // slotBallot==null
        /* Note: We don't have to create coordinator state here. It will get
         * created if needed when the first external (non-recovery) packet is
         * received. But we create the very first coordinator here as otherwise
         * it is possible that no coordinator gets elected as follows: the
         * lowest ID node wakes up and either upon an external or self-poke
         * message sends a prepare, but gets no responses because no other node
         * is up yet. In this case, the other nodes when they boot up will not
         * run for coordinator, and the lowest ID node will not resend its
         * prepare if no more requests come, so the first request could be stuck
         * in its pre-active queue for a long time. */

        // allow null state without null checkpoints just for memory testing
        if (slotBallot == null && initialState == null && !this.paxosManager.isNullCheckpointStateEnabled()
                && !Config.getGlobalBoolean(TC.MEMORY_TESTING))
            throw new PaxosInstanceCreationException("A paxos instance with null initial state can be"
                    + " created only if null checkpoints are enabled");

        /* If this is a "missed-birthing" instance creation, we still set the
         * acceptor nextSlot to 0 but don't checkpoint initialState. In fact,
         * initialState better be null here in that case as we can't possibly
         * have an initialState with missed birthing. */
        assert (!(missedBirthing && initialState != null));
        /* If it is possible for there to be no initial state checkpoint, under
         * missed birthing, an acceptor may incorrectly report its gcSlot as -1,
         * and if a majority do so (because that majority consists all of missed
         * birthers), a coordinator may propose a proposal for slot 0 even
         * though an initial state does exist, which would end up overwriting
         * the initial state. So we can not support ambiguity in whether there
         * is initial state or not. If we force initial state checkpoints (even
         * null state checkpoints) to always exist, missed birthers can always
         * set the initial gcSlot to 0. The exception and assert above imply the
         * assertion below. */
        assert (!missedBirthing || this.paxosManager.isNullCheckpointStateEnabled());

        this.paxosState = new PaxosAcceptor(slotBallot != null ? slotBallot.ballotnum : 0,
                slotBallot != null ? slotBallot.coordinator : this.roundRobinCoordinator(0),
                slotBallot != null ? (slotBallot.slot + 1) : 0, null);
        if (slotBallot == null && !missedBirthing)
            this.putInitialState(initialState); // will set nextSlot to 1
        if (missedBirthing)
            this.paxosState.setGCSlotAfterPuttingInitialSlot();

        if (slotBallot == null)
            // TESTPaxosConfig.setRecovered(this.getMyID(), pid, true)
            ;

        return true; // return value will be ignored
    }

    private boolean hotRestore(HotRestoreInfo hri) {
        // called from constructor only, hence assert
        assert (this.paxosState == null && this.coordinator == null);
        log.log(Level.FINE, "{0} hot restoring with {1}", new Object[] { this, hri });
        this.coordinator = hri.coordBallot != null && hri.coordBallot.coordinatorID == getMyID()
                ? PaxosCoordinator.hotRestore(this.coordinator, hri)
                : null;
        this.paxosState = new PaxosAcceptor(hri.accBallot.ballotNumber, hri.accBallot.coordinatorID, hri.accSlot,
                hri);
        this.paxosState.setActive(); // no recovery
        this.markActive(); // to prevent immediate re-pause
        return true;
    }

    private boolean putInitialState(String initialState) {
        if (this.getPaxosManager() == null || (initialState == null && !nullCheckpointStateEnabled()))
            return false;
        this.handleCheckpoint(new StatePacket(initialBallot(), 0, initialState));
        this.paxosState.setGCSlotAfterPuttingInitialSlot();
        return true;
    }

    private Ballot initialBallot() {
        return new Ballot(0, this.roundRobinCoordinator(0));
    }

    /* The one method for all message sending. Protected coz the logger also
     * calls this. */
    protected void sendMessagingTask(MessagingTask mtask) {
        if (mtask == null || mtask.isEmpty())
            return;
        if (this.paxosState != null && this.paxosState.isStopped()
                && !mtask.msgs[0].getType().equals(PaxosPacketType.DECISION)
                && !mtask.msgs[0].getType().equals(PaxosPacketType.CHECKPOINT_STATE))
            return;
        // if (TESTPaxosConfig.isCrashed(this.getMyID()))return;

        log.log(Level.FINEST, "{0} sending: {1}", new Object[] { this, mtask });
        mtask.putPaxosIDVersion(this.getPaxosID(), this.getVersion());
        try {
            // assert(this.paxosState.isActive());
            paxosManager.send(mtask);
        } catch (IOException ioe) {
            log.severe(this + " encountered IOException while sending " + mtask);
            ioe.printStackTrace();
            /* We can't throw this exception upward because it will get sent all
             * the way back up to PacketDemultiplexer whose incoming packet
             * initiated this whole chain of events. It seems silly for
             * PacketDemultiplexer to throw an IOException caused by the sends
             * resulting from processing that packet. So we should handle this
             * exception right here. But what should we do? We can ignore it as
             * the network does not need to be reliable anyway. Revisit as
             * needed. */
        } catch (JSONException je) {
            /* Same thing for other exceptions. Nothing useful to do here */
            log.severe(this + " encountered JSONException while sending " + mtask);
            je.printStackTrace();
        }
    }

    private void sendMessagingTask(MessagingTask[] mtasks) throws JSONException {
        for (MessagingTask mtask : mtasks)
            this.sendMessagingTask(mtask);
    }

    // will send a noop message to self to force event-driven actions
    protected void poke(boolean forceSync) {
        try {
            log.log(Level.FINE, "{0} being poked", new Object[] { this });
            this.handlePaxosMessage(null, forceSync ? SyncMode.FORCE_SYNC : SyncMode.SYNC_TO_PAUSE);
        } catch (JSONException je) {
            je.printStackTrace();
        }
    }

    private static final boolean BATCHING_ENABLED = Config.getGlobalBoolean(PC.BATCHING_ENABLED);

    /* "Phase0" Event: Received a request from a client.
     * 
     * Action: Call handleProposal which will send the corresponding proposal
     * to the current coordinator. */
    private MessagingTask[] handleRequest(RequestPacket request) {
        log.log(Level.FINE, "{0}{1}{2}",
                new Object[] { this, " Phase0/CLIENT_REQUEST: ", request.getSummary(log.isLoggable(Level.FINE)) });
        RequestInstrumenter.received(request, request.getClientID(), this.getMyID());

        if (!BATCHING_ENABLED || request.getEntryReplica() == IntegerMap.NULL_INT_NODE || request.isBroadcasted()) {
            this.paxosManager.incrOutstanding(request);
        }

        if (request.isBroadcasted()) {
            AcceptPacket accept = this.paxosManager.release(request);
            if (accept != null) {
                log.log(Level.FINE, "{0} released accept {1}",
                        new Object[] { this, accept.getSummary(log.isLoggable(Level.FINE)) });
                return this.handleAccept(accept);
            }
        }

        // multicast to others if digests enabled
        MessagingTask mtask = (this.paxosManager.shouldDigest() && request.getEntryReplica() == this.getMyID()
                && request.shouldBroadcast())
                        ? new MessagingTask(this.otherGroupMembers(),
                                request.setDigest(request.getDigest(this.paxosManager.getMessageDigest()))
                                        .setBroadcasted())
                        : null;

        return MessagingTask.combine(mtask, handleProposal(request));
    }

    private static final boolean DIGEST_REQUESTS = Config.getGlobalBoolean(PC.DIGEST_REQUESTS);
    private static final boolean BATCHED_ACCEPTS = Config.getGlobalBoolean(PC.DIGEST_REQUESTS)
            && !Config.getGlobalBoolean(PC.FLIP_BATCHED_ACCEPTS);

    /* "Phase0"->Phase2a: Event: Received a proposal [request, slot] from any
     * node.
     * 
     * Action: If a non-coordinator node receives a proposal, send to the
     * coordinator. Otherwise, propose it to acceptors with a good slot number
     * (thereby initiating phase2a for this request).
     * 
     * Return: A send either to a coordinator of the proposal or to all replicas
     * of the proposal with a good slot number. */
    private MessagingTask[] handleProposal(RequestPacket proposal) {
        assert (proposal.getEntryReplica() != IntegerMap.NULL_INT_NODE) : proposal;
        // could be multicast to all or unicast to coordinator
        MessagingTask[] mtasks = new MessagingTask[2];
        RequestInstrumenter.received(proposal, proposal.getForwarderID(), this.getMyID());
        if (PaxosCoordinator.exists(this.coordinator, this.paxosState.getBallot())) {
            // multicast ACCEPT to all
            AcceptPacket multicastAccept = null;
            proposal.addDebugInfoDeep("a");
            multicastAccept = PaxosCoordinator.propose(this.coordinator, this.groupMembers, proposal);
            if (multicastAccept != null) {
                assert (this.coordinator.getBallot().coordinatorID == getMyID()
                        && multicastAccept.sender == getMyID());
                if (proposal.isBroadcasted())
                    multicastAccept = this.paxosManager.digest(multicastAccept);
                mtasks[0] = multicastAccept != null ? new MessagingTask(this.groupMembers, multicastAccept) : null; // multicast
                RequestInstrumenter.sent(multicastAccept, this.getMyID(), -1);
                log.log(Level.FINER, "{0} issuing accept {1} ",
                        new Object[] { this, multicastAccept.getSummary(log.isLoggable(Level.FINER)) });
            }
        } else if (!proposal.isBroadcasted()) { // else unicast to current
            // coordinator
            log.log(Level.FINER, "{0} is not the coordinator; forwarding to {1}: {2}", new Object[] { this,
                    this.paxosState.getBallotCoordLog(), proposal.getSummary(log.isLoggable(Level.FINER)) });
            int coordinator = this.paxosState.getBallotCoord();

            mtasks[0] = new MessagingTask(this.paxosManager.isNodeUp(coordinator) ? coordinator
                    // send to next coordinator if current seems dead
                    : (coordinator = this.getNextCoordinator(this.paxosState.getBallot().ballotNumber + 1,
                            groupMembers)),
                    proposal.setForwarderID(this.getMyID())); // unicast
            if ((proposal.isPingPonging() || coordinator == this.getMyID())) {
                if (proposal.isPingPonging())
                    log.warning(this + " jugglinging ping-ponging proposal: " + proposal.getSummary()
                            + " forwarded by " + proposal.getForwarderID());
                Level level = Level.INFO;
                log.log(level,
                        "{0} force running for coordinator; forwardCount={1}; debugInfo = {2}; coordinator={3}",
                        new Object[] { this, proposal.getForwardCount(),
                                proposal.getDebugInfo(log.isLoggable(level)), coordinator });
                if (proposal.getForwarderID() != this.getMyID())
                    mtasks[1] = new MessagingTask(getMyID(), mtasks[0].msgs);
                mtasks[0] = this.checkRunForCoordinator(true);
            } else { // forwarding
                proposal.addDebugInfo("f", coordinator);
            }
        }
        return mtasks;
    }

    /* Phase1a Event: Received a prepare request for a ballot, i.e. that
     * ballot's coordinator is acquiring proposing rights for all slot numbers
     * (lowest uncommitted up to infinity)
     * 
     * Action: This node needs to check if it has accepted a higher numbered
     * ballot already and if not, it can accept this ballot, thereby promising
     * not to accept any lower ballots.
     * 
     * Return: Send prepare reply with proposal values previously accepted to
     * the sender (the received ballot's coordinator). */
    private MessagingTask handlePrepare(PreparePacket prepare) {
        paxosManager.heardFrom(prepare.ballot.coordinatorID); // FD optimization

        Ballot prevBallot = this.paxosState.getBallot();
        PrepareReplyPacket prepareReply = this.paxosState.handlePrepare(prepare, this.paxosManager.getMyID());
        if (prepareReply == null)
            return null; // can happen only if acceptor is stopped
        if (prepare.isRecovery())
            return null; // no need to get accepted pvalues from disk during
                         // recovery as networking is disabled anyway

        // may also need to look into disk if ACCEPTED_PROPOSALS_ON_DISK is true
        if (PaxosAcceptor.GET_ACCEPTED_PVALUES_FROM_DISK
                // no need to gather pvalues if NACKing anyway
                && prepareReply.ballot.compareTo(prepare.ballot) == 0)
            prepareReply.accepted.putAll(this.paxosManager.getPaxosLogger().getLoggedAccepts(this.getPaxosID(),
                    this.getVersion(), prepare.firstUndecidedSlot));

        for (PValuePacket pvalue : prepareReply.accepted.values())
            // if I accepted a pvalue, my acceptor ballot must reflect it
            assert (this.paxosState.getBallot().compareTo(pvalue.ballot) >= 0) : this + ":" + pvalue;

        log.log(Level.INFO, "{0} {1} {2} with {3}",
                new Object[] { this, prepareReply.ballot.compareTo(prepare.ballot) > 0 ? "preempting" : "acking",
                        prepare.ballot, prepareReply.getSummary(log.isLoggable(Level.INFO)) });

        MessagingTask mtask = prevBallot.compareTo(prepareReply.ballot) < 0 ?
        // log only if not already logged (if my ballot got upgraded)
                new LogMessagingTask(prepare.ballot.coordinatorID,
                        // ensures large prepare replies are fragmented
                        PrepareReplyAssembler.fragment(prepareReply), prepare)
                // else just send prepareReply
                : new MessagingTask(prepare.ballot.coordinatorID, PrepareReplyAssembler.fragment(prepareReply));
        for (PaxosPacket pp : mtask.msgs)
            assert (((PrepareReplyPacket) pp).getLengthEstimate() < NIOTransport.MAX_PAYLOAD_SIZE) : Util
                    .suicide(this + " trying to return unfragmented prepare reply of size "
                            + ((PrepareReplyPacket) pp).getLengthEstimate() + " : " + pp.getSummary()
                            + "; prevBallot = " + prevBallot);
        return mtask;
    }

    /* Phase1b Event: Received a reply to my ballot preparation request.
     * 
     * Action: If the reply contains a higher ballot, we must resign.
     * Otherwise, if we acquired a majority with the receipt of this reply, send
     * all previously accepted (but uncommitted) requests reported in the
     * prepare replies, each in its highest reported ballot, to all replicas.
     * These are the proposals that get carried over across a ballot change and
     * must be re-proposed.
     * 
     * Return: A list of messages each of which has to be multicast (proposed)
     * to all replicas. */
    private MessagingTask handlePrepareReply(PrepareReplyPacket prepareReply) {
        // necessary to defragment first for safety
        if ((prepareReply = PrepareReplyAssembler.processIncoming(prepareReply)) == null) {
            return null;
        }
        this.paxosManager.heardFrom(prepareReply.acceptor); // FD optimization,
        MessagingTask mtask = null;
        ArrayList<ProposalPacket> preActiveProposals = null;
        ArrayList<AcceptPacket> acceptList = null;

        if ((preActiveProposals = PaxosCoordinator.getPreActivesIfPreempted(this.coordinator, prepareReply,
                this.groupMembers)) != null) {
            log.log(Level.INFO, "{0} ({1}) election PREEMPTED by {2}",
                    new Object[] { this, PaxosCoordinator.getBallotStr(this.coordinator), prepareReply.ballot });
            this.coordinator = null;
            if (!preActiveProposals.isEmpty())
                mtask = new MessagingTask(prepareReply.ballot.coordinatorID,
                        (preActiveProposals.toArray(new PaxosPacket[0])));
        } else if ((acceptList = PaxosCoordinator.handlePrepareReply(this.coordinator, prepareReply,
                this.groupMembers)) != null && !acceptList.isEmpty()) {
            mtask = new MessagingTask(this.groupMembers, ((acceptList).toArray(new PaxosPacket[0])));
            log.log(Level.INFO, "{0} elected coordinator; sending {1}", new Object[] { this, mtask });
        } else
            log.log(Level.FINE, "{0} received prepare reply {1}",
                    new Object[] { this, prepareReply.getSummary(log.isLoggable(Level.INFO)) });

        return mtask; // Could be unicast or multicast
    }

    private static final boolean GC_MAJORITY_EXECUTED = Config.getGlobalBoolean(PC.GC_MAJORITY_EXECUTED);

    /* Phase2a Event: Received an accept message for a proposal with some
     * ballot.
     * 
     * Action: Send back current or updated ballot to the ballot's coordinator. */
    private static final boolean EXECUTE_UPON_ACCEPT = Config.getGlobalBoolean(PC.EXECUTE_UPON_ACCEPT);

    private MessagingTask[] handleAccept(AcceptPacket accept) {
        this.paxosManager.heardFrom(accept.ballot.coordinatorID); // FD
        RequestInstrumenter.received(accept, accept.sender, this.getMyID());

        // if(!accept.hasRequestValue())
        // DelayProfiler.updateCount("C_DIGESTED_ACCEPTS_RCVD",
        // accept.batchSize()+1);

        AcceptPacket copy = accept;
        if (DIGEST_REQUESTS && !accept.hasRequestValue() && (accept = this.paxosManager.match(accept)) == null) {
            log.log(Level.FINE, "{0} received unmatched accept ",
                    new Object[] { this, copy.getSummary(log.isLoggable(Level.FINE)) });
            // if(this.paxosState.getSlot() - copy.slot > 0)
            // DelayProfiler.updateCount("C_EXECD_ACCEPTS_RCVD",
            // copy.batchSize()+1);
            return new MessagingTask[0];
        } else
            log.log(Level.FINER, "{0} received matching accept ", new Object[] { this, accept.getSummary() });

        // DelayProfiler.updateCount("C_ACCEPTS_RCVD", accept.batchSize()+1);
        assert (accept.hasRequestValue());

        if (instrument(10))
            DelayProfiler.updateMovAvg("#batched", accept.batchSize() + 1);
        if ((this.paxosState.getAccept(accept.slot) == null) && (this.paxosState.getSlot() - accept.slot <= 0))
            this.paxosManager.incrOutstanding(accept.addDebugInfoDeep("a")); // stats

        if (EXECUTE_UPON_ACCEPT) { // only for testing
            PaxosInstanceStateMachine.execute(this, getPaxosManager(), this.getApp(), accept, false);
            if (Util.oneIn(10))
                log.info(DelayProfiler.getStats());
            // return null;
        }

        // have acceptor handle accept
        Ballot ballot = null;
        PValuePacket prev = this.paxosState.getAccept(accept.slot);
        try {
            ballot = !EXECUTE_UPON_ACCEPT ? this.paxosState.acceptAndUpdateBallot(accept, this.getMyID())
                    : this.paxosState.getBallot();
        } catch (Error e) {
            log.severe(this + " : " + e.getMessage());
            Util.suicide(e.getMessage());
        }
        if (ballot == null)
            return null; // can happen only if acceptor is stopped.

        this.garbageCollectAccepted(accept.getMedianCheckpointedSlot());
        if (accept.isRecovery())
            return null; // recovery ACCEPTS do not need any reply

        AcceptReplyPacket acceptReply = new AcceptReplyPacket(this.getMyID(), ballot, accept.slot,
                GC_MAJORITY_EXECUTED ? this.paxosState.getSlot() - 1
                        : lastCheckpointSlot(this.paxosState.getSlot() - 1, accept.getPaxosID()),
                accept.requestID);

        // no logging if NACking anyway
        AcceptPacket toLog = (accept.ballot.compareTo(ballot) >= 0
                // no logging if already garbage collected or previously accepted
                && accept.slot - this.paxosState.getGCSlot() > 0
                && (prev == null || prev.ballot.compareTo(accept.ballot) < 0)) ? accept : null;

        MessagingTask acceptReplyTask = accept.isRecovery() ? new LogMessagingTask(toLog)
                : toLog != null ? new LogMessagingTask(accept.sender, acceptReply, toLog)
                        : new MessagingTask(accept.sender, acceptReply);
        RequestInstrumenter.sent(acceptReply, this.getMyID(), accept.sender);

        // might release some meta-commits
        PValuePacket reconstructedDecision = this.paxosState.reconstructDecision(accept.slot);
        MessagingTask commitTask = reconstructedDecision != null
                ? this.handleCommittedRequest(reconstructedDecision)
                : null;

        MessagingTask[] mtasks = { acceptReplyTask, commitTask };

        return mtasks;
    }

    /* Batched version of handleAccept, which is meaningful only when request
     * digests are enabled. Enabling digests is particularly beneficial with one
     * or small number of active paxos groups that is less than the average size
     * of a paxos group as it helps balance the coordinator load. With many
     * paxos groups, digests actually increase the number of messages by n-1 per
     * paxos round but batching accepts helps reduce that added overhead.
     * 
     * With many groups, even with batched accepts, digests are still a net loss
     * for two reasons. The first is the increased message count. The second is
     * the added overhead of serializing reconstructed accepts while logging
     * them. Without digests, serialization for logging purposes comes for free
     * because we cache the stringified version of the received accept. */
    private static final boolean SHORT_CIRCUIT_LOCAL = Config.getGlobalBoolean(PC.SHORT_CIRCUIT_LOCAL);

    private MessagingTask[] handleBatchedAccept(BatchedAccept batchedAccept) {
        assert (BATCHED_ACCEPTS && DIGEST_REQUESTS);
        ArrayList<MessagingTask> mtasks = new ArrayList<MessagingTask>();
        for (Integer slot : batchedAccept.getAcceptSlots()) {
            assert (batchedAccept.getDigest(slot) != null);
            /* Need to put paxosID and version right here as opposed to relying
             * on the exit procedure because we need that in order to match it
             * with a request in pendingDigests. */
            AcceptPacket digestedAccept = new AcceptPacket(batchedAccept.ballot.coordinatorID,
                    new PValuePacket(batchedAccept.ballot, new ProposalPacket(slot,
                            (RequestPacket) (new RequestPacket(batchedAccept.getRequestID(slot), null, false)
                                    .setDigest(batchedAccept.getDigest(slot))
                                    .putPaxosID(getPaxosID(), getVersion())))),
                    batchedAccept.getMedianCheckpointedSlot());
            AcceptPacket accept = this.paxosManager.match(digestedAccept);
            if (accept != null) {
                MessagingTask[] mtasksHandleAccept = this.handleAccept(accept);
                if (mtasksHandleAccept != null)
                    for (MessagingTask mtask : mtasksHandleAccept)
                        if (mtask != null && !mtask.isEmpty())
                            mtasks.add(mtask);
            } else {
                assert (!SHORT_CIRCUIT_LOCAL || digestedAccept.sender != getMyID()) : digestedAccept;
                log.log(Level.FINE, "{0} received unmatched digested accept {1} within batched accept {2}",
                        new Object[] { this, digestedAccept.getSummary(log.isLoggable(Level.FINE)),
                                batchedAccept.getSummary(log.isLoggable(Level.FINE)) });
            }
        }
        return mtasks.toArray(new MessagingTask[0]);
    }

    /* We don't need to implement this. Accept logs are pruned while
     * checkpointing anyway, which is enough. Worse, it is probably inefficient
     * to touch the disk for GC upon every new gcSlot (potentially every accept
     * and decision). */
    private void garbageCollectAccepted(int gcSlot) {
    }

    /* Phase2b Event: Received a reply to an accept request, i.e. to a request
     * to accept a proposal from the coordinator.
     * 
     * Action: If this reply results in a majority for the corresponding
     * proposal, commit the request and notify all. If this preempts a proposal
     * being coordinated because it contains a higher ballot, forward to the
     * preempting coordinator in the higher ballot reported.
     * 
     * Return: The committed proposal if any to be multicast to all replicas, or
     * the preempted proposal if any to be unicast to the preempting
     * coordinator. Null if neither. */
    private MessagingTask handleAcceptReply(AcceptReplyPacket acceptReply) {
        this.paxosManager.heardFrom(acceptReply.acceptor); // FD optimization
        RequestInstrumenter.received(acceptReply, acceptReply.acceptor, this.getMyID());

        // handle if undigest request first
        if (acceptReply.isUndigestRequest()) {
            AcceptPacket accept = this.paxosState.getAccept(acceptReply.slotNumber);
            assert (accept == null || accept.hasRequestValue());
            log.log(Level.INFO, "{0} returning accept {1} for undigest request {2}",
                    new Object[] { this, accept != null ? accept.getSummary() : accept, acceptReply.getSummary() });
            return accept != null ? new MessagingTask(acceptReply.acceptor, accept) : null;
        }

        PValuePacket committedPValue = PaxosCoordinator.handleAcceptReply(this.coordinator, this.groupMembers,
                acceptReply);
        if (!PaxosCoordinator.exists(this.coordinator))
            this.coordinator = null;
        if (committedPValue == null)
            return null;

        MessagingTask multicastDecision = null;
        // separate variables only for code readability
        MessagingTask unicastPreempted = null;
        // could also call handleCommittedRequest below
        if (committedPValue.getType() == PaxosPacket.PaxosPacketType.DECISION) {
            committedPValue.addDebugInfo("d");
            // this.handleCommittedRequest(committedPValue);
            multicastDecision = new MessagingTask(this.groupMembers, committedPValue); // inform everyone of the decision
            log.log(Level.FINE, "{0} announcing decision {1}",
                    new Object[] { this, committedPValue.getSummary(log.isLoggable(Level.FINE)) });
            if (instrument(Integer.MAX_VALUE)) {
                DelayProfiler.updateCount("PAXOS_DECISIONS", 1);
                DelayProfiler.updateCount("CLIENT_COMMITS", committedPValue.batchSize() + 1);
            }
        } else if (committedPValue.getType() == PaxosPacket.PaxosPacketType.PREEMPTED) {
            /* Could drop the request, but we forward the preempted proposal as
             * a no-op to the new coordinator for testing purposes. The new(er)
             * coordinator information is within acceptReply. Note that our
             * coordinator status may still be active and it will be so until
             * all of its requests have been preempted. Note also that our local
             * acceptor might still think we are the coordinator. The only
             * evidence of a new coordinator is in acceptReply that must have
             * reported a higher ballot if we are here, hence the assert.
             * 
             * Warning: Can not forward the preempted request as-is to the new
             * coordinator as this can result in multiple executions of a
             * request. Although the multiple executions will have different
             * slot numbers and will not violate paxos safety, this is extremely
             * undesirable for most applications. */
            assert (committedPValue.ballot.compareTo(acceptReply.ballot) < 0) : (committedPValue + " >= "
                    + acceptReply);
            if (!committedPValue.isNoop() || shouldForwardNoops()) {
                // forward only if not already a no-op
                unicastPreempted = new MessagingTask(acceptReply.ballot.coordinatorID,
                        committedPValue.makeNoop().setForwarderID(this.getMyID()));
                committedPValue.addDebugInfo("f");
                log.log(Level.INFO, "{0} forwarding preempted request as no-op to node {1}:{2}",
                        new Object[] { this, acceptReply.ballot.coordinatorID, committedPValue.getSummary() });
            } else
                log.log(Level.WARNING, "{0} dropping no-op preempted by coordinator {1}: {2}",
                        new Object[] { this, acceptReply.ballot.coordinatorID, committedPValue.getSummary() });
        }

        if (EXECUTE_UPON_ACCEPT)
            return null;

        return committedPValue.getType() == PaxosPacket.PaxosPacketType.DECISION ? multicastDecision
                : unicastPreempted;
    }

    /* Each accept reply can generate a decision here, so we need to batch the
     * resulting decisions into a single messaging task. Some of these can be
     * preempted pvalues as well, so we need to sort them out too (sigh!).
     * Probably need a cleaner design here. */
    private MessagingTask[] handleBatchedAcceptReply(BatchedAcceptReply batchedAR) {
        this.paxosManager.heardFrom(batchedAR.acceptor);
        ArrayList<MessagingTask> preempts = new ArrayList<MessagingTask>();
        ArrayList<MessagingTask> decisions = new ArrayList<MessagingTask>();

        Integer[] acceptedSlots = batchedAR.getAcceptedSlots();
        // DelayProfiler.updateCount("BATCHED_ACCEPT_REPLIES", 1);

        // sort out decisions from preempted proposals
        for (Integer slot : acceptedSlots) {
            MessagingTask mtask = this.handleAcceptReply(new AcceptReplyPacket(batchedAR.acceptor, batchedAR.ballot,
                    slot, batchedAR.maxCheckpointedSlot, 0, batchedAR));
            assert (mtask == null || mtask.msgs.length == 1);

            if (mtask != null)
                if (((PValuePacket) mtask.msgs[0]).getType().equals(PaxosPacket.PaxosPacketType.PREEMPTED))
                    preempts.add(mtask);
                else if (((PValuePacket) mtask.msgs[0]).getType().equals(PaxosPacket.PaxosPacketType.DECISION))
                    decisions.add(mtask);
                else if (((PValuePacket) mtask.msgs[0]).getType().equals(PaxosPacket.PaxosPacketType.ACCEPT))
                    return mtask.toArray();
                else
                    assert (false);
        }

        // batch each of the two into single messaging task
        PaxosPacket[] decisionMsgs = new PaxosPacket[decisions.size()];
        PaxosPacket[] preemptsMsgs = new PaxosPacket[preempts.size()];
        for (int i = 0; i < decisions.size(); i++)
            decisionMsgs[i] = decisions.get(i).msgs[0];
        for (int i = 0; i < preempts.size(); i++)
            preemptsMsgs[i] = preempts.get(i).msgs[0];

        MessagingTask decisionsMTask = new MessagingTask(this.groupMembers, decisionMsgs);
        MessagingTask preemptsMTask = new MessagingTask(this.groupMembers, preemptsMsgs);
        assert (preempts.isEmpty());

        MessagingTask[] mtasks = { decisionsMTask, preemptsMTask };
        return mtasks;
    }

    // whether to "save" a noop, i.e., an already preempted request
    private static final boolean shouldForwardNoops() {
        return false;
    }

    private static boolean BATCHED_COMMITS = Config.getGlobalBoolean(PC.BATCHED_COMMITS);

    /* Phase3 Event: Received notification about a committed proposal.
     * 
     * Action: This method is responsible for executing a committed request.
     * For this, it needs to call a handler implementing the PaxosInterface
     * interface. */
    private static final boolean LOG_META_DECISIONS = Config.getGlobalBoolean(PC.LOG_META_DECISIONS);

    private MessagingTask handleCommittedRequest(PValuePacket committed) {
        assert (committed.getPaxosID() != null);
        RequestInstrumenter.received(committed, committed.ballot.coordinatorID, this.getMyID());
        if (instrument(!BATCHED_COMMITS) && committed.ballot.coordinatorID != this.getMyID())
            DelayProfiler.updateCount("COMMITS", 1);

        if (!committed.isCoalescable() && !committed.isRecovery() && committed.ballot.coordinatorID != getMyID())
            log.log(Level.INFO, "{0} received syncd decision {1}", new Object[] { this, committed.getSummary() });

        PValuePacket correspondingAccept = null;
        // log, extract from or add to acceptor, and execute the request at app
        if (!committed.isRecovery() && (committed.hasRequestValue() || LOG_META_DECISIONS))
            AbstractPaxosLogger.logDecision(this.paxosManager.getPaxosLogger(),
                    // only log meta decision if we have the accept
                    LOG_META_DECISIONS
                            // have corresponding accept
                            && (correspondingAccept = this.paxosState.getAccept(committed.slot)) != null
                            // and corresponding accept ballot dominates
                            && correspondingAccept.ballot.compareTo(committed.ballot) >= 0
                                    ? committed.getMetaDecision()
                                    :
                                    /* Could still be a placeholder meta decision as we
                                     * may have gotten this decision through a batched
                                     * commit without the corresponding accept. We log
                                     * meta decisions because they might be useful to
                                     * sync other replicas and reduce some logging
                                     * overhead for large requests. */
                                    committed);

        MessagingTask mtask = this.extractExecuteAndCheckpoint(committed);

        if (this.paxosState.getSlot() - committed.slot < 0)
            log.log(Level.FINE, "{0} expecting {1}; received out-of-order commit {2} {3}",
                    new Object[] { this, this.paxosState.getSlotLog(), committed.slot,
                            committed.getSummary(log.isLoggable(Level.FINE)) });

        return mtask;
    }

    private MessagingTask handleBatchedCommit(BatchedCommit batchedCommit) {
        assert (BATCHED_COMMITS);
        // batched commits can only come directly from the coordinator
        this.paxosManager.heardFrom(batchedCommit.ballot.coordinatorID); // FD
        MessagingTask mtask = null;

        // if (instrument()) DelayProfiler.updateCount("META_COMMITS", 1);

        for (Integer slot : batchedCommit.getCommittedSlots()) {
            // check if we have the corresponding accept
            PValuePacket accept = this.paxosState.getAccept(slot);
            MessagingTask curTask = null;
            if (accept != null && accept.ballot.equals(batchedCommit.ballot)) {
                log.log(Level.FINE, "{0} found decision for slot {1} upon receiving {2}",
                        new Object[] { this, slot, batchedCommit.getSummary(log.isLoggable(Level.FINE)) });
                // keep overwriting mtask with the most recent non-null mtask
                curTask = this.handleCommittedRequest(
                        new PValuePacket(accept).makeDecision(batchedCommit.getMedianCheckpointedSlot()));
            } else if (BATCHED_COMMITS) {
                log.log(Level.FINE, "{0} received slot {1} batched decision {2}, generating placeholder",
                        new Object[] { this, slot, batchedCommit.getSummary(log.isLoggable(Level.FINE)) });
                // make up a placeholder decision
                curTask = this.handleCommittedRequest((PValuePacket) new PValuePacket(batchedCommit.ballot,
                        new ProposalPacket(slot,
                                // null request value
                                new RequestPacket(0, null, false)))
                                        .makeDecision(batchedCommit.getMedianCheckpointedSlot())
                                        .putPaxosID(getPaxosID(), getVersion()));
            }
            if (curTask != null)
                mtask = curTask;
        }
        return mtask;
    }

    private static final boolean DISABLE_SYNC_DECISIONS = Config.getGlobalBoolean(PC.DISABLE_SYNC_DECISIONS);

    /* Typically invoked by handleCommittedRequest above. Also invoked at
     * instance creation time if outOfOrderLimit low to deal with the
     * progress-wise unsatisfying scenario where a paxos instance gets created
     * just after other replicas have committed the first few decisions; if so,
     * the newly starting replica will have no reason to suspect that anything
     * is missing and may never catch up if no other decisions get committed
     * (say, because the paxos instance gets stopped before any more decisions).
     * It is good to prevent such scenarios (even though they don't affect
     * safety), so we have shouldSync always return true at creation time i.e.,
     * expected slot is 0 or 1.
     * 
     * forceSync is used only in the beginning in the case of missedBirthing. */
    private MessagingTask syncLongDecisionGaps(PValuePacket committed, SyncMode syncMode) {

        MessagingTask fixGapsRequest = null;
        if (this.paxosState.canSync(this.paxosManager.getMinResyncDelay())
                && (this.shouldSync((committed != null ? committed.slot : this.paxosState.getMaxCommittedSlot()),
                        this.getPaxosManager().getOutOfOrderLimit(), syncMode))) {
            fixGapsRequest = this.requestMissingDecisions(
                    committed != null ? committed.ballot.coordinatorID : this.paxosState.getBallotCoord());
            if (fixGapsRequest != null) {
                log.log(Level.INFO, "{0} sending {1}; maxCommittedSlot = {2}; ",
                        new Object[] { this, fixGapsRequest, this.paxosState.getMaxCommittedSlot() });
                this.paxosState.justSyncd();
            }
        }
        return fixGapsRequest;
    }

    private MessagingTask syncLongDecisionGaps(PValuePacket committed) {
        return this.syncLongDecisionGaps(committed, SyncMode.DEFAULT_SYNC);
    }

    protected boolean isLongIdle() {
        return this.paxosState.isLongIdle();
    }

    private boolean checkIfTrapped(PaxosPacket incoming, MessagingTask mtask) {
        if (this.isStopped() && mtask != null) {
            log.log(Level.FINE, "{0} DROPPING message {1} trapped inside stopped instance",
                    new Object[] { this, incoming, mtask });
            return true;
        }
        return false;
    }

    private static enum toLog {
        EEC(false, 100), handlePaxosMessage(false, 100);
        final boolean log;
        final int sampleInt;

        toLog(boolean log, int sampleInt) {
            this.log = log;
            this.sampleInt = sampleInt;
        }

        boolean log() {
            return this.log && instrument(sampleInt);
        }
    };

    private static final int AGREEMENT_LATENCY_SAMPLING = 100;
    private static final int EXECUTION_LATENCY_SAMPLING = 100;

    /* The three actions--(1) extracting the next slot request from the
     * acceptor, (2) having the app execute the request, and (3) checkpoint if
     * needed--need to happen atomically. If the app throws an error while
     * executing the request, we need to retry until successful, otherwise, the
     * replicated state machine will be stuck. So, essentially, the app has to
     * support atomicity or the operations have to be idempotent for correctness
     * of the replicated state machine.
     * 
     * This method is protected, not private, because it needs to be called by
     * the logger after it is done logging the committed request. Having the
     * logger call this method is only space-efficient design alternative. */
    protected/* synchronized */MessagingTask extractExecuteAndCheckpoint(PValuePacket loggedDecision) {
        long methodEntryTime = System.currentTimeMillis();
        int execCount = 0;
        PValuePacket inorderDecision = null;
        synchronized (this) {
            if (this.paxosState.isStopped())
                return null;
            // extract next in-order decision
            while ((inorderDecision = this.paxosState.putAndRemoveNextExecutable(loggedDecision)) != null) {
                log.log(inorderDecision.isStopRequest() ? Level.FINE : Level.FINE,
                        "{0} received in-order commit {1} {2}",
                        new Object[] { this, inorderDecision.slot, inorderDecision.getSummary() });
                String pid = this.getPaxosID();

                if (inorderDecision.getEntryReplica() == this.getMyID() && instrument(AGREEMENT_LATENCY_SAMPLING))
                    DelayProfiler.updateDelay("agreement", inorderDecision.getEntryTime());
                updateRequestBatcher(inorderDecision, loggedDecision == null);

                long t = System.currentTimeMillis();
                /* Execute it until successful, we are *by design* stuck
                 * otherwise. Execution must be atomic with extraction and
                 * possible checkpointing below. */
                if (!EXECUTE_UPON_ACCEPT) // used for testing
                    if (execute(this, this.paxosManager, this.getApp(), inorderDecision,
                            inorderDecision.isRecovery()))
                        // +1 for each batch, not for each constituent
                        // requestValue
                        execCount++;
                    // unclean kill
                    else if (this.forceStop())
                        break;

                if (instrument(EXECUTION_LATENCY_SAMPLING))
                    DelayProfiler.updateDelay(AbstractPaxosLogger.appName + ".execute", t,
                            inorderDecision.batchSize() + 1);

                // getState must be atomic with the execution
                if (shouldCheckpoint(inorderDecision) && !inorderDecision.isRecovery())

                    consistentCheckpoint(this, inorderDecision.isStopRequest(), pid, this.version,
                            this.paxosManager.getStringNodesFromIntArray(this.groupMembers), inorderDecision.slot,
                            this.paxosState.getBallot(), this.paxosState.getGCSlot());

                /* If stop request, copy epoch final state and kill self. If
                 * copy is not successful, we could get stuck trying to create
                 * future versions for this paxosID. */
                if (inorderDecision.isStopRequest() && this.paxosManager.getPaxosLogger()
                        .copyEpochFinalCheckpointState(getPaxosID(), getVersion())
                        && (logStop(inorderDecision.getEntryTime())))
                    // this.paxosManager.kill(this, true);
                    break;
            }
            this.paxosState.assertSlotInvariant();
        }
        /* The kill has been moved out of the synchronized block above as the
         * synchronized(this) is unnecessary and creates a potential deadlock
         * with scenarios like pause where paxosManger is first locked and then
         * this instance's acceptor is locked if in the future we make the
         * PaxosAcceptor inherit from PaxosInstanceStateMachine. */
        if (inorderDecision != null && inorderDecision.isStopRequest() && this.isStopped())
            this.paxosManager.kill(this, true);

        if (loggedDecision != null && !loggedDecision.isRecovery())
            instrumentDelay(toLog.EEC, methodEntryTime, execCount);
        return loggedDecision != null && !loggedDecision.isRecovery() ? this.syncLongDecisionGaps(loggedDecision)
                : null;
    }

    /* This method synchronizes over paxosManager because otherwise we have no
     * way of ensuring that a stopped paxos instance does not go ahead and
     * overwrite a higher version checkpoint. An alternative to implement this
     * check in putCheckpointState in the logger that anyway does a read before
     * a write, but it is cleaner to have the following invariant here.
     * 
     * Invariant: A paxos instance can not checkpoint if a higher paxos instance
     * has been (or is being) created. */
    private static String consistentCheckpoint(PaxosInstanceStateMachine pism, boolean isStop, String paxosID,
            int version, Set<String> members, int slot, Ballot ballot, int gcSlot) {
        log.log(Level.FINE, "{0} checkpointing at slot {1}; isStop={2}", new Object[] { pism, slot, isStop });
        synchronized (pism.getPaxosManager()) {
            return pism.canCheckpoint()
                    ? AbstractPaxosLogger.checkpoint(pism.getPaxosManager().getPaxosLogger(), isStop, paxosID,
                            version, members, slot, ballot, pism.getApp().checkpoint(paxosID), gcSlot)
                    : null;
        }
    }

    // initial checkpoint or not de-mapped yet
    private boolean canCheckpoint() {
        return this.paxosState.isRecovering() || this.getPaxosManager().isCurrent(getPaxosID(), getVersion());
    }

    private void updateRequestBatcher(PValuePacket inorderDecision, boolean handledCP) {
        /* Use entry time only if I am entry replica so that we don't have to
         * worry about clock synchronization; only if not under recovery; and
         * only if the decision was received directly as opposed to via
         * handleCheckpoint.
         * 
         * FIXME: should probably exclude all sync decision responses, not just
         * immediately after handleCheckpoint. */
        if (inorderDecision.getEntryReplica() == getMyID() && !inorderDecision.isRecovery() && !handledCP) {
            assert (inorderDecision.getEntryTime() <= System.currentTimeMillis()) : inorderDecision.getEntryTime();
            RequestBatcher.updateSleepDuration(inorderDecision.getEntryTime());
        }
    }

    /**
     * Helper method used above in EEC as well as by PaxosManager for emulating
     * unreplicated execution for testing purposes.
     * 
     * protected only so that PaxosManager can call this directly to test
     * emulateUnreplicated mode.
     */
    protected static boolean execute(PaxosInstanceStateMachine pism, PaxosManager<?> paxosManager, Replicable app,
            RequestPacket decision, boolean recoveryMode) {

        boolean shouldLog = instrument(
                5 * getCPI(paxosManager.getInterCheckpointInterval(), decision.getPaxosID()));
        for (RequestPacket requestPacket : decision.getRequestPackets()) {
            boolean executed = false;
            int retries = 0;
            do {
                try {
                    /* Note: The conversion below is an important reason for
                     * paxos applications to use RequestPacket as opposed to
                     * propose(String requestValue,...). Otherwise, we have to
                     * unnecessarily encapsulate the string first in a
                     * RequestPacket in PaxosManager and then convert the string
                     * back to InterfaceRequest using the app's getRequest
                     * method. */
                    Request request =
                            // don't convert to and from string unnecessarily
                            !requestPacket.shouldReturnRequestValue() ? requestPacket
                                    // ask app to translate string to InterfaceRequest
                                    : getInterfaceRequest(app, requestPacket.getRequestValue());
                    Level level = Level.FINE;
                    log.log(level, "{0} executing (in-order) decision {1}",
                            new Object[] { pism,
                                    log.isLoggable(level)
                                            ? (request instanceof SummarizableRequest
                                                    ? ((SummarizableRequest) request).getSummary()
                                                    : requestPacket.getSummary())
                                            : null });

                    if (!((decision instanceof PValuePacket) && ((PValuePacket) decision).isRecovery())
                            && (shouldLog && !(shouldLog = false))) {
                        log.log(Level.INFO, "{0} {1}",
                                new Object[] { DelayProfiler.getStats(), RTTEstimator.print() });
                    }

                    // TESTPaxosApp tracks noops, so it needs to be feed them
                    executed = (requestPacket.requestValue.equals(Request.NO_OP) && !(app instanceof TESTPaxosApp))
                            || app.execute(request,
                                    // do not reply if recovery or not entry replica
                                    (recoveryMode || (requestPacket.getEntryReplica() != paxosManager.getMyID())));
                    paxosManager.executed(requestPacket, request,
                            // send response if entry replica and !recovery
                            requestPacket.getEntryReplica() == paxosManager.getMyID() && !recoveryMode);
                    assert (requestPacket.getEntryReplica() > 0) : requestPacket;

                    // don't try any more if stopped
                    if (pism != null && pism.isStopped())
                        return true;
                } catch (Exception | Error e) {
                    // must swallow any and all exceptions
                    e.printStackTrace();
                }
                if (!executed) {
                    String error = paxosManager.getApp(requestPacket.getPaxosID())
                            + " failed to execute request, retrying: " + decision.requestValue;
                    log.severe(error);
                    new RuntimeException(error).printStackTrace();
                }
                /* We have to keep trying to execute until executed to preserve
                 * safety. We have removed the decision from the acceptor and
                 * there is no going back on that by design (as we assume that
                 * invariant at many places). One option here is to kill this
                 * paxos instance after a limited number of retries. The benefit
                 * of doing that is that we can free up this thread. But it is
                 * better to not delete the state on disk just yet as kill()
                 * would do by default. */
                if (++retries > RETRY_LIMIT)
                    return false;
            } while (!executed && waitRetry(RETRY_TIMEOUT));
        }
        return true;
    }

    private boolean restore(String state) {
        long t = System.currentTimeMillis();
        boolean restored = this.getApp().restore(getPaxosID(), state);
        DelayProfiler.updateDelay(AbstractPaxosLogger.appName + ".restore", t);
        return restored;
    }

    // Like EEC but invoked upon checkpoint transfer
    private synchronized MessagingTask handleCheckpoint(StatePacket statePacket) {
        if (statePacket.slotNumber >= this.paxosState.getSlot()) {
            // put checkpoint in app (like execute)
            if (!this.restore(statePacket.state))
                return null;
            // update acceptor (like extract)
            this.paxosState.jumpSlot(statePacket.slotNumber + 1);
            // put checkpoint in logger (like checkpoint)
            consistentCheckpoint(this, statePacket.slotNumber == 0, this.getPaxosID(), this.version,
                    this.paxosManager.getStringNodesFromIntArray(groupMembers), statePacket.slotNumber,
                    statePacket.ballot,
                    // this.getApp().checkpoint(getPaxosID()),
                    this.paxosState.getGCSlot());
            /* A transferred checkpoint is almost definitely not a final
             * checkpoint as final checkpoints are ephemeral. Even if it is a
             * final checkpoint, safety is maintained. Just that this replica
             * may not know that this paxos instance is stopped. */
            log.log(statePacket.slotNumber > 0 ? Level.INFO : Level.FINE,
                    "{0} inserted {1} checkpoint through handleCheckpoint; next slot = {2}", new Object[] { this,
                            statePacket.slotNumber == 0 ? "initial state" : "", this.paxosState.getSlotLog() });
        }
        // coz otherwise we can get stuck as assertSlotInvariant() may not hold
        return extractExecuteAndCheckpoint(null);
    }

    /* This method is called by PaxosManager.hibernate that blocks on the
     * checkpoint operation to finish (unlike regular checkpoints that are
     * asynchronously handled by a helper thread). But hibernate is currently
     * not really used as pause suffices. And PaxosManager methods are likely
     * called by an executor task anyway, so blocking should be harmless. */
    protected synchronized boolean tryForcedCheckpointAndStop() {
        boolean checkpointed = false;
        // Ugly nesting, not sure how else to do this correctly
        synchronized (this.paxosState) {
            synchronized (this.coordinator != null ? this.coordinator : this.paxosState) {
                int cpSlot = this.paxosState.getSlot() - 1;
                if (this.paxosState.caughtUp() && PaxosCoordinator.caughtUp(this.coordinator)) {
                    String pid = this.getPaxosID();
                    consistentCheckpoint(this, true, pid, this.getVersion(),
                            this.paxosManager.getStringNodesFromIntArray(this.groupMembers), cpSlot,
                            this.paxosState.getBallot(), this.paxosState.getGCSlot());
                    checkpointed = true;
                    log.log(Level.INFO,
                            "{0} forcing checkpoint at slot {1}; garbage collected "
                                    + "accepts up to slot {2}; max_committed_slot = {3} {4}",
                            new Object[] { this, cpSlot, this.paxosState.getGCSlot(),
                                    this.paxosState.getMaxCommittedSlot(),
                                    (this.paxosState.getBallotCoordLog() == this.getMyID()
                                            ? "; maxCommittedFrontier="
                                                    + PaxosCoordinator.getMajorityCommittedSlot(this.coordinator)
                                            : "") });
                    this.forceStop();
                }
            }
        }
        return checkpointed;
    }

    /* Needs to be synchronized so that extractExecuteAndCheckpoint does not
     * happen concurrently. Likewise handleCheckpoint. */
    protected synchronized boolean forceCheckpoint() {
        String pid = this.getPaxosID();
        int cpSlot = this.paxosState.getSlot() - 1;
        String state = consistentCheckpoint(this, true, pid, this.getVersion(),
                this.paxosManager.getStringNodesFromIntArray(this.groupMembers), cpSlot,
                this.paxosState.getBallot(), this.paxosState.getGCSlot());
        // need to acquire these without locking
        int gcSlot = this.paxosState.getGCSlot();
        int maxCommittedSlot = this.paxosState.getMaxCommittedSlot();
        String maxCommittedFrontier = (this.paxosState.getBallotCoordLog() == this.getMyID()
                ? "; maxCommittedFrontier=" + PaxosCoordinator.getMajorityCommittedSlot(this.coordinator)
                : "");
        log.log(Level.INFO,
                "{0} forcing checkpoint at slot {1}; garbage collected accepts up to slot {2}; "
                        + "max_committed_slot = {3} {4}; state={5}",
                new Object[] { this, cpSlot, gcSlot, maxCommittedSlot, maxCommittedFrontier,
                        Util.truncate(state, 128, 128) });
        return true;
    }

    /* A note on locking: The PaxosManager lock is typically the first to get
     * acquired if it ever appears in a chain of locks with one exception as
     * noted in the invariants below.
     * 
     * Invariants: There must be no lock chain
     * 
     * !!! PaxosManager -> PaxosInstanceStateMachine
     * 
     * because there is *by design* a lock chain
     * 
     * --> PaxosInstanceStateMachine -> PaxosManager
     * 
     * when this instance is being stopped.
     * 
     * There must be no lock chains as follows (an invariant is easy to adhere
     * to or rather impossible to violate by design because acceptor and
     * coordinator are unaware of and have no references to PaxosManager):
     * 
     * !!! nothing -> PaxosAcceptor -> PaxosManager
     * 
     * !!! nothing -> PaxosCoordinator -> PaxosManager
     * 
     * because there are lock chains of the form
     * 
     * --> PaxosManager -> PaxosAcceptor or PaxosCoordinator */

    /* Same as tryForcedCheckpointAndStop but without the checkpoint.
     * 
     * Why this method is not synchronized: when this paxos instance is
     * executing a request that takes a long time, this method might
     * concurrently try to pause it and even succeed (!), say, because the
     * decision being executed has been extracted and the acceptor looks all
     * nicely caught up. Is this a problem? The forceStop in this method will
     * stop the acceptor, but the thread executing EEC will go ahead and
     * complete the execution and even checkpoint and kill if it is a stop
     * request. The fact that the acceptor is in a stopped state won't matter
     * for the current decision being executed. After that, the loop in EEC will
     * break and return, so no harm done. When this instance gets eventually
     * unpaused, it would seem exactly like just after having executed that last
     * decision, so no harm done.
     * 
     * Conversely, this method might lock paxosState first and then EEC might
     * get invoked. If so, the program counter could enter the synchronized EEC
     * method but will block on paxosState.isStopped until this tryPause method
     * finishes. If tryPuase is unsuccessful, nothing has changed, so no harm
     * done. Else if tryPause successfully pauses, isStopped will return true
     * and EEC will become a noop, so no harm done.
     * 
     * Note: If we make this method synchronized, the deactivator thread could
     * be blocked on this instance for a long time. */
    protected HotRestoreInfo tryPause() {
        // boolean paused = false;
        HotRestoreInfo hri = null;
        synchronized (this.paxosState) {
            // Ugly nesting, not sure how else to do this correctly
            synchronized (this.coordinator != null ? this.coordinator : this.paxosState) {
                if (this.paxosState.caughtUp() && PaxosCoordinator.caughtUp(this.coordinator)) {
                    hri = new HotRestoreInfo(this.getPaxosID(), this.getVersion(), this.groupMembers,
                            this.paxosState.getSlot(), this.paxosState.getBallot(), this.paxosState.getGCSlot(),
                            PaxosCoordinator.getBallot(this.coordinator),
                            PaxosCoordinator.getNextProposalSlot(this.coordinator),
                            PaxosCoordinator.getNodeSlots(this.coordinator));
                    log.log(Level.FINE, "{0} pausing [{1}]", new Object[] { this, hri });
                    // if (paused = this.paxosManager.getPaxosLogger().pause(
                    // getPaxosID(), hri.toString()))
                    this.forceStop();
                } else
                    log.log(Level.INFO, "{0} not pausing because it is not caught up: {1} {2}",
                            new Object[] { this, this.paxosState, this.coordinator });
            }
        }
        return hri;
    }

    private boolean shouldCheckpoint(PValuePacket decision) {
        return (decision.slot % getCPI(this.paxosManager.getInterCheckpointInterval(), decision.getPaxosID()) == 0
                || decision.isStopRequest());
    }

    private static Request getInterfaceRequest(Replicable app, String value) {
        try {
            return app.getRequest(value);
        } catch (RequestParseException e) {
            e.printStackTrace();
        }
        return null;
    }

    /*************************** End of phase 3 methods ********************************/

    /********************** Start of failure detection and recovery methods *****************/

    /* FIXED: If a majority miss a prepare, the coordinator may never get
     * elected as follows. The minority of acceptors who did receive the prepare
     * will assume the prepare's sender as the current coordinator. The rest
     * might still think the previous coordinator is the current coordinator.
     * All acceptors could be thinking that their up, so nobody will bother
     * running for coordinator. To break this impasse, we need to resend the
     * prepare. This has been now incorporated in the handleMessage that quickly
     * checks upon every message if we need to "(re)run for coordinator" (for
     * the same ballot) if we have been waiting for too long (having neither
     * received a prepare majority nor a preemption) for the ballot to complete. */

    /* Checks whether current ballot coordinator is alive. If not, it checks if
     * it should try to be the nest coordinator and if so, it becomes the next
     * coordinator. This method can be called any time safely by any thread. */
    private MessagingTask checkRunForCoordinator() {
        return this.checkRunForCoordinator(false);
    }

    private MessagingTask checkRunForCoordinator(boolean forceRun) {
        Ballot curBallot = this.paxosState.getBallot();
        MessagingTask multicastPrepare = null;
        boolean lastCoordinatorLongDead = this.paxosManager.lastCoordinatorLongDead(curBallot.coordinatorID);

        // if(Util.oneIn(20)) log.info(this + " node " + curBallot.coordinatorID
        // + " lastCoordinatorLongDead = " + lastCoordinatorLongDead);

        /* curBallot is my acceptor's ballot; "my acceptor's coordinator" is
         * that ballot's coordinator.
         * 
         * If I am not already a coordinator with a ballot at least as high as
         * my acceptor's ballot's coordinator
         * 
         * AND
         * 
         * I didn't run too recently
         * 
         * AND
         * 
         * (I am my acceptor's coordinator OR (my acceptor's coordinator is dead
         * AND (I am next in line OR the current coordinator has been dead for a
         * really long time)))
         * 
         * OR forceRun */
        if ((
        /* I am not already a coordinator with a ballot at least as high as my
         * acceptor's ballot's coordinator && I didn't run too recently */
        !PaxosCoordinator.exists(this.coordinator, curBallot) && !PaxosCoordinator.ranRecently(this.coordinator) &&
        // I am my acceptor's coordinator (can happen during recovery)
                (curBallot.coordinatorID == this.getMyID() ||
                // my acceptor's coordinator is dead
                        (!this.paxosManager.isNodeUp(curBallot.coordinatorID) &&
                        // I am next in line
                                (this.getMyID() == getNextCoordinator(curBallot.ballotNumber + 1, this.groupMembers)
                                        ||
                                        // current coordinator has been long dead
                                        lastCoordinatorLongDead))))
                || forceRun) {
            /* We normally round-robin across nodes for electing coordinators,
             * e.g., node 7 will try to become coordinator in ballotnum such
             * that ballotnum%7==0 if it suspects that the current coordinator
             * is dead. But it is more robust to check if it has been a long
             * time since we heard anything from the current coordinator and if
             * so, try to become a coordinator ourself even though it is not our
             * turn. Otherwise, weird partitions can result in loss of liveness,
             * e.g., the next-in-line coordinator thinks the current coordinator
             * is up but most everyone else thinks the current coordinator is
             * down. Or the next-in-line coordinator itself could be dead. The
             * downside of this lastCoordinatorLongDead check is that many nodes
             * might near simultaneously try to become coordinator with no one
             * succeeding for a while, but this is unlikely to be a problem if
             * we rely on the deterministic round-robin rule in the common case
             * and rely on the lasCoordinatorLongDead with a longer timeout
             * (much longer than the typical node failure detection timeout). */
            // to avoid invoking synchronized method inside log
            log.log(Level.INFO, "{0} running for coordinator as node {1} {2}", new Object[] { this,
                    curBallot.coordinatorID,
                    (curBallot.coordinatorID != this.getMyID() ? " seems dead (last pinged "
                            + (this.paxosManager.getDeadTime(curBallot.coordinatorID) / 1000) + " secs back)"
                            : " has not yet initialized its coordinator") });
            Ballot newBallot = new Ballot(curBallot.ballotNumber + 1, this.getMyID());
            if ((this.coordinator = PaxosCoordinator.makeCoordinator(this.coordinator, newBallot.ballotNumber,
                    newBallot.coordinatorID, this.groupMembers, this.paxosState.getSlot(), false)) != null) {
                multicastPrepare = new MessagingTask(this.groupMembers,
                        new PreparePacket(newBallot, this.paxosState.getSlot()));
            }
        } else if (PaxosCoordinator.waitingTooLong(this.coordinator)) {
            assert (!PaxosCoordinator.waitingTooLong(this.coordinator)) : this + " " + this.coordinator;
            log.log(Level.WARNING,
                    "{0} resending timed out PREPARE {1}; "
                            + "this is only needed under high congestion or reconfigurations",
                    new Object[] { this, PaxosCoordinator.getBallot(this.coordinator) });
            Ballot newBallot = PaxosCoordinator.remakeCoordinator(this.coordinator, groupMembers);
            if (newBallot != null) {
                multicastPrepare = new MessagingTask(this.groupMembers,
                        new PreparePacket(newBallot, this.paxosState.getSlot()));
            }
        } else if (!this.paxosManager.isNodeUp(curBallot.coordinatorID)
                && !PaxosCoordinator.exists(this.coordinator, curBallot)) // not
            // my
            // job
            log.log(Level.FINE, "{0} thinks current coordinator {1} is {2} dead, the next-in-line is {3}{4}",
                    new Object[] { this, curBallot.coordinatorID, (lastCoordinatorLongDead ? "*long*" : ""),
                            getNextCoordinator(curBallot.ballotNumber + 1, this.groupMembers),
                            (PaxosCoordinator.ranRecently(this.coordinator)
                                    ? ", and I ran too recently to try again"
                                    : "") });
        return multicastPrepare;
    }

    private String getBallots() {
        return "[C:(" + (this.coordinator != null ? this.coordinator.getBallotStr() : "null") + "), A:("
                + (this.paxosState != null ? this.paxosState.getBallotSlot() : "null") + ")]";
    }

    private String getNodeState() {
        return "Node" + this.getNodeID() + ":" + this.getPaxosIDVersion() + ":" + this.getBallots();
    }

    /* Computes the next coordinator as the node with the smallest ID that is
     * still up. We could plug in any deterministic policy here. But this policy
     * should somehow take into account whether nodes are up or down. Otherwise,
     * paxos will be stuck if both the current and the next-in-line coordinator
     * are both dead.
     * 
     * It is important to choose the coordinator in a deterministic way when
     * recovering, e.g., the lowest numbered node. Otherwise different nodes may
     * have different impressions of who the coordinator is with unreliable
     * failure detectors, but no one other than the current coordinator may
     * actually ever run for coordinator. E.g., with three nodes 100, 101, 102,
     * if 102 thinks 101 is the coordinator, and the other two start by assuming
     * 100 is the coordinator, then 102's accept replies will keep preempting
     * 100's accepts but 101 may never run for coordinator as it has no reason
     * to think there is any problem with 100. */
    private int getNextCoordinator(int ballotnum, int[] members, boolean recovery) {
        for (int i = 1; i < members.length; i++)
            assert (members[i - 1] < members[i]);
        assert (!recovery);
        return roundRobinCoordinator(ballotnum);
    }

    private int getNextCoordinator(int ballotnum, int[] members) {
        return this.getNextCoordinator(ballotnum, members, false);
    }

    private int roundRobinCoordinator(int ballotnum) {
        return roundRobinCoordinator(getPaxosID(), this.groupMembers, ballotnum);
    }

    protected static int roundRobinCoordinator(String paxosID, int[] members, int ballotnum) {
        // to load balance coordinatorship across groups
        int randomOffset = paxosID.hashCode();
        return members[(Math.abs(ballotnum + randomOffset)) % members.length];
    }

    /* FIXED: If a majority miss an accept, but any messages are still being
     * received at all, then the loss will eventually get fixed by a check
     * similar to checkRunForCoordinator that upon receipt of every message will
     * poke the local coordinator to recommander the next-in-line accept if the
     * accept has been waiting for too long (for a majority or preemption). Both
     * the prepare and accept waiting checks are quick O(1) operations. */

    private static final boolean POKE_COORDINATOR = Config.getGlobalBoolean(PC.POKE_COORDINATOR);

    private MessagingTask pokeLocalCoordinator() {
        if (!POKE_COORDINATOR)
            return null;
        AcceptPacket accept = PaxosCoordinator.reissueAcceptIfWaitingTooLong(this.coordinator,
                this.paxosState.getSlot());
        if (accept != null)
            log.log(Level.INFO, "{0} resending timed out ACCEPT {1}", new Object[] { this, accept.getSummary() });
        else
            log.log(Level.FINEST, "{0} coordinator {1} is good for now", new Object[] { this, this.coordinator });
        MessagingTask reAccept = (accept != null ? new MessagingTask(this.groupMembers, accept) : null);
        return reAccept;
    }

    private boolean logStop(long createTime) {
        if (instrument())
            DelayProfiler.updateDelay("stopcoordination", createTime);
        log.log(Level.INFO, "Paxos instance {0} >>>>STOPPED||||||||||", new Object[] { this });
        return true;
    }

    /* Event: Received or locally generated a sync request. Action: Send a sync
     * reply containing missing committed requests to the requester. If the
     * requester is myself, multicast to all. */
    private MessagingTask requestMissingDecisions(int coordinatorID) {
        ArrayList<Integer> missingSlotNumbers = this.paxosState
                .getMissingCommittedSlots(this.paxosManager.getMaxSyncDecisionsGap());
        // initially we might want to send an empty sync request
        if (missingSlotNumbers == null)
            return null; // if stopped
        else if (missingSlotNumbers.isEmpty())
            missingSlotNumbers.add(this.paxosState.getSlot());

        int maxDecision = this.paxosState.getMaxCommittedSlot();
        SyncDecisionsPacket srp = new SyncDecisionsPacket(this.getMyID(), maxDecision, missingSlotNumbers,
                this.isMissingTooMuch());

        int requestee = COORD_DONT_LOG_DECISIONS ? randomNonCoordOther(coordinatorID) : randomOther();
        // send sync request to coordinator or random other node
        MessagingTask mtask = requestee != this.getMyID() ? new MessagingTask(requestee, srp) : null;

        return mtask;
    }

    private static final boolean COORD_DONT_LOG_DECISIONS = Config.getGlobalBoolean(PC.COORD_DONT_LOG_DECISIONS);

    /* We normally sync decisions if the gap between the maximum decision slot
     * and the expected slot is at least as high as the threshold. But we also
     * sync in the beginning when the expected slot is 0 (if we disable null
     * checkpoints) or 1 and there is either a nonzero gap or simply if the
     * threshold is 1. The initial nonzero gap is an understandable
     * optimization. But we also sync in the special case when the threshold is
     * low and this paxos instance has just gotten created (even when there is
     * no gap) because it is possible that other replicas have committed
     * decisions that I don't even know have happened. This optimizaiton is not
     * necessary for safety, but it is useful for liveness especially in the
     * case when an epoch start (in reconfiguration) is not considered complete
     * until all replicas have committed the first decision (as in the special
     * case of reconfigurator node reconfigurations). */
    private static final int INITIAL_SYNC_THRESHOLD = 1;
    private static final int NONTRIVIAL_GAP_FACTOR = 100;

    private boolean shouldSync(int maxDecisionSlot, int threshold, SyncMode syncMode) {
        if (DISABLE_SYNC_DECISIONS)
            return false;
        int expectedSlot = this.paxosState.getSlot();
        boolean nontrivialInitialGap = maxDecisionSlot - expectedSlot > threshold / NONTRIVIAL_GAP_FACTOR;
        boolean smallGapThreshold = threshold <= INITIAL_SYNC_THRESHOLD;

        return
        // typical legitimate sync criterion
        (maxDecisionSlot - expectedSlot >= threshold)
                // sync decisions initially if nonzero gap or small threshold
                || ((expectedSlot == 0 || expectedSlot == 1) && (nontrivialInitialGap || smallGapThreshold))
                // non-zero gap and syncMode is SYNC_IF_NONZERO_GAP
                || (nontrivialInitialGap && SyncMode.SYNC_TO_PAUSE.equals(syncMode))
                // force sync
                || SyncMode.FORCE_SYNC.equals(syncMode);

    }

    private boolean shouldSync(int maxDecisionSlot, int threshold) {
        return shouldSync(maxDecisionSlot, threshold, SyncMode.DEFAULT_SYNC);
    }

    private boolean isMissingTooMuch() {
        return this.shouldSync(this.paxosState.getMaxCommittedSlot(), this.paxosManager.getMaxSyncDecisionsGap());
    }

    // point here is really to request initial state
    protected MessagingTask requestZerothMissingDecision() {
        ArrayList<Integer> missingSlotNumbers = new ArrayList<Integer>();
        missingSlotNumbers.add(0);

        SyncDecisionsPacket srp = new SyncDecisionsPacket(this.getMyID(), 1, missingSlotNumbers, true);

        log.log(Level.INFO, "{0} requesting missing zeroth checkpoint", new Object[] { this, });
        // send sync request to coordinator or multicast to others if I am
        // coordinator
        MessagingTask mtask = this.paxosState.getBallotCoord() != this.getMyID()
                ? new MessagingTask(this.paxosState.getBallotCoord(), srp)
                : new MessagingTask(otherGroupMembers(), srp);
        return mtask;
    }

    protected int[] otherGroupMembers() {
        return Util.filter(groupMembers, getMyID());
    }

    // random member other than coordinator and self
    private int randomNonCoordOther(int coordinator) {
        if (this.groupMembers.length == 1)
            return this.getMyID(); // no other
        if (this.groupMembers.length == 2 && getMyID() != coordinator)
            return coordinator; // no other option
        // else other exists
        int retval = coordinator;
        // need at least 3 members for this loop to make sense
        while ((retval == getMyID() || retval == coordinator))
            retval = this.groupMembers[(int) (Math.random() * this.groupMembers.length)];
        return retval;
    }

    private int randomOther() {
        if (this.groupMembers.length == 1)
            return this.getMyID(); // no other
        int retval = getMyID();
        // need at least 2 members for this loop to make sense
        while (retval == getMyID() && this.groupMembers.length > 1)
            retval = this.groupMembers[(int) (Math.random() * this.groupMembers.length)];
        return retval;
    }

    /* Event: Received a sync reply packet with a list of missing committed
     * requests Action: Send back all missing committed requests from the log to
     * the sender (replier).
     * 
     * We could try to send some from acceptor memory instead of the log, but in
     * general, it is not worth the effort. Furthermore, if the sync gap is too
     * much, do a checkpoint transfer. */
    private MessagingTask handleSyncDecisionsPacket(SyncDecisionsPacket syncReply) throws JSONException {
        int minMissingSlot = syncReply.missingSlotNumbers.get(0);
        log.log(Level.FINE, "{0} handling sync decisions request {1} when maxCommittedSlot = {2}",
                new Object[] { this, syncReply.getSummary(), this.paxosState.getMaxCommittedSlot() });

        if (this.paxosState.getMaxCommittedSlot() - minMissingSlot < 0)
            return null; // I am worse than you

        // get checkpoint if minMissingSlot > last checkpointed slot
        MessagingTask checkpoint = null;
        if (minMissingSlot - lastCheckpointSlot(this.paxosState.getSlot(), syncReply.getPaxosID()) <= 0) {
            checkpoint = handleCheckpointRequest(syncReply);
            if (checkpoint != null)
                // only get decisions beyond checkpoint
                minMissingSlot = ((StatePacket) (checkpoint.msgs[0])).slotNumber + 1;
        }

        // try to get decisions from memory first
        HashMap<Integer, PValuePacket> missingDecisionsMap = new HashMap<Integer, PValuePacket>();
        for (PValuePacket pvalue : this.paxosState.getCommitted(syncReply.missingSlotNumbers))
            missingDecisionsMap.put(pvalue.slot, pvalue.setNoCoalesce());

        // get decisions from database as unlikely to have all of them in memory
        ArrayList<PValuePacket> missingDecisions = this.paxosManager.getPaxosLogger().getLoggedDecisions(
                this.getPaxosID(), this.getVersion(), minMissingSlot,
                /* If maxDecision <= minMissingSlot, sender is probably
                 * doing a creation sync. But we need min < max for the
                 * database query to return nonzero results, so we
                 * adjust up the max if needed. Note that
                 * getMaxCommittedSlot() at this node may not be greater
                 * than minMissingDecision either. For example, the
                 * sender may be all caught up at slot 0 and request a
                 * creation sync for 1 and this node may have committed
                 * up to 1; if so, it should return decision 1. */
                syncReply.maxDecisionSlot > minMissingSlot ? syncReply.maxDecisionSlot
                        : Math.max(minMissingSlot + 1, this.paxosState.getMaxCommittedSlot() + 1));

        // filter non-missing from database decisions
        if (syncReply.maxDecisionSlot > minMissingSlot)
            for (Iterator<PValuePacket> pvalueIterator = missingDecisions.iterator(); pvalueIterator.hasNext();) {
                PValuePacket pvalue = pvalueIterator.next();
                if (!syncReply.missingSlotNumbers.contains(pvalue.slot))
                    pvalueIterator.remove(); // filter non-missing
                else
                    pvalue.setNoCoalesce(); // send as-is, no compacting
                // isRecovery() true only in rollForward
                assert (!pvalue.isRecovery());
            }

        // copy over database decisions not in memory
        for (PValuePacket pvalue : missingDecisions)
            if (!missingDecisionsMap.containsKey(pvalue.slot))
                missingDecisionsMap.put(pvalue.slot, pvalue);

        // replace meta decisions with actual decisions
        getActualDecisions(missingDecisionsMap);

        assert (missingDecisionsMap.isEmpty()
                || missingDecisionsMap.values().toArray(new PaxosPacket[0]).length > 0) : missingDecisions;
        for (PValuePacket pvalue : missingDecisionsMap.values()) {
            pvalue.setNoCoalesce();
            assert (pvalue.hasRequestValue());
        }
        // the list of missing decisions to be sent
        MessagingTask unicasts = missingDecisionsMap.isEmpty() ? null
                : new MessagingTask(syncReply.nodeID, (missingDecisionsMap.values().toArray(new PaxosPacket[0])));

        log.log(Level.INFO, "{0} sending {1} missing decision(s) to node {2} in response to {3}", new Object[] {
                this, unicasts == null ? 0 : unicasts.msgs.length, syncReply.nodeID, syncReply.getSummary() });
        if (checkpoint != null)
            log.log(Level.INFO, "{0} sending checkpoint for slot {1} to node {2} in response to {3}",
                    new Object[] { this, minMissingSlot - 1, syncReply.nodeID, syncReply.getSummary() });

        // combine checkpoint and missing decisions in unicasts
        MessagingTask mtask =
                // both nonempty => combine
                (checkpoint != null && unicasts != null && !checkpoint.isEmpty() && !unicasts.isEmpty())
                        ? mtask = new MessagingTask(syncReply.nodeID,
                                MessagingTask.toPaxosPacketArray(checkpoint.msgs, unicasts.msgs))
                        :
                        // nonempty checkpoint
                        (checkpoint != null && !checkpoint.isEmpty()) ? checkpoint :
                        // unicasts (possibly also empty or null)
                                unicasts;
        log.log(Level.FINE, "{0} sending mtask: ", new Object[] { this, mtask });
        return mtask;
    }

    /* We reconstruct decisions from logged accepts. This is safe because we
     * only log a decision with a meta request value when we already have
     * previously accepted the corresponding accept. */
    private void getActualDecisions(HashMap<Integer, PValuePacket> missing) {
        if (missing.isEmpty())
            return;
        Integer minSlot = null, maxSlot = null;
        // find meta value commits
        for (Integer slot : missing.keySet()) {
            if (missing.get(slot).isMetaValue()) {
                if (minSlot == null)
                    minSlot = (maxSlot = slot);
                if (slot - minSlot < 0)
                    minSlot = slot;
                if (slot - maxSlot > 0)
                    maxSlot = slot;
            }
        }

        if (!(minSlot == null || (minSlot - this.paxosState.getMaxAcceptedSlot() > 0))) {

            // get logged accepts for meta commit slots
            Map<Integer, PValuePacket> accepts = this.paxosManager.getPaxosLogger()
                    .getLoggedAccepts(this.getPaxosID(), this.getVersion(), minSlot, maxSlot + 1);

            // reconstruct decision from accept
            for (PValuePacket pvalue : accepts.values())
                if (missing.containsKey(pvalue.slot) && missing.get(pvalue.slot).isMetaValue())
                    missing.put(pvalue.slot, pvalue.makeDecision(pvalue.getMedianCheckpointedSlot()));
        }

        // remove remaining meta value decisions
        for (Iterator<PValuePacket> pvalueIter = missing.values().iterator(); pvalueIter.hasNext();) {
            PValuePacket decision = pvalueIter.next();
            if (decision.isMetaValue()) {
                if (this.paxosState.getSlot() - decision.slot > 0)
                    log.log(Level.FINE, "{0} has no body for executed meta-decision {1} "
                            + " likely because placeholder decision was " + "logged without corresponding accept)",
                            new Object[] { this, decision });
                pvalueIter.remove();
            }
        }
    }

    private int lastCheckpointSlot(int slot, String paxosID) {
        return lastCheckpointSlot(slot, getCPI(this.paxosManager.getInterCheckpointInterval(), this.getPaxosID()));
    }

    private static int lastCheckpointSlot(int slot, int checkpointInterval) {
        int lcp = slot - slot % checkpointInterval;
        if (lcp < 0 && ((lcp -= checkpointInterval) > 0)) // wraparound-arithmetic
            lcp = lastCheckpointSlot(Integer.MAX_VALUE, checkpointInterval);
        return lcp;
    }

    /* Event: Received a request for a recent checkpoint presumably from a
     * replica that has recovered after a long down time. Action: Send
     * checkpoint to requester. */
    private MessagingTask handleCheckpointRequest(SyncDecisionsPacket syncReply) {
        /* The assertion below does not mean that the state we actually get will
         * be at lastCheckpointSlot() or higher because, even though getSlot()
         * has gotten updated, the checkpoint to disk may not yet have finished.
         * We have no way of knowing other than reading the disk. So we first do
         * a read to check if the checkpointSlot is at least higher than the
         * minMissingSlot in syncReply. If the state is tiny, this will double
         * the state fetching overhead as we are doing two database reads. */
        assert (syncReply.missingSlotNumbers.get(0)
                - lastCheckpointSlot(this.paxosState.getSlot(), syncReply.getPaxosID()) <= 0);
        int checkpointSlot = this.paxosManager.getPaxosLogger().getCheckpointSlot(getPaxosID());
        StatePacket statePacket = (checkpointSlot >= syncReply.missingSlotNumbers.get(0) ? StatePacket
                .getStatePacket(this.paxosManager.getPaxosLogger().getSlotBallotState(this.getPaxosID())) : null);
        if (statePacket != null)
            log.log(Level.INFO, "{0} sending checkpoint to node {1}: {2}",
                    new Object[] { this, syncReply.nodeID, statePacket.getSummary() });
        else {
            String myStatus = (!PaxosCoordinator.exists(this.coordinator) ? "[acceptor]"
                    : PaxosCoordinator.isActive(this.coordinator) ? "[coordinator]" : "[preactive-coordinator]");
            log.log(Level.INFO, "{0}  has no state (yet) for ",
                    new Object[] { this, myStatus, syncReply.getSummary() });
        }

        return statePacket != null ? new MessagingTask(syncReply.nodeID, statePacket) : null;
    }

    /*************** End of failure detection and recovery methods ***************/

    /************************ Start of testing and instrumentation methods *****************/
    /* Used only to test paxos instance size. We really need a paxosManager to
     * do anything real with paxos. */
    private void testingNoRecovery() {
        int initSlot = 0;
        this.coordinator = null;// new PaxosCoordinator();
        if (this.groupMembers[0] == this.getMyID())
            this.coordinator = PaxosCoordinator.makeCoordinator(this.coordinator, 0, this.groupMembers[0],
                    groupMembers, initSlot, true);
        this.paxosState = new PaxosAcceptor(0, this.groupMembers[0], initSlot, null);
    }

    private static int CREATION_LOG_THRESHOLD = 100000;
    private static int creationCount = 0;

    private static void incrInstanceCount() {
        creationCount++;
    }

    protected static void decrInstanceCount() {
        creationCount--;
    }

    // only an approximate count for instrumentation purposes
    private static int getInstanceCount() {
        return creationCount;
    }

    private static boolean notManyInstances() {
        return getInstanceCount() < CREATION_LOG_THRESHOLD;
    }

    protected void testingInit(int load) {
        this.coordinator.testingInitCoord(load);
        this.paxosState.testingInitInstance(load);
    }

    protected void garbageCollectDecisions(int slot) {
        this.paxosState.garbageCollectDecisions(slot);
    }

    @Override
    public boolean isPausable() {
        return this.paxosState.isLongIdle();
    }

    protected PaxosInstanceStateMachine markActive() {
        this.paxosState.justActive();
        return this;
    }

    private static double CPI_NOISE = Config.getGlobalDouble(PC.CPI_NOISE);

    private static int getCPI(int cpi, String paxosID) {
        return (int) (cpi * (1 - CPI_NOISE) + (Math.abs(paxosID.hashCode()) % cpi) * 2 * CPI_NOISE);
    }
}