edu.umass.cs.reconfiguration.Reconfigurator.java Source code

Introduction

Here is the source code for edu.umass.cs.reconfiguration.Reconfigurator.java
Source

/* Copyright (c) 2015 University of Massachusetts
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 * 
 * Initial developer(s): V. Arun */
package edu.umass.cs.reconfiguration;

import java.io.IOException;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.json.JSONException;
import org.json.JSONObject;

import edu.umass.cs.gigapaxos.PaxosConfig;
import edu.umass.cs.gigapaxos.interfaces.Request;
import edu.umass.cs.gigapaxos.paxosutil.LargeCheckpointer;
import edu.umass.cs.nio.AbstractPacketDemultiplexer;
import edu.umass.cs.nio.GenericMessagingTask;
import edu.umass.cs.nio.JSONMessenger;
import edu.umass.cs.nio.MessageNIOTransport;
import edu.umass.cs.nio.SSLDataProcessingWorker.SSL_MODES;
import edu.umass.cs.nio.interfaces.AddressMessenger;
import edu.umass.cs.nio.interfaces.Messenger;
import edu.umass.cs.nio.interfaces.PacketDemultiplexer;
import edu.umass.cs.nio.interfaces.SSLMessenger;
import edu.umass.cs.nio.interfaces.Stringifiable;
import edu.umass.cs.nio.nioutils.NIOInstrumenter;
import edu.umass.cs.nio.nioutils.RTTEstimator;
import edu.umass.cs.protocoltask.ProtocolExecutor;
import edu.umass.cs.protocoltask.ProtocolTask;
import edu.umass.cs.protocoltask.ProtocolTaskCreationException;
import edu.umass.cs.reconfiguration.ReconfigurationConfig.RC;
import edu.umass.cs.reconfiguration.interfaces.ReconfigurableNodeConfig;
import edu.umass.cs.reconfiguration.interfaces.ReconfiguratorCallback;
import edu.umass.cs.reconfiguration.reconfigurationpackets.BasicReconfigurationPacket;
import edu.umass.cs.reconfiguration.reconfigurationpackets.ClientReconfigurationPacket;
import edu.umass.cs.reconfiguration.reconfigurationpackets.CreateServiceName;
import edu.umass.cs.reconfiguration.reconfigurationpackets.DeleteServiceName;
import edu.umass.cs.reconfiguration.reconfigurationpackets.DemandReport;
import edu.umass.cs.reconfiguration.reconfigurationpackets.EchoRequest;
import edu.umass.cs.reconfiguration.reconfigurationpackets.RCRecordRequest;
import edu.umass.cs.reconfiguration.reconfigurationpackets.ReconfigurationPacket;
import edu.umass.cs.reconfiguration.reconfigurationpackets.ReconfigurationPacket.PacketType;
import edu.umass.cs.reconfiguration.reconfigurationpackets.ReconfigureActiveNodeConfig;
import edu.umass.cs.reconfiguration.reconfigurationpackets.ReconfigureRCNodeConfig;
import edu.umass.cs.reconfiguration.reconfigurationpackets.RequestActiveReplicas;
import edu.umass.cs.reconfiguration.reconfigurationpackets.ServerReconfigurationPacket;
import edu.umass.cs.reconfiguration.reconfigurationpackets.StartEpoch;
import edu.umass.cs.reconfiguration.reconfigurationpackets.RCRecordRequest.RequestTypes;
import edu.umass.cs.reconfiguration.reconfigurationprotocoltasks.ReconfiguratorProtocolTask;
import edu.umass.cs.reconfiguration.reconfigurationprotocoltasks.WaitAckDropEpoch;
import edu.umass.cs.reconfiguration.reconfigurationprotocoltasks.WaitAckStartEpoch;
import edu.umass.cs.reconfiguration.reconfigurationprotocoltasks.WaitAckStopEpoch;
import edu.umass.cs.reconfiguration.reconfigurationprotocoltasks.CommitWorker;
import edu.umass.cs.reconfiguration.reconfigurationprotocoltasks.WaitEpochFinalState;
import edu.umass.cs.reconfiguration.reconfigurationprotocoltasks.WaitPrimaryExecution;
import edu.umass.cs.reconfiguration.reconfigurationutils.AbstractDemandProfile;
import edu.umass.cs.reconfiguration.reconfigurationutils.AggregateDemandProfiler;
import edu.umass.cs.reconfiguration.reconfigurationutils.ConsistentReconfigurableNodeConfig;
import edu.umass.cs.reconfiguration.reconfigurationutils.ReconfigurationPacketDemultiplexer;
import edu.umass.cs.reconfiguration.reconfigurationutils.ReconfigurationRecord;
import edu.umass.cs.reconfiguration.reconfigurationutils.ReconfigurationRecord.RCStates;
import edu.umass.cs.utils.Config;
import edu.umass.cs.utils.DelayProfiler;
import edu.umass.cs.utils.MyLogger;
import edu.umass.cs.utils.Util;

/**
 * @author V. Arun
 * @param <NodeIDType>
 * 
 *            This class is the main Reconfigurator module. It issues
 *            reconfiguration commands to ActiveReplicas and also responds to
 *            client requests to create or delete names or request the list of
 *            active replicas for a name.
 * 
 *            It relies on the following helper protocol tasks:
 *            {@link WaitAckStopEpoch WaitAckStopEpoch},
 *            {@link WaitAckStartEpoch}, {@link WaitAckDropEpoch},
 *            {@link CommitWorker}, {@link WaitPrimaryExecution}. The last one
 *            is to enable exactly one primary Reconfigurator in the common case
 *            to conduct reconfigurations but ensure that others safely complete
 *            the reconfiguration if the primary fails to do so. CommitWorker is
 *            a worker that is needed to ensure that a paxos-coordinated request
 *            eventually gets committed; we need this property to ensure that a
 *            reconfiguration operation terminates, but paxos itself provides us
 *            no such liveness guarantee.
 * 
 *            This class now supports add/remove operations for the set of
 *            Reconfigurator nodes. This is somewhat tricky, but works
 *            correctly. A detailed, formal description of the protocol is TBD.
 *            The documentation further below in this class explains the main
 *            ideas.
 * 
 */
public class Reconfigurator<NodeIDType> implements PacketDemultiplexer<JSONObject>, ReconfiguratorCallback {

    private final SSLMessenger<NodeIDType, JSONObject> messenger;
    private final ProtocolExecutor<NodeIDType, ReconfigurationPacket.PacketType, String> protocolExecutor;
    protected final ReconfiguratorProtocolTask<NodeIDType> protocolTask;
    private final RepliconfigurableReconfiguratorDB<NodeIDType> DB;
    private final ConsistentReconfigurableNodeConfig<NodeIDType> consistentNodeConfig;
    private final AggregateDemandProfiler demandProfiler = new AggregateDemandProfiler();
    private final CommitWorker<NodeIDType> commitWorker;
    private PendingBatchCreates pendingBatchCreations = new PendingBatchCreates();
    private boolean recovering = true;

    private static final Logger log = Logger.getLogger(Reconfigurator.class.getName());

    /**
     * @return Logger used by all of the reconfiguration package.
     */
    public static final Logger getLogger() {
        return log;
    }

    /**
     * For profiling statistics in {@link DelayProfiler}.
     */
    public static enum ProfilerKeys {
        /**
         * 
         */
        stop_epoch,
        /**
         * 
         */
        create,
        /**
         * 
         */
        delete,
    };

    /* Any id-based communication requires NodeConfig and Messenger. In general,
     * the former may be a subset of the NodeConfig used by the latter, so they
     * are separate arguments. */
    protected Reconfigurator(ReconfigurableNodeConfig<NodeIDType> nc, SSLMessenger<NodeIDType, JSONObject> m,
            boolean startCleanSlate) throws IOException {
        this.messenger = m;
        this.consistentNodeConfig = new ConsistentReconfigurableNodeConfig<NodeIDType>(nc);
        this.DB = new RepliconfigurableReconfiguratorDB<NodeIDType>(
                new SQLReconfiguratorDB<NodeIDType>(this.messenger.getMyID(), this.consistentNodeConfig), getMyID(),
                this.consistentNodeConfig, this.messenger, startCleanSlate);
        // recovery complete at this point
        /* We need to set a callback explicitly in AbstractReplicaCoordinator
         * instead of just passing self with each coordinateRequest because the
         * AbstractReconfiguratorDB "app" sometimes returns false, which can be
         * detected and passed back here as-is, but paxos always expects execute
         * to return true by design and itself invokes the callback with
         * handled=true. */
        this.DB.setCallback(this); // no callbacks will happen during recovery

        // protocol executor not needed until recovery complete
        this.protocolExecutor = new ProtocolExecutor<NodeIDType, ReconfigurationPacket.PacketType, String>(
                messenger);
        this.protocolTask = new ReconfiguratorProtocolTask<NodeIDType>(getMyID(), this);
        // non default types will be registered by spawned tasks
        this.protocolExecutor.register(this.protocolTask.getDefaultTypes(), this.protocolTask);
        this.commitWorker = new CommitWorker<NodeIDType>(this.DB, null);
        this.initFinishPendingReconfigurations();
        this.initClientMessenger(false);
        if (ReconfigurationConfig.getClientSSLMode() != SSL_MODES.CLEAR)
            this.initClientMessenger(true);

        assert (this.getClientMessenger() != null || this.clientFacingPortIsMyPort());

        // if here, recovery must be complete
        this.DB.setRecovering(this.recovering = false);
        log.log(Level.FINE, "{0} finished recovery with NodeConfig = {1}",
                new Object[] { this, this.consistentNodeConfig.getReconfigurators() });
    }

    /**
     * This treats the reconfigurator itself as an "active replica" in order to
     * be able to reconfigure reconfigurator groups.
     */
    protected ActiveReplica<NodeIDType> getReconfigurableReconfiguratorAsActiveReplica() {
        return new ActiveReplica<NodeIDType>(this.DB, this.consistentNodeConfig.getUnderlyingNodeConfig(),
                this.messenger);
    }

    @Override
    public boolean handleMessage(JSONObject jsonObject) {
        try {
            ReconfigurationPacket.PacketType rcType = ReconfigurationPacket
                    .getReconfigurationPacketType(jsonObject);
            log.log(Level.FINE, "{0} received {1} {2}", new Object[] { this, rcType, jsonObject });
            /* This assertion is true only if TLS with mutual authentication is
             * enabled. If so, only authentic servers will be able to send
             * messages to a reconfigurator and they will never send any message
             * other than the subset of ReconfigurationPacket types meant to be
             * processed by reconfigurators. */
            assert (rcType != null || ReconfigurationConfig.isTLSEnabled()) : jsonObject;

            // try handling as reconfiguration packet through protocol task
            @SuppressWarnings("unchecked")
            // checked by assert above
            BasicReconfigurationPacket<NodeIDType> rcPacket = (BasicReconfigurationPacket<NodeIDType>) ReconfigurationPacket
                    .getReconfigurationPacket(jsonObject, this.getUnstringer());
            // all packets are handled through executor, nice and simple
            if (!this.protocolExecutor.handleEvent(rcPacket))
                // do nothing
                log.log(Level.FINE, MyLogger.FORMAT[2],
                        new Object[] { this, "unable to handle packet", jsonObject });
        } catch (JSONException | ProtocolTaskCreationException e) {
            log.severe(this + " incurred exception " + e.getMessage() + " while trying to handle message "
                    + jsonObject);
            e.printStackTrace();
        }
        return false; // neither reconfiguration packet nor app request
    }

    /**
     * @return Packet types handled by Reconfigurator. Refer
     *         {@link ReconfigurationPacket}.
     */
    public Set<ReconfigurationPacket.PacketType> getPacketTypes() {
        return this.protocolTask.getEventTypes();
    }

    public String toString() {
        return "RC." + getMyID();
    }

    /**
     * Close gracefully.
     */
    public void close() {
        this.commitWorker.close();
        this.protocolExecutor.stop();
        this.messenger.stop();
        this.DB.close();
        log.log(Level.INFO, "{0} closing with nodeConfig = {1}", new Object[] { this, this.consistentNodeConfig });
    }

    /* *********** Start of protocol task handler methods ***************** */
    /**
     * Incorporates demand reports (possibly but not necessarily with replica
     * coordination), checks for reconfiguration triggers, and initiates
     * reconfiguration if needed.
     * 
     * @param report
     * @param ptasks
     * @return MessagingTask, typically null. No protocol tasks spawned.
     */
    public GenericMessagingTask<NodeIDType, ?>[] handleDemandReport(DemandReport<NodeIDType> report,
            ProtocolTask<NodeIDType, ReconfigurationPacket.PacketType, String>[] ptasks) {
        log.log(Level.FINEST, "{0} received {1} {2}", new Object[] { this, report.getType(), report });
        if (report.needsCoordination())
            this.DB.handleIncoming(report, null); // coordinated
        else
            this.updateDemandProfile(report); // no coordination
        ReconfigurationRecord<NodeIDType> record = this.DB.getReconfigurationRecord(report.getServiceName());
        if (record != null)
            // coordinate and commit reconfiguration intent
            this.initiateReconfiguration(report.getServiceName(), record,
                    shouldReconfigure(report.getServiceName()), null, null, null, null, null, null); // coordinated
        trimAggregateDemandProfile();
        return null; // never any messaging or ptasks
    }

    private boolean isLegitimateCreateRequest(CreateServiceName create) {
        if (!create.isBatched())
            return true;
        return this.consistentNodeConfig.checkSameGroup(create.nameStates.keySet());
    }

    private CreateServiceName[] makeCreateServiceNameBatches(CreateServiceName batchedCreate) {
        CreateServiceName[] creates = ReconfigurationConfig.makeCreateNameRequest(batchedCreate.nameStates,
                Config.getGlobalInt(ReconfigurationConfig.RC.MAX_BATCH_SIZE),
                Util.setToStringSet(this.consistentNodeConfig.getReconfigurators()));
        for (int i = 0; i < creates.length; i++) {
            creates[i] = new CreateServiceName(creates[i].nameStates, batchedCreate);
            assert (this.isLegitimateCreateRequest(creates[i]));
        }
        return creates;
    }

    class BatchCreateJobs {
        final long creationTime;
        final String headName;
        final CreateServiceName original;
        final Set<CreateServiceName> parts = new HashSet<CreateServiceName>();
        final Set<String> remaining = new HashSet<String>();

        BatchCreateJobs(String headName, CreateServiceName[] creates, CreateServiceName original) {
            creationTime = System.currentTimeMillis();
            this.headName = headName;
            this.original = original;
            for (CreateServiceName create : creates) {
                parts.add(create);
                remaining.add(create.getServiceName());
            }
        }

        int totalNames() {
            int total = 0;
            for (CreateServiceName part : parts)
                total += part.size();
            return total;
        }
    }

    class PendingBatchCreates {
        /**
         * Splits a batch creation jobs into smaller batch creates each of which
         * is consistent. The set {@link BatchCreateJobs#parts} holds those
         * parts.
         */
        private ConcurrentHashMap<String, BatchCreateJobs> consistentBatchCreateJobs = new ConcurrentHashMap<String, BatchCreateJobs>();
        /**
         * Maps the headname of a part to the overall head name of the original
         * client-issued batch create request.
         */
        private ConcurrentHashMap<String, String> headnameToOverallHeadname = new ConcurrentHashMap<String, String>();
    }

    // private ConcurrentHashMap<String, BatchCreateJobs> pendingBatchCreates =
    // new ConcurrentHashMap<String, BatchCreateJobs>();
    // private ConcurrentHashMap<String, String> pendingBatchCreateParents = new
    // ConcurrentHashMap<String, String>();

    private void failLongPendingBatchCreate(String name, long timeout) {
        for (Iterator<String> strIter = this.pendingBatchCreations.consistentBatchCreateJobs.keySet()
                .iterator(); strIter.hasNext();) {
            String headName = strIter.next();
            BatchCreateJobs jobs = this.pendingBatchCreations.consistentBatchCreateJobs.get(headName);
            if (System.currentTimeMillis() - jobs.creationTime > timeout) {
                Map<String, String> nameStates = jobs.original.nameStates;
                Set<String> failedCreates = new HashSet<String>();
                for (CreateServiceName part : jobs.parts) {
                    if (jobs.remaining.contains(part.getServiceName())) {
                        failedCreates.addAll(part.nameStates.keySet());
                        nameStates.remove(part.nameStates.keySet());
                    }
                }
                this.sendClientReconfigurationPacket(
                        new CreateServiceName(nameStates, failedCreates, jobs.parts.iterator().next()).setFailed()
                                .setResponseMessage(
                                        "Batch create failed to create the names listed in the field "
                                                + CreateServiceName.Keys.FAILED_CREATES.toString()
                                                + (!nameStates.isEmpty()
                                                        ? " (but did successfully create the names in the field "
                                                                + CreateServiceName.Keys.NAME_STATE_ARRAY + ")"
                                                        : "")));
            }
        }
    }

    private static final long BATCH_CREATE_TIMEOUT = 30 * 1000;

    private CreateServiceName updateAndCheckComplete(String batchCreateHeadName) {
        String headName = this.pendingBatchCreations.headnameToOverallHeadname.remove(batchCreateHeadName);
        if (headName == null)
            return null;
        // else
        BatchCreateJobs jobs = this.pendingBatchCreations.consistentBatchCreateJobs.get(headName);
        jobs.remaining.remove(batchCreateHeadName);
        if (jobs.remaining.isEmpty()) {
            log.log(Level.INFO,
                    "{0} returning completed batch create with head name {1} with {2} parts and {3} total constituent names",
                    new Object[] { this, headName, jobs.parts.size(), jobs.totalNames() });
            return this.pendingBatchCreations.consistentBatchCreateJobs.remove(headName).original;
        }
        // else
        log.log(Level.INFO, "{0} completed batch create part job with head name {1}; jobs remaining = {2}",
                new Object[] { this, batchCreateHeadName, jobs.remaining });
        return null;
    }

    private String updateAndCheckComplete(StartEpoch<NodeIDType> startEpoch) {
        boolean hasBatchCreatePartJobs = this.pendingBatchCreations.headnameToOverallHeadname
                .containsKey(startEpoch.getServiceName());
        CreateServiceName original = this.updateAndCheckComplete(startEpoch.getServiceName());
        if (original != null)
            return original.getServiceName();
        // remaining batch job parts
        else if (hasBatchCreatePartJobs)
            return null;
        // no batch job parts to begin with
        else
            return startEpoch.getServiceName();
    }

    /**
     * Create service name is a special case of reconfiguration where the
     * previous group is non-existent.
     * 
     * @param create
     * @param ptasks
     * @return Messaging task, typically null. No protocol tasks spawned.
     */
    public GenericMessagingTask<NodeIDType, ?>[] handleCreateServiceName(CreateServiceName create,
            ProtocolTask<NodeIDType, ReconfigurationPacket.PacketType, String>[] ptasks) {
        log.log(!this.pendingBatchCreations.headnameToOverallHeadname.containsKey(create.getServiceName())
                ? Level.INFO
                : Level.INFO, "{0} received {1} from client {2} {3}",
                new Object[] { this, create.getSummary(), create.getCreator(),
                        create.isForwarded() ? "from reconfigurator " + create.getSender() : "" });

        /* If create is batched but all names do not belong to the same
         * reconfigurator group, we automatically split the batch into smaller
         * batches, issue the constituent creates, wait for the corresponding
         * responses, and send a single success or failure response (upon a
         * timeout) back to the issuing client. If it is a legitimate batch
         * create, i.e., all names hash to the same reconfigurator group, then
         * we don't have to maintain any state and can try to commit it like a
         * usual unbatched create. */
        if (create.isBatched() && !this.isLegitimateCreateRequest(create)
                && !this.pendingBatchCreations.consistentBatchCreateJobs.containsKey(create.getServiceName())
                && !this.pendingBatchCreations.headnameToOverallHeadname.containsKey(create.getServiceName())) {
            BatchCreateJobs jobs = null;
            this.pendingBatchCreations.consistentBatchCreateJobs.put(create.getServiceName(),
                    jobs = new BatchCreateJobs(create.getServiceName(), this.makeCreateServiceNameBatches(create),
                            create));
            log.log(Level.INFO, "{0} inserted batch create with {1} parts ({2}) and {3} total constituent names",
                    new Object[] { this, jobs.parts.size(), jobs.remaining, jobs.totalNames() });
            for (CreateServiceName part : jobs.parts) {
                this.pendingBatchCreations.headnameToOverallHeadname.put(part.getServiceName(),
                        create.getServiceName());
                this.handleCreateServiceName(part, ptasks);
            }

            // fail if it doesn't complete within timeout
            this.protocolExecutor.scheduleSimple(new Runnable() {
                @Override
                public void run() {
                    Reconfigurator.this.failLongPendingBatchCreate(create.getServiceName(), BATCH_CREATE_TIMEOUT);
                }
            }, BATCH_CREATE_TIMEOUT, TimeUnit.MILLISECONDS);
            return null;
        }
        // responses for forwarded batch create jobs coming back
        else if (this.pendingBatchCreations.headnameToOverallHeadname.containsKey(create.getServiceName())
                && create.isForwarded()
                && create.getForwader().equals(this.consistentNodeConfig.getNodeSocketAddress(getMyID()))) {
            assert (!create.isRequest()); // response
            CreateServiceName original = this.updateAndCheckComplete(create.getServiceName());
            if (original != null)
                // all done, send success response to client
                this.sendClientReconfigurationPacket(
                        original.getHeadOnly().setResponseMessage("Successfully batch-created " + original.size()
                                + " names with head name " + original.getServiceName()));
            // else wait for more responses
            return null;
        }

        // quick reject for bad batched create
        if (!this.isLegitimateCreateRequest(create))
            this.sendClientReconfigurationPacket(create.setFailed().setResponseMessage(
                    "The names in this create request do not all map to the same reconfigurator group"));

        if (this.processRedirection(create))
            return null;
        // else I am responsible for handling this (possibly forwarded) request

        /* Commit initial "reconfiguration" intent. If the record can be created
         * at all default actives, this operation will succeed, and fail
         * otherwise; in either case, the reconfigurators will have an
         * eventually consistent view of whether the record got created or not.
         * 
         * Note that we need to maintain this consistency property even when
         * nodeConfig may be in flux, i.e., different reconfigurators may have
         * temporarily different views of what the current set of
         * reconfigurators is. But this is okay as app record creations (as well
         * as all app record reconfigurations) are done by an RC paxos group
         * that agrees on whether the creation completed or not; this claim is
         * true even if that RC group is itself undergoing reconfiguration. If
         * nodeConfig is outdated at some node, that only affects the choice of
         * active replicas below, not their consistency. */
        log.log(Level.FINE, "{0} processing {1} from creator {2} {3}", new Object[] { this, create.getSummary(),
                create.getCreator(), create.getForwader() != null ? " forwarded by " + create.getForwader() : "" });

        ReconfigurationRecord<NodeIDType> record = null;
        /* Check if record doesn't already exist. This check is meaningful only
         * for unbatched create requests. For batched creates, we optimistically
         * assume that none of the batched names already exist and let the
         * create fail later if that is not the case. */
        if ((record = this.DB.getReconfigurationRecord(create.getServiceName())) == null)
            this.initiateReconfiguration(create.getServiceName(), record,
                    this.consistentNodeConfig.getReplicatedActives(create.getServiceName()), create.getCreator(),
                    create.getMyReceiver(), create.getForwader(), create.getInitialState(), create.getNameStates(),
                    null);

        // record already exists, so return error message
        else
            this.sendClientReconfigurationPacket(
                    create.setFailed(ClientReconfigurationPacket.ResponseCodes.DUPLICATE_ERROR)
                            .setResponseMessage("Can not (re-)create an already " + (record.isDeletePending()
                                    ? "deleted name until " + ReconfigurationConfig.getMaxFinalStateAge() / 1000
                                            + " seconds have elapsed after the most recent deletion."
                                    : "existing name.")));
        return null;
    }

    private static final boolean TWO_PAXOS_RC = Config.getGlobalBoolean(ReconfigurationConfig.RC.TWO_PAXOS_RC);

    /**
     * Simply hand over DB request to DB. The only type of RC record that can
     * come here is one announcing reconfiguration completion. Reconfiguration
     * initiation messages are derived locally and coordinated through paxos,
     * not received from outside.
     * 
     * @param rcRecReq
     * @param ptasks
     * @return Messaging task, typically null unless TWO_PAXOS_RC.
     */
    public GenericMessagingTask<NodeIDType, ?>[] handleRCRecordRequest(RCRecordRequest<NodeIDType> rcRecReq,
            ProtocolTask<NodeIDType, ReconfigurationPacket.PacketType, String>[] ptasks) {
        log.log(Level.FINE, "{0} receievd {1}", new Object[] { this, rcRecReq.getSummary() });

        GenericMessagingTask<NodeIDType, ?> mtask = null;
        // for NC changes, prev drop complete signifies everyone's on board
        if (rcRecReq.isReconfigurationPrevDropComplete()
                && rcRecReq.getServiceName().equals(AbstractReconfiguratorDB.RecordNames.RC_NODES.toString()))
            this.sendReconfigureRCNodeConfigConfirmationToInitiator(rcRecReq);
        else if (rcRecReq.isReconfigurationPrevDropComplete()
                && rcRecReq.getServiceName().equals(AbstractReconfiguratorDB.RecordNames.AR_NODES.toString()))
            this.sendReconfigureActiveNodeConfigConfirmationToInitiator(rcRecReq);

        // single paxos reconfiguration allowed only for non-RC-group names
        else if (!TWO_PAXOS_RC && !this.DB.isRCGroupName(rcRecReq.getServiceName())
                && !rcRecReq.isNodeConfigChange()) {
            if (rcRecReq.isReconfigurationComplete() || rcRecReq.isReconfigurationPrevDropComplete()) {
                if (rcRecReq.getInitiator().equals(getMyID()))
                    mtask = new GenericMessagingTask<NodeIDType, RCRecordRequest<NodeIDType>>(getOthers(
                            this.consistentNodeConfig.getReplicatedReconfigurators(rcRecReq.getServiceName())),
                            rcRecReq);
                // no coordination
                boolean handled = this.DB.execute(rcRecReq);
                if (handled)
                    this.garbageCollectPendingTasks(rcRecReq);
            }
        } else {
            // commit until committed by default
            this.repeatUntilObviated(rcRecReq);
        }

        return mtask != null ? mtask.toArray() : null;
    }

    Object[] getOthers(Set<NodeIDType> nodes) {
        Set<NodeIDType> others = new HashSet<NodeIDType>();
        for (NodeIDType node : nodes)
            if (!node.equals(getMyID()))
                others.add(node);
        return others.toArray();
    }

    /**
     * We need to ensure that both the stop/drop at actives happens atomically
     * with the removal of the record at reconfigurators. To accomplish this, we
     * first mark the record as stopped at reconfigurators, then wait for the
     * stop/drop tasks to finish, and finally coordinate the completion
     * notification so that reconfigurators can completely remove the record
     * from their DB.
     * 
     * @param delete
     * @param ptasks
     * @return Messaging task, typically null. No protocol tasks spawned.
     */
    public GenericMessagingTask<NodeIDType, ?>[] handleDeleteServiceName(DeleteServiceName delete,
            ProtocolTask<NodeIDType, ReconfigurationPacket.PacketType, String>[] ptasks) {
        log.log(Level.INFO, "{0} received {1} from creator {2}",
                new Object[] { this, delete.getSummary(), delete.getCreator() });

        if (this.processRedirection(delete))
            return null;
        log.log(Level.FINE, "{0} processing delete request {1} from creator {2} {3}",
                new Object[] { this, delete.getSummary(), delete.getCreator(),
                        delete.getForwader() != null ? " forwarded by " + delete.getForwader() : "" });
        ReconfigurationRecord<NodeIDType> record = this.DB.getReconfigurationRecord(delete.getServiceName());
        RCRecordRequest<NodeIDType> rcRecReq = null;
        // coordinate delete intent, response will be sent in callback
        if (record != null
                && this.isReadyForReconfiguration(rcRecReq = new RCRecordRequest<NodeIDType>(this.getMyID(),
                        this.formStartEpoch(delete.getServiceName(), record, null, delete.getCreator(),
                                delete.getMyReceiver(), delete.getForwader(), null, null, null),
                        RequestTypes.RECONFIGURATION_INTENT), record)) {
            this.DB.handleIncoming(rcRecReq, null);
            return null;
        }
        // WAIT_DELETE state also means success
        else if (this.isWaitingDelete(delete)) {
            // this.sendDeleteConfirmationToClient(rcRecReq);
            this.sendClientReconfigurationPacket(
                    delete.setResponseMessage(delete.getServiceName() + " already pending deletion"));
            return null;
        }
        // else failure
        this.sendClientReconfigurationPacket(
                delete.setFailed(ClientReconfigurationPacket.ResponseCodes.NONEXISTENT_NAME_ERROR)
                        .setResponseMessage(delete.getServiceName()
                                + (record != null ? " is being reconfigured and can not be deleted just yet."
                                        : " does not exist")));
        log.log(Level.FINE, "{0} discarded {1} because RC record is not reconfiguration ready.",
                new Object[] { this, delete.getSummary() });
        return null;
    }

    private boolean isWaitingDelete(DeleteServiceName delete) {
        ReconfigurationRecord<NodeIDType> record = this.DB.getReconfigurationRecord(delete.getServiceName());
        return record != null && record.getState().equals(RCStates.WAIT_DELETE);
    }

    private static final String ANYCAST_NAME = Config.getGlobalString(RC.SPECIAL_NAME);
    private static final String BROADCAST_NAME = Config.getGlobalString(RC.BROADCAST_NAME);

    /**
     * This method simply looks up and returns the current set of active
     * replicas. Maintaining this state consistently is the primary and only
     * existential purpose of reconfigurators.
     * 
     * @param request
     * @param ptasks
     * @return Messaging task returning the set of active replicas to the
     *         requestor. No protocol tasks spawned.
     */
    public GenericMessagingTask<NodeIDType, ?>[] handleRequestActiveReplicas(RequestActiveReplicas request,
            ProtocolTask<NodeIDType, ReconfigurationPacket.PacketType, String>[] ptasks) {

        log.log(Level.INFO, "{0} received {1} {2} from client {3} {4}",
                new Object[] { this, request.isRequest() ? "" : "RESPONSE", request.getSummary(),
                        request.getCreator(),
                        request.isForwarded() ? "from reconfigurator " + request.getSender() : "" });
        if (request.getServiceName().equals(ANYCAST_NAME)) {
            this.sendClientReconfigurationPacket(request.setActives(modifyPortsForSSL(
                    this.consistentNodeConfig.getRandomActiveReplica(), receivedOnSSLPort(request))));
            return null;
        } else if (request.getServiceName().equals(BROADCAST_NAME)) {
            this.sendClientReconfigurationPacket(request.setActives(modifyPortsForSSL(
                    this.consistentNodeConfig.getActiveReplicaSocketAddresses(), receivedOnSSLPort(request))));
            return null;
        }

        if (this.processRedirection(request))
            return null;

        ReconfigurationRecord<NodeIDType> record = this.DB.getReconfigurationRecord(request.getServiceName());
        if (record == null || record.getActiveReplicas() == null || record.isDeletePending()) {
            log.log(Level.FINE, "{0} returning null active replicas for name {1}; record = {2}",
                    new Object[] { this, request.getServiceName(), record });
            // I am responsible but can't find actives for the name
            String responseMessage = "No state found for name " + request.getServiceName();
            request.setResponseMessage(
                    responseMessage + " probably because the name has not yet been created or is pending deletion");
            this.sendClientReconfigurationPacket(request
                    .setFailed(ClientReconfigurationPacket.ResponseCodes.NONEXISTENT_NAME_ERROR).makeResponse()
                    .setHashRCs(modifyPortsForSSL(
                            this.getSocketAddresses(this.consistentNodeConfig
                                    .getReplicatedReconfigurators(request.getServiceName())),
                            receivedOnSSLPort(request))));
            return null;
        }

        // else
        Set<InetSocketAddress> activeIPs = new HashSet<InetSocketAddress>();
        /* It is important to ensure that the mapping between active nodeIDs and
         * their socket addresses does not change or changes very infrequently.
         * Otherwise, in-flux copies of nodeConfig can produce wrong answers
         * here. This assumption is reasonable and will hold as long as active
         * nodeIDs are re-used with the same socket address or removed and
         * re-added after a long time if at all by which time all nodes have
         * forgotten about the old id-to-address mapping. */
        for (NodeIDType node : record.getActiveReplicas())
            activeIPs.add(this.consistentNodeConfig.getNodeSocketAddress(node));
        // to support different client facing ports
        request.setActives(modifyPortsForSSL(activeIPs, receivedOnSSLPort(request)));
        this.sendClientReconfigurationPacket(request.makeResponse());
        /* We message using sendActiveReplicasToClient above as opposed to
         * returning a messaging task below because protocolExecutor's messenger
         * may not be usable for client facing requests. */
        return null;
    }

    /**
     * Handles a request to add or delete a reconfigurator from the set of all
     * reconfigurators in NodeConfig. The reconfiguration record corresponding
     * to NodeConfig is stored in the RC records table and the
     * "active replica state" or the NodeConfig info itself is stored in a
     * separate NodeConfig table in the DB.
     * 
     * @param changeRC
     * @param ptasks
     * @return Messaging task typically null. No protocol tasks spawned.
     */
    public GenericMessagingTask<?, ?>[] handleReconfigureRCNodeConfig(ReconfigureRCNodeConfig<NodeIDType> changeRC,
            ProtocolTask<NodeIDType, ReconfigurationPacket.PacketType, String>[] ptasks) {
        assert (changeRC.getServiceName().equals(AbstractReconfiguratorDB.RecordNames.RC_NODES.toString()));
        log.log(Level.INFO, "\n\n{0}\n{1} received {2} request {3} from initiator {4}\n{5}", new Object[] {
                separator, this, changeRC.getType(), changeRC.getSummary(), changeRC.getIssuer(), separator });
        if (!this.isPermitted(changeRC)) {
            String errorMessage = " Impermissible node config change request";
            log.severe(this + errorMessage + ": " + changeRC);
            // this.sendRCReconfigurationErrorToInitiator(changeRC).setFailed().setResponseMessage(errorMessage);
            return (new GenericMessagingTask<InetSocketAddress, ServerReconfigurationPacket<NodeIDType>>(
                    changeRC.getIssuer(), changeRC.setFailed().setResponseMessage(errorMessage))).toArray();
        }
        // check first if NC is ready for reconfiguration
        ReconfigurationRecord<NodeIDType> ncRecord = this.DB.getReconfigurationRecord(changeRC.getServiceName());
        if (ncRecord == null)
            return null; // possible if startCleanSlate

        if (!ncRecord.isReady()) {
            String errorMessage = " Trying to conduct concurrent node config changes";
            log.warning(this + errorMessage + ": " + changeRC);
            return (new GenericMessagingTask<InetSocketAddress, ServerReconfigurationPacket<NodeIDType>>(
                    changeRC.getIssuer(), changeRC.setFailed().setResponseMessage(errorMessage))).toArray();
        }
        // else try to reconfigure even though it may still fail
        Set<NodeIDType> curRCs = ncRecord.getActiveReplicas();
        Set<NodeIDType> newRCs = new HashSet<NodeIDType>(curRCs);
        newRCs.addAll(changeRC.getAddedNodeIDs());
        newRCs.removeAll(changeRC.getDeletedNodeIDs());
        // will use the nodeConfig before the change below.
        if (changeRC.newlyAddedNodes != null || changeRC.deletedNodes != null)
            this.initiateReconfiguration(AbstractReconfiguratorDB.RecordNames.RC_NODES.toString(), ncRecord, newRCs, // this.consistentNodeConfig.getNodeSocketAddress
                    (changeRC.getIssuer()), changeRC.getMyReceiver(), null, null, null, changeRC.newlyAddedNodes);
        return null;
    }

    /**
     * @param changeActives
     * @param ptasks
     * @return Messaging task if any.
     */
    public GenericMessagingTask<?, ?>[] handleReconfigureActiveNodeConfig(
            ReconfigureActiveNodeConfig<NodeIDType> changeActives,
            ProtocolTask<NodeIDType, ReconfigurationPacket.PacketType, String>[] ptasks) {

        assert (changeActives.getServiceName().equals(AbstractReconfiguratorDB.RecordNames.AR_NODES.toString()));
        log.log(Level.INFO, "\n\n{0}\n{1} received {2} request {3} from initiator {4}\n{5}",
                new Object[] { separator, this, changeActives.getType(), changeActives.getSummary(),
                        changeActives.getIssuer(), separator });

        if (!this.isPermitted(changeActives)) {
            String errorMessage = " Impermissible node config change request";
            log.severe(this + errorMessage + ": " + changeActives);
            // this.sendRCReconfigurationErrorToInitiator(changeRC).setFailed().setResponseMessage(errorMessage);
            return (new GenericMessagingTask<InetSocketAddress, ServerReconfigurationPacket<NodeIDType>>(
                    changeActives.getIssuer(), changeActives.setFailed().setResponseMessage(errorMessage)))
                            .toArray();
        } else if (this.nothingToDo(changeActives)) {
            String errorMessage = " Requested node additions or deletions already in place";
            log.log(Level.INFO, "{0} {1} : {2}", new Object[] { this, errorMessage, changeActives });
            // do not setFailed() in this case
            return (new GenericMessagingTask<InetSocketAddress, ServerReconfigurationPacket<NodeIDType>>(
                    changeActives.getIssuer(), changeActives.setResponseMessage(errorMessage))).toArray();

        }
        // check first if NC is ready for reconfiguration
        ReconfigurationRecord<NodeIDType> activeNCRecord = this.DB
                .getReconfigurationRecord(changeActives.getServiceName());
        if (activeNCRecord == null)
            return null; // possible if startCleanSlate

        if (!activeNCRecord.isReady()) {
            String errorMessage = " Trying to conduct concurrent node config changes";
            log.warning(this + errorMessage + ": " + changeActives.getSummary() + "\n when activeNCRecord = "
                    + activeNCRecord.getSummary());
            return (new GenericMessagingTask<InetSocketAddress, ServerReconfigurationPacket<NodeIDType>>(
                    changeActives.getIssuer(), changeActives.setFailed().setResponseMessage(errorMessage)))
                            .toArray();
        }

        // else try to reconfigure even though it may still fail
        Set<NodeIDType> curActives = activeNCRecord.getActiveReplicas();
        Set<NodeIDType> newActives = new HashSet<NodeIDType>(curActives);
        newActives.addAll(changeActives.getAddedNodeIDs());
        newActives.removeAll(changeActives.getDeletedNodeIDs());
        // will use the nodeConfig before the change below.
        if (changeActives.newlyAddedNodes != null || changeActives.deletedNodes != null)
            this.initiateReconfiguration(AbstractReconfiguratorDB.RecordNames.AR_NODES.toString(), activeNCRecord,
                    newActives, // this.consistentNodeConfig.getNodeSocketAddress
                    (changeActives.getIssuer()), changeActives.getMyReceiver(), null, null, null,
                    changeActives.newlyAddedNodes);

        return null;
    }

    private boolean isPermitted(ReconfigureActiveNodeConfig<NodeIDType> changeActives) {
        return changeActives.hasDeletedNodes() ? changeActives.deletedNodes.size() == 1 : true;
    }

    private boolean nothingToDo(ReconfigureActiveNodeConfig<NodeIDType> changeActives) {
        boolean nothing = true;
        nothing = nothing && (changeActives.hasAddedNodes()
                ? this.consistentNodeConfig.getActiveReplicas().containsAll(changeActives.newlyAddedNodes.keySet())
                : true);
        if (changeActives.hasDeletedNodes())
            for (NodeIDType node : changeActives.deletedNodes)
                nothing = nothing && !this.consistentNodeConfig.getActiveReplicas().contains(node);
        return nothing;
    }

    /**
     * Reconfiguration is initiated using a callback because the intent to
     * conduct a reconfiguration must be persistently committed before
     * initiating the reconfiguration. Otherwise, the failure of say the
     * initiating reconfigurator can leave an active replica group stopped
     * indefinitely. Exactly one reconfigurator, the one that proposes the
     * request initiating reconfiguration registers the callback. This
     * initiating reconfigurator will spawn a WaitAckStopEpoch task when the
     * initiating request is locally executed. The other replicas only spawn a
     * WaitPrimaryExecution task as a double check that the initiating
     * reconfigurator does complete the reconfiguration; if it does not, they
     * will follow up with their own attempt after a timeout. This works because
     * all three steps: WaitAckStopEpoch, WaitAckStartEpoch, and
     * WaitAckDropEpoch are idempotent operations.
     * 
     * A reconfiguration attempt can still get stuck if all reconfigurators
     * crash or the only reconfigurators that committed the intent crash. So, a
     * replica recovery procedure should ensure that replicas eventually execute
     * committed but unexecuted requests. This naturally happens with paxos.
     */
    @Override
    public void executed(Request request, boolean handled) {
        if (this.isRecovering() && !((request instanceof RCRecordRequest<?>)
                && ((RCRecordRequest<?>) request).isReconfigurationMerge()))
            return; // no messaging during recovery
        BasicReconfigurationPacket<?> rcPacket = null;
        try {
            rcPacket = ReconfigurationPacket.getReconfigurationPacket(request, getUnstringer());
        } catch (JSONException e) {
            if (!request.toString().equals(Request.NO_OP))
                e.printStackTrace();
        }
        if (rcPacket == null || !rcPacket.getType().equals(ReconfigurationPacket.PacketType.RC_RECORD_REQUEST))
            return;
        @SuppressWarnings("unchecked")
        // checked right above
        RCRecordRequest<NodeIDType> rcRecReq = (RCRecordRequest<NodeIDType>) rcPacket;

        if (this.isCommitWorkerCoordinated(rcRecReq)) {
            log.log(Level.FINE, "{0} executing CommitWorker callback after {1} execution of {2}",
                    new Object[] { this, (handled ? "successful" : "failed"), rcRecReq.getSummary() });
            this.commitWorker.executedCallback(rcRecReq, handled);
        }

        // handled is true when reconfiguration intent causes state change
        if (handled && rcRecReq.isReconfigurationIntent() && !rcRecReq.isNodeConfigChange()
                && !rcRecReq.isActiveNodeConfigChange()) {
            // if I initiated this, spawn reconfiguration task
            if (rcRecReq.startEpoch.getInitiator().equals(getMyID())
                    // but spawn anyway for my RC group reconfigurations
                    || (this.DB.isRCGroupName(rcRecReq.getServiceName())
                            && rcRecReq.startEpoch.getCurEpochGroup().contains(getMyID())))
                this.spawnPrimaryReconfiguratorTask(rcRecReq);
            // else I am secondary, so wait for primary's execution
            else if (!this.DB.isRCGroupName(rcRecReq.getServiceName()))
                this.spawnSecondaryReconfiguratorTask(rcRecReq);

        } else if (handled
                && (rcRecReq.isReconfigurationComplete() || rcRecReq.isDeleteIntentOrPrevDropComplete())) {
            // send delete confirmation to deleting client
            if (rcRecReq.isDeleteIntent() && rcRecReq.startEpoch.isDeleteRequest())
                sendDeleteConfirmationToClient(rcRecReq);
            // send response back to creating client
            else if (rcRecReq.isReconfigurationComplete() && rcRecReq.startEpoch.isCreateRequest()) {
                this.sendCreateConfirmationToClient(rcRecReq, updateAndCheckComplete(rcRecReq.startEpoch));
            }
            // send response back to RCReconfigure initiator
            else if (rcRecReq.isReconfigurationComplete() && rcRecReq.isNodeConfigChange())
                // checkpoint and garbage collect
                this.postCompleteNodeConfigChange(rcRecReq);

            if (this.DB.outstandingContains(rcRecReq.getServiceName()))
                this.DB.notifyOutstanding(rcRecReq.getServiceName());

            /* If reconfiguration is complete, remove any previously spawned
             * secondary tasks for the same reconfiguration. We do not remove
             * WaitAckDropEpoch here because that might still be waiting for
             * drop ack messages. If they don't arrive in a reasonable period of
             * time, WaitAckDropEpoch is designed to self-destruct. But we do
             * remove all tasks corresponding to the previous epoch at this
             * point. */
            this.garbageCollectPendingTasks(rcRecReq);
        } else if (handled && rcRecReq.isNodeConfigChange()) {
            if (rcRecReq.isReconfigurationIntent()) {
                ncAssert(rcRecReq, handled);
                // initiate and complete reconfiguring RC groups here
                executeNodeConfigChange(rcRecReq);
            }
        } else if (handled && rcRecReq.isActiveNodeConfigChange()) {
            if (rcRecReq.isReconfigurationIntent()) {
                this.spawnExecuteActiveNodeConfigChange(rcRecReq);
            }
        } else if (rcRecReq.isReconfigurationMerge()) {
            if (!handled) {
                /* Merge was unsuccessful probably because the node that
                 * responded with the checkpoint handle did not deliver on the
                 * actual checkpoint, so we need to start from WaitAckStopEpoch
                 * all over again. Note that it is inconvenient to do something
                 * similar to WaitEpochFinalState and merge upon successfully
                 * getting the state because the merge needs a coordinated
                 * commit task that is asynchronous. The only way to know if the
                 * merge succeeded or failed is in this Reconfigurator
                 * executed() callback function but WaitEpochFinalState by
                 * design belongs to ActiveReplica. */
                log.log(Level.INFO, "{0} restarting failed merge {1}",
                        new Object[] { this, rcRecReq.getSummary() });
                this.protocolExecutor
                        .spawnIfNotRunning(new WaitAckStopEpoch<NodeIDType>(rcRecReq.startEpoch, this.DB));
            }

            else if (handled && rcRecReq.getInitiator().equals(getMyID()))
                /* We shoudln't explicitly drop the mergee's final epoch state
                 * as other nodes may not have completed the merge and the node
                 * that first supplied the final checkpoint handle may have
                 * crashed. If so, we need to resume WaitAckStopEpoch and for it
                 * to succeed, we need the final checkpoints to not be dropped.
                 * The mergee's final state can be left around hanging and will
                 * eventually become unusable after MAX_FINAL_STATE_AGE. The
                 * actual checkpoints via the file system will be deleted by the
                 * garbage collector eventually, but the final checkpoint
                 * handles in the DB will remain forever or at least until a
                 * node with this name is re-added to the system. */
                ;

        }
    }

    private boolean isCommitWorkerCoordinated(RCRecordRequest<NodeIDType> rcRecReq) {
        return (TWO_PAXOS_RC
                && (rcRecReq.isReconfigurationComplete() || rcRecReq.isReconfigurationPrevDropComplete()))
                || rcRecReq.isReconfigurationMerge();
    }

    private void ncAssert(RCRecordRequest<NodeIDType> rcRecReq, boolean handled) {
        ReconfigurationRecord<NodeIDType> ncRecord = this.DB
                .getReconfigurationRecord(AbstractReconfiguratorDB.RecordNames.RC_NODES.toString());
        assert (!ncRecord.getActiveReplicas().equals(ncRecord.getNewActives())) : this + " : handled=" + handled
                + "; " + ncRecord + "\n  upon \n" + rcRecReq;
    }

    @Override
    public void preExecuted(Request request) {
        if (this.isRecovering())
            return;
        // checked right above
        RCRecordRequest<NodeIDType> rcRecReq = this.requestToRCRecordRequest(request);

        // this method is currently used for NC record completions
        if (rcRecReq == null || !this.DB.isNCRecord(rcRecReq.getServiceName())
                || !rcRecReq.isReconfigurationComplete())
            return;

        /* Only newly added nodes need to do this as they received a
         * reconfiguration complete out of the blue and may not even know the
         * socket addresses of other newly added nodes. */
        if (rcRecReq.startEpoch.getNewlyAddedNodes().contains(this.getMyID()))
            this.executeNodeConfigChange(rcRecReq);
    }

    /****************************** End of protocol task handler methods *********************/

    /*********************** Private methods below **************************/

    @SuppressWarnings("unchecked")
    private RCRecordRequest<NodeIDType> requestToRCRecordRequest(Request request) {
        if (request instanceof RCRecordRequest<?>)
            return (RCRecordRequest<NodeIDType>) request;
        BasicReconfigurationPacket<?> rcPacket = null;
        try {
            rcPacket = ReconfigurationPacket.getReconfigurationPacket(request, getUnstringer());
        } catch (JSONException e) {
            if (!request.toString().equals(Request.NO_OP))
                e.printStackTrace();
        }
        if (rcPacket == null || !rcPacket.getType().equals(ReconfigurationPacket.PacketType.RC_RECORD_REQUEST))
            return null;
        // checked right above
        RCRecordRequest<NodeIDType> rcRecReq = (RCRecordRequest<NodeIDType>) rcPacket;
        return rcRecReq;
    }

    private void spawnPrimaryReconfiguratorTask(RCRecordRequest<NodeIDType> rcRecReq) {
        /* This assert follows from the fact that the return value handled can
         * be true for a reconfiguration intent packet exactly once. */
        // MOB-504: Fix 2: This is more of a hack for now.
        // if(this.isTaskRunning(this.getTaskKey(WaitAckStopEpoch.class,
        // rcRecReq)) && !rcRecReq.isSplitIntent()) return;

        assert (!this.isTaskRunning(this.getTaskKey(WaitAckStopEpoch.class, rcRecReq)));

        log.log(Level.FINE, MyLogger.FORMAT[8],
                new Object[] { this, "spawning WaitAckStopEpoch for", rcRecReq.startEpoch.getPrevGroupName(), ":",
                        rcRecReq.getEpochNumber() - 1, "for starting", rcRecReq.getServiceName(), ":",
                        rcRecReq.getEpochNumber() });
        // the main stop/start/drop sequence begins here
        if (!rcRecReq.isSplitIntent())
            this.protocolExecutor.spawnIfNotRunning(new WaitAckStopEpoch<NodeIDType>(rcRecReq.startEpoch, this.DB));
        else
            // split reconfigurations should skip the stop phase
            this.protocolExecutor
                    .spawnIfNotRunning(new WaitAckStartEpoch<NodeIDType>(rcRecReq.startEpoch, this.DB));

    }

    // utility method used to determine how to offset ports in responses
    private Boolean receivedOnSSLPort(ClientReconfigurationPacket request) {
        if (request.getMyReceiver() == null)
            return false;
        // server-to-server => return null
        else if (request.getMyReceiver().getPort() == this.consistentNodeConfig.getNodePort(getMyID()))
            return null;
        else
            return request.getMyReceiver().getPort() == ReconfigurationConfig
                    .getClientFacingSSLPort(this.consistentNodeConfig.getNodePort(getMyID()));
    }

    private void spawnSecondaryReconfiguratorTask(RCRecordRequest<NodeIDType> rcRecReq) {
        /* This assert follows from the fact that the return value handled can
         * be true for a reconfiguration intent packet exactly once. */
        if (this.isTaskRunning(this.getTaskKey(WaitPrimaryExecution.class, rcRecReq))) {
            log.log(Level.INFO, MyLogger.FORMAT[3],
                    new Object[] { this, " TASK IS ALREADY RUNNING: ", rcRecReq.getSummary() });
        }
        // disable
        assert (!this.isTaskRunning(this.getTaskKey(WaitPrimaryExecution.class, rcRecReq)));
        //

        log.log(Level.FINE, MyLogger.FORMAT[3], new Object[] { this, " spawning WaitPrimaryExecution for ",
                rcRecReq.getServiceName(), rcRecReq.getEpochNumber() - 1 });
        /* If nodeConfig is under flux, we could be wrong on the set of peer
         * reconfigurators below, but this information is only used to get
         * confirmation from the primary, so in the worst case, the secondary
         * will not hear from any primary and will itself complete the
         * reconfiguration, which will be consistent thanks to paxos. */
        this.protocolExecutor.schedule(new WaitPrimaryExecution<NodeIDType>(getMyID(), rcRecReq.startEpoch, this.DB,
                this.consistentNodeConfig.getReplicatedReconfigurators(rcRecReq.getServiceName())));
    }

    /**
     * These are the only request types which {@link Reconfigurator} will accept
     * on the client facing port.
     */
    private ReconfigurationPacket.PacketType[] clientRequestTypes = {
            ReconfigurationPacket.PacketType.CREATE_SERVICE_NAME,
            ReconfigurationPacket.PacketType.DELETE_SERVICE_NAME,
            ReconfigurationPacket.PacketType.REQUEST_ACTIVE_REPLICAS, };

    // put anything needing periodic instrumentation here
    private void instrument(Level level) {
        log.log(level, "{0} activeThreadCount = {1}; taskCount = {2}; completedTaskCount = {3}",
                new Object[] { this, this.protocolExecutor.getActiveCount(), this.protocolExecutor.getTaskCount(),
                        this.protocolExecutor.getCompletedTaskCount() });
    }

    class Instrumenter implements Runnable {
        public void run() {
            instrument(Level.FINE);
        }
    }

    private AddressMessenger<JSONObject> getClientMessenger() {
        return this.messenger.getClientMessenger();
    }

    private AddressMessenger<JSONObject> getClientMessenger(InetSocketAddress listenSockAddr) {
        AddressMessenger<JSONObject> cMsgr = this.messenger.getClientMessenger(listenSockAddr);
        cMsgr = cMsgr != null ? cMsgr : this.messenger;
        log.log(Level.FINEST, "{0} returning messenger listening on address {1}",
                new Object[] { this, listenSockAddr, cMsgr });
        return cMsgr;
    }

    private boolean processRedirection(ClientReconfigurationPacket clientRCPacket) {
        /* Received response from responsible reconfigurator to which I
         * previously forwarded this client request. Need to check whether I
         * received a redirected response before checking whether I am
         * responsible, otherwise there will be an infinite forwarding loop. */
        if (clientRCPacket.isRedirectedResponse()) {
            log.log(Level.FINE, "{0} relaying RESPONSE for forwarded request {1} to {2}",
                    new Object[] { this, clientRCPacket.getSummary(), clientRCPacket.getCreator() });
            // just relay response to the client
            return this.sendClientReconfigurationPacket(clientRCPacket);
        }
        // forward if I am not responsible
        else
            return (this.redirectableRequest(clientRCPacket));
    }

    private static Set<InetSocketAddress> modifyPortsForSSL(Set<InetSocketAddress> replicas, Boolean ssl) {
        if (ssl == null)
            return replicas;
        else if ((ssl && ReconfigurationConfig.getClientPortSSLOffset() == 0)
                || (!ssl && ReconfigurationConfig.getClientPortOffset() == 0))
            return replicas;
        Set<InetSocketAddress> modified = new HashSet<InetSocketAddress>();
        for (InetSocketAddress sockAddr : replicas)
            modified.add(new InetSocketAddress(sockAddr.getAddress(),
                    ssl ? ReconfigurationConfig.getClientFacingSSLPort(sockAddr.getPort())
                            : ReconfigurationConfig.getClientFacingClearPort(sockAddr.getPort())));
        return modified;
    }

    private boolean clientFacingPortIsMyPort() {
        return getClientFacingClearPort(
                this.consistentNodeConfig.getNodePort(getMyID())) == this.consistentNodeConfig
                        .getNodePort(getMyID());
    }

    /**
     * Refer {@link ActiveReplica#getClientFacingSSLPort(int)}.
     * 
     * @param port
     * @return The client facing ssl port.
     */
    public static int getClientFacingSSLPort(int port) {
        return ActiveReplica.getClientFacingSSLPort(port);
    }

    /**
     * @param port
     * @return The client facing clear port.
     */
    public static int getClientFacingClearPort(int port) {
        return ActiveReplica.getClientFacingClearPort(port);
    }

    private boolean redirectableRequest(ClientReconfigurationPacket request) {
        // I am responsible
        if (this.consistentNodeConfig.getReplicatedReconfigurators(request.getServiceName()).contains(getMyID()))
            return false;

        // else if forwardable
        if (request.isForwardable())
            // forward to a random responsible reconfigurator
            this.forwardClientReconfigurationPacket(request);
        else
            // error with redirection hints
            this.sendClientReconfigurationPacket(request.setFailed()
                    .setHashRCs(modifyPortsForSSL(
                            this.getSocketAddresses(this.consistentNodeConfig
                                    .getReplicatedReconfigurators(request.getServiceName())),
                            receivedOnSSLPort(request)))
                    .setResponseMessage(" <Wrong number! I am not the reconfigurator responsible>"));
        return true;
    }

    private Set<InetSocketAddress> getSocketAddresses(Set<NodeIDType> nodes) {
        Set<InetSocketAddress> sockAddrs = new HashSet<InetSocketAddress>();
        for (NodeIDType node : nodes)
            sockAddrs.add(this.consistentNodeConfig.getNodeSocketAddress(node));
        return sockAddrs;
    }

    /* We may need to use a separate messenger for end clients if we use two-way
     * authentication between servers.
     * 
     * TODO: unused, remove */
    @SuppressWarnings("unused")
    @Deprecated
    private AddressMessenger<JSONObject> initClientMessenger() {
        AbstractPacketDemultiplexer<JSONObject> pd = null;
        Messenger<InetSocketAddress, JSONObject> cMsgr = null;
        try {
            int myPort = (this.consistentNodeConfig.getNodePort(getMyID()));
            if (ReconfigurationConfig.getClientFacingPort(myPort) != myPort) {
                log.log(Level.INFO, "{0} creating client messenger at {1}:{2}",
                        new Object[] { this, this.consistentNodeConfig.getBindAddress(getMyID()),
                                ReconfigurationConfig.getClientFacingPort(myPort) });
                MessageNIOTransport<InetSocketAddress, JSONObject> niot = null;
                InetSocketAddress isa = new InetSocketAddress(this.consistentNodeConfig.getBindAddress(getMyID()),
                        ReconfigurationConfig.getClientFacingPort(myPort));
                cMsgr = new JSONMessenger<InetSocketAddress>(
                        niot = new MessageNIOTransport<InetSocketAddress, JSONObject>(isa.getAddress(),
                                isa.getPort(), (pd = new ReconfigurationPacketDemultiplexer()),
                                ReconfigurationConfig.getClientSSLMode()));
                if (!niot.getListeningSocketAddress().equals(isa))
                    throw new IOException("Unable to listen on specified socket address at " + isa);
                pd.register(clientRequestTypes, this);
            }
        } catch (IOException e) {
            e.printStackTrace();
            log.severe(this + ": " + e.getMessage());
            System.exit(1);
        }
        return cMsgr != null ? cMsgr : (AddressMessenger<JSONObject>) this.messenger;
    }

    /**
     * Initiates clear or SSL client messenger based on {@code ssl}.
     * 
     * @param ssl
     * @return
     */
    @SuppressWarnings("unchecked")
    private AddressMessenger<JSONObject> initClientMessenger(boolean ssl) {
        AbstractPacketDemultiplexer<JSONObject> pd = null;
        Messenger<InetSocketAddress, JSONObject> cMsgr = null;
        try {
            int myPort = (this.consistentNodeConfig.getNodePort(getMyID()));
            if ((ssl ? getClientFacingSSLPort(myPort) : getClientFacingClearPort(myPort)) != myPort) {
                log.log(Level.INFO, "{0} creating {1} client messenger at {2}:{3}",
                        new Object[] { this, ssl ? "SSL" : "", this.consistentNodeConfig.getBindAddress(getMyID()),
                                "" + (ssl ? getClientFacingSSLPort(myPort) : getClientFacingClearPort(myPort)) });
                AddressMessenger<?> existing = (ssl ? this.messenger.getSSLClientMessenger()
                        : this.messenger.getClientMessenger());

                if (existing == null || existing == this.messenger) {
                    MessageNIOTransport<InetSocketAddress, JSONObject> niot = null;
                    InetSocketAddress isa = new InetSocketAddress(
                            this.consistentNodeConfig.getBindAddress(getMyID()),
                            ssl ? getClientFacingSSLPort(myPort) : getClientFacingClearPort(myPort));
                    cMsgr = new JSONMessenger<InetSocketAddress>(
                            niot = new MessageNIOTransport<InetSocketAddress, JSONObject>(isa.getAddress(),
                                    isa.getPort(), (pd = new ReconfigurationPacketDemultiplexer()),
                                    ssl ? ReconfigurationConfig.getClientSSLMode() : SSL_MODES.CLEAR));
                    if (!niot.getListeningSocketAddress().equals(isa))
                        throw new IOException("Unable to listen on specified socket address at " + isa
                                + "; created messenger listening instead on " + niot.getListeningSocketAddress());
                } else if (!ssl) {
                    log.log(Level.INFO, "{0} adding self as demultiplexer to existing {1} client messenger",
                            new Object[] { this, ssl ? "SSL" : "" });
                    if (this.messenger.getClientMessenger() instanceof Messenger)
                        ((Messenger<NodeIDType, ?>) this.messenger.getClientMessenger())
                                .addPacketDemultiplexer(pd = new ReconfigurationPacketDemultiplexer());
                } else {
                    log.log(Level.INFO, "{0} adding self as demultiplexer to existing {1} client messenger",
                            new Object[] { this, ssl ? "SSL" : "" });
                    if (this.messenger.getSSLClientMessenger() instanceof Messenger)
                        ((Messenger<NodeIDType, ?>) this.messenger.getSSLClientMessenger())
                                .addPacketDemultiplexer(pd = new ReconfigurationPacketDemultiplexer());
                }
                assert (pd != null);
                pd.register(clientRequestTypes, this);
            }
        } catch (IOException e) {
            e.printStackTrace();
            log.severe(this + " failed to initialize client messenger: " + e.getMessage());
            System.exit(1);
        }

        if (cMsgr != null)
            if (ssl && this.messenger.getSSLClientMessenger() == null)
                this.messenger.setSSLClientMessenger(cMsgr);
            else if (!ssl && this.messenger.getClientMessenger() == null)
                this.messenger.setClientMessenger(cMsgr);

        return cMsgr != null ? cMsgr : (AddressMessenger<JSONObject>) this.messenger;
    }

    private boolean isTaskRunning(String key) {
        return this.protocolExecutor.isRunning(key);
    }

    /* Check for and invoke reconfiguration policy. The reconfiguration policy
     * is in AbstractDemandProfile and by design only deals with IP addresses,
     * not node IDs, so we have utility methods in ConsistentNodeConfig to go
     * back and forth between collections of NodeIDType and InetAddress taking
     * into account the many-to-one mapping from the former to the latter. A
     * good reconfiguration policy should try to return a set of IPs that only
     * minimally modifies the current set of IPs; if so, ConsistentNodeConfig
     * will ensure a similar property for the corresponding NodeIDType set.
     * 
     * If nodeConfig is under flux, this will affect the selection of actives,
     * but not correctness. */
    private Set<NodeIDType> shouldReconfigure(String name) {
        // return null if no current actives
        Set<NodeIDType> oldActives = this.DB.getActiveReplicas(name);
        if (oldActives == null || oldActives.isEmpty())
            return null;
        // get new IP addresses (via consistent hashing if no oldActives
        ArrayList<InetAddress> newActiveIPs = this.demandProfiler.testAndSetReconfigured(name,
                this.consistentNodeConfig.getNodeIPs(oldActives));
        if (newActiveIPs == null)
            return null;
        // get new actives based on new IP addresses
        Set<NodeIDType> newActives = this.consistentNodeConfig.getIPToActiveReplicaIDs(newActiveIPs, oldActives);
        return (!newActives.equals(oldActives) || ReconfigurationConfig.shouldReconfigureInPlace()) ? newActives
                : null;
    }

    // combine json stats from report into existing demand profile
    private void updateDemandProfile(DemandReport<NodeIDType> report) {
        // if no entry for name, try to read and refresh from DB
        if (!this.demandProfiler.contains(report.getServiceName())) {
            String statsStr = this.DB.getDemandStats(report.getServiceName());
            JSONObject statsJSON = null;
            try {
                if (statsStr != null)
                    statsJSON = new JSONObject(statsStr);
            } catch (JSONException e) {
                e.printStackTrace();
            }
            if (statsJSON != null)
                this.demandProfiler.putIfEmpty(AbstractDemandProfile.createDemandProfile(statsJSON));
        }
        this.demandProfiler.combine(AbstractDemandProfile.createDemandProfile(report.getStats()));
    }

    /* Stow away to disk if the size of the memory map becomes large. We will
     * refresh in the updateDemandProfile method if needed. */
    private void trimAggregateDemandProfile() {
        Set<AbstractDemandProfile> profiles = this.demandProfiler.trim();
        for (AbstractDemandProfile profile : profiles) {
            // initiator and epoch are irrelevant in this report
            DemandReport<NodeIDType> report = new DemandReport<NodeIDType>(this.getMyID(), profile.getName(), 0,
                    profile);
            // will update stats in DB
            this.DB.execute(report);
        }
    }

    // coordinate reconfiguration intent
    private boolean initiateReconfiguration(String name, ReconfigurationRecord<NodeIDType> record,
            Set<NodeIDType> newActives, InetSocketAddress sender, InetSocketAddress receiver,
            InetSocketAddress forwarder, String initialState, Map<String, String> nameStates,
            Map<NodeIDType, InetSocketAddress> newlyAddedNodes) {
        if (newActives == null)
            return false;
        // request to persistently log the intent to reconfigure
        RCRecordRequest<NodeIDType> rcRecReq = new RCRecordRequest<NodeIDType>(this.getMyID(), formStartEpoch(name,
                record, newActives, sender, receiver, forwarder, initialState, nameStates, newlyAddedNodes),
                RequestTypes.RECONFIGURATION_INTENT);

        // coordinate intent with replicas
        if (this.isReadyForReconfiguration(rcRecReq, record)) {
            return this.DB.handleIncoming(rcRecReq, null);
        } else
            return false;
    }

    /* We check for ongoing reconfigurations to avoid multiple paxos
     * coordinations by different nodes each trying to initiate a
     * reconfiguration. Although only one will succeed at the end, it is still
     * useful to limit needless paxos coordinated requests. Nevertheless, one
     * problem with the check in this method is that multiple nodes can still
     * try to initiate a reconfiguration as it only checks based on the DB
     * state. Ideally, some randomization should make the likelihood of
     * redundant concurrent reconfigurations low.
     * 
     * It is not important for this method to be atomic. Even if an RC group or
     * a service name reconfiguration is initiated concurrently with the ready
     * checks, paxos ensures that no more requests can be committed after the
     * group has been stopped. If the group becomes non-ready immediately after
     * this method returns true, the request for which this method is being
     * called will either not get committed or be rendered a no-op. */
    private boolean isReadyForReconfiguration(BasicReconfigurationPacket<NodeIDType> rcPacket,
            ReconfigurationRecord<NodeIDType> recordServiceName) {
        ReconfigurationRecord<NodeIDType> recordGroupName = this.DB
                .getReconfigurationRecord(this.DB.getRCGroupName(rcPacket.getServiceName()));
        /* We need to check both if the RC group record is ready and the service
         * name record is either also ready or null (possible during name
         * creation). */
        boolean ready = recordGroupName != null && recordGroupName.isReady()
                && (recordServiceName == null || recordServiceName.isReady());
        if (!ready)
            log.log(Level.FINE, "{0} not ready to reconfigure {1}; record={2} and rcGroupRecord={3}",
                    new Object[] { this, rcPacket.getServiceName(),
                            recordServiceName != null ? recordServiceName.getSummary() : "[null]",
                            recordGroupName != null ? recordGroupName.getSummary() : "[null]" });
        return ready;
    }

    private NodeIDType getMyID() {
        return this.messenger.getMyID();
    }

    private Stringifiable<NodeIDType> getUnstringer() {
        return this.consistentNodeConfig;
    }

    private StartEpoch<NodeIDType> formStartEpoch(String name, ReconfigurationRecord<NodeIDType> record,
            Set<NodeIDType> newActives, InetSocketAddress sender, InetSocketAddress receiver,
            InetSocketAddress forwarder, String initialState, Map<String, String> nameStates,
            Map<NodeIDType, InetSocketAddress> newlyAddedNodes) {
        StartEpoch<NodeIDType> startEpoch = (record != null) ?
        // typical reconfiguration
                new StartEpoch<NodeIDType>(getMyID(), name, record.getEpoch() + 1, newActives,
                        record.getActiveReplicas(record.getName(), record.getEpoch()), sender, receiver, forwarder,
                        initialState, nameStates, newlyAddedNodes)
                // creation reconfiguration
                : new StartEpoch<NodeIDType>(getMyID(), name, 0, newActives, null, sender, receiver, forwarder,
                        initialState, nameStates, newlyAddedNodes);
        return startEpoch;
    }

    /************ Start of key construction utility methods *************/

    private String getTaskKey(Class<?> C, BasicReconfigurationPacket<?> rcPacket) {
        return getTaskKey(C, rcPacket, getMyID().toString());
    }

    /**
     * @param C
     * @param rcPacket
     * @param myID
     * @return The task key.
     */
    public static String getTaskKey(Class<?> C, BasicReconfigurationPacket<?> rcPacket, String myID) {
        return getTaskKey(C, myID, rcPacket.getServiceName(), rcPacket.getEpochNumber());
    }

    private static String getTaskKey(Class<?> C, String myID, String name, int epoch) {
        return C.getSimpleName() + myID + ":" + name + ":" + epoch;
    }

    private String getTaskKeyPrev(Class<?> C, BasicReconfigurationPacket<?> rcPacket) {
        return getTaskKeyPrev(C, rcPacket, getMyID().toString());
    }

    private String getTaskKeyPrev(Class<?> C, BasicReconfigurationPacket<?> rcPacket, int prev) {
        return getTaskKeyPrev(C, rcPacket, getMyID().toString(), prev);
    }

    protected static String getTaskKeyPrev(Class<?> C, BasicReconfigurationPacket<?> rcPacket, String myID) {
        return getTaskKeyPrev(C, rcPacket, myID, 1);
    }

    private static String getTaskKeyPrev(Class<?> C, BasicReconfigurationPacket<?> rcPacket, String myID,
            int prev) {
        return getTaskKey(C, myID, rcPacket.getServiceName(), rcPacket.getEpochNumber() - prev);
    }

    /************ End of key construction utility methods *************/

    private void garbageCollectPendingTasks(RCRecordRequest<NodeIDType> rcRecReq) {
        this.garbageCollectStopAndStartTasks(rcRecReq);
        /* Remove secondary task, primary will take care of itself.
         * 
         * Invariant: The secondary task always terminates when a
         * reconfiguration completes. */
        this.protocolExecutor.remove(getTaskKey(WaitPrimaryExecution.class, rcRecReq));

        /* We don't need to garbage collect the just completed reconfiguration's
         * WaitAckDropEpoch as it should clean up after itself when if and when
         * it finishes, but we should garbage collect any WaitAckDropEpoch from
         * the immediately preceding reconfiguration completion. So we remove
         * WaitAckDropEpoch[myID]:name:n-2 here, where 'n' is the epoch number
         * to which we just completed reconfiguring.
         * 
         * Invariant: There is at most one WaitAckDropEpoch task running for a
         * given name at any reconfigurator, the one for the most recently
         * completed reconfiguration. */
        this.protocolExecutor.remove(getTaskKeyPrev(WaitAckDropEpoch.class, rcRecReq, 2));
    }

    // just before coordinating reconfiguration complete/merge
    private void garbageCollectStopAndStartTasks(RCRecordRequest<NodeIDType> rcRecReq) {
        // stop task obviated just before reconfiguration complete proposed
        this.protocolExecutor.remove(this.getTaskKeyPrev(WaitAckStopEpoch.class, rcRecReq));

        // FIXME: need to also remove split stop tasks here

        // start task obviated just before reconfiguration complete proposed
        this.protocolExecutor.remove(this.getTaskKey(WaitAckStartEpoch.class, rcRecReq));

        // remove previous epoch's start task in case it exists here
        this.protocolExecutor.remove(this.getTaskKeyPrev(WaitAckStartEpoch.class, rcRecReq));
    }

    private void initFinishPendingReconfigurations() throws IOException {
        /* Invoked just once upon recovery, but we could also invoke this
         * periodically. */
        this.finishPendingReconfigurations();

        /* Periodic task to remove old file system based checkpoints after a
         * safe timeout of MAX_FINAL_STATE_AGE. The choice of the period below
         * of a tenth of that is somewhat arbitrary. */
        this.protocolExecutor.scheduleWithFixedDelay(new Runnable() {
            public void run() {
                DB.garbageCollectOldFileSystemBasedCheckpoints();
            }
        }, 0, ReconfigurationConfig.getMaxFinalStateAge() / 10, TimeUnit.MILLISECONDS);

        /* Periodic task to finish pending deletions after a safe timeout of
         * MAX_FINAL_STATE_AGE. The choice of the period below of a tenth of
         * that is somewhat arbitrary. */
        this.protocolExecutor.scheduleWithFixedDelay(new Runnable() {
            public void run() {
                DB.delayedDeleteComplete();
            }
        }, 0, ReconfigurationConfig.getMaxFinalStateAge() / 10, TimeUnit.MILLISECONDS);

        // for instrumentation, unrelated to pending reconfigurations
        this.protocolExecutor.scheduleWithFixedDelay(new Instrumenter(), 0, 60, TimeUnit.SECONDS);
    }

    private static final boolean SKIP_ACTIVE_DELETIONS_UPON_RECOVERY = true;

    /* Called initially upon recovery to finish pending reconfigurations. We
     * assume that the set of pending reconfigurations is not too big as this is
     * the set of reconfigurations that were ongoing at the time of the crash. */
    private void finishPendingReconfigurations() throws IOException {
        String[] pending = this.DB.getPendingReconfigurations();
        for (String name : pending) {
            ReconfigurationRecord<NodeIDType> record = this.DB.getReconfigurationRecord(name);

            if (record == null ||
            // ignore failed creations
                    record.getActiveReplicas() == null || record.getActiveReplicas().isEmpty() ||
                    // probably crashed before setPending(false)
                    record.isReady()) {
                this.DB.removePending(name);
                continue;
            }

            /* Skip active node deletions upon recovery. Either others completed
             * it or it did not get completed from the issuing client's
             * perspective in which case it will be reissued if needed. */
            if (record.getName().equals(AbstractReconfiguratorDB.RecordNames.AR_NODES.toString())) {
                if (!SKIP_ACTIVE_DELETIONS_UPON_RECOVERY)
                    this.executeActiveNodeConfigChange(
                            new RCRecordRequest<NodeIDType>(this.getMyID(),
                                    this.formStartEpoch(name, record, record.getNewActives(), null, null, null,
                                            null, null, null),
                                    RCRecordRequest.RequestTypes.RECONFIGURATION_INTENT));
                this.DB.removePending(record.getName());
                continue;
            }

            /* Note; The fact that the RC record request is an intent is
             * immaterial. It is really only used to construct the corresponding
             * WaitAckStopEpoch task, i.e., the intent itself will not be
             * committed again (and indeed can not be by design). */
            log.log(Level.FINE, "{0} initiating pending reconfiguration for {1}", new Object[] { this, name });
            RCRecordRequest<NodeIDType> rcRecReq = new RCRecordRequest<NodeIDType>(this.getMyID(),
                    this.formStartEpoch(name, record, record.getNewActives(), null, null, null, null, null, null),
                    RCRecordRequest.RequestTypes.RECONFIGURATION_INTENT);
            /* We spawn primary even though that may be unnecessary because we
             * don't know if or when any other reconfigurator might finish this
             * pending reconfiguration. Having multiple reconfigurators push a
             * reconfiguration is okay as stop, start, and drop are all
             * idempotent operations. */
            this.spawnPrimaryReconfiguratorTask(rcRecReq);
        }
    }

    private boolean forwardClientReconfigurationPacket(ClientReconfigurationPacket request) {
        try {
            Set<NodeIDType> responsibleRCs = this.DB.removeDead(new HashSet<NodeIDType>(
                    this.consistentNodeConfig.getReplicatedReconfigurators(request.getServiceName())));
            if (responsibleRCs.isEmpty())
                return false;

            @SuppressWarnings("unchecked")
            NodeIDType randomResponsibleRC = (NodeIDType) (responsibleRCs
                    .toArray()[(int) (Math.random() * responsibleRCs.size())]);

            request = request.setForwader(this.consistentNodeConfig.getBindSocketAddress(getMyID()));
            log.log(Level.INFO, "{0} forwarding client request {1} to reconfigurator {2}:{3}",
                    new Object[] { this, request.getSummary(), randomResponsibleRC,
                            this.consistentNodeConfig.getNodeSocketAddress(randomResponsibleRC) });

            this.messenger.sendToAddress(this.consistentNodeConfig.getNodeSocketAddress(randomResponsibleRC),
                    new JSONMessenger.JSONObjectByteableWrapper(
                            request.setForwader(this.consistentNodeConfig.getNodeSocketAddress(getMyID()))
                    // .toJSONObject()
                    ));
        } catch (IOException /* | JSONException */ e) {
            log.severe(this + " incurred " + e.getClass().getSimpleName() + e.getMessage());
            e.printStackTrace();
        }
        return true;
    }

    private boolean sendClientReconfigurationPacket(ClientReconfigurationPacket response) {
        try {
            InetSocketAddress querier = this.getQuerier(response);
            if (querier.equals(response.getCreator())) {
                // only response can go back to client
                log.log(Level.INFO, "{0} sending client RESPONSE {1}:{2} back to client",
                        new Object[] { this, response.getSummary(), response.getResponseMessage(), querier });
                (this.getClientMessenger(response.getMyReceiver())).sendToAddress(querier,
                        new JSONMessenger.JSONObjectByteableWrapper(response
                        // .toJSONObject()
                        ));
            } else {
                // may be a request or response
                log.log(Level.INFO, "{0} sending {1} {2} to reconfigurator {3}", new Object[] { this,
                        response.isRequest() ? "request" : "RESPONSE", response.getSummary(), querier });
                assert (this.messenger.sendToAddress(querier, new JSONMessenger.JSONObjectByteableWrapper(response
                // .toJSONObject()
                )) > 0);
            }
        } catch (IOException /* | JSONException */ e) {
            log.severe(this + " incurred " + e.getClass().getSimpleName() + e.getMessage());
            e.printStackTrace();
        }
        return true;
    }

    /* If it is not my node config socket address, it must be one of the two
     * client messengers. */
    private AddressMessenger<JSONObject> getMessenger(InetSocketAddress receiver) {
        if (receiver.equals(this.consistentNodeConfig.getBindSocketAddress(this.getMyID()))) {
            log.log(Level.FINE, "{0} using messenger for {1}; bindAddress is {2}", new Object[] { this, receiver,
                    this.consistentNodeConfig.getBindSocketAddress(this.getMyID()) });
            return this.messenger;
        } else {
            log.log(Level.FINE, "{0} using clientMessenger for {1}; bindAddress is {2}", new Object[] { this,
                    receiver, this.consistentNodeConfig.getBindSocketAddress(this.getMyID()) });
            return this.getClientMessenger(receiver);
        }
    }

    /* Confirmation means necessarily a positive response. This method is
     * invoked from the creation execution callback. If the record already
     * exists or is in the process of being created, we return an error as
     * opposed to sending a confirmation via this method.
     * 
     * Note: this behavior is different from deletions where we return success
     * if the record is pending deletion (but do return failure if it has been
     * completely deleted). */
    private void sendCreateConfirmationToClient(RCRecordRequest<NodeIDType> rcRecReq, String headName) {
        if (rcRecReq.startEpoch.creator == null || !rcRecReq.getInitiator().equals(getMyID()) || headName == null) {
            return;
        }

        DelayProfiler.updateDelay(ProfilerKeys.create.toString(), rcRecReq.startEpoch.getInitTime());
        try {
            InetSocketAddress querier = this.getQuerier(rcRecReq);
            CreateServiceName response = (CreateServiceName) (new CreateServiceName(rcRecReq.startEpoch.creator,
                    headName, rcRecReq.getEpochNumber(), null, rcRecReq.startEpoch.getMyReceiver()))
                            .setForwader(rcRecReq.startEpoch.getForwarder()).makeResponse();
            // need to use different messengers for client and forwarder
            if (querier.equals(rcRecReq.startEpoch.creator)) {
                log.log(Level.INFO, "{0} sending creation confirmation {1} back to client",
                        new Object[] { this, response.getSummary(), querier });
                // this.getClientMessenger()
                (this.getMessenger(rcRecReq.startEpoch.getMyReceiver())).sendToAddress(querier,
                        new JSONMessenger.JSONObjectByteableWrapper(response
                        // .toJSONObject()
                        ));
            } else {
                log.log(Level.INFO, "{0} sending creation confirmation {1} to forwarding reconfigurator {2}",
                        new Object[] { this, response.getSummary(), querier });
                this.messenger.sendToAddress(querier, new JSONMessenger.JSONObjectByteableWrapper(
                        response.setForwardee(this.consistentNodeConfig.getNodeSocketAddress(getMyID()))
                // .toJSONObject()
                ));
            }
        } catch (IOException /* | JSONException */ e) {
            log.severe(this + " incurred " + e.getClass().getSimpleName() + e.getMessage());
            e.printStackTrace();
        }
    }

    /* Confirmation means necessarily a positive response. This method is
     * invoked either via the delete execution callback or immediately if the
     * record is already pending deletion. If the record is completely deleted,
     * we return an error as opposed to sending a confirmation via this method.
     * 
     * Note: Returning success for pending deletions is different from the
     * behavior for creations where we return success only after the record's
     * creation is complete, i.e., pending creations return a creation error but
     * pending deletions return a deletion success. This difference is because
     * once a record is marked as pending deletion (WAIT_DELETE), it is as good
     * as deleted and is only waiting final garbage collection. */
    private void sendDeleteConfirmationToClient(RCRecordRequest<NodeIDType> rcRecReq) {
        if (rcRecReq.startEpoch.creator == null || !rcRecReq.getInitiator().equals(getMyID()))
            return;
        try {
            InetSocketAddress querier = this.getQuerier(rcRecReq);
            // copy forwarder from startEpoch and mark as response
            DeleteServiceName response = (DeleteServiceName) new DeleteServiceName(rcRecReq.startEpoch.creator,
                    rcRecReq.getServiceName(), rcRecReq.getEpochNumber() - 1, rcRecReq.startEpoch.getMyReceiver())
                            .setForwader(rcRecReq.startEpoch.getForwarder()).makeResponse();

            if (querier.equals(rcRecReq.startEpoch.creator)) {
                log.log(Level.FINE, "{0} sending deletion confirmation {1} back to client",
                        new Object[] { this, response.getSummary(), querier });
                // this.getClientMessenger()
                (this.getMessenger(rcRecReq.startEpoch.getMyReceiver())).sendToAddress(this.getQuerier(rcRecReq),
                        new JSONMessenger.JSONObjectByteableWrapper(response
                        // .toJSONObject()
                        ));
            } else {
                log.log(Level.FINE, "{0} sending deletion confirmation {1} to forwarding reconfigurator {2}",
                        new Object[] { this, response.getSummary(), querier });
                this.messenger.sendToAddress(querier, new JSONMessenger.JSONObjectByteableWrapper(response
                // .toJSONObject()
                ));
            }
        } catch (IOException /* | JSONException */ e) {
            log.severe(this + " incurred " + e.getClass().getSimpleName() + e.getMessage());
            e.printStackTrace();
        }
    }

    private InetSocketAddress getQuerier(RCRecordRequest<NodeIDType> rcRecReq) {
        InetSocketAddress forwarder = rcRecReq.startEpoch.getForwarder();
        InetSocketAddress me = this.consistentNodeConfig.getBindSocketAddress(getMyID());
        // if there is a forwarder that is not me, relay back
        if (forwarder != null && !forwarder.equals(me))
            return forwarder;
        else
            // return directly to creator
            return rcRecReq.startEpoch.creator;
    }

    private InetSocketAddress getQuerier(ClientReconfigurationPacket response) {
        InetSocketAddress forwarder = response.getForwader();
        InetSocketAddress me = this.consistentNodeConfig.getBindSocketAddress(getMyID());
        // if there is a forwarder that is not me, relay back
        if (forwarder != null && !forwarder.equals(me)) {
            return forwarder;
        } else {
            // return directly to creator
            return response.getCreator();
        }
    }

    private static final String separator = "-------------------------------------------------------------------------";

    private void sendReconfigureRCNodeConfigConfirmationToInitiator(RCRecordRequest<NodeIDType> rcRecReq) {
        try {
            ReconfigureRCNodeConfig<NodeIDType> response = new ReconfigureRCNodeConfig<NodeIDType>(
                    this.DB.getMyID(), rcRecReq.startEpoch.newlyAddedNodes,
                    this.diff(rcRecReq.startEpoch.prevEpochGroup, rcRecReq.startEpoch.curEpochGroup));
            log.log(Level.INFO, "\n\n{0}\n{1} sending {2} confirmation to {3}: {4}\n{5}",
                    new Object[] { separator, this, ReconfigurationPacket.PacketType.RECONFIGURE_RC_NODE_CONFIG,
                            rcRecReq.startEpoch.creator, response.getSummary(), separator });
            (this.messenger).sendToAddress(rcRecReq.startEpoch.creator,
                    new JSONMessenger.JSONObjectByteableWrapper(response
                    // .toJSONObject()
                    ));
        } catch (IOException /* | JSONException */ e) {
            log.severe(this + " incurred " + e.getClass().getSimpleName() + e.getMessage());
            e.printStackTrace();
        }
    }

    private void sendReconfigureActiveNodeConfigConfirmationToInitiator(RCRecordRequest<NodeIDType> rcRecReq) {
        try {
            ReconfigureActiveNodeConfig<NodeIDType> response = new ReconfigureActiveNodeConfig<NodeIDType>(
                    this.DB.getMyID(), rcRecReq.startEpoch.newlyAddedNodes,
                    this.diff(rcRecReq.startEpoch.prevEpochGroup, rcRecReq.startEpoch.curEpochGroup));
            log.log(Level.INFO, "{0} has nodeConfig = {1} after processing {2}",
                    new Object[] { this, this.consistentNodeConfig, response.getSummary() });

            log.log(Level.INFO,
                    "\n\n{0}\n{1} finished required reconfigurations to change active replica(s) {2}; sending response to {3}\n{4}\n",
                    new Object[] { separator, this, response.getSummary(), rcRecReq.startEpoch.creator,
                            separator });
            (this.messenger).sendToAddress(rcRecReq.startEpoch.creator,
                    new JSONMessenger.JSONObjectByteableWrapper(response
                    // .toJSONObject()
                    ));
        } catch (IOException /* | JSONException */ e) {
            log.severe(this + " incurred " + e.getClass().getSimpleName() + e.getMessage());
            e.printStackTrace();
        }
    }

    /*************** Reconfigurator reconfiguration related methods ***************/
    // return s1 - s2
    private Set<NodeIDType> diff(Set<NodeIDType> s1, Set<NodeIDType> s2) {
        Set<NodeIDType> diff = new HashSet<NodeIDType>();
        for (NodeIDType node : s1)
            if (!s2.contains(node))
                diff.add(node);
        return diff;
    }

    // all nodes are primaries for NC change.
    private boolean reconfigureNodeConfigRecord(RCRecordRequest<NodeIDType> rcRecReq) {
        if (rcRecReq.getInitiator().equals(getMyID()))
            this.spawnPrimaryReconfiguratorTask(rcRecReq);
        else
            this.spawnSecondaryReconfiguratorTask(rcRecReq);
        return true;
    }

    /**
     * @param echo
     * @param ptasks
     * @return null
     */
    public GenericMessagingTask<NodeIDType, ?>[] handleEchoRequest(EchoRequest echo,
            ProtocolTask<NodeIDType, ReconfigurationPacket.PacketType, String>[] ptasks) {
        log.log(Level.FINE, "{0} received echo request {1}", new Object[] { this, echo.getSummary() });
        if (echo.isRequest()) {
            // ignore echo requests
        } else if (echo.hasClosest()) {
            RTTEstimator.closest(echo.getSender(), echo.getClosest());
            log.log(Level.INFO, "{0} received closest map {1} from {2}; RTTEstimator.closest={3}",
                    new Object[] { this, echo.getClosest(), echo.getSender(),
                            RTTEstimator.getClosest(echo.getSender().getAddress()) });
        }
        // else
        return null;
    }

    /* This method conducts the actual reconfiguration assuming that the
     * "intent" has already been committed in the NC record. It (1) spawns each
     * constituent reconfiguration for its new reconfigurator groups and (2)
     * reconfigures the NC record itself. Spawning each constituent
     * reconfiguration means executing the corresponding reconfiguration intent,
     * then spawning WaitAckStop, etc. It is not important to worry about
     * "completing" the NC change intent under failures as paxos will ensure
     * safety. We do need a trigger to indicate the completion of all
     * constituent reconfigurations so that the NC record change can be
     * considered and marked as complete. For this, upon every NC
     * reconfiguration complete commit, we could simply check if any of the new
     * RC groups are still pending and if not, consider the NC change as
     * incomplete until all constituent RC groups are ready. That is what we do
     * in AbstractReconfiguratorDB. */
    private boolean executeNodeConfigChange(RCRecordRequest<NodeIDType> rcRecReq) {
        boolean allDone = true;

        // change soft copy of node config
        boolean ncChanged = changeSoftNodeConfig(rcRecReq.startEpoch);
        // change persistent copy of node config
        ncChanged = ncChanged && this.DB.changeDBNodeConfig(rcRecReq.startEpoch.getEpochNumber());
        if (!ncChanged)
            throw new RuntimeException("Unable to change node config");
        assert (!rcRecReq.startEpoch.getNewlyAddedNodes().isEmpty()
                || !diff(rcRecReq.startEpoch.prevEpochGroup, rcRecReq.startEpoch.curEpochGroup).isEmpty());

        // to track epoch numbers of RC groups correctly
        Set<NodeIDType> affectedNodes = this.DB.setRCEpochs(rcRecReq.startEpoch.getNewlyAddedNodes(),
                diff(rcRecReq.startEpoch.prevEpochGroup, rcRecReq.startEpoch.curEpochGroup));

        allDone = this.changeSplitMergeGroups(affectedNodes, rcRecReq.startEpoch.getNewlyAddedNodes(),
                diff(rcRecReq.startEpoch.prevEpochGroup, rcRecReq.startEpoch.curEpochGroup));

        this.reconfigureNodeConfigRecord(rcRecReq);

        // finally all done
        return allDone;
    }

    /* Starts in a separate thread as it is a blocking operation. It does not
     * have to finish before returning even though it is technically a paxos
     * request (or the request's callback to be precise) because it is an
     * internal operation. The issuing client only gets a response when
     * executeActiveNodeConfig and the WaitAckDropEpoch task that it spawns both
     * eventually complete, which may take a while. */
    private void spawnExecuteActiveNodeConfigChange(final RCRecordRequest<NodeIDType> rcRecReq) {
        log.log(Level.INFO, "{0} spawning active node config change task for {1}",
                new Object[] { this,
                        new ReconfigureActiveNodeConfig<NodeIDType>(rcRecReq.getInitiator(),
                                rcRecReq.startEpoch.newlyAddedNodes, rcRecReq.startEpoch.getDeletedNodes())
                                        .getSummary() });
        this.protocolExecutor.submit(new Runnable() {
            @Override
            public void run() {
                try {
                    Reconfigurator.this.executeActiveNodeConfigChange(rcRecReq);
                } catch (Exception | Error e) {
                    e.printStackTrace();
                }
            }

        });
    }

    /* There is no actual reconfiguration to be done for the ACTIVE_NODE_CONFIG
     * record itself, just the app records placed on deleted active nodes. The
     * current and new actives in the ACTIVE_NODE_CONFIG do *not* mean the
     * current and new locations where the record is replicated. The record
     * itself is replicated at all reconfigurators and the current and new
     * actives are the universe of active replicas as maintained by the
     * reconfigurators. */
    private boolean executeActiveNodeConfigChange(RCRecordRequest<NodeIDType> rcRecReq) {

        ReconfigurationRecord<NodeIDType> record = this.DB
                .getReconfigurationRecord(AbstractReconfiguratorDB.RecordNames.AR_NODES.toString());

        // change soft copy
        if (rcRecReq.startEpoch.hasNewlyAddedNodes())
            for (NodeIDType node : rcRecReq.startEpoch.newlyAddedNodes.keySet())
                this.consistentNodeConfig.addActiveReplica(node, rcRecReq.startEpoch.newlyAddedNodes.get(node));

        assert (diff(record.getActiveReplicas(), record.getNewActives()).size() <= 1);

        // FIXME: why change again?
        for (NodeIDType active : this.diff(record.getNewActives(), record.getActiveReplicas()))
            this.consistentNodeConfig.addActiveReplica(active, rcRecReq.startEpoch.newlyAddedNodes.get(active));

        // change persistent copy of active node config
        boolean ancChanged = this.DB.changeActiveDBNodeConfig(rcRecReq.startEpoch.getEpochNumber());

        Set<NodeIDType> deletedNodes = this.diff(record.getActiveReplicas(), record.getNewActives());
        for (NodeIDType active : deletedNodes)
            this.deleteActiveReplica(active, rcRecReq.startEpoch.creator);

        try {
            this.DB.waitOutstanding(1);
        } catch (InterruptedException e) {
            e.printStackTrace();
            return false;
        }

        this.consistentNodeConfig.removeActivesSlatedForRemoval();
        for (NodeIDType active : this.diff(record.getActiveReplicas(), record.getNewActives())) {
            assert (!this.consistentNodeConfig.nodeExists(active));
        }

        // uncoordinated change
        boolean executed = this.DB.execute(new RCRecordRequest<NodeIDType>(rcRecReq.getInitiator(),
                rcRecReq.startEpoch, RCRecordRequest.RequestTypes.RECONFIGURATION_COMPLETE));

        this.DB.forceCheckpoint(AbstractReconfiguratorDB.RecordNames.AR_NODES.toString());

        // launch prev drop complete
        if (rcRecReq.getInitiator().equals(this.getMyID()))
            this.protocolExecutor.spawnIfNotRunning(new WaitAckDropEpoch<NodeIDType>(rcRecReq.startEpoch,
                    this.consistentNodeConfig.getReconfigurators(), this.DB));

        this.updatePropertiesFile(rcRecReq, PaxosConfig.DEFAULT_SERVER_PREFIX);
        return ancChanged && executed;
    }

    private void updatePropertiesFile(RCRecordRequest<NodeIDType> rcRecReq, String prefix) {
        // active node config change complete
        if (PaxosConfig.getPropertiesFile() != null)
            try {
                for (NodeIDType node : rcRecReq.startEpoch.getNewlyAddedNodes())
                    Util.writeProperty(prefix + node,
                            this.consistentNodeConfig.getNodeAddress(node).getHostAddress() + ":"
                                    + this.consistentNodeConfig.getNodePort(node),
                            PaxosConfig.getPropertiesFile(), prefix);
                for (NodeIDType node : rcRecReq.startEpoch.getDeletedNodes())
                    Util.writeProperty(prefix + node, null, PaxosConfig.getPropertiesFile(), prefix);
            } catch (IOException ioe) {
                log.severe(this + " incurred exception while modifying properties file"
                        + PaxosConfig.getPropertiesFile() + ioe);
            }
        else
            log.log(Level.INFO, "{0} not updating non-existent properties file upon adds={1}, deletes={2}",
                    new Object[] { this, rcRecReq.startEpoch.getNewlyAddedNodes(),
                            rcRecReq.startEpoch.getDeletedNodes() });
    }

    /* We need to checkpoint the NC record after every NC change. Unlike other
     * records for RC groups where we can roll forward quickly by simply
     * applying state changes specified in the logged decisions (instead of
     * actually re-conducting the corresponding reconfigurations), NC group
     * changes are more complex and have to be re-conducted at each node
     * redundantly, however that may not even be possible as deleted nodes or
     * even existing nodes may no longer have the final state corresponding to
     * older epochs. Checkpointing after every NC change ensures that, upon
     * recovery, each node has to try to re-conduct at most only the most recent
     * NC change.
     * 
     * What if this forceCheckpoint operation fails? If the next NC change
     * successfully completes at this node before the next crash, there is no
     * problem. Else, upon recovery, this node will try to re-conduct the NC
     * change corresponding to the failed forceCheckpoint and might be unable to
     * do so. This is equivalent to this node having missed long past NC
     * changes. At this point, this node must be deleted and re-added to NC. */
    private void postCompleteNodeConfigChange(RCRecordRequest<NodeIDType> rcRecReq) {
        log.log(Level.INFO, "{0} completed node config change for epoch {1}; (forcing checkpoint..)",
                new Object[] { this, rcRecReq.getEpochNumber() });
        this.DB.forceCheckpoint(rcRecReq.getServiceName());

        // active node config change complete
        this.updatePropertiesFile(rcRecReq, ReconfigurationConfig.DEFAULT_RECONFIGURATOR_PREFIX);

        // stop needless failure monitoring
        for (NodeIDType node : diff(rcRecReq.startEpoch.prevEpochGroup, rcRecReq.startEpoch.curEpochGroup))
            this.DB.garbageCollectDeletedNode(node);

    }

    // change soft copy of node config
    private boolean changeSoftNodeConfig(StartEpoch<NodeIDType> startEpoch) {
        /* Do adds immediately. This means that if we ever need the old
         * "world view" again, e.g., to know which group a name maps to, we have
         * to reconstruct the consistent hash ring on demand based on the old
         * set of nodes in the DB. We could optimize this slightly by just
         * storing also an in-memory copy of the old consistent hash ring, but
         * this is probably unnecessary given that nodeConfig changes are rare,
         * slow operations anyway. */
        if (startEpoch.hasNewlyAddedNodes())
            for (Map.Entry<NodeIDType, InetSocketAddress> entry : startEpoch.newlyAddedNodes.entrySet()) {
                this.consistentNodeConfig.addReconfigurator(entry.getKey(), entry.getValue());
                log.log(Level.FINE, "{0} added new reconfigurator {1}={2} to node config", new Object[] { this,
                        entry.getKey(), this.consistentNodeConfig.getNodeSocketAddress(entry.getKey()) });
            }
        /* Deletes, not so fast. If we delete entries from nodeConfig right
         * away, we don't have those nodes' socket addresses, so we can't
         * communicate with them any more, but we need to be able to communicate
         * with them in order to do the necessary reconfigurations to cleanly
         * eliminate them from the consistent hash ring. */
        for (NodeIDType node : this.diff(startEpoch.prevEpochGroup, startEpoch.curEpochGroup)) {
            this.consistentNodeConfig.slateForRemovalReconfigurator(node);
        }
        return true;
    }

    private boolean isPermitted(ReconfigureRCNodeConfig<NodeIDType> changeRC) {

        // if node is pending deletion from previous incarnation
        if (changeRC.getAddedNodeIDs() != null)
            for (NodeIDType addNode : changeRC.getAddedNodeIDs()) {
                ReconfigurationRecord<NodeIDType> rcRecord = this.DB
                        .getReconfigurationRecord(this.DB.getRCGroupName(addNode));
                {
                    if (rcRecord != null && rcRecord.isDeletePending()) {
                        changeRC.setResponseMessage("Can not add reconfigurator named " + addNode
                                + " as it is pending deletion from a previous add.");
                        return false;
                    }
                    // check if name conflicts with active replica name
                    else if (this.consistentNodeConfig.nodeExists(addNode)) {
                        changeRC.setResponseMessage("Can not add reconfigurator named " + addNode
                                + " as another node with the same name already exists.");
                        return false;
                    }
                }
            }
        // if node is not in the current set of RC nodes
        if (changeRC.deletedNodes != null)
            for (NodeIDType deleteNode : changeRC.deletedNodes) {
                if (!this.consistentNodeConfig.getReconfigurators().contains(deleteNode)) {
                    changeRC.setResponseMessage("Can not delete reconfigurator " + deleteNode
                            + " as it is not part of the current set of reconfigurators");
                    return false;
                }
            }

        int permittedSize = this.consistentNodeConfig.getReplicatedReconfigurators("0").size();
        // allow at most one less than the reconfigurator group size
        return changeRC.getDeletedNodeIDs().size() > permittedSize
                ? (changeRC.setResponseMessage("Deleting more than " + (permittedSize - 1)
                        + " reconfigurators simultaneously is not permitted") != null)
                : true;
    }

    private boolean amAffected(Set<NodeIDType> addNodes, Set<NodeIDType> deleteNodes) {
        boolean affected = false;
        for (NodeIDType node : addNodes)
            if (this.DB.amAffected(node))
                affected = true;
        for (NodeIDType node : deleteNodes)
            if (this.DB.amAffected(node))
                affected = true;
        return affected;
    }

    private boolean changeSplitMergeGroups(Set<NodeIDType> affectedNodes, Set<NodeIDType> addNodes,
            Set<NodeIDType> deleteNodes) {
        if (!amAffected(addNodes, deleteNodes))
            return false;

        // get list of current RC groups from DB.
        Map<String, Set<NodeIDType>> curRCGroups = this.DB.getOldRCGroups();
        // get list of new RC groups from NODE_CONFIG record in DB
        Map<String, Set<NodeIDType>> newRCGroups = this.DB.getNewRCGroups();
        // get NC record from DB
        ReconfigurationRecord<NodeIDType> ncRecord = this.DB
                .getReconfigurationRecord(AbstractReconfiguratorDB.RecordNames.RC_NODES.toString());

        assert (!ncRecord.getActiveReplicas().equals(ncRecord.getNewActives())) : ncRecord;

        if (ncRecord == null)
            return false;

        // adjustCurWithNewRCGroups(curRCGroups, newRCGroups, ncRecord);
        String changedSplitMerged = this.changeExistingGroups(curRCGroups, newRCGroups, ncRecord, affectedNodes);
        if (!isAggregatedMergeSplit()) {
            // the two methods below are unused with aggregated merge/split
            changedSplitMerged += this.splitExistingGroups(curRCGroups, newRCGroups, ncRecord);
            changedSplitMerged += this.mergeExistingGroups(curRCGroups, newRCGroups, ncRecord);
        }

        log.log(Level.INFO, "\n                           " + "{0} changed/split/merged = \n{1}",
                new Object[] { this, changedSplitMerged });
        return !(changedSplitMerged).isEmpty();
    }

    private boolean isRecovering() {
        return this.recovering;
    }

    private boolean isPresent(String rcGroupName, Set<NodeIDType> affectedNodes) {
        for (NodeIDType node : affectedNodes) {
            if (this.DB.getRCGroupName(node).equals(rcGroupName))
                return true;
        }
        return false;
    }

    // NC request restarts should be slow and mostly unnecessary
    private static final long NODE_CONFIG_RESTART_PERIOD = 8 * WaitAckStopEpoch.RESTART_PERIOD;

    private void repeatUntilObviated(RCRecordRequest<NodeIDType> rcRecReq) {
        if (this.DB.isNCRecord(rcRecReq.getServiceName()))
            this.commitWorker.enqueueForExecution(rcRecReq, NODE_CONFIG_RESTART_PERIOD);
        else {
            if (rcRecReq.isReconfigurationMerge())
                log.log(Level.INFO, "{0} coordinating merge {1}", new Object[] { this, rcRecReq.getSummary() });
            this.commitWorker.enqueueForExecution(rcRecReq);
        }
    }

    /**
     * Default true now for an improved merge/split implementation. Doing merges
     * in the old way potentially violates RSM safety.
     */
    protected static boolean isAggregatedMergeSplit() {
        return true;
    }

    /**
     * This method reconfigures groups that exist locally both in the old and
     * new rings, i.e., this node just has to do a standard reconfiguration
     * operation because the membership of the paxos group is changing.
     * 
     * With {@link #isAggregatedMergeSplit()}, this method suffices for all
     * necessary reconfigurations; otherwise, we have to also invoke
     * splitExistingGroups and mergeExistingGroups.
     */
    private String changeExistingGroups(Map<String, Set<NodeIDType>> curRCGroups,
            Map<String, Set<NodeIDType>> newRCGroups, ReconfigurationRecord<NodeIDType> ncRecord,
            Set<NodeIDType> affectedNodes) {
        String debug = ""; // just for prettier clustered printing
        Map<String, Set<String>> mergeLists = this.DB.app.getMergeLists();
        // for each new group, initiate group change if and as needed
        for (String newRCGroup : newRCGroups.keySet()) {
            if (!isPresent(newRCGroup, affectedNodes))
                continue; // don't trivial-reconfigure
            else
                log.log(Level.FINE, "{0} finds {1} present in affected RC groups {2}",
                        new Object[] { this, newRCGroup, affectedNodes });
            final Map<String, Set<String>> mergees = this.isMergerGroup(newRCGroup, curRCGroups, newRCGroups,
                    ncRecord);
            Map<String, Set<NodeIDType>> splitParentGroup = this.isSplitGroup(newRCGroup, curRCGroups, newRCGroups,
                    ncRecord);
            int ncEpoch = ncRecord.getRCEpoch(newRCGroup);

            boolean invokeAggregatedMergeSplit = isAggregatedMergeSplit()
                    && (!mergees.isEmpty() || splitParentGroup != null);

            if (curRCGroups.keySet().contains(newRCGroup) && !invokeAggregatedMergeSplit) {

                // change current group
                debug += (this + " changing local group {" + newRCGroup + ":" + (ncEpoch - 1) + "="
                        + curRCGroups.get(newRCGroup) + "} to {" + newRCGroup + ":" + (ncEpoch) + "="
                        + newRCGroups.get(newRCGroup)) + "}";

                if ((mergees).isEmpty() || !isAggregatedMergeSplit()) {
                    this.repeatUntilObviated(new RCRecordRequest<NodeIDType>(this.getMyID(),
                            new StartEpoch<NodeIDType>(this.getMyID(), newRCGroup, ncEpoch,
                                    newRCGroups.get(newRCGroup), curRCGroups.get(newRCGroup),
                                    mergeLists.get(newRCGroup)), // mergees
                            RequestTypes.RECONFIGURATION_INTENT));
                }
            } else if (invokeAggregatedMergeSplit) {
                // change current group
                debug += (this
                        + (curRCGroups.keySet().contains(newRCGroup) ? " changing local group " : " creating ")
                        + "{" + newRCGroup + ":"
                        + (curRCGroups.keySet().contains(newRCGroup) ? ncEpoch - 1 : ncEpoch) + "="
                        + this.DB.getOldGroup(newRCGroup).values().iterator().next() + "}"

                        + (mergees.isEmpty() ? "" : " merging groups {" + mergees + "}")

                        + (splitParentGroup == null ? "" : " splitting from {" + splitParentGroup) + "}");
                this.aggregatedSplitMerge(newRCGroup, curRCGroups, newRCGroups, ncRecord, mergees,
                        splitParentGroup);

            } else
                debug += "(" + this + " relying on others to create non-local group {" + newRCGroup + ":" + ncEpoch
                        + "=" + newRCGroups.get(newRCGroup) + "})";
            debug += "\n";
        }
        return debug;
    }

    private void aggregatedSplitMerge(String newRCGroup, Map<String, Set<NodeIDType>> curRCGroups,
            Map<String, Set<NodeIDType>> newRCGroups, ReconfigurationRecord<NodeIDType> ncRecord,
            Map<String, Set<String>> mergees, Map<String, Set<NodeIDType>> splitParentGroup) {
        // spawn a new task to avoid blocking here
        this.protocolExecutor.submit(new Runnable() {

            public void run() {
                /* Aggregate mergee states, create replica group locally, and
                 * spawn WaitAckStartEpoch that will complete when all replicas
                 * have done the same. */
                Object monitor = new Object();
                String mergedStateFilename = Reconfigurator.this.spawnMergeeStopAndFetchStateTasks(
                        Reconfigurator.this.getStopTasks(newRCGroup, mergees.keySet(), splitParentGroup, monitor,
                                curRCGroups, newRCGroups, ncRecord),
                        monitor, newRCGroup, ncRecord.getRCEpoch(newRCGroup));

                boolean created = Reconfigurator.this.DB.createReplicaGroup(newRCGroup,
                        ncRecord.getRCEpoch(newRCGroup),
                        LargeCheckpointer.createCheckpointHandle(mergedStateFilename), newRCGroups.get(newRCGroup));
                assert (created);

                // will issue the complete upon majority
                Reconfigurator.this.protocolExecutor.spawnIfNotRunning(new WaitAckStartEpoch<NodeIDType>(
                        // passive start epoch
                        new StartEpoch<NodeIDType>(Reconfigurator.this.getMyID(), newRCGroup,
                                ncRecord.getRCEpoch(newRCGroup), newRCGroups.get(newRCGroup), true),
                        Reconfigurator.this.DB));
            }
        });
    }

    private Map<String, Set<NodeIDType>> isSplitGroup(String rcGroup, Map<String, Set<NodeIDType>> curRCGroups,
            Map<String, Set<NodeIDType>> newRCGroups, ReconfigurationRecord<NodeIDType> ncRecord) {
        if (newRCGroups.keySet().contains(rcGroup) && this.DB.isBeingAdded(rcGroup))
            return this.DB.getOldGroup(rcGroup);
        return null;
    }

    /**
     * @param rcGroup
     * @param curRCGroups
     * @param newRCGroups
     * @param ncRecord
     * @return True if {@code rcGroup} is a group into which other groups are
     *         being merged. This is true when a group is being deleted and it
     *         consistent-hashes on to {@code rcGroup} in the new ring.
     */
    private Map<String, Set<String>> isMergerGroup(String rcGroup, Map<String, Set<NodeIDType>> curRCGroups,
            Map<String, Set<NodeIDType>> newRCGroups, ReconfigurationRecord<NodeIDType> ncRecord) {
        Map<String, Set<String>> mergees = new ConcurrentHashMap<String, Set<String>>();
        if (newRCGroups.keySet().contains(rcGroup)) {
            for (NodeIDType curRCNode : ncRecord.getActiveReplicas()) {
                String curRCGroup = this.DB.getRCGroupName(curRCNode);
                if (this.DB.isBeingDeleted(curRCGroup)) {
                    Map<String, Set<NodeIDType>> mergeGroup = this.DB.getNewGroup(curRCGroup);
                    String mergeGroupName = mergeGroup.keySet().iterator().next();
                    if (mergeGroupName.equals(rcGroup))
                        mergees.put(curRCGroup, Util.setToStringSet(mergeGroup.get(mergeGroupName)));
                }
            }
        }
        if (!mergees.isEmpty())
            mergees.put(rcGroup, Util.setToStringSet(this.DB.getOldGroup(rcGroup).values().iterator().next()));
        return mergees;
    }

    /**
     * 
     * @param newRCGroup
     * @param mergees
     * @param splitParentGroup
     * @param monitor
     * @param curRCGroups
     * @param newRCGroups
     * @param ncRecord
     * @return All of the {@link WaitAckStopEpoch} tasks needed to stop groups
     *         being merged or split in order to create {@code newRCGroup}
     */
    private Set<WaitAckStopEpoch<NodeIDType>> getStopTasks(String newRCGroup, Set<String> mergees,
            Map<String, Set<NodeIDType>> splitParentGroup, Object monitor, Map<String, Set<NodeIDType>> curRCGroups,
            Map<String, Set<NodeIDType>> newRCGroups, ReconfigurationRecord<NodeIDType> ncRecord) {
        Set<WaitAckStopEpoch<NodeIDType>> stopTasks = new HashSet<WaitAckStopEpoch<NodeIDType>>();
        for (String mergee : mergees) {
            Set<NodeIDType> mergeeGroup = this.DB.getOldGroup(newRCGroup).values().iterator().next();
            stopTasks.add(new WaitAckStopEpoch<NodeIDType>(new StartEpoch<NodeIDType>(this.getMyID(), newRCGroup,
                    ncRecord.getRCEpoch(newRCGroup), newRCGroups.get(newRCGroup), mergeeGroup, mergee, true,
                    newRCGroups.containsKey(mergee) ? ncRecord.getRCEpoch(mergee) - 1
                            : ncRecord.getRCEpoch(mergee)),
                    this.DB, monitor));
        }
        if (splitParentGroup != null) {
            String splitParent = splitParentGroup.keySet().iterator().next();
            stopTasks.add(new WaitAckStopEpoch<NodeIDType>(
                    new StartEpoch<NodeIDType>(this.getMyID(), newRCGroup, ncRecord.getRCEpoch(newRCGroup),
                            newRCGroups.get(newRCGroup), splitParentGroup.values().iterator().next(), splitParent,
                            false, ncRecord.getRCEpoch(splitParent) - 1),
                    this.DB, monitor));
        }
        return stopTasks;
    }

    private String spawnMergeeStopAndFetchStateTasks(Set<WaitAckStopEpoch<NodeIDType>> stopTasks, Object monitor,
            String mergerGroup, int mergerGroupEpoch) {

        Map<String, String> finalStates = new ConcurrentHashMap<String, String>();

        log.log(Level.INFO, "{0} starting wait on stop task monitors {1}", new Object[] { this, stopTasks });

        // FIXME: should start tasks inside synchronized?
        for (WaitAckStopEpoch<NodeIDType> stopTask : stopTasks)
            assert (this.protocolExecutor.spawnIfNotRunning(stopTask));

        synchronized (monitor) {
            while (!stopTasks.isEmpty())
                try {
                    monitor.wait();
                    for (Iterator<WaitAckStopEpoch<NodeIDType>> iter = stopTasks.iterator(); iter.hasNext();) {
                        WaitAckStopEpoch<NodeIDType> stopTask = iter.next();
                        String finalState = stopTask.getFinalState();
                        if (finalState != null) {
                            iter.remove();
                            finalStates.put(stopTask.startEpoch.getPrevGroupName() + ":"
                                    + stopTask.startEpoch.getPrevEpochNumber(), finalState);
                        }
                    }
                } catch (InterruptedException e) {
                    e.printStackTrace();
                    log.log(Level.INFO, "{0} interrupted while waiting on tasks {1}",
                            new Object[] { this, stopTasks });
                    throw new RuntimeException("Task to spawn merge/split fetch tasks interrupted");
                }
        }
        log.log(Level.INFO, "{0} finished waiting on all stop task monitors {1}", new Object[] { this, stopTasks });
        // merge mergee checkpoints
        return this.DB.app.fetchAndAggregateMergeeStates(finalStates, mergerGroup, mergerGroupEpoch);
        // can create the mergeGroup now with all the fetched states
    }

    /**
     * This method "reconfigures" groups that will exist locally in the new ring
     * but do not currently exist in the old ring. This "reconfiguration" is
     * actually a group split operation, wherein an existing group is stopped
     * and two new groups are created by splitting the final state of the
     * stopped group, one with membership identical to the stopped group and the
     * other corresponding to the new but currently non-existent group. A
     * detailed example is described below.
     * 
     * Note: This method is unused with {@link #isAggregatedMergeSplit()}.
     * 
     * @param curRCGroups
     * @param newRCGroups
     * @param ncRecord
     * @return A debug message for pretty-printing.
     */
    @Deprecated
    private String splitExistingGroups(Map<String, Set<NodeIDType>> curRCGroups,
            Map<String, Set<NodeIDType>> newRCGroups, ReconfigurationRecord<NodeIDType> ncRecord) {
        String debug = ""; // just for prettier clustered printing
        // for each new group, initiate group change if and as needed
        for (String newRCGroup : newRCGroups.keySet()) {
            if (!curRCGroups.keySet().contains(newRCGroup)) {
                /* Create new group from scratch by splitting existing group.
                 * 
                 * Example: Suppose we have nodes Y, Z, A, C, D, E as
                 * consecutive RC nodes along the ring and we add B between A
                 * and C, and all groups are of size 3. Then, the group BCD is a
                 * new group getting added at nodes B, C, and D. This new group
                 * BCD must obtain state from the existing group CDE, i.e., the
                 * group CDE is getting split into two groups, BCD and CDE. One
                 * way to accomplish creation of the group BCD is to specify the
                 * previous group as CDE and just select the subset of state
                 * that gets remapped to BCD as the initial state. Below, we
                 * just acquire all of CDE's final state and simply choose what
                 * belongs to BCD while updating BCD's state at replica group
                 * creation time.
                 * 
                 * This operation will happen at C, and D, but not at B and E
                 * because E has no new group BCD that is not part of its
                 * existing groups, and B has nothing at all, not even a node
                 * config. */
                Map<String, Set<NodeIDType>> oldGroup = this.DB.getOldGroup(newRCGroup);
                assert (oldGroup != null && oldGroup.size() == 1);
                String oldGroupName = oldGroup.keySet().iterator().next();
                debug += this + " creating new group {" + newRCGroup + ":" + ncRecord.getRCEpoch(newRCGroup) + "="
                        + newRCGroups.get(newRCGroup) + "} by splitting {" + oldGroupName + ":"
                        + (ncRecord.getRCEpoch(oldGroupName) - 1) + "=" + oldGroup.get(oldGroupName) + "}\n";
                if (newRCGroup.equals(oldGroupName))
                    continue; // no trivial splits

                // uncoordinated execute
                this.DB.execute(new RCRecordRequest<NodeIDType>(this.getMyID(),
                        new StartEpoch<NodeIDType>(this.getMyID(), newRCGroup, ncRecord.getRCEpoch(newRCGroup),
                                newRCGroups.get(newRCGroup), oldGroup.get(oldGroupName), oldGroupName, false,
                                ncRecord.getRCEpoch(oldGroupName) - 1),
                        RequestTypes.RECONFIGURATION_INTENT));
            }
        }
        return debug;
    }

    /**
     * 
     * This method "reconfigures" groups that will not exist locally in the new
     * ring but do currently exist locally in the old ring. This
     * "reconfiguration" is actually a group merge operation, wherein the old
     * "mergee" group is stopped, the group which with the old group is supposed
     * to merge (and will continue to exist locally in the new ring) is stopped,
     * and the mergee group's final state is merged into the latter group simply
     * through a paxos update operation. A detailed example and a discussion of
     * relevant concerns is described below.
     * 
     * Note: This method is unused with {@link #isAggregatedMergeSplit()}.
     * 
     * @param curRCGroups
     * @param newRCGroups
     * @param ncRecord
     * @return A debug message for pretty-printing.
     */
    @Deprecated
    private String mergeExistingGroups(Map<String, Set<NodeIDType>> curRCGroups,
            Map<String, Set<NodeIDType>> newRCGroups, ReconfigurationRecord<NodeIDType> ncRecord) {
        /* Delete groups that no longer should exist at this node.
         * 
         * Example: Suppose we have nodes Y, Z, A, B, C, D, E as consecutive RC
         * nodes along the ring and we are removing B between A and C, and all
         * groups are of size 3.
         * 
         * Basic idea: For each node being deleted, if I belong to the deleted
         * node's group, I need to reconfigure the deleted node's group by
         * merging it with the node in the new ring to which the deleted node
         * hashes.
         * 
         * In the example above, we need to remove group B at C by changing BCD
         * to CDE. Likewise, at nodes D and E, we need to change group BCD to
         * CDE.
         * 
         * C: BCD -> CDE (merge)
         * 
         * A merge is implemented as a reconfiguration that starts with
         * WaitAckStopEpoch for the old group, but instead of starting the new
         * group, it simply calls updateState on the new group to merge the
         * stopped mergee group's final state into the new group.
         * 
         * Furthermore, the group ZAC is a new group getting added at node C
         * because of the removal of B. There is no current group at C that
         * needs to be stopped, however, one does need to stop the old group ZAB
         * in order to reconfigure it to ZAC. One issue is that C doesn't even
         * know ZAB's epoch number as the group doesn't exist locally at C. So
         * we just let one of Z or A, not C, reconfigure ZAB in this case.
         * 
         * What if we are deleting B1, B2, and B3 from Y, Z, A, B1, B2, B3, C,
         * D, E? The group ZAC has to get created at C, which can still be done
         * by Z or A. Similarly, AB1B2 can be moved to ACD by A. However, B1B2B3
         * can not be moved to CDE at C because CDE has to merge B1B2B3, B2B3C,
         * and B3CD. C can conduct the latter two merges but not the first. To
         * merge B1B2B3, at least one of B1, B2, or B3 must be up. The only
         * compelling reason to delete all three of B1,B2, and B3 together is
         * that they are all down, but in that case we can not delete them
         * anyway until at least one of them comes back up. So we can delete at
         * most as many nodes as the size of the reconfigurator replica group.
         * 
         * Actually, the exact condition is weaker (something like we can delete
         * at most as many consecutive nodes as the size of the reconfigurator
         * replica group, but we need to formally prove the
         * necessity/sufficiency of this constraint). For now, simple and safe
         * is good enough. */

        String debug = "";
        for (String curRCGroup : curRCGroups.keySet()) {
            if (!newRCGroups.containsKey(curRCGroup) && this.DB.isBeingDeleted(curRCGroup)) {
                Map<String, Set<NodeIDType>> mergeGroup = this.DB.getNewGroup(curRCGroup);
                assert (mergeGroup != null && mergeGroup.size() == 1);
                String mergeGroupName = mergeGroup.keySet().iterator().next();

                /* mergeGroupName must be in my new groups and curRCGroup must
                 * exist locally. The latter is needed in order to know the
                 * epoch number of the group being merged. In the running
                 * example above, E does not satisfy both conditions because the
                 * mergeGroupName CDE exists at E but the mergee group BCD
                 * doesn't exist at E, so it is not in a position to conduct the
                 * reconfiguration (as it doesn't know which BCD epoch to stop
                 * and merge into CDE), so just one of C or D will conduct the
                 * merge in this case. */
                if (!newRCGroups.containsKey(mergeGroupName) || this.DB.getEpoch(curRCGroup) == null)
                    continue;

                // delete current group and merge into a new "mergeGroup"
                debug += (this + " merging current group {" + curRCGroup + ":"
                        + this.DB.getReplicaGroup(curRCGroup)) + "} with {" + mergeGroupName + ":"
                        + (ncRecord.getRCEpoch(mergeGroupName)) + "=" + mergeGroup.get(mergeGroupName) + "}\n";

                /* Register the mergee groups right here so that they can be
                 * available upon reconfiguration complete and can be executed
                 * sequentially in the new epoch. It is also easy to look at the
                 * RC record and determine if all the merges are done.
                 * 
                 * It is better to first start a task to stop all mergee groups
                 * (including the mergeGroup) and get a copy of their final
                 * state on the local host and concatenate them into an
                 * initialState meant for the mergeGroup. This will obviate the
                 * current design of having to coordinate merges individually in
                 * the mergeGroup with a global state handle; each such
                 * coordinated merge operation can fail at a subset of replicas
                 * prompting them to lauch WaitAckStopEpoch tasks to try to
                 * re-coordinate the merge that would be redundant on the nodes
                 * on which the merge already succeeded. The current design is
                 * also more complex to debug and, most importantly, violates
                 * state machine semantics. Thus, it is possible that changes to
                 * a merged RC record don't result in the same state at all
                 * replicas because some replicas may not yet have completed the
                 * merge. Waiting until all replicas have completed all
                 * constituent merges is both more complicated to implement and
                 * is bad for liveness. The new design of assembling all merged
                 * state into an initialState before starting the paxos instance
                 * also has the benefit of cleanly dealing with crash recovery
                 * using paxos checkpoint transfers as usual.
                 * 
                 * Invariant: A state machine maintains state machine semantics,
                 * i.e., replicas start from the same initial state and any
                 * sequence of executed requests results in the same state at
                 * the end of the execution sequence at all replicas.
                 * 
                 * Note that the invariant above implicitly disallows "failed"
                 * request executions at a subset of replicas, i.e., a request R
                 * and a state S completely determine the resulting next state
                 * at any replica. Allowing R to fail at a strict subset of
                 * replicas while succeeding at others means that different
                 * replicas may go from the same state S to different possible
                 * next states, causing divergence.
                 * 
                 * The above design has now been implemented using the cleaner
                 * isAggregatedMergeSplit design. */
                this.protocolExecutor.spawnIfNotRunning(new WaitAckStopEpoch<NodeIDType>(
                        new StartEpoch<NodeIDType>(this.getMyID(), mergeGroupName,
                                ncRecord.getRCEpoch(mergeGroupName), mergeGroup.get(mergeGroupName),
                                curRCGroups.get(curRCGroup), curRCGroup, true, ncRecord.getRCEpoch(curRCGroup)),
                        this.DB));
            } else if (!newRCGroups.containsKey(curRCGroup) && !this.DB.isBeingDeleted(curRCGroup)) {
                // delete current group and merge into a new "mergeGroup"
                debug += (this + " expecting others to delete current group {" + curRCGroup + ":"
                        + (ncRecord.getRCEpoch(curRCGroup) - 1) + "=" + this.DB.getReplicaGroup(curRCGroup))
                        + "}\n";
            }
        }
        return debug;
    }

    private static final int MAX_OUTSTANDING_RECONFIGURATIONS = 100;

    /**
     * This method issues reconfigurations for records replicated on active in a
     * manner that limits the number of outstanding reconfigurations using the
     * {@link #outstandingReconfigurations} queue.
     */
    @SuppressWarnings({ "unchecked" })
    private boolean deleteActiveReplica(NodeIDType active, InetSocketAddress creator) {
        boolean initiated = this.DB.app.initiateReadActiveRecords(active);
        if (!initiated) {
            log.log(Level.WARNING, "{0} deleteActiveReplica {1} unable to initiate read active records",
                    new Object[] { this, active });
            return false;
        }
        int rcCount = 0;
        // this.setOutstanding(active);
        this.consistentNodeConfig.slateForRemovalActive(active);
        ReconfigurationRecord<NodeIDType> record = null;
        while ((record = this.DB.app.readNextActiveRecord()) != null) {
            log.log(Level.FINEST, "{0} reconfiguring {1} in order to delete active {1}",
                    new Object[] { this, record.getName(), active });
            try {
                this.DB.waitOutstanding(MAX_OUTSTANDING_RECONFIGURATIONS);
            } catch (InterruptedException e) {
                e.printStackTrace();
                return false;
            }
            // reconfigure name so as to exclude active
            Set<NodeIDType> newActives = new HashSet<NodeIDType>(record.getActiveReplicas());
            assert (newActives.contains(active));
            NodeIDType newActive = (NodeIDType) Util
                    .getRandomOtherThan(this.consistentNodeConfig.getActiveReplicas(), newActives);
            if (newActive != null)
                newActives.add(newActive);
            newActives.remove(active);
            if (this.initiateReconfiguration(record.getName(), record, newActives, creator, null, null, null, null,
                    null)) {
                rcCount++;
                this.DB.addToOutstanding(record.getName());
                record = this.DB.getReconfigurationRecord(record.getName());
                if (record != null && record.getActiveReplicas() != null
                        && !record.getActiveReplicas().contains(active))
                    // inelegant redundant check to handle concurrency
                    this.DB.notifyOutstanding(record.getName());
            }
        }
        log.log(Level.INFO,
                "{0} closing read active records cursor after initiating "
                        + "{1} reconfigurations in order to delete active {2}",
                new Object[] { this, rcCount, active });
        boolean closed = this.DB.app.closeReadActiveRecords();
        // this.setNoOutstanding();
        return initiated && closed;
    }

    /**
     * If only a subset of reconfigurators get a node config change intent, they
     * could end up never executing the intent and therefore never doing the
     * split/change/merge sequence. If only a single node is in this situation,
     * there shouldn't be a problem as reconfiguration of reconfigurator groups
     * is done redundantly by all relevant reconfigurators. However, if multiple
     * nodes miss the NC change intent, say, only a single node executes it in
     * the worst case, it will go ahead and create the next NC epoch but the
     * other reconfigurators will not do anything for groups other than those at
     * that reconfigurator. To address this problem, there must be enough
     * information in the NC change complete for reconfigurators that missed the
     * corresponding intent to go ahead and initiate the change/split/merge
     * sequence anyway. There is of course enough information as all that is
     * really needed is the old and new set of reconfigurators.
     * 
     * This fix has now been implemented using the
     * {@link #executed(Request, boolean)} callback in AbstractReconfiguratorDB
     * that gets it via {@link RepliconfigurableReconfiguratorDB} that in turn
     * gets it from {@link AbstractReplicaCoordinator}.
     * 
     */
}