edu.umass.cs.reconfiguration.RepliconfigurableReconfiguratorDB.java Source code

Java tutorial

Introduction

Here is the source code for edu.umass.cs.reconfiguration.RepliconfigurableReconfiguratorDB.java

Source

/*
 * Copyright (c) 2015 University of Massachusetts
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 * 
 * Initial developer(s): V. Arun
 */
package edu.umass.cs.reconfiguration;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;

import org.json.JSONObject;

import edu.umass.cs.gigapaxos.interfaces.ExecutedCallback;
import edu.umass.cs.gigapaxos.interfaces.Request;
import edu.umass.cs.gigapaxos.paxosutil.PaxosInstanceCreationException;
import edu.umass.cs.nio.interfaces.Messenger;
import edu.umass.cs.reconfiguration.AbstractReconfiguratorDB.RecordNames;
import edu.umass.cs.reconfiguration.interfaces.ReconfigurableRequest;
import edu.umass.cs.reconfiguration.interfaces.ReconfiguratorCallback;
import edu.umass.cs.reconfiguration.reconfigurationpackets.ReconfigurationPacket;
import edu.umass.cs.reconfiguration.reconfigurationpackets.StartEpoch;
import edu.umass.cs.reconfiguration.reconfigurationprotocoltasks.WaitAckDropEpoch;
import edu.umass.cs.reconfiguration.reconfigurationutils.ConsistentHashing;
import edu.umass.cs.reconfiguration.reconfigurationutils.ConsistentReconfigurableNodeConfig;
import edu.umass.cs.reconfiguration.reconfigurationutils.ReconfigurationRecord;
import edu.umass.cs.reconfiguration.reconfigurationutils.RequestParseException;

/**
 * @author V. Arun
 *         <p>
 *         We need this class to extend both PaxosReplicationCoordinator and
 *         AbstractReconfiguratorDB, so we use an interface for the latter.
 * @param <NodeIDType>
 */
public class RepliconfigurableReconfiguratorDB<NodeIDType> extends PaxosReplicaCoordinator<NodeIDType> {

    private static enum ReplicaCoordinator {
        PAXOS, DYNAMO
    };

    private static ReplicaCoordinator RC_REPLICA_COORDINATOR = ReplicaCoordinator.PAXOS;

    protected final AbstractReconfiguratorDB<NodeIDType> app;
    protected final ConsistentReconfigurableNodeConfig<NodeIDType> consistentNodeConfig;
    private final HashMap<NodeIDType, Long> pendingReconfiguratorDeletions = new HashMap<NodeIDType, Long>();

    /**
     * @param app
     * @param myID
     * @param consistentNodeConfig
     * @param niot
     * @param startCleanSlate
     */
    public RepliconfigurableReconfiguratorDB(AbstractReconfiguratorDB<NodeIDType> app, NodeIDType myID,
            ConsistentReconfigurableNodeConfig<NodeIDType> consistentNodeConfig,
            Messenger<NodeIDType, JSONObject> niot, boolean startCleanSlate) {
        // setting paxosManager out-of-order limit to 1
        super(app, myID, consistentNodeConfig, niot, 1);
        assert (niot != null);
        this.app = app;
        this.consistentNodeConfig = consistentNodeConfig;
        // only request that needs coordination;
        this.registerCoordination(ReconfigurationPacket.PacketType.RC_RECORD_REQUEST);
        // default groups need only be created for paxos, not dynamo
        if (RC_REPLICA_COORDINATOR.equals(ReplicaCoordinator.PAXOS) && !startCleanSlate)
            this.createDefaultGroups();
        this.setLargeCheckpoints();
        this.app.setCallback(new ReconfiguratorCallback() {

            @Override
            public void executed(Request request, boolean handled) {
                RepliconfigurableReconfiguratorDB.this.callCallback(request, handled, null);
            }

            @Override
            public void preExecuted(Request request) {
                RepliconfigurableReconfiguratorDB.this.getCallback().preExecuted(request);
            }
        });

        log.log(Level.INFO, "{0} after recovery has node config records = {1}", new Object[] { this,
                ((SQLReconfiguratorDB<NodeIDType>) this.app).getNodeConfigRecords(this.consistentNodeConfig) });
    }

    // needed by Reconfigurator
    protected Set<NodeIDType> getActiveReplicas(String name) {
        ReconfigurationRecord<NodeIDType> record = this.getReconfigurationRecord(name);
        return record != null ? record.getActiveReplicas() : null;
    }

    @Override
    public boolean coordinateRequest(Request request, ExecutedCallback callback)
            throws IOException, RequestParseException {
        String rcGroupName = this.getRCGroupName(request.getServiceName());
        // can only send stop request to own RC group
        if (!rcGroupName.equals(request.getServiceName()) && (request instanceof ReconfigurableRequest)
                && ((ReconfigurableRequest) request).isStop()) {
            ReconfigurableRequest stop = ((ReconfigurableRequest) request);
            log.log(Level.INFO,
                    "{0} received stop request for RC group {1}:{2} that is not (yet) "
                            + " node config likely because this node has fallen behind.",
                    new Object[] { this, stop.getServiceName(), stop.getEpochNumber() });
            rcGroupName = request.getServiceName();
        }

        return super.coordinateRequest(rcGroupName, request, callback);
    }

    /**
     * @param request
     * @param callback
     * @return Returns the result of
     *         {@link #coordinateRequest(Request,ExecutedCallback)}.
     */
    public boolean coordinateRequestSuppressExceptions(Request request, ExecutedCallback callback) {
        try {
            return this.coordinateRequest(request, callback);
        } catch (RequestParseException | IOException e) {
            log.warning(this + " incurred " + e.getClass().getSimpleName() + " while coordinating " + request);
            e.printStackTrace();
        }
        return false;
    }

    /**
     * Allows uncoordinated access to DB state. In-memory "DB" could also allow
     * outsiders to modify DB state through returned references, which is
     * problematic unless the returned values are deep copied.
     * 
     * @param name
     * @return ReconfigurationRecord for {@code name}.
     */
    public ReconfigurationRecord<NodeIDType> getReconfigurationRecord(String name) {
        return this.app.getReconfigurationRecord(name);
    }

    /*
     * Create one group for every set of contiguous nodes on the ring of which
     * this node is a part. The name of the group is the name of the first node
     * in the group.
     * 
     * Upon recovery, if any of the groups exists already, it will be recovered
     * from the most recent checkpoint.
     */
    private boolean createDefaultGroups() {
        Set<NodeIDType> reconfigurators = this.consistentNodeConfig.getReconfigurators();
        // iterate over all nodes
        for (NodeIDType node : reconfigurators) {
            Set<NodeIDType> group = this.consistentNodeConfig
                    .getReplicatedReconfigurators(this.app.getRCGroupName(node));
            // if I am present, create group
            if (group.contains(this.getMyID())) {
                log.log(Level.INFO, "{0} creating reconfigurator group {1} with members {2}",
                        new Object[] { this, this.app.getRCGroupName(node), group });
                try {
                    this.createReplicaGroup(this.app.getRCGroupName(node), 0,
                            this.getInitialRCGroupRecord(this.app.getRCGroupName(node), group).toString(), group);
                } catch (PaxosInstanceCreationException pice) {
                    // can happen during recovery
                    log.info(this + " encountered paxos instance creation exception (not unusual during recovery): "
                            + pice.getMessage());
                }
            }
        }
        /*
         * Create RC_NODE_CONFIG record, the master copy of the set of all
         * reconfigurators.
         */
        this.createReplicaGroup(RecordNames.RC_NODES.toString(), 0,
                this.getInitialRCGroupRecord(RecordNames.RC_NODES.toString(),
                        this.consistentNodeConfig.getReconfigurators()).toString(),
                this.consistentNodeConfig.getReconfigurators());

        /*
         * Create ACTIVE_NODE_CONFIG record, the master copy of the set of all
         * reconfigurators.
         */
        this.createReplicaGroup(RecordNames.AR_NODES.toString(), 0,
                this.getInitialRCGroupRecord(RecordNames.AR_NODES.toString(),
                        this.consistentNodeConfig.getActiveReplicas()).toString(),
                this.consistentNodeConfig.getReconfigurators());

        return false; // not used
    }

    @Override
    public boolean createReplicaGroup(String groupName, int epoch, String state, Set<NodeIDType> nodes) {
        boolean created = super.createReplicaGroup(groupName, epoch, state, nodes);
        return created;
    }

    /**
     * The initial RC group record for RC nodes is just directly inserted into
     * the reconfiguration DB by paxos as the initial state and does not go
     * through
     * {@link AbstractReconfiguratorDB#handleRCRecordRequest(edu.umass.cs.reconfiguration.reconfigurationpackets.RCRecordRequest)}
     * , so it does not have to pass the {@link StartEpoch#isInitEpoch()} check
     * and will indeed not pass that check as both the previous group is not
     * empty.
     */
    private ReconfigurationRecord<NodeIDType> getInitialRCGroupRecord(String groupName, Set<NodeIDType> group) {
        return new ReconfigurationRecord<NodeIDType>(groupName, 0, group, group);
    }

    // needed by Reconfigurator
    protected String getDemandStats(String name) {
        return this.app.getDemandStats(name);
    }

    protected String[] getPendingReconfigurations() {
        return this.app.getPendingReconfigurations();
    }

    protected void close() {
        super.stop();
        this.app.close();
    }

    @Override
    public Set<NodeIDType> getReplicaGroup(String serviceName) {
        return super.getReplicaGroup(getRCGroupName(serviceName));
    }

    protected String getRCGroupName(String serviceName) {
        return this.app.getRCGroupName(serviceName);
    }

    protected void setRecovering(boolean b) {
        this.app.recovering = false;
    }

    /******************* Reconfigurator reconfiguration methods ***************/

    protected Set<String> getMergeList(String newRCGroupName) {
        return this.app.getMergeList(newRCGroupName);
    }

    protected Map<String, Set<String>> getMergeLists() {
        return this.app.getMergeLists();
    }

    /*
     * Checks if I am affected because of the addition or deletion of the node
     * argument.
     * 
     * I am affected if either "node" consistent-hashes to an existing RC group
     * or the consistent hash node is a member of one of my RC groups. This
     * check is the same for both add and remove operations.
     */
    protected boolean amAffected(NodeIDType node) {
        if (node == null)
            return false;
        boolean affected = false;
        NodeIDType hashNode = this.getOldConsistentHashRing()
                .getReplicatedServersArray(this.app.getRCGroupName(node)).get(0);
        NodeIDType newHashNode = this.getNewConsistentHashRing()
                .getReplicatedServersArray(this.app.getRCGroupName(node)).get(0);

        String hashGroup = this.app.getRCGroupName(hashNode);

        Map<String, Set<NodeIDType>> myRCGroups = this.getOldRCGroups();

        for (String rcGroup : myRCGroups.keySet()) {
            if (hashGroup.equals(rcGroup) || myRCGroups.get(rcGroup).contains(hashNode))
                affected = true;
            else if (myRCGroups.get(rcGroup).contains(newHashNode))
                affected = true;
        }
        return affected;
    }

    protected Set<NodeIDType> setRCEpochs(Set<NodeIDType> addNodes, Set<NodeIDType> deleteNodes) {
        return this.app.setRCEpochs(addNodes, deleteNodes);
    }

    protected Map<String, Set<NodeIDType>> getOldRCGroups() {
        return this.app.getOldRCGroups();
    }

    protected Map<String, Set<NodeIDType>> getNewRCGroups() {
        return this.app.getNewRCGroups();
    }

    /**
     * Checks if RC group name is name itself by consulting the soft copy of
     * node config. We could also have checked if the name is node.toString()
     * for some node in the current set of reconfigurators.
     * 
     * @param name
     * @return True if {@code name} represents a reconfigurator group name.
     */
    public boolean isRCGroupName(String name) {
        return this.app.isRCGroupName(name);
    }

    /**
     * RC group name of node is just node.toString(). Changing it to anything
     * else will break code.
     * 
     * @param node
     * @return String form of {@code node}.
     */
    public final String getRCGroupName(NodeIDType node) {
        return this.app.getRCGroupName(node);
    }

    /*
     * The methods below generate the old and new consistent hash rings on
     * demand. We may want to cache them as a minor (unimplemented)
     * optimization. We need both rings in order to correctly conduct
     * reconfigurator add/delete operations. It is unwise to use the soft copy
     * of consistentNodeConfig. The only reliable information is in the
     * paxos-managed NODE_CONFIG record that has the current and new (possibly
     * identical) set of reconfigurators, so we generate the consistent hash
     * rings on demand using that information.
     */
    protected String getOldGroupName(String name) {
        return this.app.getRCGroupName(this.getOldConsistentHashRing().getReplicatedServersArray(name).get(0));
    }

    protected ConsistentHashing<NodeIDType> getOldConsistentHashRing() {
        return this.app.getOldConsistentHashRing();
    }

    protected ConsistentHashing<NodeIDType> getNewConsistentHashRing() {
        return this.app.getNewConsistentHashRing();
    }

    // needed because we have no copy of the old consistent hash ring
    protected Map<String, Set<NodeIDType>> getOldGroup(String newRCNode) {
        ArrayList<NodeIDType> oldGroup = this.getOldConsistentHashRing().getReplicatedServersArray(newRCNode);
        // oldGroupName != newRCNode in the case of RC node additions
        String oldGroupName = this.app.getRCGroupName(oldGroup.get(0));
        Map<String, Set<NodeIDType>> group = new HashMap<String, Set<NodeIDType>>();
        group.put(oldGroupName, new HashSet<NodeIDType>(oldGroup));

        return group;
    }

    protected Set<String> getNodeSetAsStringSet(Set<NodeIDType> nodeSet) {
        Set<String> strSet = new HashSet<String>();
        for (NodeIDType node : nodeSet)
            strSet.add(this.getRCGroupName(node));
        return strSet;
    }

    /*
     * Needed because we may have no copy of the new consistent hash ring when
     * nodes are being deleted.
     */
    protected Map<String, Set<NodeIDType>> getNewGroup(String oldRCNode) {
        ArrayList<NodeIDType> newGroup = this.getNewConsistentHashRing().getReplicatedServersArray(oldRCNode);
        String newGroupName = this.app.getRCGroupName(newGroup.get(0));
        Map<String, Set<NodeIDType>> group = new HashMap<String, Set<NodeIDType>>();
        group.put(newGroupName, new HashSet<NodeIDType>(newGroup));
        return group;
    }

    /*
     * Changes node config copy in DB. We need a persistent copy there as we can
     * not rely on the inital node config supplied in the constructor as that
     * may be out-of-date. Actually, we need to store node config information in
     * the DB primarily for the nodeID -> socketAddress mapping. We always have
     * the set of reconfigurators available in the NODE_CONFIG RC record.
     */
    protected boolean changeDBNodeConfig(int version) {
        return this.app.updateDBNodeConfig(version);
    }

    protected boolean changeActiveDBNodeConfig(int version) {
        return this.app.updateActiveDBNodeConfig(version);
    }

    @Override
    public boolean deleteFinalState(String rcGroupName, int epoch) {
        // special case for node config changes
        if (rcGroupName.equals(AbstractReconfiguratorDB.RecordNames.RC_NODES.toString()))
            return isNCReady(epoch + 1); // final state deletion unnecessary
        else if (rcGroupName.equals(AbstractReconfiguratorDB.RecordNames.AR_NODES.toString()))
            return isActiveNCReady(epoch + 1);
        // final state deletion strictly unnecessary
        return super.deleteFinalState(rcGroupName, epoch);
    }

    private boolean isNCReady(int epoch) {
        ReconfigurationRecord<NodeIDType> ncRecord = this
                .getReconfigurationRecord(AbstractReconfiguratorDB.RecordNames.RC_NODES.toString());
        boolean ncComplete = false;
        if (ncComplete = (ncRecord != null && ncRecord.getEpoch() == epoch && ncRecord.isReady())) {
            log.log(Level.INFO, "{0} {1} is all ready",
                    new Object[] { this, AbstractReconfiguratorDB.RecordNames.RC_NODES });
            return true;
        } else {
            String debug = this.app.areRCChangesCompleteDebug();
            // debug empty means only ncRecord change remains
            log.log(debug.isEmpty() ? Level.INFO : Level.FINE,
                    "{0} has not completed node config record change to epoch {1}; nc_record = {2}; {3}",
                    new Object[] { this, epoch, ncRecord.getSummary(), debug });
        }
        // maybe true by now despite check above
        return ncComplete;
    }

    private boolean isActiveNCReady(int epoch) {
        ReconfigurationRecord<NodeIDType> activeNCRecord = this
                .getReconfigurationRecord(AbstractReconfiguratorDB.RecordNames.AR_NODES.toString());
        if (activeNCRecord != null && activeNCRecord.getEpoch() == epoch && activeNCRecord.isReady())
            return true;
        else {
            log.log(Level.INFO, "{0} has *NOT* completed active node config change to epoch {1}; nc_state = {2}",
                    new Object[] { this, epoch, activeNCRecord.getSummary() });
        }
        assert (activeNCRecord == null || activeNCRecord.getEpoch() != epoch
                || !activeNCRecord.isReady()) : activeNCRecord;
        return false;
    }

    // really checks for not present in new
    protected boolean isBeingDeleted(String curRCGroup) {
        Set<NodeIDType> newRCs = this
                .getReconfigurationRecord(AbstractReconfiguratorDB.RecordNames.RC_NODES.toString()).getNewActives();
        boolean presentInNew = false;
        for (NodeIDType node : newRCs) {
            if (this.getRCGroupName(node).equals(curRCGroup))
                presentInNew = true;
        }
        return !presentInNew;
    }

    // really checks for not present in old
    protected boolean isBeingAdded(String curRCGroup) {
        Set<NodeIDType> oldRCs = this
                .getReconfigurationRecord(AbstractReconfiguratorDB.RecordNames.RC_NODES.toString())
                .getActiveReplicas();
        boolean presentInOld = false;
        for (NodeIDType node : oldRCs) {
            if (this.getRCGroupName(node).equals(curRCGroup))
                presentInOld = true;
        }
        return !presentInOld;
    }

    protected void delayedDeleteComplete() {
        this.app.delayedDeleteComplete();
    }

    protected int getCurNCEpoch() {
        return this.getReconfigurationRecord(AbstractReconfiguratorDB.RecordNames.RC_NODES.toString()).getEpoch();
    }

    protected void garbageCollectDeletedNode(NodeIDType node) {
        // to stop paxos failure detection
        this.stopFailureMonitoring(node);
        /*
         * To clean old checkpoint crap lying around. Could also put time here
         * and wait for MAX_FINAL_STATE_AGE.
         */
        long curNCEpoch = getCurNCEpoch();
        log.log(Level.INFO,
                "{0} queueing {1}:{2} for garbage collection of old checkpoints; pending queue size = {3}",
                new Object[] { this, node, curNCEpoch, this.pendingReconfiguratorDeletions.size() + 1 });
        this.pendingReconfiguratorDeletions.put(node, curNCEpoch);
    }

    /*
     * A reconfigurator's file system based checkpoints can be dropped after it
     * is more than 2 epochs old. If other reconfigurators have not kept pace
     * with node config changes, they have to be deleted from the system first
     * anyway before joining back in. There is no way for a reconfigurator to
     * recover and "roll forward" node config changes if it has missed multiple
     * node config changes. Subsequent RC node config changes should not be
     * continued with if some nodes have not completed the current node config
     * change, as doing so essentially means that those RC nodes will be treated
     * as failed.
     */
    private static int RECONFIGURATOR_GC_WAIT_EPOCHS = 2;

    protected void garbageCollectOldFileSystemBasedCheckpoints() {
        for (Iterator<NodeIDType> iter = this.pendingReconfiguratorDeletions.keySet().iterator(); iter.hasNext();) {
            NodeIDType removedRC = iter.next();
            long removedEpoch = this.pendingReconfiguratorDeletions.get(removedRC);
            if (getCurNCEpoch() - removedEpoch > RECONFIGURATOR_GC_WAIT_EPOCHS) {
                log.log(Level.INFO, "{0} invoking RC GC on {1}", new Object[] { this, removedRC });
                this.app.garbageCollectedDeletedNode(removedRC);
                iter.remove();
            }
        }
    }

    protected boolean isNCRecord(String name) {
        return name.equals(AbstractReconfiguratorDB.RecordNames.RC_NODES.toString());
    }

    /**
     * @param name
     */
    public void removePending(String name) {
        this.app.removePending(name);
    }

    private NodeIDType outstandingActiveDeletion = null;

    protected void setOutstanding(NodeIDType active) {
        this.outstandingActiveDeletion = active;
    }

    /**
     * @param toFilter
     * @return Will modify argument and return it by removing active nodes
     *         either currently being deleted or that have been deleted. Used by
     *         {@link WaitAckDropEpoch} to avoid unnecessary message send
     *         failures.
     */
    public Set<NodeIDType> filterDeletedActives(Set<NodeIDType> toFilter) {
        if (this.outstandingActiveDeletion != null)
            toFilter.remove(outstandingActiveDeletion);
        for (Iterator<NodeIDType> nodeIter = toFilter.iterator(); nodeIter.hasNext();) {
            if (this.consistentNodeConfig.getNodeAddress(nodeIter.next()) == null)
                nodeIter.remove();
        }
        return toFilter;
    }

    private Set<String> outstandingReconfigurations = new HashSet<String>();

    protected boolean outstandingContains(String name) {
        return this.outstandingReconfigurations.contains(name);
    }

    protected void addToOutstanding(String name) {
        synchronized (this.outstandingReconfigurations) {
            this.outstandingReconfigurations.add(name);
        }
        log.log(Level.INFO, "{0} has +outstanding = {1}", new Object[] { this, this.outstandingReconfigurations });
    }

    protected void waitOutstanding(int max) throws InterruptedException {
        synchronized (this.outstandingReconfigurations) {
            while (this.outstandingReconfigurations.size() >= max)
                this.outstandingReconfigurations.wait();
        }
    }

    protected void notifyOutstanding(String name) {
        synchronized (this.outstandingReconfigurations) {
            this.outstandingReconfigurations.remove(name);
            this.outstandingReconfigurations.notify();
        }
        log.log(Level.INFO, "{0} has -outstanding = {1}", new Object[] { this, this.outstandingReconfigurations });
    }

}