Java tutorial
/* Copyright (c) 2015 University of Massachusetts * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. * * Initial developer(s): V. Arun */ package edu.umass.cs.gigapaxos; import edu.umass.cs.gigapaxos.PaxosConfig.PC; import edu.umass.cs.gigapaxos.interfaces.ClientRequest; import edu.umass.cs.gigapaxos.interfaces.ExecutedCallback; import edu.umass.cs.gigapaxos.interfaces.Replicable; import edu.umass.cs.gigapaxos.interfaces.Request; import edu.umass.cs.gigapaxos.paxospackets.AcceptPacket; import edu.umass.cs.gigapaxos.paxospackets.AcceptReplyPacket; import edu.umass.cs.gigapaxos.paxospackets.BatchedPaxosPacket; import edu.umass.cs.gigapaxos.paxospackets.FailureDetectionPacket; import edu.umass.cs.gigapaxos.paxospackets.FindReplicaGroupPacket; import edu.umass.cs.gigapaxos.paxospackets.PaxosPacket; import edu.umass.cs.gigapaxos.paxospackets.RequestPacket; import edu.umass.cs.gigapaxos.paxospackets.PaxosPacket.PaxosPacketType; import edu.umass.cs.gigapaxos.paxosutil.Ballot; import edu.umass.cs.gigapaxos.paxosutil.HotRestoreInfo; import edu.umass.cs.gigapaxos.paxosutil.IntegerMap; import edu.umass.cs.gigapaxos.paxosutil.LargeCheckpointer; import edu.umass.cs.gigapaxos.paxosutil.LogMessagingTask; import edu.umass.cs.gigapaxos.paxosutil.MessagingTask; import edu.umass.cs.gigapaxos.paxosutil.PaxosInstanceCreationException; import edu.umass.cs.gigapaxos.paxosutil.PaxosInstanceDestructionException; import edu.umass.cs.gigapaxos.paxosutil.PaxosMessenger; import edu.umass.cs.gigapaxos.paxosutil.OverloadException; import edu.umass.cs.gigapaxos.paxosutil.PaxosPacketDemultiplexer; import edu.umass.cs.gigapaxos.paxosutil.PendingDigests; import edu.umass.cs.gigapaxos.paxosutil.RateLimiter; import edu.umass.cs.gigapaxos.paxosutil.RecoveryInfo; import edu.umass.cs.gigapaxos.paxosutil.RequestInstrumenter; import edu.umass.cs.gigapaxos.paxosutil.StringContainer; import edu.umass.cs.gigapaxos.testing.TESTPaxosConfig; import edu.umass.cs.gigapaxos.testing.TESTPaxosApp; import edu.umass.cs.nio.AbstractJSONPacketDemultiplexer; import edu.umass.cs.nio.JSONMessenger; import edu.umass.cs.nio.JSONNIOTransport; import edu.umass.cs.nio.JSONPacket; import edu.umass.cs.nio.MessageExtractor; import edu.umass.cs.nio.MessageNIOTransport; import edu.umass.cs.nio.SSLDataProcessingWorker; import edu.umass.cs.nio.SSLDataProcessingWorker.SSL_MODES; import edu.umass.cs.nio.interfaces.Byteable; import edu.umass.cs.nio.interfaces.Messenger; import edu.umass.cs.nio.interfaces.InterfaceNIOTransport; import edu.umass.cs.nio.interfaces.NodeConfig; import edu.umass.cs.nio.interfaces.SSLMessenger; import edu.umass.cs.nio.interfaces.Stringifiable; import edu.umass.cs.nio.nioutils.NIOHeader; import edu.umass.cs.nio.nioutils.PacketDemultiplexerDefault; import edu.umass.cs.nio.nioutils.SampleNodeConfig; import edu.umass.cs.utils.Config; import edu.umass.cs.utils.Diskable; import edu.umass.cs.utils.GCConcurrentHashMap; import edu.umass.cs.utils.GCConcurrentHashMapCallback; import edu.umass.cs.utils.StringLocker; import edu.umass.cs.utils.Util; import edu.umass.cs.utils.DelayProfiler; import edu.umass.cs.utils.MultiArrayMap; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.InetAddress; import java.net.InetSocketAddress; import java.net.UnknownHostException; import java.nio.ByteBuffer; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.ThreadFactory; import java.util.concurrent.TimeUnit; import java.util.logging.Level; import java.util.logging.Logger; /** * @author V. Arun * @param <NodeIDType> * * <p> * PaxosManager is the primary interface to create and use paxos by * creating a paxos instance. * * PaxosManager manages all paxos instances at a node. There is * typically one paxos manager per machine. This class could be * static, but it is not so that we can test emulations involving * multiple "machines" within a JVM. * * PaxosManager has four functions at a machine that are useful * across paxos instances of all applications on the machine: (1) * logging, (2) failure detection, (3) messaging, and (4) paxos * instance mapping. The fourth is key to allowing the manager to * demultiplex incoming messages to the appropriate application paxos * instance. */ public class PaxosManager<NodeIDType> { // final private final AbstractPaxosLogger paxosLogger; // logging private final FailureDetection<NodeIDType> FD; // failure detection private final PaxosMessenger<NodeIDType> messenger; // messaging private final int myID; private final Replicable myApp; // default app for all paxosIDs // background deactivation/cremation tasks, all else event-driven private final ScheduledExecutorService executor; // paxos instance mapping private final MultiArrayMap<String, PaxosInstanceStateMachine> pinstances; // stopped paxos instances about to be incinerated private final HashMap<String, PaxosInstanceStateMachine> corpses; private final IntegerMap<NodeIDType> integerMap = new IntegerMap<NodeIDType>(); private final Stringifiable<NodeIDType> unstringer; private final RequestBatcher requestBatcher; private final PaxosPacketBatcher ppBatcher; private int outOfOrderLimit = PaxosInstanceStateMachine.SYNC_THRESHOLD; private int interCheckpointInterval = PaxosInstanceStateMachine.INTER_CHECKPOINT_INTERVAL; private int checkpointTransferTrigger = PaxosInstanceStateMachine.MAX_SYNC_DECISIONS_GAP; private long minResyncDelay = PaxosInstanceStateMachine.MIN_RESYNC_DELAY; private final boolean nullCheckpointsEnabled; private final Outstanding outstanding = new Outstanding(); private final LargeCheckpointer largeCheckpointer; private PendingDigests pendingDigests; private static final boolean USE_GC_MAP = Config.getGlobalBoolean(PC.USE_GC_MAP); private class Outstanding { int totalRequestSize = 0; long lastIncremented = System.currentTimeMillis(); ConcurrentHashMap<Long, RequestAndCallback> requests = USE_GC_MAP ? new GCConcurrentHashMap<Long, RequestAndCallback>(new GCConcurrentHashMapCallback() { @Override public void callbackGC(Object key, Object value) { PaxosManager.this.callbackRequestTimeout(((RequestAndCallback) value).requestPacket); } }, REQUEST_TIMEOUT) : new ConcurrentHashMap<Long, RequestAndCallback>(); HashMap<RequestPacket, RequestAndCallback> conflictIDRequests = new HashMap<RequestPacket, RequestAndCallback>(); ConcurrentHashMap<RequestPacket, RequestResponseAndCallback> responses = USE_GC_MAP ? new GCConcurrentHashMap<RequestPacket, RequestResponseAndCallback>( new GCConcurrentHashMapCallback() { @Override public void callbackGC(Object key, Object value) { } }, REQUEST_TIMEOUT) : new ConcurrentHashMap<RequestPacket, RequestResponseAndCallback>(); private void enqueue(RequestAndCallback rc) { assert (rc.requestPacket.getType() != PaxosPacketType.ACCEPT || rc.requestPacket.hasRequestValue()); RequestAndCallback prev = null; synchronized (this.requests) { if ((prev = this.requests.putIfAbsent(rc.requestPacket.requestID, rc)) == null) totalRequestSize += rc.requestPacket.lengthEstimate(); else if (!rc.requestPacket.equals(prev.requestPacket)) { // just skip accept packets if (!(rc.requestPacket instanceof AcceptPacket)) // insert in overflow this.conflictIDRequests.put(rc.requestPacket, rc); } } this.lastIncremented = System.currentTimeMillis(); } // called by executed callback private RequestAndCallback dequeue(RequestPacket request) { RequestAndCallback queued = this.requests.get(request.requestID); if (queued != null && queued.requestPacket.equals(request)) return this.requests.remove(request.requestID); else return this.conflictIDRequests.remove(request); } private long generateUnusedID() { Long requestID = null; do { requestID = (long) (Math.random() * Long.MAX_VALUE); } while (this.requests.containsKey(requestID)); return requestID; } } static final long REQUEST_TIMEOUT = Config.getGlobalLong(PC.REQUEST_TIMEOUT) * 1000; private static final long FADE_OUTSTANDING_TIMEOUT = REQUEST_TIMEOUT; private void GC() { if (this.outstanding.requests instanceof GCConcurrentHashMap) ((GCConcurrentHashMap<Long, RequestAndCallback>) this.outstanding.requests).tryGC(REQUEST_TIMEOUT); else if (System.currentTimeMillis() - this.outstanding.lastIncremented > PaxosManager.FADE_OUTSTANDING_TIMEOUT) { if (this.outstanding.requests.size() > MAX_OUTSTANDING_REQUESTS) PaxosManager.log.severe(this + " clearing clogged outstanding queue"); this.outstanding.requests.clear(); } } /** * */ public static class RequestAndCallback { /** * */ public final RequestPacket requestPacket; /** * */ final ExecutedCallback callback; RequestAndCallback(RequestPacket request, ExecutedCallback callback) { this.requestPacket = request; this.callback = callback; } } static class RequestResponseAndCallback extends RequestAndCallback { /* this is actually the request whose getResponse contains the response. */ final Request clientRequest; RequestResponseAndCallback(RequestPacket request, Request response, ExecutedCallback callback) { super(request, callback); this.clientRequest = response; } } private static final boolean NO_RESPONSE = Config.getGlobalBoolean(PC.NO_RESPONSE); // default callback tries to send back response private void defaultCallback(Request response, InetSocketAddress clientAddress, InetSocketAddress listenAddress) { if (response == null || !(response instanceof ClientRequest)) return; // waiting for others to remove this method if (clientAddress != null && response != null && !NO_RESPONSE && !clientAddress.equals(RequestPacket.NULL_SOCKADDR)) { try { this.send(clientAddress, response, listenAddress); } catch (JSONException | IOException e) { e.printStackTrace(); } } } // default callback tries to send back response @SuppressWarnings("deprecation") private void defaultCallback(RequestPacket requestPacket, Request request) { if (request == null || !(request instanceof ClientRequest)) return; this.defaultCallback(((ClientRequest) request).getResponse(), ((ClientRequest) request).getClientAddress(), requestPacket.getListenAddress()); } // called by PaxosInstanceStateMachine as execute callback protected boolean executed(RequestPacket requestPacket, Request request, boolean sendResponse) { RequestAndCallback rc = this.outstanding.dequeue(requestPacket); if (rc != null) this.outstanding.totalRequestSize -= rc.requestPacket.lengthEstimate(); RequestInstrumenter.remove(requestPacket.requestID); // cache response if (ENABLE_RESPONSE_CACHING && request instanceof ClientRequest) this.outstanding.responses.put(requestPacket, new RequestResponseAndCallback(requestPacket, ((ClientRequest) request), rc != null ? rc.callback : null)); // only called if executed if (rc != null && rc.callback != null) rc.callback.executed(request, true); else if (sendResponse) this.defaultCallback(requestPacket, request); assert (requestPacket.batchSize() == 0); return rc != null; } protected boolean retransmittedRequest(RequestPacket requestPacket) { RequestResponseAndCallback rrc = null; if (ENABLE_RESPONSE_CACHING && (rrc = this.outstanding.responses.get((requestPacket))) != null) { RequestInstrumenter.remove(requestPacket.requestID); RequestAndCallback rc = this.outstanding.dequeue(requestPacket); if (rc.callback != null) rc.callback.executed(rrc.clientRequest, false); else if (rrc.callback != null) rrc.callback.executed(rrc.clientRequest, false); else this.defaultCallback(rrc.clientRequest, requestPacket.getClientAddress(), requestPacket.getListenAddress()); assert (requestPacket.batchSize() == 0); } return rrc != null; } // non-final private boolean hasRecovered = false; // need this to be static so DB can be closed gracefully private static boolean closed = false; // need this to be static so DB can be closed gracefully private static int processing = 0; /* Note: PaxosManager itself maintains no NIO transport instance as it * delegates all communication related activities to other objects. * PaxosManager is only responsible for managing state for and * demultiplexing incoming packets to a number of paxos instances at this * node. */ private static Logger log = Logger.getLogger(PaxosManager.class.getName()); /** * @param id * My node ID. * @param unstringer * An instance of Stringifiable that can convert String to * NodeIDType. * @param niot * InterfaceNIOTransport or InterfaceMessenger object used for * messaging. * @param pi * InterfaceReplicable application controlled by gigapaxos. * Currently, all paxos instances must correspond to a single * umbrella application even though each createPaxosInstance * method explicitly specifies the app and this information is * stored explicitly inside a paxos instance. The reason for the * single umbrella app restriction is that we won't have a * pointer to the appropriate app upon recovery otherwise. * @param paxosLogFolder * Paxos logging folder. * @param enableNullCheckpoints * Whether null checkpoints are enabled. We need this flag to be * enabled if we intend to reconfigure paxos groups managed by * this PaxosManager. Otherwise, we can not distinguish between a * null checkpoint and no checkpoint, so the next epoch members * may be waiting forever for the previous epoch's final state * (that happens to be null). This flag needs to be set at * construction time and can not be changed thereafter. */ public PaxosManager(NodeIDType id, Stringifiable<NodeIDType> unstringer, InterfaceNIOTransport<NodeIDType, JSONObject> niot, Replicable pi, String paxosLogFolder, boolean enableNullCheckpoints) { this.myID = this.integerMap.put(id);// id.hashCode(); this.executor = Executors.newScheduledThreadPool(1, new ThreadFactory() { @Override public Thread newThread(Runnable r) { Thread thread = Executors.defaultThreadFactory().newThread(r); thread.setName(PaxosManager.class.getSimpleName() + myID); return thread; } }); this.unstringer = unstringer; this.largeCheckpointer = new LargeCheckpointer(paxosLogFolder, this.myID + ""); this.myApp = LargeCheckpointer.wrap(pi, largeCheckpointer); this.FD = new FailureDetection<NodeIDType>(id, niot, paxosLogFolder); this.pinstances = new MultiArrayMap<String, PaxosInstanceStateMachine>( Config.getGlobalInt(PC.PINSTANCES_CAPACITY)); this.corpses = new HashMap<String, PaxosInstanceStateMachine>(); // this.activePaxii = new HashMap<String, ActivePaxosState>(); this.messenger = (new PaxosMessenger<NodeIDType>(niot, this.integerMap)); this.paxosLogger = new SQLPaxosLogger(this.myID, id.toString(), paxosLogFolder, this.wrapMessenger(this.messenger)); this.nullCheckpointsEnabled = enableNullCheckpoints; // periodically remove active state for idle paxii executor.scheduleWithFixedDelay(new Deactivator(), 0, Config.getGlobalInt(PC.DEACTIVATION_PERIOD), TimeUnit.MILLISECONDS); this.pendingDigests = new PendingDigests(this.outstanding.requests, Config.getGlobalInt(PC.NUM_MESSAGE_DIGESTS), new PendingDigests.PendingDigestCallback() { public void callback(AcceptPacket accept) { PaxosManager.this.callbackDigestedAcceptTimeout(accept); } }); this.initOutstandingMonitor(); (this.requestBatcher = new RequestBatcher(this)).start(); (this.ppBatcher = new PaxosPacketBatcher(this)).start(); testingInitialization(); // needed to unclose when testing multiple runs of open and close open(); // so paxos packets will come to me before anyone else niot.precedePacketDemultiplexer( Config.getGlobalString(PC.JSON_LIBRARY).equals("org.json") ? new JSONDemultiplexer() : new FastDemultiplexer()); initiateRecovery(); if (!Config.getGlobalBoolean(PC.DELAY_PROFILER)) DelayProfiler.disable(); } private void initOutstandingMonitor() { final long monitorIterval = Config.getGlobalLong(PC.DEBUG_MONITOR); this.executor.scheduleWithFixedDelay(new Runnable() { @Override public void run() { try { if ((PaxosManager.this.outstanding.requests.size() > 0 || PaxosManager.this.pendingDigests.size() > 0) && (System.currentTimeMillis() - PaxosManager.this.outstanding.lastIncremented > REQUEST_TIMEOUT) || monitorIterval > 0) { HashMap<Long, String> instances = new HashMap<Long, String>(); for (RequestAndCallback rc : PaxosManager.this.outstanding.requests.values()) instances.put(rc.requestPacket.requestID, rc.requestPacket.getPaxosID() + ":" + rc.requestPacket.getSummary()); log.log(Level.INFO, "{0} |outstanding|={1}; {2}; |unpaused|={3}; \n|pending|={4}; {5}", new Object[] { PaxosManager.this, PaxosManager.this.outstanding.requests.size(), Util.truncatedLog(instances.entrySet(), 10), PaxosManager.this.pinstances.size(), PaxosManager.this.pendingDigests, DelayProfiler.getStats() }); if (!PaxosManager.this.outstanding.requests.isEmpty() && PaxosManager.this.outstanding.requests instanceof GCConcurrentHashMap) ((GCConcurrentHashMap<Long, RequestAndCallback>) PaxosManager.this.outstanding.requests) .tryGC(REQUEST_TIMEOUT); } } catch (Exception e) { e.printStackTrace(); } } }, 0, monitorIterval > 0 ? monitorIterval : REQUEST_TIMEOUT, TimeUnit.SECONDS); } /** * Refer * {@link #PaxosManager(Object, Stringifiable, InterfaceNIOTransport, Replicable, String, boolean)} * . * * @param id * @param nc * @param niot * @param app */ public PaxosManager(NodeIDType id, Stringifiable<NodeIDType> nc, InterfaceNIOTransport<NodeIDType, JSONObject> niot, Replicable app) { this(id, nc, niot, (app), null, PaxosInstanceStateMachine.ENABLE_NULL_CHECKPOINT_STATE); } /* We need to be careful with pause/unpause and createPaxosInstance as there * are potential cyclic dependencies. The call chain is as below, where * "info" is the information needed to create a paxos instance. The * notation"->?" means the call may or may not happen, which is why the * recursion breaks after at most one step. * * On recovery: initiateRecovery() -> recover(info) -> * createPaxosInstance(info) -> getInstance(paxosID, tryHotRestore) ->? * unpause(paxosID) ->? createPaxosInstance(info, hri). An inifinite * recursion is prevented because createPaxosInstance will not again call * getInstance with the tryHotRestore option if hri!=null. * * Upon createPaxosInstance any time after recovery, the same chain as above * is followed. * * On deactivation: deactivate(paxosID) ->? pause(paxosID) // may or may not * be successful * * On incoming packet: handleIncomingPacket() -> getInstance(paxosID) ->? * unpause(paxosID) ->? createPaxosInstance(info) */ /** * Returns members of this paxos instance if it is active, and null * otherwise. Note that this means that we return null even if the paxos * instance is being created (but has not yet completed recovery). A * non-null answer to this method necessarily means that the paxos instance * was ready to take more requests at the time of the query. * * @param paxosID * The name of the object or service being managed as a * replicated state machine. * @param version * The reconfiguration version. * @return Set of members in the paxos instance paxosID:version. */ public Set<NodeIDType> getReplicaGroup(String paxosID, int version) { PaxosInstanceStateMachine pism = this.getInstance(paxosID); if (pism != null && pism.getVersion() == version && pism.isActive()) return this.integerMap.getIntArrayAsNodeSet(pism.getMembers()); return null; } /** * @param paxosID * @return Set of members in the paxos instance named paxosID. There can * only be one version of a paxos instance at a node, so we don't * really have to specify a version unless we want to explicitly get * the members for a specific version, in which case, * {@link #getReplicaGroup(String, int)} should be used. */ public Set<NodeIDType> getReplicaGroup(String paxosID) { PaxosInstanceStateMachine pism = this.getInstance(paxosID); if (pism != null && pism.isActive()) return this.integerMap.getIntArrayAsNodeSet(pism.getMembers()); return null; } // is current and not stopped (and may not yet be active) protected boolean isCurrent(String paxosID, int version) { PaxosInstanceStateMachine pism = this.getInstance(paxosID); if (pism != null && pism.getVersion() == version) return true; return false; } private void synchronizedNoop(String paxosID, int version) { PaxosInstanceStateMachine pism = this.getInstance(paxosID); if (pism != null && pism.getVersion() == version) pism.synchronizedNoop(); } /** * A method that specifies the minimum number of necessary arguments to * create a paxos instance. The version is assumed to be 0 here. The use of * this method is encouraged only if reconfiguration is not desired. The * initialState argument can be {@code null} if * {@link #isNullCheckpointStateEnabled()} is true. * * @param paxosID * @param gms * @param initialState * @return Returns true if the paxos instance paxosID:version or one with a * higher version number was successfully created. */ public boolean createPaxosInstance(String paxosID, Set<NodeIDType> gms, String initialState) { return this.createPaxosInstance(paxosID, 0, gms, myApp, initialState, null, true) != null; } /** * Paxos instance creation with an initial state specified. * * @param paxosID * Paxos group name. * @param version * Paxos group version (or epoch number). * @param gms * Group members. * @param app * Application controlled by paxos. * @param initialState * Initial application state. * @return Whether this paxos instance or higher got created. */ public boolean createPaxosInstance(String paxosID, int version, Set<NodeIDType> gms, Replicable app, String initialState) { return this.createPaxosInstance(paxosID, version, gms, app, initialState, null, true) != null; } private PaxosInstanceStateMachine createPaxosInstance(String paxosID, int version, Set<NodeIDType> gms, Replicable app, String initialState, HotRestoreInfo hri, boolean tryRestore) { return this.createPaxosInstance(paxosID, version, gms, app, initialState, hri, tryRestore, false); } private static final boolean SNEAKY_BATCH_CREATION = true; private void waitPinstancesSize() { while (this.pinstances.size() >= this.pinstances.capacity()) { synchronized (this.pinstances) { try { this.pinstances.wait(); } catch (InterruptedException e) { e.printStackTrace(); } } } } /** * @param nameStates * @param gms * @return True if all successfully created. */ public boolean createPaxosInstance(Map<String, String> nameStates, Set<NodeIDType> gms) { waitPinstancesSize(); synchronized (this) { int[] members = Util.setToIntArray(this.integerMap.put(gms)); log.log(Level.INFO, "{0} batch-inserting initial checkpoints for {1} names: {2}", new Object[] { this, nameStates.size(), Util.truncatedLog(nameStates.entrySet(), 8) }); if (SNEAKY_BATCH_CREATION) this.paxosLogger.insertInitialCheckpoints(nameStates, Util.setToStringSet(gms), members); boolean created = true; for (String name : nameStates.keySet()) { assert (nameStates.get(name) != null); created = created && (SNEAKY_BATCH_CREATION ? this.createPaxosInstance(name, 0, gms, this.myApp, nameStates.get(name), HotRestoreInfo.createHRI(name, members, PaxosInstanceStateMachine.roundRobinCoordinator(name, members, 0)), false) : this.createPaxosInstance(name, gms, nameStates.get(name))) != null; } return created; } } private PaxosInstanceStateMachine createPaxosInstance(String paxosID, int version, Set<NodeIDType> gms, Replicable app, String initialState, HotRestoreInfo hri, boolean tryRestore, boolean missedBirthing) { this.waitPinstancesSize(); return this.createPaxosInstanceFinal(paxosID, version, gms, app, initialState, hri, tryRestore, missedBirthing); } private long totalInstancesCreated = 0; private long totalInstancesKilled = 0; private boolean totalInstancesChanged = false; private synchronized long getNumInstances() { totalInstancesChanged = false; return totalInstancesCreated - totalInstancesKilled; } private synchronized long getNumInstancesAndResetChanged() { totalInstancesChanged = false; return this.getNumInstances(); } private synchronized long getNumCreated() { return totalInstancesCreated; } private synchronized long incrCreated() { totalInstancesChanged = true; return ++totalInstancesCreated; } private synchronized long incrKilled() { totalInstancesChanged = true; return ++totalInstancesKilled; } /* Synchronized in order to prevent duplicate instance creation under * concurrency. This is the only method that can actually create a paxos * instance. All other methods just call this method eventually. * * private because it ensures that initialState!=null and missedBirthing are * not both true. */ private synchronized PaxosInstanceStateMachine createPaxosInstanceFinal(String paxosID, int version, Set<NodeIDType> gms, Replicable app, String initialState, HotRestoreInfo hri, boolean tryRestore, boolean missedBirthing) { if (this.isClosed()) return null; if (!gms.contains(this.getNodeID())) throw new PaxosInstanceCreationException(this.getNodeID() + " can not create a paxos instance for group " + gms + " to which it does not belong"); boolean tryHotRestore = (hasRecovered() && hri == null); PaxosInstanceStateMachine pism = this.getInstance(paxosID, tryHotRestore, tryRestore); // if equal or higher version exists, return false if ((pism != null) && (pism.getVersion() - version >= 0)) { log.log(Level.FINE, "{0} paxos instance {1}:{2} or higher version currently exists", new Object[] { this, paxosID, version }); return null; } // if lower version exists, return false if (pism != null && (pism.getVersion() - version < 0)) { log.log(Level.INFO, "{0} has pre-existing paxos instance {1} when asked to create version {2}", new Object[] { this, pism.getPaxosIDVersion(), version }); // pism must be explicitly stopped first return null; // initialState will also be ignored here } // if equal or higher version stopped on disk, return false if (pism == null && equalOrHigherVersionStopped(paxosID, version)) { log.log(Level.INFO, "{0} paxos instance {1}:{2} can not be created as equal or higher " + "version {3}:{4} was previously created and stopped", new Object[] { this, paxosID, version, paxosID, this.getVersion(paxosID) }); return null; } try { // else try to create (could still run into exception) pism = new PaxosInstanceStateMachine(paxosID, version, myID, this.integerMap.put(gms), app != null ? app : this.myApp, initialState, this, hri, missedBirthing); } catch (Exception e) { throw new PaxosInstanceCreationException(e.getMessage()); } pinstances.put(paxosID, pism); incrCreated(); this.notifyUponCreation(); assert (this.getInstance(paxosID, false, false) != null); log.log(Level.FINE, "{0} successfully {1} paxos instance {2}", new Object[] { this, hri != null ? "unpaused" : "created", pism.getPaxosIDVersion() }); /* Note: rollForward can not be done inside the instance as we first * need to update the instance map here so that networking--even * trivially sending message to self--works. */ assert (hri == null || hasRecovered()); if (hri == null) // not hot restore rollForward(paxosID, version); // to sync decisions initially if needed or if missed birthing this.syncPaxosInstance(pism, missedBirthing); // keepalives only if needed this.FD.sendKeepAlive(gms); this.integerMap.put(gms); this.addServers(gms); return pism; } private void syncPaxosInstance(PaxosInstanceStateMachine pism, boolean forceSync) { if (pism != null) pism.poke(forceSync); } /** * When a node is being permanently deleted. * * @param id * @return True if {@code id} was being monitored. */ public boolean stopFailureMonitoring(NodeIDType id) { this.removeServer(id); return this.FD.dontSendKeepAlive(id); } private boolean canCreateOrExistsOrHigher(String paxosID, int version) { PaxosInstanceStateMachine pism = this.getInstance(paxosID); return (pism == null || (pism.getVersion() - version >= 0)); } private Set<InetAddress> servers = new HashSet<InetAddress>(); private boolean isServer(InetAddress isa) { // play safe if we can't distinguish servers from clients if (!(this.unstringer instanceof NodeConfig)) return true; return servers.contains(isa); } protected void addServers(Set<NodeIDType> nodes) { if (this.unstringer instanceof NodeConfig) for (NodeIDType node : nodes) { this.servers.add(((NodeConfig<NodeIDType>) this.unstringer).getNodeAddress(node)); } } private void removeServer(NodeIDType node) { if (this.unstringer instanceof NodeConfig) servers.remove(((NodeConfig<NodeIDType>) this.unstringer).getNodeAddress(node)); } private static int MAX_OUTSTANDING_REQUESTS = Config.getGlobalInt(PC.MAX_OUTSTANDING_REQUESTS); private final boolean DISABLE_CC = Config.getGlobalBoolean(PC.DISABLE_CC); private final boolean ORDER_PRESERVING_REQUESTS = Config.getGlobalBoolean(PC.ORDER_PRESERVING_REQUESTS); // older demultiplexer based purely on JSON class JSONDemultiplexer extends AbstractJSONPacketDemultiplexer { private final boolean clientFacing; public JSONDemultiplexer() { this(Config.getGlobalInt(PC.PACKET_DEMULTIPLEXER_THREADS), false); } public JSONDemultiplexer(int numThreads, boolean clientFacing) { super(numThreads); this.register(PaxosPacket.PaxosPacketType.PAXOS_PACKET); this.setThreadName(myID + (clientFacing ? "-clientFacing" : "")); this.clientFacing = clientFacing; } public boolean handleMessage(JSONObject jsonMsg) { try { PaxosManager.log.log(Level.FINEST, "{0} packet json demultiplexer received {1}", new Object[] { PaxosManager.this, jsonMsg }); PaxosManager.this.handleIncomingPacket(edu.umass.cs.gigapaxos.paxosutil.PaxosPacketDemultiplexer .toPaxosPacket(fixNodeStringToInt(jsonMsg), PaxosManager.this.unstringer)); return true; } catch (JSONException e) { log.severe(this + " unable to parse JSON or unable fix node ID string to integer"); e.printStackTrace(); } catch (OverloadException oe) { if (this.clientFacing) PaxosPacketDemultiplexer.throttleExcessiveLoad(); } return true; } @Override protected boolean isCongested(NIOHeader header) { if (DISABLE_CC) return false; if (PaxosManager.this.isServer(header.sndr.getAddress())) return false; // if(!clientFacing) return false; boolean congested = PaxosManager.this.isCongested(); if (congested) log.log(Level.WARNING, "{0} congested; rate limiting requests from {1}; (outstanding={2} > {3})", new Object[] { this, header.sndr.getAddress(), PaxosManager.this.getNumOutstandingOrQueued(), MAX_OUTSTANDING_REQUESTS }); return congested; } @Override public boolean isOrderPreserving(JSONObject msg) { if (!ORDER_PRESERVING_REQUESTS) return false; try { // only preserve order for REQUEST or PROPOSAL packets PaxosPacketType type = PaxosPacket.getPaxosPacketType(msg); return (type.equals(PaxosPacket.PaxosPacketType.REQUEST) || type.equals(PaxosPacket.PaxosPacketType.PROPOSAL)); } catch (JSONException e) { log.severe(this + " incurred JSONException while parsing " + msg); e.printStackTrace(); } return false; } } protected boolean isCongested() { this.GC(); return PaxosManager.this.getNumOutstandingOrQueued() > MAX_OUTSTANDING_REQUESTS; } // faster demultiplexer based on byte[] serialization class FastDemultiplexer extends edu.umass.cs.gigapaxos.paxosutil.PaxosPacketDemultiplexerFast { public FastDemultiplexer(int numThreads, boolean clientFacing) { super(numThreads); this.setThreadName(PaxosManager.this.intToString(myID) + (clientFacing ? "-clientFacing" : "")); this.register(PaxosPacket.PaxosPacketType.PAXOS_PACKET); } public FastDemultiplexer() { this(Config.getGlobalInt(PC.PACKET_DEMULTIPLEXER_THREADS), false); } public boolean handleMessage(Object msg) { // long t = System.nanoTime(); assert (msg != null); if (msg instanceof net.minidev.json.JSONObject) try { PaxosPacketType type = null; net.minidev.json.JSONObject jsonMsg = (net.minidev.json.JSONObject) msg; assert ((type = PaxosPacket.getPaxosPacketType(jsonMsg)) != PaxosPacketType.ACCEPT || jsonMsg.containsKey(RequestPacket.Keys.STRINGIFIED.toString())); long t = System.nanoTime(); PaxosPacket pp = edu.umass.cs.gigapaxos.paxosutil.PaxosPacketDemultiplexerFast .toPaxosPacket(fixNodeStringToInt(jsonMsg), PaxosManager.this.unstringer); Level level = type == PaxosPacketType.REQUEST ? Level.FINE : Level.FINEST; PaxosManager.log.log(level, "{0} packet fast demultiplexer received {1}", new Object[] { PaxosManager.this, pp.getSummary(log.isLoggable(level)) }); if (PaxosMessenger.INSTRUMENT_SERIALIZATION && Util.oneIn(100)) if (pp.getType() == PaxosPacketType.REQUEST) DelayProfiler.updateDelayNano("requestPacketization", t); PaxosManager.this.handleIncomingPacket(pp); return true; } catch (JSONException e) { log.severe(this + " incurred JSONException while parsing " + msg); e.printStackTrace(); } try { // else assert (msg instanceof PaxosPacket); PaxosPacketType type = ((PaxosPacket) msg).getType(); Level level = (type == PaxosPacketType.REQUEST ? Level.FINE : Level.FINEST); PaxosManager.log.log(level, "{0} packet fast-byte demultiplexer received {1} {2}", new Object[] { PaxosManager.this, ((PaxosPacket) msg).getSummary( // log.isLoggable(level) ), msg instanceof RequestPacket && (((RequestPacket) msg).getEntryReplica() == PaxosManager.this.myID || ((RequestPacket) msg).getEntryReplica() == IntegerMap.NULL_INT_NODE) ? "from client" : "" }); /* FIXME: Need to fixNodeStringToInt. Unclear how to do the * reverse efficiently while sending out messages. So we * currently byteify paxos packets only when all node IDs are * integers. */ PaxosManager.this.handleIncomingPacket(((PaxosPacket) msg)); } catch (Exception | Error e) { e.printStackTrace(); } return true; } @Override protected boolean matchesType(Object message) { return message instanceof net.minidev.json.JSONObject; } @Override protected boolean isCongested(NIOHeader header) { if (DISABLE_CC) return false; if (PaxosManager.this.isServer(header.sndr.getAddress())) return false; // if(!clientFacing) return false; return PaxosManager.this.isCongested(); } // @Override public boolean isOrderPreservingFIXME_REMOVE(net.minidev.json.JSONObject msg) { if (!ORDER_PRESERVING_REQUESTS) return false; try { // only preserve order for REQUEST or PROPOSAL packets PaxosPacketType type = PaxosPacket.getPaxosPacketType(msg); return (type.equals(PaxosPacket.PaxosPacketType.REQUEST) || type.equals(PaxosPacket.PaxosPacketType.PROPOSAL)); } catch (JSONException e) { log.severe(this + " incurred JSONException while parsing " + msg); e.printStackTrace(); } return false; } } private static final boolean BATCHING_ENABLED = Config.getGlobalBoolean(PC.BATCHING_ENABLED); private static final boolean ENABLE_RESPONSE_CACHING = Config.getGlobalBoolean(PC.ENABLE_RESPONSE_CACHING); private void handleIncomingPacket(PaxosPacket pp) { if (ENABLE_RESPONSE_CACHING && pp.getType() == PaxosPacketType.REQUEST && this.retransmittedRequest(((RequestPacket) pp))) return; else if (pp.getType() == PaxosPacketType.BATCHED_PAXOS_PACKET) for (PaxosPacket packet : ((BatchedPaxosPacket) pp).getPaxosPackets()) this.handleIncomingPacket(packet); else if (BATCHING_ENABLED) this.enqueueRequest(pp); else this.handlePaxosPacket(pp); } /* If RequestPacket, hand over to batcher that will then call * handleIncomingPacketInternal on batched requests. */ private void enqueueRequest(PaxosPacket pp) { PaxosPacketType type = pp.getType(); Level level = Level.FINEST; if ((type.equals(PaxosPacketType.REQUEST) || type.equals(PaxosPacketType.PROPOSAL)) && RequestBatcher.shouldEnqueue() && !((RequestPacket) pp).isBroadcasted()) { if (pp.getPaxosID() != null) { log.log(level, "{0} enqueueing request {1}", new Object[] { this, pp.getSummary(log.isLoggable(level)) }); this.requestBatcher.enqueue(((RequestPacket) pp)); } else error((RequestPacket) pp); } else { log.log(level, "{0} handling paxos packet {1} directly without enqueueuing", new Object[] { this, pp.getSummary(log.isLoggable(level)) }); this.handlePaxosPacket(pp); } } private void error(RequestPacket req) { log.warning(this + " received request with no paxosID: " + req.getSummary()); } @SuppressWarnings("unchecked") private void handlePaxosPacket(PaxosPacket request) { if (this.isClosed()) return; else if (emulateUnreplicated(request) || this.emulateLazyPropagation(request)) return; // testing else setProcessing(true); Level level = Level.FINEST; PaxosPacketType paxosPacketType; try { // will throw exception if no PAXOS_PACKET_TYPE paxosPacketType = request.getType(); switch (paxosPacketType) { case FAILURE_DETECT: processFailureDetection((FailureDetectionPacket<NodeIDType>) request); break; case FIND_REPLICA_GROUP: processFindReplicaGroup((FindReplicaGroupPacket) request); break; default: // paxos protocol messages assert (request.getPaxosID() != null) : request.toJSONSmart().toString(); if (request instanceof RequestPacket) // base and super types ((RequestPacket) request).addDebugInfo("i", myID); PaxosInstanceStateMachine pism = this.getInstance(request.getPaxosID()); log.log(level, "{0} received paxos message for {1} : {2}", new Object[] { this, pism != null ? pism : "non-existent instance", request.getSummary(log.isLoggable(level)) }); if ((pism != null) && (pism.getVersion() == request.getVersion()) && (!pism.isStopped())) pism.handlePaxosMessage(request); else // for recovering group created while crashed this.findPaxosInstance(request); break; } } catch (JSONException je) { log.severe("Node" + this.myID + " received bad JSON message: " + request); je.printStackTrace(); } finally { setProcessing(false); } } private void processFailureDetection(FailureDetectionPacket<NodeIDType> request) { if (request.getSender() != null) { this.servers.add(request.getSender().getAddress()); } FD.receive((FailureDetectionPacket<NodeIDType>) request); } private String propose(String paxosID, RequestPacket requestPacket, ExecutedCallback callback) { if (this.isClosed()) return null; boolean matched = false; PaxosInstanceStateMachine pism = this.getInstance(paxosID); if (pism != null) { matched = true; requestPacket.putPaxosID(paxosID, pism.getVersion()); log.log(Level.FINE, "{0} proposing to {1}: {2}", new Object[] { this, pism.getPaxosIDVersion(), requestPacket.getSummary() }); this.outstanding.enqueue(new RequestAndCallback(requestPacket, callback)); this.handleIncomingPacket(requestPacket); } else log.log(Level.INFO, "{0} could not find paxos instance {1} for request {2} with body {3}; " + " last known version was [{4}]", new Object[] { this, paxosID, requestPacket.getSummary(), Util.truncate(requestPacket.getRequestValues()[0], 64), this.getVersion(paxosID) }); return matched ? pism.getPaxosIDVersion() : null; } // used (only) by RequestBatcher for already batched RequestPackets protected void proposeBatched(RequestPacket requestPacket) { if (requestPacket != null) this.handlePaxosPacket(requestPacket); } /** * Propose a request to the paxos group with name paxosID. * * @param paxosID * @param requestPacket * @param callback * @return The paxosID:version represented as a String to which the request * got proposed; null if no paxos group named paxosID exists * locally. */ public String propose(String paxosID, String requestPacket, ExecutedCallback callback) { return propose(paxosID, (new RequestPacket(this.outstanding.generateUnusedID(), requestPacket, false)) .setReturnRequestValue(), callback); } /** * Avoids unnecessary conversion to string and back if request happens to be * RequestPacket. * * @param paxosID * @param request * @param callback * @return Refer {@link #propose(String, String,ExecutedCallback)}. */ public String propose(String paxosID, Request request, ExecutedCallback callback) { return this.propose(paxosID, this.getRequestPacket(request), callback); } /** * @param paxosID * @param request * @param callback * @return Refer {@link #proposeStop(String, String, ExecutedCallback)}. */ public String proposeStop(String paxosID, Request request, ExecutedCallback callback) { // if (request instanceof RequestPacket) // return this.propose(paxosID, (RequestPacket) request, callback); return this.propose(paxosID, this.getRequestPacket(request, true), callback); } @SuppressWarnings("deprecation") private RequestPacket getRequestPacket(Request request, boolean stop) { return request instanceof RequestPacket ? // return as-is (stop == ((RequestPacket) request).isStopRequest()) ? (RequestPacket) request : // fix stop new RequestPacket(((RequestPacket) request).requestID, ((RequestPacket) request).requestValue, stop, (RequestPacket) request) : // copy over ClientRequest info request instanceof ClientRequest ? (new RequestPacket(((ClientRequest) request).getRequestID(), ((ClientRequest) request).toString(), stop, ((ClientRequest) request).getClientAddress())).setReturnRequestValue() : // default new RequestPacket(this.outstanding.generateUnusedID(), request.toString(), stop) .setReturnRequestValue(); } private RequestPacket getRequestPacket(Request request) { return this.getRequestPacket(request, false); } /** * @param paxosID * @param version * @param request * @param callback * @return Refer {@link #proposeStop(String, int, Request,ExecutedCallback)} * . */ public String proposeStop(String paxosID, int version, Request request, ExecutedCallback callback) { PaxosInstanceStateMachine pism = this.getInstance(paxosID); if (pism != null && pism.getVersion() == version) { return this.propose(paxosID, (RequestPacket) (this.getRequestPacket(request, true).putPaxosID(paxosID, version)), callback); } else log.log(Level.INFO, "{0} failed to propose stop request for {1}:{2}; {3}", new Object[] { this, paxosID, version, (pism == null ? " no paxos instance found " : pism.getPaxosIDVersion() + " pre-exists") }); return null; } /** * Proposes a request to stop a specific version of paxosID. There is no way * to stop a paxos replicated state machine other than by issuing a stop * request. The only way to know for sure that the RSM is stopped is by the * application receiving the stop request. * * @param paxosID * @param value * @param version * @param callback * @return The paxosID:version represented as a String to which this request * was proposed. */ public String proposeStop(String paxosID, int version, String value, ExecutedCallback callback) { PaxosInstanceStateMachine pism = this.getInstance(paxosID); if (pism != null && pism.getVersion() == version) { return this.propose(paxosID, (RequestPacket) ((new RequestPacket(this.outstanding.generateUnusedID(), value, true)) .setReturnRequestValue().putPaxosID(paxosID, version)), callback); } else log.log(Level.INFO, "{0} failed to propose stop request for {1}:{2}; {3}", new Object[] { this, paxosID, version, (pism == null ? " no paxos instance found " : pism.getPaxosIDVersion() + " pre-exists") }); return null; } /** * Stop the paxos instance named paxosID on this machine irrespective of the * version. Note that there can be at most one paxos version with a given * name on a machine. * * @param paxosID * @param value * @param callback * @return The paxosID:version string to which the request was proposed. */ public String proposeStop(String paxosID, String value, ExecutedCallback callback) { PaxosInstanceStateMachine pism = this.getInstance(paxosID); if (pism == null) return null; else return this.proposeStop(paxosID, pism.getVersion(), value, callback); } /** * @param paxosID * @param version * @return The final state wrapped in a StringContainer. We use * StringContainer to distinguish between null state and no state * when null checkpoint state is enabled. */ public StringContainer getFinalState(String paxosID, int version) { /* The wait below is a hack to force a little wait through * synchronization. If a stop request is being executed concurrently * while a getFinalState request arrives, it is better for the stop * request to finish and the final checkpoint to be created before we * issue getEpochFinalCheckpoint. Otherwise, we would return a null * response for getEpochFinalCheckpointState here and the requester of * the checkpoint is forced to time out and resend the request after a * coarse-grained timeout. This wait is just an optimization and is not * necessary for safety. */ this.synchronizedNoop(paxosID, version); return this.paxosLogger.getEpochFinalCheckpointState(paxosID, version); } /** * We are only allowed to delete a stopped paxos instance. There is no * public method to force-kill a paxos instance other than by successfully * creating a paxos instance with the same name and higher version. The only * way to stop a paxos instance is to get a stop request committed. * * @param paxosID * @param version * @return Returns true if the paxos instance {@code paxosID:version} exists * and is stopped. If so, the instance will be deleted. */ public boolean deleteStoppedPaxosInstance(String paxosID, int version) { PaxosInstanceStateMachine pism = this.getInstance(paxosID); if (pism != null && pism.getVersion() == version) // nothing to do here as stopped paxos instances are auto-killed if (pism.synchronizedNoop() && pism.isStopped()) return true; return false; } /** * @param paxosID * @param version * @return Returns true if the final state was successfully deleted or there * was nothing to delete. */ public boolean deleteFinalState(String paxosID, int version) { // might as well force-kill the instance at this point PaxosInstanceStateMachine pism = this.getInstance(paxosID); if (pism != null && pism.getVersion() == version) this.kill(pism); // needed for concurrent epoch final state creation to finish this.synchronizedNoop(paxosID, version); assert (pism == null || pism.getVersion() != version || pism.isStopped()); return this.paxosLogger.deleteEpochFinalCheckpointState(paxosID, version); } /** * The Integer return value as opposed to int is convenient to say that * there is no epoch. * * @param paxosID * @return Integer version of paxos instance named {@code paxosID}. */ public Integer getVersion(String paxosID) { PaxosInstanceStateMachine pism = this.getInstance(paxosID); if (pism != null) return (int) pism.getVersion(); else { return this.paxosLogger.getEpochFinalCheckpointVersion(paxosID); } } /** * @param myAddress * @param niot * @return {@code this} */ public PaxosManager<NodeIDType> initClientMessenger(InetSocketAddress myAddress, InterfaceNIOTransport<NodeIDType, ?> niot) { PaxosManager<NodeIDType> pm = this.initClientMessenger(myAddress, false, niot); if (SSL_MODES.valueOf(Config.getGlobalString(PC.CLIENT_SSL_MODE)) != SSL_MODES.CLEAR) pm = pm.initClientMessenger(myAddress, true, niot); return pm; } /** * @param myAddress * @param ssl * @return {@code this} */ private PaxosManager<NodeIDType> initClientMessenger(InetSocketAddress myAddress, boolean ssl, InterfaceNIOTransport<NodeIDType, ?> nioTransport) { Messenger<InetSocketAddress, JSONObject> cMsgr = null; SSLMessenger<NodeIDType, ?> msgr = (nioTransport instanceof Messenger ? (SSLMessenger<NodeIDType, ?>) nioTransport : null); try { int clientPortOffset = ssl ? Config.getGlobalInt(PC.CLIENT_PORT_SSL_OFFSET) : Config.getGlobalInt(PC.CLIENT_PORT_OFFSET); if (clientPortOffset > 0) { InetSocketAddress myAddressOffsetted = new InetSocketAddress(myAddress.getAddress(), myAddress.getPort() + clientPortOffset); log.log(Level.INFO, "{0} creating client messenger at {1}; (offset={2}{3})", new Object[] { this, myAddressOffsetted, clientPortOffset, ssl ? "/SSL" : "" }); MessageNIOTransport<InetSocketAddress, JSONObject> createdNIOTransport = null; cMsgr = new JSONMessenger<InetSocketAddress>( createdNIOTransport = new MessageNIOTransport<InetSocketAddress, JSONObject>( myAddressOffsetted.getAddress(), myAddressOffsetted.getPort(), /* Client facing demultiplexer is single * threaded to keep clients from overwhelming * the system with request load. */ (Config.getGlobalString(PC.JSON_LIBRARY).equals("org.json") ? new JSONDemultiplexer(0, true) : new FastDemultiplexer( Config.getGlobalInt(PC.CLIENT_DEMULTIPLEXER_THREADS), true)), ssl ? SSLDataProcessingWorker.SSL_MODES .valueOf(Config.getGlobalString(PC.CLIENT_SSL_MODE)) : SSL_MODES.CLEAR)); if (Config.getGlobalBoolean(PC.STRICT_ADDRESS_CHECKS) && !createdNIOTransport.getListeningSocketAddress().equals(myAddressOffsetted)) // Note: will throw false positive exception on EC2 throw new IOException("Unable to listen on specified socket address at " + myAddressOffsetted + " != " + createdNIOTransport.getListeningSocketAddress()); assert (msgr != null); if (ssl) msgr.setSSLClientMessenger(cMsgr); else msgr.setClientMessenger(cMsgr); } } catch (IOException e) { e.printStackTrace(); log.severe(e.getMessage()); System.exit(1); } return this; } /** * Forces a checkpoint, but not guaranteed to happen immediately. * * @param paxosID */ public void forceCheckpoint(String paxosID) { PaxosInstanceStateMachine pism = this.getInstance(paxosID); if (pism != null) pism.forceCheckpoint(); } /** * Specifies the level of reordering of decisions that prompts a * sync-decisions request. * * @param limit */ public void setOutOfOrderLimit(int limit) { this.outOfOrderLimit = Math.max(limit, 1); } protected int getOutOfOrderLimit() { return this.outOfOrderLimit; } private int getNumOutstandingOrQueued() { return (this.outstanding.requests.size()) + this.requestBatcher.getQueueSize(); } // queue of outstanding requests protected void incrOutstanding(RequestPacket request) { request.setEntryReplicaAndReturnCount(this.myID); // if (request.getEntryReplica() == getMyID()) this.outstanding.enqueue(new RequestAndCallback(request, null)); if (request.batchSize() > 0) for (RequestPacket req : request.getBatched()) // if (request.getEntryReplica() == getMyID()) this.outstanding.enqueue(new RequestAndCallback(req, null)); if (Util.oneIn(10)) DelayProfiler.updateMovAvg("outstanding", this.outstanding.requests.size()); } /** * @param interval */ public void setInterCheckpointInterval(int interval) { this.interCheckpointInterval = interval; } /** * @return Inter-checkpoint interval. */ public int getInterCheckpointInterval() { return this.interCheckpointInterval; } /** * @param maxGap */ public void setMaxSyncDecisionsGap(int maxGap) { this.checkpointTransferTrigger = maxGap; } /** * @return Minimum delay between successive sync decisions for the same * paxos instance. This rate limit prevents unnecessary sync'ing * under high load when a nontrivial extent of out-of-order-ness is * expected. */ public long getMinResyncDelay() { return this.minResyncDelay; } /** * @param minDelay */ public void setMinResyncDelay(long minDelay) { this.minResyncDelay = minDelay; } /** * @return Maximum reordering among received decisions that when exceeded * prompts a checkpoint transfer as opposed to a sync-decisions * operation. */ public int getMaxSyncDecisionsGap() { return this.checkpointTransferTrigger; } /** * @return Returns true if null checkpoints are enabled. */ public boolean isNullCheckpointStateEnabled() { return this.nullCheckpointsEnabled; } /** * Removes all persistent state. Just avoid using this deprecated method. */ @Deprecated protected synchronized void resetAll() { this.pinstances.clear(); this.corpses.clear(); this.paxosLogger.removeAll(); } private static synchronized void open() { { closed = false; } } private static synchronized void closeAll() { { closed = true; } } private static synchronized boolean allClosed() { { return closed; } } private boolean isClosed() { return allClosed(); } /** * Gracefully closes PaxosManager. */ public void close() { /* The static method closeAll sets the closed flag so as to prevent any * further new packet processing across all instances of PaxosManager. */ closeAll(); /* The static method waitToFinishAll waits until the static method * getProcessing returns true, i.e., there is some PaxosManager that has * started processing a packet (via handlePaxosMessage) but not finished * processing it. Once closeAll returns and then waitToFinishAll * returns, there can be no ongoing or future packet processing by any * instance of PaxosManager in this JVM. */ waitToFinishAll(); /* Close logger, FD, messenger, request batcher, executor */ this.paxosLogger.close(); this.FD.close(); this.messenger.stop(); this.requestBatcher.stop(); this.ppBatcher.stop(); this.largeCheckpointer.close(); this.executor.shutdownNow(); for (Iterator<PaxosInstanceStateMachine> pismIter = this.pinstances.concurrentIterator(); pismIter .hasNext();) log.log(Level.FINE, "{0} terminating with paxos instance state {1}", new Object[] { this, pismIter.next().toStringLong() }); } /** * @return Idle period after which paxos instances are paused. */ protected static long getDeactivationPeriod() { return Config.getGlobalInt(PC.DEACTIVATION_PERIOD); } /********************* End of public methods ***********************/ int totalRcvd = 0; synchronized void incrTotalRcvd(int n) { totalRcvd += n; } synchronized int getTotalRcvd(int n) { return totalRcvd; } private static final boolean EMULATE_UNREPLICATED = Config.getGlobalBoolean(PC.EMULATE_UNREPLICATED); private final boolean emulateUnreplicated(PaxosPacket request) { if (!EMULATE_UNREPLICATED || !(request instanceof RequestPacket)) return false; // else will finally return true // pretend-execute new requests PaxosInstanceStateMachine.execute(null, this, myApp, ((RequestPacket) request).setEntryReplica(getMyID()), false); return true; } private static final boolean LAZY_PROPAGATION = Config.getGlobalBoolean(PC.LAZY_PROPAGATION); private final boolean emulateLazyPropagation(PaxosPacket request) { if (!LAZY_PROPAGATION || !(request instanceof RequestPacket)) return false; // else will finally return true try { // extract newly received requests request = ((RequestPacket) request).getEntryReplicaRequestsAsBatch(getMyID())[1]; if (request != null) { ((RequestPacket) request).setEntryReplica(getMyID()); // broadcast newly received requests to others PaxosInstanceStateMachine pism = this.getInstance(request.getPaxosID()); MessagingTask mtask = pism != null ? new MessagingTask(pism.otherGroupMembers(), request) : null; this.send(mtask); // pretend-execute newly received requests PaxosInstanceStateMachine.execute(null, this, myApp, ((RequestPacket) request).setEntryReplica(getMyID()), false); } } catch (JSONException e) { log.severe(this + " unable to parse " + request.getSummary()); e.printStackTrace(); } catch (IOException e) { log.severe(this + " IOException while lazy-propagating " + request.getSummary()); e.printStackTrace(); } return true; } protected Set<NodeIDType> getNodesFromStringSet(Set<String> strNodes) { Set<NodeIDType> nodes = new HashSet<NodeIDType>(); for (String strNode : strNodes) { nodes.add(this.unstringer.valueOf(strNode)); } return nodes; } protected Set<String> getStringNodesFromIntArray(int[] members) { Set<String> nodes = new HashSet<String>(); for (int member : members) { nodes.add(this.integerMap.get(member).toString()); } return nodes; } private/* synchronized */PaxosInstanceStateMachine getInstance(String paxosID, boolean tryHotRestore, boolean tryRestore) { // long methodEntryTime = System.currentTimeMillis(); PaxosInstanceStateMachine pism = null; synchronized (this) { // atomic get and mark active to prevent concurrent pause if ((pism = pinstances.get(paxosID)) != null) pism.markActive(); } if (pism == null && ((tryHotRestore && (pism = this.unpause(paxosID)) != null) || (tryRestore && (pism = this.restore(paxosID)) != null))) // nothing here ; // DelayProfiler.updateDelay("getInstance", methodEntryTime); return pism != null ? pism : pinstances.get(paxosID); } private PaxosInstanceStateMachine getInstance(String paxosID) { return this.getInstance(paxosID, true, true); } private boolean isPauseEnabled() { return Config.getGlobalBoolean(PC.PAUSE_OPTION); } private boolean isHibernateable() { return Config.getGlobalBoolean(PC.HIBERNATE_OPTION); } /* For each paxosID in the logs, this method creates the corresponding paxos * instance and rolls it forward from the last checkpointed state. * * Synchronized because this method invokes an incremental read on the * database that currently does not support parallelism. But the * "synchronized" qualifier here is not necessary for correctness. */ private synchronized void initiateRecovery() { boolean found = false; int groupCount = 0, freq = 1; log.log(Level.INFO, "{0} beginning to recover checkpoints", new Object[] { this }); while (this.paxosLogger.initiateReadCheckpoints(true)) ; // acquires lock RecoveryInfo pri = null; while ((pri = this.paxosLogger.readNextCheckpoint(true)) != null) { found = true; assert (pri.getPaxosID() != null); // start paxos instance, restore app state from checkpoint if any // and roll forward try { this.recover(pri.getPaxosID(), pri.getVersion(), this.myID, getNodesFromStringSet(pri.getMembers()), myApp, pri.getState()); } catch (PaxosInstanceCreationException pice) { // should we remove this checkpoint? pice.printStackTrace(); log.severe(this + " unable to create paxos instance " + pri.getPaxosID()); } if ((++groupCount) % freq == 0) { freq *= 2; } } this.paxosLogger.closeReadAll(); // releases lock log.log(Level.INFO, "{0} has recovered checkpoints for {1} paxos groups", new Object[] { this, groupCount }); if (!found) { log.warning("No checkpoint state found for node " + this.myID + ". This can only happen if\n" + "(1) the node is newly joining the system, or\n(2) the node previously crashed before " + "completing even a single checkpoint, or\n(3) the node's checkpoint was manually deleted."); } int logCount = 0; freq = 1; // roll forward all logged messages in a single pass log.log(Level.INFO, "{0} beginning to roll forward logged messages", new Object[] { this }); while (this.paxosLogger.initiateReadMessages()) ; // acquires lock String paxosPacketString = null; PaxosPacket paxosPacket = null; /** * Set packetizer for logger. We need this in order to have the benefits * of caching the original string form of received accepts to reduce * serialization overhead. Without a packetizer, the logger doesn't know * how to convert original string node IDs (that came over the network) * in the logged messages to integer IDs. The alternative would be to * just store integer IDs in logged messages (trusting that we will * always be able to have IntegerMap be able to convert back to the * original node IDs because checkpoint recovery happens before rolling * forward logs), but that means we wouldn't have the stringified * caching optimization. */ this.paxosLogger.setPacketizer(new AbstractPaxosLogger.PaxosPacketizer() { @Override protected PaxosPacket stringToPaxosPacket(String str) { try { PaxosPacket pp = PaxosPacket .getPaxosPacket(PaxosManager.this.fixNodeStringToInt(new JSONObject(str))); assert (pp != null) : str; return pp; } catch (JSONException e) { e.printStackTrace(); try { log.severe(PaxosManager.this + " unable to decode string of byte length " + str.getBytes("ISO-8859-1").length); } catch (UnsupportedEncodingException e1) { e1.printStackTrace(); } } return null; } @Override protected PaxosPacket stringToPaxosPacket(byte[] bytes) { PaxosPacketType type = PaxosPacket.getType(bytes); try { if (type == PaxosPacketType.ACCEPT) { return new AcceptPacket(bytes); } // else return this.stringToPaxosPacket(MessageExtractor.decode(bytes)); } catch (UnsupportedEncodingException | UnknownHostException e) { // likely an undecodeable accept e.printStackTrace(); return null; } } }); /** * We need this for {@link SQLPaxosLogger} for logging messages with int * IDs converted back to the original string IDs. The logger will invoke * stringToPaxosPacket(String) above while reading logged messages from * disk. Disk I/O is no different from network I/O in that integer IDs * have meaning outside of PaxosInstanceStateMachine soft state. * * We don't always have to convert int to string IDs because if we have * {@code stringified} already stored, it is already in a * network-friendly form. */ this.paxosLogger.setPaxosPacketStringifier(new AbstractPaxosLogger.PaxosPacketStringifier() { @Override protected String paxosPacketToString(PaxosPacket paxosPacket) { PaxosPacket.PaxosPacketType type = paxosPacket.getType(); String stringified = null; // three loggable types if (type == PaxosPacket.PaxosPacketType.ACCEPT || type == PaxosPacket.PaxosPacketType.PREPARE || type == PaxosPacket.PaxosPacketType.DECISION) { if (paxosPacket instanceof RequestPacket && (stringified = ((RequestPacket) paxosPacket).getStringifiedSelf()) != null) return stringified; else try { net.minidev.json.JSONObject jsonSmart = paxosPacket.toJSONSmart(); stringified = jsonSmart != null ? PaxosManager.this.messenger.fixNodeIntToString(jsonSmart).toString() : // prepares don't have toJSONSmartImpl() paxosPacket.toString(); } catch (JSONException je) { // exception will never be thrown assert (false); // at least use default toString in any case stringified = paxosPacket.toString(); } } return stringified; } }); try { while ((paxosPacket = this.paxosLogger.readNextMessage()) != null) { paxosPacket = PaxosPacket.markRecovered(paxosPacket); Level level = Level.FINEST; log.log(level, "{0} rolling forward logged message {1}", new Object[] { this, paxosPacket.getSummary(log.isLoggable(level)) }); this.handlePaxosPacket((paxosPacket)); if ((++logCount) % freq == 0) { freq *= 2; } } } catch (NumberFormatException e) { Util.suicide(log, this + " recovery interrupted while parsing " + paxosPacketString + ";\n Exiting because it is unsafe to continue recovery."); e.printStackTrace(); } this.paxosLogger.closeReadAll(); // releases lock log.log(Level.INFO, "{0} rolled forward {1} messages total across {2} paxos groups", new Object[] { this, logCount, groupCount }); // need to make another pass to mark all instances as active while (this.paxosLogger.initiateReadCheckpoints(true)) ; // acquires lock while ((pri = this.paxosLogger.readNextCheckpoint(true)) != null) { found = true; assert (pri.getPaxosID() != null); PaxosInstanceStateMachine pism = getInstance(pri.getPaxosID()); if (pism != null) pism.setActive(); Boolean isActive = pism != null ? pism.isActive() : null; log.log(Level.INFO, "{0} recovered paxos instance {1}; isActive = {2} ", new Object[] { this, pism != null ? pism.toStringLong() : null, isActive }); } this.paxosLogger.closeReadAll(); // releases lock this.hasRecovered = true; this.notifyRecovered(); log.log(Level.INFO, "------------------{0} recovery complete-------------------", new Object[] { this }); } protected boolean hasRecovered() { return this.hasRecovered; } protected boolean hasRecovered(PaxosInstanceStateMachine pism) { // if (ONE_PASS_RECOVERY) return this.hasRecovered() // else // !ONE_PASS_RECOVERY // return && (pism != null && pism.isActive()); } private static final boolean BATCHED_ACCEPT_REPLIES = Config.getGlobalBoolean(PC.BATCHED_ACCEPT_REPLIES); private PaxosMessenger<NodeIDType> wrapMessenger(PaxosMessenger<NodeIDType> msgr) { return new PaxosMessenger<NodeIDType>(PaxosManager.this.messenger) { public void send(MessagingTask mtask) throws JSONException, IOException { PaxosManager.this.send(mtask, BATCHED_ACCEPT_REPLIES, false); } public void send(MessagingTask[] mtasks) throws JSONException, IOException { PaxosManager.this.notifyLoggedDecisions(mtasks); super.send(mtasks); } }; } private void notifyLoggedDecisions(MessagingTask[] mtasks) { PaxosInstanceStateMachine pism = null; for (Map.Entry<String, Integer> entry : PaxosPacketBatcher.getMaxLoggedDecisionMap(mtasks).entrySet()) if ((pism = PaxosManager.this.getInstance(entry.getKey())) != null) pism.garbageCollectDecisions(entry.getValue()); } /* All messaging is done using PaxosMessenger and MessagingTask. This method */ protected void send(MessagingTask mtask, boolean coalesce, boolean logMsg) throws JSONException, IOException { if (mtask == null) return; if (logMsg && mtask instanceof LogMessagingTask) { AbstractPaxosLogger.logAndMessage(this.paxosLogger, (LogMessagingTask) mtask);// , this.messenger); } else { this.sendOrLoopback(coalesce ? PaxosManager.this.ppBatcher.coalesce(mtask) : mtask); } } protected void send(MessagingTask mtask) throws JSONException, IOException { this.send(mtask, true, true); } private void sendOrLoopback(MessagingTask mtask) throws JSONException, IOException { MessagingTask local = MessagingTask.getLoopback(mtask, myID); if (local != null && !local.isEmptyMessaging()) for (PaxosPacket pp : local.msgs) if (pp.getType() == PaxosPacketType.BATCHED_PAXOS_PACKET) for (PaxosPacket packet : ((BatchedPaxosPacket) pp).getPaxosPackets()) this.handlePaxosPacket((packet)); else this.handlePaxosPacket((pp)); this.messenger.send(MessagingTask.getNonLoopback(mtask, myID)); } protected void send(InetSocketAddress sockAddr, Request request, InetSocketAddress listenSockAddr) throws JSONException, IOException { this.messenger.sendClient(sockAddr, request instanceof RequestPacket ? (((RequestPacket) request).toBytes()) : request, listenSockAddr); } /* A clean kill completely removes all trace of the paxos instance (unlike * pause or hibernate or an unclean kill). The copy of the instance kept in * corpses will not be revived; in fact, is there temporarily only to make * sure that the instance does not get re-created as a "missed birthing" * instance creation. * * synchronized because any changes to pinstances must be synchronized as * createPaxosInstance and other methods also use it. Also, we want to move * the paxos instance atomically from pinstances to corpses. If not atomic, * it can result in the corpse (to-be) getting resurrected if a packet for * the instance arrives in between. * * Note: pause or hibernate or crashAndRecover like options just invoke * softCrash and not kill that is more fatal. None of them do any of the * following: (1) wipe out checkpoint state, (2) nullify app state, or (3) * move pism to corpses, all of which kill does. * * Invariant: Only a paxos instance currently inserted in the map can be * killed. */ protected synchronized void kill(PaxosInstanceStateMachine pism, boolean clean) { assert (pism != null); if (this.isClosed()) throw new PaxosInstanceDestructionException(this + " has already been closed"); /* Do nothing if existing version not same as kill target. Note that if * existing is null, we still kill pism. It is unlikely but possible for * existing to be null, say, because the instance pism just got paused, * but for pism or this paxos manager to issue a kill immediately after. */ PaxosInstanceStateMachine existing = this.getInstance(pism.getPaxosID()); if (existing != null && (pism.getVersion() - existing.getVersion() < 0)) { log.log(Level.INFO, "{0} unable to kill {1} because {2} already exists in the map", new Object[] { this, pism, existing }); return; } // else got murder work to do (even if existing==null) while (!pism.kill(clean)) log.severe("Problem stopping paxos instance " + pism.getPaxosID() + ":" + pism.getVersion()); incrKilled(); this.softCrash(pism); this.corpses.put(pism.getPaxosID(), pism); executor.schedule(new Cremator(pism.getPaxosID(), this.corpses), Config.getGlobalInt(PC.MORGUE_DELAY), TimeUnit.MILLISECONDS); this.notifyUponKill(); } /* Default kill is clean. Unclean kill is used by PaxosInstanceStateMachine * when it is unable to execute an app request successfully. An unclean kill * does not wipe out checkpoint state or app state, but does move the paxos * instance to corpses because it is as good as dead at this point. * * Why not just do a softCrash instead of corpsing out in the case of an app * execution error? The instance might get re-created as a "missed birthing" * case, but that would still be safe and, maybe, the roll forward this time * around might succeed. There isnt' a strong reason one way or the other. */ private void kill(PaxosInstanceStateMachine pism) { this.kill(pism, true); } // soft crash means stopping and "forgetting" the instance private synchronized void softCrash(PaxosInstanceStateMachine pism) { assert (pism != null); pism.forceStop(); this.pinstances.remove(pism.getPaxosID()); } /* For testing. Similar to hibernate but without forcing a checkpoint and * followed immediately by a restore from the most recent (but possibly * still quite old) checkpoint. */ protected void crashAndRecover(PaxosInstanceStateMachine pism) { if (pism == null || this.isClosed() || !this.pinstances.containsValue(pism)) return; log.warning(this + " crash-and-recovering paxos instance " + pism); this.softCrash(pism); this.recover(pism.getPaxosID(), pism.getVersion(), pism.getMyID(), this.integerMap.get(Util.arrayToIntSet(pism.getMembers())), pism.getApp()); // state will be auto-recovered } // Checkpoint and go to sleep on disk. Currently not used. protected synchronized boolean hibernate(PaxosInstanceStateMachine pism) { if (pism == null || this.isClosed() || !this.pinstances.containsValue(pism) || !this.isHibernateable()) return false; // else hibernate boolean hibernated = false; log.log(Level.INFO, "{0} trying to hibernate {1}", new Object[] { this, pism }); boolean stopped = pism.tryForcedCheckpointAndStop(); if (stopped) { this.softCrash(pism); hibernated = true; log.log(Level.INFO, "{0} sucessfully hibernated {1}", new Object[] { this, pism }); } return hibernated; } // Undo hibernate. Will rollback, so not very efficient. private synchronized PaxosInstanceStateMachine restore(String paxosID) { if (this.isClosed() || !this.isHibernateable()) return null; PaxosInstanceStateMachine pism = null; if ((pism = this.pinstances.get(paxosID)) != null) return pism; log.log(Level.INFO, "{0} trying to restore instance {1}", new Object[] { this, paxosID }); RecoveryInfo pri = this.paxosLogger.getRecoveryInfo(paxosID); if (pri != null) pism = this.recover(paxosID, pri.getVersion(), this.myID, this.getNodesFromStringSet(pri.getMembers()), this.myApp, pri.getState()); if (pism != null) log.log(Level.INFO, "{0} successfully restored hibernated instance {1}", new Object[] { this, pism }); else log.log(Level.WARNING, "{0} unable to restore paxos instance {1}", new Object[] { this, paxosID }); return pism; } /* Pausing is like hibernate but without checkpointing or subsequent * recovery overhead. * * We need batched pausing for efficiency, otherwise a deluge of pauses can * take a long time and slow down regular request processing. The method * below pauses one instance at time, which will hold up regular request * processing for that much time because of the synchronization below, but * if we pause in modest-sized batches, we will hold up regular request * processing for slightly longer but get done with pausing all pausable * instances much more quickly. */ protected HotRestoreInfo pause(PaxosInstanceStateMachine pism) { if (pism == null || this.isClosed() || !this.pinstances.containsValue(pism) || !isPauseEnabled()) return null; log.log(Level.FINE, "{0} trying to pause {1}", new Object[] { this, pism.getPaxosIDVersion() }); // else try to pause long pauseInitTime = System.currentTimeMillis(); HotRestoreInfo hri = pism.tryPause(); if (hri != null) { /* crash means the same as removing from pinstances as well as * activePaxii for an already stopped paxos instance. */ assert (pism.isStopped()); this.softCrash(pism); if (Util.oneIn(Integer.MAX_VALUE)) DelayProfiler.updateDelay("pause", pauseInitTime); log.log(Level.FINE, "{0} successfully paused {1}", new Object[] { this, pism.getPaxosIDVersion() }); } else log.log(Level.FINE, "{0} failed to pause {1}", new Object[] { this, pism }); return hri; } // FIXME: unused but may be needed if we shift to DiskMap protected final Diskable<String, PaxosInstanceStateMachine> disk = new Diskable<String, PaxosInstanceStateMachine>() { @Override public Set<String> commit(Map<String, PaxosInstanceStateMachine> toCommit) throws IOException { return PaxosManager.this.pause(toCommit, false); } @Override public PaxosInstanceStateMachine restore(String key) throws IOException { return PaxosManager.this.unpause(key); } }; /* Batched pausing can speed up pause throughput by leveraging batching in a * database. It doesn't help unpausing though. */ protected synchronized Set<String> pause(Map<String, PaxosInstanceStateMachine> pauseBatch, boolean dequeue) { if (this.isClosed() || !isPauseEnabled()) return null; long t = System.currentTimeMillis(); // pause paxos instances Map<String, HotRestoreInfo> hriMap = new HashMap<String, HotRestoreInfo>(); for (PaxosInstanceStateMachine pism : pauseBatch.values()) { String paxosID = pism.getPaxosID(); HotRestoreInfo hri = dequeue ? this.pause(pism) : pism.tryPause(); // unpause here can return null unless synchronized if (hri != null) hriMap.put(paxosID, hri); } // write paused state to disk Map<String, HotRestoreInfo> pausedHRIMap = this.paxosLogger.pause(hriMap); for (HotRestoreInfo pausedHRI : pausedHRIMap.values()) hriMap.remove(pausedHRI.paxosID); // roll back failed pause attempts for (HotRestoreInfo hri : hriMap.values()) { boolean rolledBack = this.createPaxosInstance(hri.paxosID, hri.version, this.integerMap.get(Util.arrayToIntSet(hri.members)), this.myApp, null, hri, false) != null; if (rolledBack) log.log(Level.INFO, "{0} rolled back pausing of {1}", new Object[] { this, hri.paxosID }); else log.log(Level.SEVERE, "{0} unable to roll back failed pausing of {1}", new Object[] { this, hri.paxosID }); } DelayProfiler.updateDelay("pause", t, pausedHRIMap.size()); return pausedHRIMap.keySet(); } private StringLocker stringLocker = new StringLocker(); // Hot restores from disk, i.e., restores quickly without need for rollback private/* synchronized */PaxosInstanceStateMachine unpause(String paxosID) { if (this.isClosed() || !this.hasRecovered() || !this.isPauseEnabled()) return null; PaxosInstanceStateMachine restored = null; if ((restored = this.pinstances.get(paxosID)) != null) return restored; long unpauseInitTime = System.currentTimeMillis(); /* stringLocker allows concurrent unpause of different paxosIDs while * serializing unpause attempts of the same paxosID. We need to * serialize unpause attempts to the same paxosID as otherwise, even if * paused state exists, only the first unpause attempt will retrieve it * but one of the latter attempts that found no paused state could still * race ahead causing getInstance to return null resulting in a packet * drop even though the instance exists locally. Such packet drops will * not affect safety but it is good to avoid them. */ synchronized (this.stringLocker.get(paxosID)) { log.log(Level.FINE, "{0} about to try to unpause instance {1}", new Object[] { this, paxosID }); HotRestoreInfo hri = this.paxosLogger.unpause(paxosID); if (hri != null) { log.log(Level.FINE, "{0} successfully unpaused paused instance {1}", new Object[] { this, paxosID }); restored = this.createPaxosInstance(hri.paxosID, hri.version, this.integerMap.get(Util.arrayToIntSet(hri.members)), this.myApp, null, hri, false); // if (restored != null) restored.markActive(); } else log.log(Level.FINE, "{0} unable to unpause instance {1}", new Object[] { this, paxosID }); } this.stringLocker.remove(paxosID); if (restored != null) assert (restored.isActive()); if (restored != null) DelayProfiler.updateDelay("unpause", unpauseInitTime); return restored; } /* Create paxos instance restoring app state from checkpoint if any and roll * forward. */ private PaxosInstanceStateMachine recover(String paxosID, int version, int id, Set<NodeIDType> members, Replicable app, String state) { log.log(Level.FINE, "{0} {1}:{2} {3} recovering", new Object[] { this, paxosID, version, members }); PaxosInstanceStateMachine pism = this.createPaxosInstance(paxosID, version, (members), app, null, null, false); return pism; } private PaxosInstanceStateMachine recover(String paxosID, int version, int id, Set<NodeIDType> members, Replicable app) { // state will be auto-recovered if it exists return this.recover(paxosID, version, id, members, app, null); } /* After rollForward, recovery is complete. In particular, we don't have to * wait for any more processing of messages, e.g., out of order decisions to * "settle", because the only important thing is to replay and process * ACCEPTs and PREPAREs so as to bring the acceptor state up to speed, which * is a purely local and non-blocking sequence of operations. Coordinator * state in general is not recoverable; the easiest way to recover it is to * simply call checkRunForCoordinator, which will happen automatically upon * the receipt of any external packet. */ private void rollForward(String paxosID, int version) { if (/* !ONE_PASS_RECOVERY || */this.hasRecovered()) { log.log(Level.FINE, "{0} about to roll forward {1}:{2}", new Object[] { this, paxosID, version }); AbstractPaxosLogger.rollForward(paxosLogger, paxosID, messenger); PaxosInstanceStateMachine pism = (this.getInstance(paxosID, true, false)); pism.setActive(); assert (this.getInstance(paxosID, false, false) != null); // if (pism != null) pism.poke(); } // TESTPaxosConfig.setRecovered(this.myID, paxosID, true); // testing } private void findPaxosInstance(PaxosPacket pp) throws JSONException { if (!this.hasRecovered()) { this.logPacketDrop(pp); return; } assert (pp.getPaxosID() != null); /* If it is possible for there to be no initial state checkpoint, under * missed birthing, an acceptor may incorrectly report its gcSlot as -1, * and if a majority do so (because that majority consists all of missed * birthers), a coordinator may propose a proposal for slot 0 even * though an initial state does exist, which would end up overwriting * the initial state. So we can not support ambiguity in whether there * is initial state or not. If we force initial state checkpoints (even * null state checkpoints) to always exist, so that missed birthers can * always set the initial gcSlot to 0. */ if (!this.isNullCheckpointStateEnabled()) { this.logPacketDrop(pp); return; } PaxosInstanceStateMachine zombie = this.corpses.get(pp.getPaxosID()); if (zombie == null || (zombie.getVersion() - pp.getVersion()) < 0) findReplicaGroup(pp); else this.logPacketDrop(pp); } private void logPacketDrop(PaxosPacket pp) { log.log(PaxosPacket.isRecovery(pp) ? Level.FINE : Level.INFO, "{0} dropping packet {1} as unable to find active paxos instance", new Object[] { this, pp.getSummary() }); } /* The two methods, heardFrom and isNodeUp, below are the only ones that * invoke nodeMap.get(int). They are only invoked after the corresponding * NodeIDType is already inserted in the map. */ protected void heardFrom(int id) { try { this.FD.heardFrom(this.integerMap.get(id)); } catch (RuntimeException re) { // do nothing, can happen during recovery log.log(Level.INFO, "{0} has no NodeIDType entry for integer {1}", new Object[] { this, id }); } } protected boolean isNodeUp(int id) { return (FD != null ? FD.isNodeUp(this.integerMap.get(id)) : false); } protected boolean lastCoordinatorLongDead(int id) { return (FD != null ? FD.lastCoordinatorLongDead(this.integerMap.get(id)) : true); } protected long getDeadTime(int id) { return (FD != null ? FD.getDeadTime(this.integerMap.get(id)) : System.currentTimeMillis()); } protected AbstractPaxosLogger getPaxosLogger() { return paxosLogger; } protected PaxosMessenger<NodeIDType> getMessenger() { return this.messenger; } /****************** Start of methods to gracefully finish processing **************/ private static synchronized void setProcessing(boolean b) { if (b) processing++; else processing--; if (processing == 0) PaxosManager.class.notify(); } private static synchronized boolean getProcessing() { return processing > 0; } protected static synchronized void waitToFinishAll() { try { while (getProcessing()) { PaxosManager.class.wait(); } } catch (InterruptedException ie) { ie.printStackTrace(); } } private synchronized void timedWaitCanCreateOrExistsOrHigher(String paxosID, int version, long timeout) { try { if (!this.canCreateOrExistsOrHigher(paxosID, version)) wait(timeout); } catch (InterruptedException ie) { ie.printStackTrace(); } } // wait until timeout for creation private synchronized void timedWaitForExistence(String paxosID, int version, long timeout) { try { long waitStartTime = System.currentTimeMillis(); while (!this.equalOrHigherVersionExists(paxosID, version) && System.currentTimeMillis() - waitStartTime < timeout) wait(timeout); } catch (InterruptedException ie) { ie.printStackTrace(); } } protected synchronized void waitCanCreateOrExistsOrHigher(String paxosID, int version) { try { while (!this.canCreateOrExistsOrHigher(paxosID, version)) wait(); } catch (InterruptedException ie) { ie.printStackTrace(); } } protected synchronized void notifyUponKill() { notifyAll(); } protected synchronized void notifyUponCreation() { notifyAll(); } /** * This is a common default create paxos instance creation method, i.e., we * wait for lower versions to be killed if necessary but, after a timeout, * create the new instance forcibly anyway. This method won't create the * instance if the same or higher version already exists. One concern with * this method is that force killing lower versions in order to start higher * versions can cause the epoch final state to be unavailable at *any* node * in immediately lower version. It is okay to kill even an immediately * preceding instance only if we already have the necessary state * (presumably obtained from a stopped immediately preceding instance at * some other group member). In the reconfiguration protocol, we must ensure * this by starting a new epoch only after receiving a successful * acknowledgment for stopping the previous epoch. However, if say only one * node has executed the previous epoch's stop and then starts the new epoch * and causes other new epoch replicas to force-create the new epoch, and * then crashes, the new epoch is stuck. * * @param paxosID * @param version * @param gms * @param app * @param state * @param timeout * @return Returns true if this paxos instance or one with a higher version * number was successfully created. */ public boolean createPaxosInstanceForcibly(String paxosID, int version, Set<NodeIDType> gms, Replicable app, String state, long timeout) { if (timeout < Config.getGlobalInt(PC.CAN_CREATE_TIMEOUT)) timeout = Config.getGlobalInt(PC.CAN_CREATE_TIMEOUT); // still do a timed wait this.timedWaitCanCreateOrExistsOrHigher(paxosID, version, timeout); return this.createPaxosInstanceForcibly(paxosID, version, gms, app, state); } private synchronized boolean createPaxosInstanceForcibly(String paxosID, int version, Set<NodeIDType> gms, Replicable app, String state) { PaxosInstanceStateMachine pism = this.getInstance(paxosID); // if equal or higher instance exists if (pism != null && (pism.getVersion() - version >= 0)) return true; if (pism != null && pism.getVersion() - version < 0) { log.log(Level.INFO, "{0} forcibly killing {1} in order to create {2}:{3}", new Object[] { this, pism.getPaxosIDVersion(), paxosID, version }); this.kill(pism); // will succeed or throw exception } boolean created = this.createPaxosInstance(paxosID, version, gms, app, state); return created; } /** * @param paxosID * @param version * @return Returns true if the paxos instance {@code paxosID:version} exists * or a higher version exists. */ public boolean equalOrHigherVersionExists(String paxosID, int version) { PaxosInstanceStateMachine pism = this.getInstance(paxosID); if (pism != null && (pism.getVersion() - version >= 0)) return true; return false; } /** * Unlike the method {@link #equalOrHigherVersionExists(String, int)}, this * method checks for whether an equal or higher version was previously * stopped. We need this check to prevent going back in time, i.e., starting * an epoch after it has been stopped. * * @param paxosID * @param version * @return */ private boolean equalOrHigherVersionStopped(String paxosID, int version) { // lastVersion may be active or recently stopped Integer lastStoppedVersion = this.paxosLogger.getEpochFinalCheckpointVersion(paxosID); if (lastStoppedVersion != null && lastStoppedVersion - version >= 0) return true; return false; } /****************** End of methods to gracefully finish processing **************/ private String printLog(String paxosID) { return ("State for " + paxosID + ": Checkpoint: " + this.paxosLogger.getStatePacket(paxosID)); } // send a request asking for your group private void findReplicaGroup(PaxosPacket pp) throws JSONException { FindReplicaGroupPacket findGroup = new FindReplicaGroupPacket(this.myID, pp); // paxosID and version should be within int nodeID = FindReplicaGroupPacket.getNodeID(pp); if (nodeID >= 0) { try { log.log(Level.INFO, "{0} received paxos {1} for non-existent instance {2}; contacting {3} for help", new Object[] { this, pp.getSummary(), pp.getPaxosID(), this.integerMap.get(nodeID) }); this.send(new MessagingTask(nodeID, findGroup)); } catch (IOException ioe) { ioe.printStackTrace(); } } else log.log(Level.FINE, "{0} cant find group member in {1}:{2}: {3}", new Object[] { this, pp.getPaxosID(), pp.getVersion(), pp.getSummary() }); } // process a request or send an answer private void processFindReplicaGroup(FindReplicaGroupPacket findGroup) throws JSONException { MessagingTask mtask = null; if (findGroup.group == null && findGroup.nodeID != this.myID) { // process a request PaxosInstanceStateMachine pism = this.getInstance(findGroup.getPaxosID()); if (pism != null && pism.getVersion() == findGroup.getVersion()) { FindReplicaGroupPacket frgReply = new FindReplicaGroupPacket(pism.getMembers(), findGroup); mtask = new MessagingTask(findGroup.nodeID, frgReply); } } else if (findGroup.group != null && findGroup.nodeID == this.myID) { // process an answer PaxosInstanceStateMachine pism = this.getInstance(findGroup.getPaxosID()); if (pism == null || (pism.getVersion() - findGroup.getVersion()) < 0) { // wait to see if it gets created anyway; this.timedWaitForExistence(findGroup.getPaxosID(), findGroup.getVersion(), Config.getGlobalInt(PC.WAIT_TO_GET_CREATED_TIMEOUT)); // wait and kill lower versions if any if (pism != null && (pism.getVersion() - findGroup.getVersion() < 0)) { // wait to see if lower versions go away anyway this.timedWaitCanCreateOrExistsOrHigher(findGroup.getPaxosID(), findGroup.getVersion(), Config.getGlobalInt(PC.CAN_CREATE_TIMEOUT)); this.kill(pism); } // create out of "nothing" boolean created = this.createPaxosInstance(findGroup.getPaxosID(), findGroup.getVersion(), this.integerMap.get(Util.arrayToIntSet(findGroup.group)), myApp, null, null, false, true) != null; if (created) log.log(Level.INFO, "{0} created paxos instance {1}:{2} from nothing because it apparently missed its birthing", new Object[] { this, findGroup.getPaxosID(), findGroup.getVersion() }); } } try { if (mtask != null) this.send(mtask); } catch (IOException ioe) { ioe.printStackTrace(); } } /*************************** Start of activePaxii related methods **********************/ private static final double PAUSE_SIZE_THRESHOLD = Config.getGlobalDouble(PC.PAUSE_SIZE_THRESHOLD); private long lastDeactivationAttempt = 0; private boolean shouldTryDeactivation() { synchronized (this) { if ((System.currentTimeMillis() - lastDeactivationAttempt < Config .getGlobalLong(PC.DEACTIVATION_PERIOD)) || (this.pinstances.size() < this.pinstances.capacity() * PAUSE_SIZE_THRESHOLD)) return false; lastDeactivationAttempt = System.currentTimeMillis(); return true; } } private static final int FORCE_PAUSE_FACTOR = 10; private static final double PAUSE_RATE_LIMIT = Config.getGlobalDouble(PC.PAUSE_RATE_LIMIT); /* We currently make a periodic pass over all active instances to sync and * check for deactivation. However, deactivation can be delegated to a * general-purpose DiskMap like structure and can be done more efficiently * in *expectation* than a full active sweep (at a ~2x higher memory cost) * every iteration. The sync also does not need a sweep if we maintain a * separate set of paxosIDs (with a commensurate memory cost) that are not * caught up and we sync just those. A sweep thread (like below or in * DiskMap) is needed in order to have the benefit of reducing the memory * footprint down to as small as necessary in steady-state without slowing * down the critical path. The bad case for a sweep is when most of the * mapped instances are active, so the sweeps end up being redundant; it is * unclear if this actually affects performance much given the low priority * hint for the thread and the explict rate limit. */ private void syncAndDeactivate() { if (isClosed() || this.pinstances.size() == 0) return; if (!this.shouldTryDeactivation()) return; long t0 = System.currentTimeMillis(); RateLimiter rateLimiter = new RateLimiter(PAUSE_RATE_LIMIT); log.log(Level.FINE, "{0} initiating deactivation attempt, |activePaxii| = {1}", new Object[] { this, this.pinstances.size() }); int numPaused = 0; Map<String, PaxosInstanceStateMachine> batch = new HashMap<String, PaxosInstanceStateMachine>(); // cuckoo hashmap now supports an efficient iterator for (Iterator<PaxosInstanceStateMachine> pismIter = this.pinstances.concurrentIterator(); pismIter .hasNext();) { PaxosInstanceStateMachine pism = pismIter.next(); String paxosID = pism.getPaxosID(); if (pism.isLongIdle() // if size > capacity/2, pause 1/FORCE_PAUSE_FACTOR fraction || (this.pinstances.size() > this.pinstances.capacity() / 2 && numPaused < this.pinstances.capacity() / FORCE_PAUSE_FACTOR)) { log.log(Level.FINER, "{0} trying to pause {1} [{2}]", new Object[] { this, paxosID, pism }); /* The sync below ensures that, at least once every deactivation * period, we sync decisions for an active paxos instance. This * is handy when a paxos instance is not caught up but doesn't * get any new messages either, say, because nothing new is * happening, then it has no reason to do anything but will * remain unnecessarily active; the sync here allows it to * potentially catch up and possibly be paused in the next * deactivation round if there is still no action by then. The * sync is useful irrespective of whether or not the instance is * caught up for pausability * * Overhead: This sync imposes a message overhead of up to A * messages A is the number of active paxos instances. For * example, with 10K active instances, this method could send * 10K messages, which is high. However, for each instance, the * sync message will get sent only if it has not recently sent a * sync message *and* it is out of sync or it has just started * up and has a very low outOfOrder limit. Consequently, we * should avoid having a large number of paxos instances with a * very low outOfOrderLimit, especially if all they plan to do * is to start up and do nothing, otherwise, they will cause a * one-time deluge of sync messages before being paused. * * If active instances are generally busy but out of sync, we * could impose a bandwidth overhead of A/D where D is the * deactivation thread's period, e.g., A=10K, D=30secs => an * overhead of 333 messages/sec, which although seems high is * possible only if none of those paxos instances sent a sync * reauest in the last S seconds, where S is the minimum * inter-sync interval for each instance (default 1 second). In * expectation, a high overhead relative to the inevitable paxos * commit overhead (of 3 messages per replica per decision) is * unlikely unless the workload and network behave * adversarially, i.e., in every D period, each active paxos * instance executes just enough decisions for it to be possible * for its outOfOrder threshold to be triggered and the network * reorders some of those decisions. If the instance commits * many more decisions than the outOfOrder threshold, then the * sync message adds only a small relative overhead, e.g., if * the outOfOrder threshold is 10, roughly 33 messages (accept, * acceptReply, decision) would be required to commit at least * 11 decisions that would then trigger just one sync message. * If the outOfOrder threshold is 1, then the sync message could * add one message to every 6 expected messages (for 2 paxos * commits) at this replica, a ~15% overhead. But with such a * low outOfOrder threshold, we should not be having a large * number of paxos instances in the first place. */ this.syncPaxosInstance(pism, false); // rate limit if well under capacity if (this.pinstances.size() < this.pinstances.capacity() / FORCE_PAUSE_FACTOR) rateLimiter.record(); batch.put(pism.getPaxosID(), pism); if (batch.size() >= PAUSE_BATCH_SIZE) { Set<String> batchPaused = pause(batch, true); if (batchPaused != null) numPaused += batchPaused.size(); log.log(Level.FINE, "{0} paused {1}", new Object[] { this, batchPaused }); this.printPauseLog(batchPaused); batch.clear(); } } } if (!batch.isEmpty()) { this.printPauseLog(this.pause(batch, true)); } DelayProfiler.updateDelay("deactivation", t0); } private void printPauseLog(Collection<String> paused) { // can not call synchronized methods inside log statements long totalCreated = this.getNumCreated(); long totalCurrent = this.getNumInstancesAndResetChanged(); log.log(paused.size() > 0 || this.totalInstancesChanged ? Level.INFO : Level.FINE, "{0} deactivated {1} idle instances {2}; has {3} active instances{4}; avg_pause_delay = {5};" + " avg_deactivation_loop_delay = {6}; total_instances_created = {7}; total_current_instances = {8}", new Object[] { this, paused.size(), Util.truncatedLog(paused, PRINT_LOG_SIZE), this.pinstances.size(), this.pinstances.size() < 10 ? Arrays.asList(this.pinstances.keySet().toArray()) : "", Util.nmu(DelayProfiler.get("pause")), Util.ms(DelayProfiler.get("deactivation")), totalCreated, totalCurrent }); } private static final int PRINT_LOG_SIZE = 16; private static final int PAUSE_BATCH_SIZE = Config.getGlobalInt(PC.PAUSE_BATCH_SIZE); /*************************** End of activePaxii related methods **********************/ private class Cremator implements Runnable { String id = null; HashMap<String, PaxosInstanceStateMachine> map = null; Cremator(String paxosID, HashMap<String, PaxosInstanceStateMachine> zombies) { this.id = paxosID; this.map = zombies; } public void run() { synchronized (map) { map.remove(id); } } } /* Both deactivates, i.e., removes temporary active paxos state, and pauses, * i.e., swaps safety-critical paxos state to disk. */ private class Deactivator implements Runnable { public void run() { /* There is a good reason not to slow down this thread as that will * essentially slow down the rate of group creation. Groups can at * most be created as fast as they can be paused, otherwise we will * run out of memory. */ // Thread.currentThread().setPriority(Thread.MIN_PRIORITY); try { syncAndDeactivate(); } catch (Exception e) { // must continue running despite any exceptions e.printStackTrace(); } } } protected int getMyID() { return this.myID; } /** * @return NodeIDType of this. */ public NodeIDType getNodeID() { return this.messenger.getMyID(); } private synchronized void waitToRecover() { try { this.wait(); } catch (InterruptedException e) { e.printStackTrace(); } } private synchronized void notifyRecovered() { this.notifyAll(); } // convert string -> NodeIDType -> int (can *NOT* convert string directly to // int) private JSONObject fixNodeStringToInt(JSONObject json) throws JSONException { // long t = System.nanoTime(); if (!PaxosMessenger.ENABLE_INT_STRING_CONVERSION) return json; // FailureDetectionPacket already has generic NodeIDType if (PaxosPacket.getPaxosPacketType(json) == PaxosPacket.PaxosPacketType.FAILURE_DETECT) return json; if (json.has(PaxosPacket.NodeIDKeys.B.toString())) { // fix ballot string String ballotString = json.getString(PaxosPacket.NodeIDKeys.B.toString()); NodeIDType nodeID = this.unstringer.valueOf(Ballot.getBallotCoordString(ballotString)); if (nodeID != null) // assert (coordInt != null); json.put(PaxosPacket.NodeIDKeys.B.toString(), new Ballot(Ballot.getBallotNumString(ballotString), this.integerMap.put(nodeID)) .toString()); } else if (json.has(PaxosPacket.NodeIDKeys.GROUP.toString())) { // fix group string (JSONArray) JSONArray jsonArray = json.getJSONArray(PaxosPacket.NodeIDKeys.GROUP.toString()); for (int i = 0; i < jsonArray.length(); i++) { String memberString = jsonArray.getString(i); int memberInt = this.integerMap.put(this.unstringer.valueOf(memberString)); jsonArray.put(i, memberInt); } json.put(PaxosPacket.NodeIDKeys.GROUP.toString(), jsonArray); } else for (PaxosPacket.NodeIDKeys key : PaxosPacket.NodeIDKeys.values()) { if (json.has(key.toString())) { // fix default node string String nodeString = json.getString(key.toString()); if (!nodeString.equals(IntegerMap.NULL_STR_NODE)) { int nodeInt = this.integerMap.put(this.unstringer.valueOf(nodeString)); json.put(key.toString(), nodeInt); } } } // if(Util.oneIn(100)) // DelayProfiler.updateDelayNano("fixNodeStringToIntJSON", t); return json; } private net.minidev.json.JSONObject fixNodeStringToInt(net.minidev.json.JSONObject json) { // long t = System.nanoTime(); // FailureDetectionPacket already has generic NodeIDType if ((Integer) json.get(PaxosPacket.Keys.PT.toString()) == PaxosPacket.PaxosPacketType.FAILURE_DETECT .getInt()) return json; if (json.containsKey(PaxosPacket.NodeIDKeys.B.toString())) { // fix ballot string String ballotString = (String) json.get(PaxosPacket.NodeIDKeys.B.toString()); json.put(PaxosPacket.NodeIDKeys.B.toString(), new Ballot(Ballot.getBallotNumString(ballotString), this.integerMap.put(this.unstringer.valueOf(Ballot.getBallotCoordString(ballotString)))) .toString()); } else if (json.containsKey(PaxosPacket.NodeIDKeys.GROUP.toString())) { // fix group string (JSONArray) Collection<?> jsonArray = (Collection<?>) json.get(PaxosPacket.NodeIDKeys.GROUP.toString()); Set<Integer> group = new HashSet<Integer>(); for (Object element : jsonArray) { String memberString = element.toString(); int memberInt = this.integerMap.put(this.unstringer.valueOf(memberString)); group.add(memberInt); } json.put(PaxosPacket.NodeIDKeys.GROUP.toString(), group); } else for (PaxosPacket.NodeIDKeys key : PaxosPacket.NodeIDKeys.values()) { if (json.containsKey(key.toString())) { // fix default node string String nodeString = json.get(key.toString()).toString(); if (!nodeString.equals(IntegerMap.NULL_STR_NODE)) { int nodeInt = this.integerMap.put(this.unstringer.valueOf(nodeString)); json.put(key.toString(), nodeInt); } } } // if(Util.oneIn(100)) // DelayProfiler.updateDelayNano("fixNodeStringToIntJSONSmart", t); return json; } public String toString() { return this.getClass().getSimpleName() + ":" + this.integerMap.get(myID); } protected String intToString(int id) { return this.integerMap.get(id).toString(); } /* ********************** Testing methods below ********************* */ /** * @return Logger used by PaxosManager. */ public static Logger getLogger() { return log // Logger.getLogger(PaxosManager.class.getName().replace("PaxosManager", // "")) ; } private void testingInitialization() { if (cleanDB) while (!this.paxosLogger.removeAll()) ; } private static boolean cleanDB = false; /** * If set to true, {@link PaxosManager} will clear the DB upon creation. * This static flag applies only to PaxosManager instances created after * this flag has been set to true. * * @param clean */ public static void startWithCleanDB(boolean clean) { cleanDB = clean; } /** * This test method is deprecated and will either be removed or * significantly revamped. Use TESTPaxosMain instead to run a single machine * test with multiple virtual nodes. * * @param args * @throws InterruptedException * @throws IOException * @throws JSONException */ @Deprecated static void test(String[] args) throws InterruptedException, IOException, JSONException { int[] members = TESTPaxosConfig.getDefaultGroup(); int numNodes = members.length; SampleNodeConfig<Integer> snc = new SampleNodeConfig<Integer>(2000); snc.localSetup(Util.arrayToIntSet(members)); @SuppressWarnings("unchecked") PaxosManager<Integer>[] pms = new PaxosManager[numNodes]; TESTPaxosApp[] apps = new TESTPaxosApp[numNodes]; /* We always test with the first member crashed. This also ensures that * the system is fault-tolerant to the failure of the default * coordinator, which in our policy is the first (or lowest numbered) * node. */ TESTPaxosConfig.crash(members[0]); /* We disable sending replies to client in PaxosManager's unit-test. To * test with clients, we rely on other tests in TESTPaxosMain * (single-machine) or on TESTPaxosNode and TESTPaxosClient for * distributed testing. */ TESTPaxosConfig.setSendReplyToClient(false); /* This setting is "guilty until proven innocent", i.e., each node will * start out assuming that all other nodes are dead. This is probably * too pessimistic as it will cause every node to run for coordinator * when it starts up but is good for testing. */ FailureDetection.setParanoid(); // Set up paxos managers and apps with nio for (int i = 0; i < numNodes; i++) { System.out.println("Initiating PaxosManager at node " + members[i]); JSONNIOTransport<Integer> niot = new JSONNIOTransport<Integer>(members[i], snc, new PacketDemultiplexerDefault(), true); apps[i] = new TESTPaxosApp(niot); // app, PM reuse nio pms[i] = new PaxosManager<Integer>(members[i], snc, niot, apps[i]); } System.out.println("Initiated all " + numNodes + " paxos managers with failure detectors..\n"); /* We don't rigorously test with multiple groups as they are * independent, but this is useful for memory testing. */ int numPaxosGroups = 2; String[] names = new String[numPaxosGroups]; for (int i = 0; i < names.length; i++) names[i] = "paxos" + i; System.out.println("Creating " + numPaxosGroups + " paxos groups each with " + numNodes + " members each, one each at each of the " + numNodes + " nodes"); for (int node = 0; node < numNodes; node++) { int k = 1; for (int group = 0; group < numPaxosGroups; group++) { // creating a paxos instance may induce recovery from disk pms[node].createPaxosInstance(names[group], 0, Util.arrayToIntSet(members), apps[node], null, null, false); if (numPaxosGroups > 1000 && ((group % k == 0 && ((k *= 2) > 0)) || group % 100000 == 0)) { System.out.print(group + " "); } } System.out.println("..node" + members[node] + " done"); } Thread.sleep(1000); /* Wait for all paxos managers to finish recovery. Recovery is finished * when initiateRecovery() is complete. At this point, all the paxos * groups at that node would have also rolled forward. */ int maxRecoverySlot = -1; int maxRecoveredNode = -1; for (int i = 0; i < numNodes; i++) { while (!TESTPaxosConfig.isCrashed(members[i]) && !TESTPaxosConfig.getRecovered(members[i], names[0])) { log.info("Waiting for node " + members[i] + " to recover "); pms[i].waitToRecover(); } log.info("Node" + members[i] + " finished recovery including rollback;\n" + names[0] + " recovered at slot " + apps[i].getNumCommitted(names[0])); // need max recovery slot for names[0] below maxRecoverySlot = Math.max(maxRecoverySlot, apps[i].getNumCommitted(names[0])); maxRecoveredNode = i; } System.out.println("all nodes done creating groups."); /*********** Finished creating paxos instances for testing *****************/ /************* Begin ClientRequestTask **************************/ ScheduledExecutorService execpool = Executors.newScheduledThreadPool(5); class ClientRequestTask implements Runnable { private final RequestPacket request; private final PaxosManager<Integer> paxosManager; ClientRequestTask(RequestPacket req, PaxosManager<Integer> pm) { request = req; paxosManager = pm; } public void run() { try { JSONObject reqJson = request.toJSONObject(); JSONPacket.putPacketType(reqJson, PaxosPacketType.PAXOS_PACKET.getInt()); paxosManager.propose(request.getPaxosID(), request, null); } catch (JSONException e) { e.printStackTrace(); } } } /************* End ClientRequestTask **************************/ /* Create and schedule requests. All requests are scheduled immediately * to test concurrency */ int numRequests = 1000; RequestPacket[] reqs = new RequestPacket[numRequests]; ScheduledFuture<?>[] futures = new ScheduledFuture[numRequests]; int numExceptions = 0; double scheduledDelay = 0; for (int i = 0; i < numRequests; i++) { reqs[i] = new RequestPacket(i, "[ Sample write request numbered " + i + " ]", false); reqs[i].putPaxosID(names[0], 0); JSONObject reqJson = reqs[i].toJSONObject(); JSONPacket.putPacketType(reqJson, PaxosPacketType.PAXOS_PACKET.getInt()); try { ClientRequestTask crtask = new ClientRequestTask(reqs[i], pms[1]); futures[i] = (ScheduledFuture<?>) execpool.schedule(crtask, (long) scheduledDelay, TimeUnit.MILLISECONDS); scheduledDelay += 0; } catch (Exception e) { e.printStackTrace(); continue; } } /* Any exceptions below could occur because of exceptions inside paxos. * Scheduling a request will invoke PaxosManager.propose() that will * cause it to send the request to the corresponding * PaxosInstanceStateMachine. */ log.info("Waiting for request scheduling to complete."); for (int i = 0; i < numRequests; i++) { try { futures[i].get(); } catch (Exception e) { e.printStackTrace(); numExceptions++; } } log.info("Request scheduling complete; numExceptions=" + numExceptions); Thread.sleep(1000); /* Wait for scheduled requests to finish being processed by paxos. We * check for this by checking that at least one node has executed up to * the slot number maxRecoverySlot + numRequests. */ while (apps[maxRecoveredNode].getNumCommitted(names[0]) < maxRecoverySlot + numRequests) { apps[maxRecoveredNode].waitToFinish(); ; } log.info("Node" + maxRecoveredNode + " has executed up to slot " + (maxRecoverySlot + numRequests)); /* The code below waits for all uncrashed replicas to finish executing * up to the same slot and will then assert the SMR invariant, i.e., * they all made the same state transitions up to that slot. */ int numCommitted = 0; for (int i = 0; i < numNodes; i++) { for (int j = i + 1; j < numNodes; j++) { if (TESTPaxosConfig.isCrashed(members[i]) || TESTPaxosConfig.isCrashed(members[j])) continue; // ignore crashed nodes int committed1 = apps[i].getNumCommitted(names[0]); int committed2 = apps[j].getNumCommitted(names[0]); // Wait for the other node to catch up while (committed1 != committed2) { if (committed1 > committed2) apps[j].waitToFinish(names[0], committed1); else if (committed1 < committed2) apps[i].waitToFinish(names[0], committed2); log.info("Waiting : (slot1,hash1)=(" + committed1 + "," + apps[i].getHash(names[0]) + "(; (slot2,hash2=" + committed2 + "," + apps[j].getHash(names[0]) + ")"); Thread.sleep(1000); committed1 = apps[i].getNumCommitted(names[0]); committed2 = apps[j].getNumCommitted(names[0]); } // Both nodes caught up to the same slot assert (committed1 == committed2) : "numCommitted@" + i + "=" + committed1 + ", numCommitted@" + j + "=" + committed2; // Assert state machine replication invariant numCommitted = apps[i].getNumCommitted(names[0]); assert (apps[i].getHash(names[0]) == apps[j].getHash(names[0])) : ("Waiting : (slot1,hash1)=(" + committed1 + "," + apps[i].getHash(names[0]) + "(; (slot2,hash2=" + committed2 + "," + apps[j].getHash(names[0]) + ")"); ; // end of SMR invariant } } /* Print preempted requests if any. These could happen during * coordinator changes. Preempted requests are converted to no-ops and * forwarded to the current presumed coordinator by paxos. */ String preemptedReqs = "[ "; int numPreempted = 0; for (int i = 0; i < numRequests; i++) { if (!TESTPaxosConfig.isCommitted(reqs[i].requestID)) { preemptedReqs += (i + " "); numPreempted++; } } preemptedReqs += "]"; System.out.println("\n\nTest completed. Executed " + numCommitted + " requests consistently including " + (numRequests - numPreempted) + " of " + numRequests + " received requests;\nPreempted requests = " + preemptedReqs + "; numExceptions=" + numExceptions + "; average message log time=" + Util.df(DelayProfiler.get("logDelay")) + "ms.\n" + "\nNote that it is possible for the test to be successful even if the number of consistently\n" + "executed requests is less than the number of received requests as paxos only guarantees\n" + "consistency, i.e., that all replicas executed requests in the same order, not that all requests\n" + "issued will get executed. The latter property can be achieved by clients reissuing requests\n" + "until successfully executed. With reissuals, clients do need to worry about double execution,\n" + "so they should be careful. A client is not guaranteed to get a failure message if the request fails,\n" + "e.g., if the replica receiving a request dies immediately. If the client uses a timeout to detect\n" + "failure and thereupon reissue its request, it is possible that both the original and re-issued\n" + "requests are executed. Clients can get around this problem by using sequence numbers within\n" + "their app, reading the current sequence number, and then trying to commit their write provided the\n" + "sequence number has not changed in the meantime. There are other alternatives, but all of these\n" + "are application-specific; they are not paxos's problem\n"); for (int i = 0; i < numNodes; i++) { System.out.println(pms[i].printLog(names[0])); } execpool.shutdownNow(); for (PaxosManager<Integer> pm : pms) pm.close(); } protected Replicable getApp(String paxosID) { return this.myApp; } protected MessageDigest getMessageDigest() { return PendingDigests.getMessageDigest(); } private static final int DIGEST_THRESHOLD = Config.getGlobalInt(PC.DIGEST_THRESHOLD); private static final boolean DIGEST_REQUESTS = Config.getGlobalBoolean(PC.DIGEST_REQUESTS); protected boolean shouldDigest() { return DIGEST_REQUESTS && this.pinstances.size() <= DIGEST_THRESHOLD; } protected AcceptPacket match(AcceptPacket accept) { return this.pendingDigests.match(accept); } protected AcceptPacket release(RequestPacket request, boolean remove) { return this.pendingDigests.release(request, remove); } protected AcceptPacket release(RequestPacket request) { return this.pendingDigests.release(request, true); } protected AcceptPacket digest(AcceptPacket multicastAccept) { return this.shouldDigest() ? multicastAccept.digest(this.getMessageDigest()) : multicastAccept; } /* Callback for timed out digested accept. Will send a nack accept reply in * order to try to get the sender to send the undigested accept. The * digested accept will be discarded in any case after this callback. */ private void callbackDigestedAcceptTimeout(AcceptPacket accept) { try { PaxosManager.log.log(Level.INFO, "{0} trying to release digested accept {1}", new Object[] { PaxosManager.this, accept.getSummary(log.isLoggable(Level.INFO)) }); AcceptPacket released = PaxosManager.this.release(accept, false); if (released != null) { PaxosManager.this.handlePaxosPacket(released); } else try { // non-loopback send to re-insert accept PaxosManager.this.messenger.send(new MessagingTask(PaxosManager.this.getMyID(), accept)); PaxosManager.this.send(new MessagingTask(accept.ballot.coordinatorID, new AcceptReplyPacket(PaxosManager.this.getMyID(), accept.ballot, accept.slot, /* FIXME: wraparound means that * maxCheckpointedSlot of 0 can be unsafe. * We need to get the correct value from the * paxos instance. But for now digests are * not disabled by default, so this is okay. */ 0, accept.requestID).setDigestRequest().putPaxosID(accept.getPaxosID(), accept.getVersion()))); } catch (JSONException | IOException e) { e.printStackTrace(); } } catch (Exception e) { log.severe(this + " incurred exception on digested accept timeout"); e.printStackTrace(); } } private void callbackRequestTimeout(RequestPacket request) { PaxosPacket accept = null; if ((accept = PaxosManager.this.release(request)) != null) try { PaxosManager.this.send(new MessagingTask(PaxosManager.this.getMyID(), accept), false, false); } catch (JSONException | IOException e) { log.severe(PaxosManager.this + " incurred exception while expiring request and releasing accept " + accept.getSummary()); e.printStackTrace(); } this.outstanding.totalRequestSize -= request.lengthEstimate(); Level level = accept != null ? Level.INFO : Level.FINE; PaxosManager.log.log(level, "{0} garbage collected enqueued request {1}" + (accept != null ? "; released accept {2}" : ""), new Object[] { PaxosManager.this, request.getSummary(log.isLoggable(level)), accept != null ? accept.getSummary(log.isLoggable(level)) : "" }); } /** * @param nodes * @return Modified argument after filtering out dead nodes using * information from its failure detector. */ public Set<NodeIDType> removeDead(Set<NodeIDType> nodes) { for (Iterator<NodeIDType> iter = nodes.iterator(); iter.hasNext();) if (!this.FD.isNodeUp(iter.next())) iter.remove(); return nodes; } }