Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.procedure2; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.HashSet; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.ReentrantLock; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.TimeUnit; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.classification.InterfaceStability; import org.apache.hadoop.hbase.procedure2.store.ProcedureStore; import org.apache.hadoop.hbase.procedure2.store.ProcedureStore.ProcedureIterator; import org.apache.hadoop.hbase.procedure2.util.StringUtils; import org.apache.hadoop.hbase.procedure2.util.TimeoutBlockingQueue; import org.apache.hadoop.hbase.procedure2.util.TimeoutBlockingQueue.TimeoutRetriever; import org.apache.hadoop.hbase.protobuf.generated.ProcedureProtos.ProcedureState; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.NonceKey; import org.apache.hadoop.hbase.util.Pair; import com.google.common.base.Preconditions; /** * Thread Pool that executes the submitted procedures. * The executor has a ProcedureStore associated. * Each operation is logged and on restart the pending procedures are resumed. * * Unless the Procedure code throws an error (e.g. invalid user input) * the procedure will complete (at some point in time), On restart the pending * procedures are resumed and the once failed will be rolledback. * * The user can add procedures to the executor via submitProcedure(proc) * check for the finished state via isFinished(procId) * and get the result via getResult(procId) */ @InterfaceAudience.Private @InterfaceStability.Evolving public class ProcedureExecutor<TEnvironment> { private static final Log LOG = LogFactory.getLog(ProcedureExecutor.class); Testing testing = null; public static class Testing { protected boolean killBeforeStoreUpdate = false; protected boolean toggleKillBeforeStoreUpdate = false; protected boolean shouldKillBeforeStoreUpdate() { final boolean kill = this.killBeforeStoreUpdate; if (this.toggleKillBeforeStoreUpdate) { this.killBeforeStoreUpdate = !kill; LOG.warn("Toggle Kill before store update to: " + this.killBeforeStoreUpdate); } return kill; } } public interface ProcedureExecutorListener { void procedureLoaded(long procId); void procedureAdded(long procId); void procedureFinished(long procId); } /** * Used by the TimeoutBlockingQueue to get the timeout interval of the procedure */ private static class ProcedureTimeoutRetriever implements TimeoutRetriever<Procedure> { @Override public long getTimeout(Procedure proc) { return proc.getTimeRemaining(); } @Override public TimeUnit getTimeUnit(Procedure proc) { return TimeUnit.MILLISECONDS; } } /** * Internal cleaner that removes the completed procedure results after a TTL. * NOTE: This is a special case handled in timeoutLoop(). * * Since the client code looks more or less like: * procId = master.doOperation() * while (master.getProcResult(procId) == ProcInProgress); * The master should not throw away the proc result as soon as the procedure is done * but should wait a result request from the client (see executor.removeResult(procId)) * The client will call something like master.isProcDone() or master.getProcResult() * which will return the result/state to the client, and it will mark the completed * proc as ready to delete. note that the client may not receive the response from * the master (e.g. master failover) so, if we delay a bit the real deletion of * the proc result the client will be able to get the result the next try. */ private static class CompletedProcedureCleaner<TEnvironment> extends Procedure<TEnvironment> { private static final Log LOG = LogFactory.getLog(CompletedProcedureCleaner.class); private static final String CLEANER_INTERVAL_CONF_KEY = "hbase.procedure.cleaner.interval"; private static final int DEFAULT_CLEANER_INTERVAL = 30 * 1000; // 30sec private static final String EVICT_TTL_CONF_KEY = "hbase.procedure.cleaner.evict.ttl"; private static final int DEFAULT_EVICT_TTL = 15 * 60000; // 15min private static final String EVICT_ACKED_TTL_CONF_KEY = "hbase.procedure.cleaner.acked.evict.ttl"; private static final int DEFAULT_ACKED_EVICT_TTL = 5 * 60000; // 5min private final Map<Long, ProcedureResult> completed; private final Map<NonceKey, Long> nonceKeysToProcIdsMap; private final ProcedureStore store; private final Configuration conf; public CompletedProcedureCleaner(final Configuration conf, final ProcedureStore store, final Map<Long, ProcedureResult> completedMap, final Map<NonceKey, Long> nonceKeysToProcIdsMap) { // set the timeout interval that triggers the periodic-procedure setTimeout(conf.getInt(CLEANER_INTERVAL_CONF_KEY, DEFAULT_CLEANER_INTERVAL)); this.completed = completedMap; this.nonceKeysToProcIdsMap = nonceKeysToProcIdsMap; this.store = store; this.conf = conf; } public void periodicExecute(final TEnvironment env) { if (completed.isEmpty()) { if (LOG.isTraceEnabled()) { LOG.trace("No completed procedures to cleanup."); } return; } final long evictTtl = conf.getInt(EVICT_TTL_CONF_KEY, DEFAULT_EVICT_TTL); final long evictAckTtl = conf.getInt(EVICT_ACKED_TTL_CONF_KEY, DEFAULT_ACKED_EVICT_TTL); long now = EnvironmentEdgeManager.currentTime(); Iterator<Map.Entry<Long, ProcedureResult>> it = completed.entrySet().iterator(); while (it.hasNext() && store.isRunning()) { Map.Entry<Long, ProcedureResult> entry = it.next(); ProcedureResult result = entry.getValue(); // TODO: Select TTL based on Procedure type if ((result.hasClientAckTime() && (now - result.getClientAckTime()) >= evictAckTtl) || (now - result.getLastUpdate()) >= evictTtl) { if (LOG.isDebugEnabled()) { LOG.debug("Evict completed procedure " + entry.getKey()); } store.delete(entry.getKey()); it.remove(); NonceKey nonceKey = result.getNonceKey(); if (nonceKey != null) { nonceKeysToProcIdsMap.remove(nonceKey); } } } } @Override protected Procedure[] execute(final TEnvironment env) { throw new UnsupportedOperationException(); } @Override protected void rollback(final TEnvironment env) { throw new UnsupportedOperationException(); } @Override protected boolean abort(final TEnvironment env) { throw new UnsupportedOperationException(); } @Override public void serializeStateData(final OutputStream stream) { throw new UnsupportedOperationException(); } @Override public void deserializeStateData(final InputStream stream) { throw new UnsupportedOperationException(); } } /** * Map the the procId returned by submitProcedure(), the Root-ProcID, to the ProcedureResult. * Once a Root-Procedure completes (success or failure), the result will be added to this map. * The user of ProcedureExecutor should call getResult(procId) to get the result. */ private final ConcurrentHashMap<Long, ProcedureResult> completed = new ConcurrentHashMap<Long, ProcedureResult>(); /** * Map the the procId returned by submitProcedure(), the Root-ProcID, to the RootProcedureState. * The RootProcedureState contains the execution stack of the Root-Procedure, * It is added to the map by submitProcedure() and removed on procedure completion. */ private final ConcurrentHashMap<Long, RootProcedureState> rollbackStack = new ConcurrentHashMap<Long, RootProcedureState>(); /** * Helper map to lookup the live procedures by ID. * This map contains every procedure. root-procedures and subprocedures. */ private final ConcurrentHashMap<Long, Procedure> procedures = new ConcurrentHashMap<Long, Procedure>(); /** * Helper map to lookup whether the procedure already issued from the same client. * This map contains every root procedure. */ private ConcurrentHashMap<NonceKey, Long> nonceKeysToProcIdsMap = new ConcurrentHashMap<NonceKey, Long>(); /** * Timeout Queue that contains Procedures in a WAITING_TIMEOUT state * or periodic procedures. */ private final TimeoutBlockingQueue<Procedure> waitingTimeout = new TimeoutBlockingQueue<Procedure>( new ProcedureTimeoutRetriever()); /** * Queue that contains runnable procedures. */ private final ProcedureRunnableSet runnables; // TODO private final ReentrantLock submitLock = new ReentrantLock(); private final AtomicLong lastProcId = new AtomicLong(-1); private final CopyOnWriteArrayList<ProcedureExecutorListener> listeners = new CopyOnWriteArrayList<ProcedureExecutorListener>(); private final AtomicInteger activeExecutorCount = new AtomicInteger(0); private final AtomicBoolean running = new AtomicBoolean(false); private final TEnvironment environment; private final ProcedureStore store; private final Configuration conf; private Thread[] threads; public ProcedureExecutor(final Configuration conf, final TEnvironment environment, final ProcedureStore store) { this(conf, environment, store, new ProcedureSimpleRunQueue()); } public ProcedureExecutor(final Configuration conf, final TEnvironment environment, final ProcedureStore store, final ProcedureRunnableSet runqueue) { this.environment = environment; this.runnables = runqueue; this.store = store; this.conf = conf; } private void load(final boolean abortOnCorruption) throws IOException { Preconditions.checkArgument(completed.isEmpty()); Preconditions.checkArgument(rollbackStack.isEmpty()); Preconditions.checkArgument(procedures.isEmpty()); Preconditions.checkArgument(waitingTimeout.isEmpty()); Preconditions.checkArgument(runnables.size() == 0); store.load(new ProcedureStore.ProcedureLoader() { @Override public void setMaxProcId(long maxProcId) { assert lastProcId.get() < 0 : "expected only one call to setMaxProcId()"; LOG.debug("load procedures maxProcId=" + maxProcId); lastProcId.set(maxProcId); } @Override public void load(ProcedureIterator procIter) throws IOException { loadProcedures(procIter, abortOnCorruption); } @Override public void handleCorrupted(ProcedureIterator procIter) throws IOException { int corruptedCount = 0; while (procIter.hasNext()) { Procedure proc = procIter.next(); LOG.error("corrupted procedure: " + proc); corruptedCount++; } if (abortOnCorruption && corruptedCount > 0) { throw new IOException("found " + corruptedCount + " procedures on replay"); } } }); } private void loadProcedures(final ProcedureIterator procIter, final boolean abortOnCorruption) throws IOException { // 1. Build the rollback stack int runnablesCount = 0; while (procIter.hasNext()) { Procedure proc = procIter.next(); if (!proc.hasParent() && !proc.isFinished()) { rollbackStack.put(proc.getProcId(), new RootProcedureState()); } // add the procedure to the map proc.beforeReplay(getEnvironment()); procedures.put(proc.getProcId(), proc); // add the nonce to the map if (proc.getNonceKey() != null) { nonceKeysToProcIdsMap.put(proc.getNonceKey(), proc.getProcId()); } if (proc.getState() == ProcedureState.RUNNABLE) { runnablesCount++; } } // 2. Initialize the stacks ArrayList<Procedure> runnableList = new ArrayList(runnablesCount); HashSet<Procedure> waitingSet = null; procIter.reset(); while (procIter.hasNext()) { Procedure proc = procIter.next(); if (LOG.isDebugEnabled()) { LOG.debug(String.format("Loading procedure state=%s isFailed=%s: %s", proc.getState(), proc.hasException(), proc)); } Long rootProcId = getRootProcedureId(proc); if (rootProcId == null) { // The 'proc' was ready to run but the root procedure was rolledback? runnables.addBack(proc); continue; } if (!proc.hasParent() && proc.isFinished()) { if (LOG.isDebugEnabled()) { LOG.debug(String.format("The procedure is completed state=%s isFailed=%s", proc.getState(), proc.hasException())); } assert !rollbackStack.containsKey(proc.getProcId()); procedures.remove(proc.getProcId()); completed.put(proc.getProcId(), newResultFromProcedure(proc)); continue; } if (proc.hasParent() && !proc.isFinished()) { Procedure parent = procedures.get(proc.getParentProcId()); // corrupted procedures are handled later at step 3 if (parent != null) { parent.incChildrenLatch(); } } RootProcedureState procStack = rollbackStack.get(rootProcId); procStack.loadStack(proc); switch (proc.getState()) { case RUNNABLE: runnableList.add(proc); break; case WAITING_TIMEOUT: if (waitingSet == null) { waitingSet = new HashSet<Procedure>(); } waitingSet.add(proc); break; case FINISHED: if (proc.hasException()) { // add the proc to the runnables to perform the rollback runnables.addBack(proc); break; } case ROLLEDBACK: case INITIALIZING: String msg = "Unexpected " + proc.getState() + " state for " + proc; LOG.error(msg); throw new UnsupportedOperationException(msg); default: break; } } // 3. Validate the stacks int corruptedCount = 0; Iterator<Map.Entry<Long, RootProcedureState>> itStack = rollbackStack.entrySet().iterator(); while (itStack.hasNext()) { Map.Entry<Long, RootProcedureState> entry = itStack.next(); RootProcedureState procStack = entry.getValue(); if (procStack.isValid()) continue; for (Procedure proc : procStack.getSubprocedures()) { LOG.error("corrupted procedure: " + proc); procedures.remove(proc.getProcId()); runnableList.remove(proc); if (waitingSet != null) waitingSet.remove(proc); corruptedCount++; } itStack.remove(); } if (abortOnCorruption && corruptedCount > 0) { throw new IOException("found " + corruptedCount + " procedures on replay"); } // 4. Push the runnables if (!runnableList.isEmpty()) { // TODO: See ProcedureWALFormatReader#hasFastStartSupport // some procedure may be started way before this stuff. for (int i = runnableList.size() - 1; i >= 0; --i) { Procedure proc = runnableList.get(i); if (!proc.hasParent()) { sendProcedureLoadedNotification(proc.getProcId()); } if (proc.wasExecuted()) { runnables.addFront(proc); } else { // if it was not in execution, it can wait. runnables.addBack(proc); } } } } /** * Start the procedure executor. * It calls ProcedureStore.recoverLease() and ProcedureStore.load() to * recover the lease, and ensure a single executor, and start the procedure * replay to resume and recover the previous pending and in-progress perocedures. * * @param numThreads number of threads available for procedure execution. * @param abortOnCorruption true if you want to abort your service in case * a corrupted procedure is found on replay. otherwise false. */ public void start(int numThreads, boolean abortOnCorruption) throws IOException { if (running.getAndSet(true)) { LOG.warn("Already running"); return; } // We have numThreads executor + one timer thread used for timing out // procedures and triggering periodic procedures. threads = new Thread[numThreads + 1]; LOG.info("Starting procedure executor threads=" + threads.length); // Initialize procedures executor for (int i = 0; i < numThreads; ++i) { threads[i] = new Thread("ProcedureExecutor-" + i) { @Override public void run() { execLoop(); } }; } // Initialize procedures timeout handler (this is the +1 thread) threads[numThreads] = new Thread("ProcedureExecutorTimeoutThread") { @Override public void run() { timeoutLoop(); } }; // Acquire the store lease. store.recoverLease(); // TODO: Split in two steps. // TODO: Handle corrupted procedures (currently just a warn) // The first one will make sure that we have the latest id, // so we can start the threads and accept new procedures. // The second step will do the actual load of old procedures. load(abortOnCorruption); // Start the executors. Here we must have the lastProcId set. for (int i = 0; i < threads.length; ++i) { threads[i].start(); } // Add completed cleaner waitingTimeout.add(new CompletedProcedureCleaner(conf, store, completed, nonceKeysToProcIdsMap)); } public void stop() { if (!running.getAndSet(false)) { return; } LOG.info("Stopping the procedure executor"); runnables.signalAll(); waitingTimeout.signalAll(); } public void join() { boolean interrupted = false; for (int i = 0; i < threads.length; ++i) { try { threads[i].join(); } catch (InterruptedException ex) { interrupted = true; } } if (interrupted) { Thread.currentThread().interrupt(); } completed.clear(); rollbackStack.clear(); procedures.clear(); nonceKeysToProcIdsMap.clear(); waitingTimeout.clear(); runnables.clear(); lastProcId.set(-1); } public boolean isRunning() { return running.get(); } /** * @return the number of execution threads. */ public int getNumThreads() { return threads == null ? 0 : (threads.length - 1); } public int getActiveExecutorCount() { return activeExecutorCount.get(); } public TEnvironment getEnvironment() { return this.environment; } public ProcedureStore getStore() { return this.store; } public void registerListener(ProcedureExecutorListener listener) { this.listeners.add(listener); } public boolean unregisterListener(ProcedureExecutorListener listener) { return this.listeners.remove(listener); } /** * Add a new root-procedure to the executor. * @param proc the new procedure to execute. * @return the procedure id, that can be used to monitor the operation */ public long submitProcedure(final Procedure proc) { return submitProcedure(proc, HConstants.NO_NONCE, HConstants.NO_NONCE); } /** * Add a new root-procedure to the executor. * @param proc the new procedure to execute. * @param nonceGroup * @param nonce * @return the procedure id, that can be used to monitor the operation */ public long submitProcedure(final Procedure proc, final long nonceGroup, final long nonce) { Preconditions.checkArgument(proc.getState() == ProcedureState.INITIALIZING); Preconditions.checkArgument(isRunning()); Preconditions.checkArgument(lastProcId.get() >= 0); Preconditions.checkArgument(!proc.hasParent()); Long currentProcId; // The following part of the code has to be synchronized to prevent multiple request // with the same nonce to execute at the same time. synchronized (this) { // Check whether the proc exists. If exist, just return the proc id. // This is to prevent the same proc to submit multiple times (it could happen // when client could not talk to server and resubmit the same request). NonceKey noncekey = null; if (nonce != HConstants.NO_NONCE) { noncekey = new NonceKey(nonceGroup, nonce); currentProcId = nonceKeysToProcIdsMap.get(noncekey); if (currentProcId != null) { // Found the proc return currentProcId; } } // Initialize the Procedure ID currentProcId = nextProcId(); proc.setProcId(currentProcId); // This is new procedure. Set the noncekey and insert into the map. if (noncekey != null) { proc.setNonceKey(noncekey); nonceKeysToProcIdsMap.put(noncekey, currentProcId); } } // end of synchronized (this) // Commit the transaction store.insert(proc, null); if (LOG.isDebugEnabled()) { LOG.debug("Procedure " + proc + " added to the store."); } // Create the rollback stack for the procedure RootProcedureState stack = new RootProcedureState(); rollbackStack.put(currentProcId, stack); // Submit the new subprocedures assert !procedures.containsKey(currentProcId); procedures.put(currentProcId, proc); sendProcedureAddedNotification(currentProcId); runnables.addBack(proc); return currentProcId; } public ProcedureResult getResult(final long procId) { return completed.get(procId); } /** * Return true if the procedure is finished. * The state may be "completed successfully" or "failed and rolledback". * Use getResult() to check the state or get the result data. * @param procId the ID of the procedure to check * @return true if the procedure execution is finished, otherwise false. */ public boolean isFinished(final long procId) { return completed.containsKey(procId); } /** * Return true if the procedure is started. * @param procId the ID of the procedure to check * @return true if the procedure execution is started, otherwise false. */ public boolean isStarted(final long procId) { Procedure proc = procedures.get(procId); if (proc == null) { return completed.get(procId) != null; } return proc.wasExecuted(); } /** * Mark the specified completed procedure, as ready to remove. * @param procId the ID of the procedure to remove */ public void removeResult(final long procId) { ProcedureResult result = completed.get(procId); if (result == null) { assert !procedures.containsKey(procId) : "procId=" + procId + " is still running"; if (LOG.isDebugEnabled()) { LOG.debug("Procedure procId=" + procId + " already removed by the cleaner."); } return; } // The CompletedProcedureCleaner will take care of deletion, once the TTL is expired. result.setClientAckTime(EnvironmentEdgeManager.currentTime()); } /** * Send an abort notification the specified procedure. * Depending on the procedure implementation the abort can be considered or ignored. * @param procId the procedure to abort * @return true if the procedure exist and has received the abort, otherwise false. */ public boolean abort(final long procId) { return abort(procId, true); } /** * Send an abort notification the specified procedure. * Depending on the procedure implementation the abort can be considered or ignored. * @param procId the procedure to abort * @param mayInterruptIfRunning if the proc completed at least one step, should it be aborted? * @return true if the procedure exist and has received the abort, otherwise false. */ public boolean abort(final long procId, final boolean mayInterruptIfRunning) { Procedure proc = procedures.get(procId); if (proc != null) { if (!mayInterruptIfRunning && proc.wasExecuted()) { return false; } else { return proc.abort(getEnvironment()); } } return false; } public Map<Long, ProcedureResult> getResults() { return Collections.unmodifiableMap(completed); } public Procedure getProcedure(final long procId) { return procedures.get(procId); } protected ProcedureRunnableSet getRunnableSet() { return runnables; } /** * Execution loop (N threads) * while the executor is in a running state, * fetch a procedure from the runnables queue and start the execution. */ private void execLoop() { while (isRunning()) { Long procId = runnables.poll(); Procedure proc = procId != null ? procedures.get(procId) : null; if (proc == null) continue; try { activeExecutorCount.incrementAndGet(); execLoop(proc); } finally { activeExecutorCount.decrementAndGet(); } } } private void execLoop(Procedure proc) { if (LOG.isTraceEnabled()) { LOG.trace("Trying to start the execution of " + proc); } Long rootProcId = getRootProcedureId(proc); if (rootProcId == null) { // The 'proc' was ready to run but the root procedure was rolledback executeRollback(proc); return; } RootProcedureState procStack = rollbackStack.get(rootProcId); if (procStack == null) return; do { // Try to acquire the execution if (!procStack.acquire(proc)) { if (procStack.setRollback()) { // we have the 'rollback-lock' we can start rollingback if (!executeRollback(rootProcId, procStack)) { procStack.unsetRollback(); runnables.yield(proc); } } else { // if we can't rollback means that some child is still running. // the rollback will be executed after all the children are done. // If the procedure was never executed, remove and mark it as rolledback. if (!proc.wasExecuted()) { if (!executeRollback(proc)) { runnables.yield(proc); } } } break; } // Execute the procedure assert proc.getState() == ProcedureState.RUNNABLE; if (proc.acquireLock(getEnvironment())) { execProcedure(procStack, proc); proc.releaseLock(getEnvironment()); } else { runnables.yield(proc); } procStack.release(proc); // allows to kill the executor before something is stored to the wal. // useful to test the procedure recovery. if (testing != null && !isRunning()) { break; } if (proc.getProcId() == rootProcId && proc.isSuccess()) { // Finalize the procedure state if (LOG.isDebugEnabled()) { LOG.debug("Procedure completed in " + StringUtils.humanTimeDiff(proc.elapsedTime()) + ": " + proc); } procedureFinished(proc); break; } // if the procedure is kind enough to pass the slot to someone else, yield if (proc.isYieldAfterExecutionStep(getEnvironment())) { runnables.yield(proc); break; } } while (procStack.isFailed()); } private void timeoutLoop() { while (isRunning()) { Procedure proc = waitingTimeout.poll(); if (proc == null) continue; if (proc.getTimeRemaining() > 100) { // got an early wake, maybe a stop? // re-enqueue the task in case was not a stop or just a signal waitingTimeout.add(proc); continue; } // ---------------------------------------------------------------------------- // TODO-MAYBE: Should we provide a notification to the store with the // full set of procedures pending and completed to write a compacted // version of the log (in case is a log)? // In theory no, procedures are have a short life, so at some point the store // will have the tracker saying everything is in the last log. // ---------------------------------------------------------------------------- // The CompletedProcedureCleaner is a special case, and it acts as a chore. // instead of bringing the Chore class in, we reuse this timeout thread for // this special case. if (proc instanceof CompletedProcedureCleaner) { try { ((CompletedProcedureCleaner) proc).periodicExecute(getEnvironment()); } catch (Throwable e) { LOG.error("Ignoring CompletedProcedureCleaner exception: " + e.getMessage(), e); } proc.setStartTime(EnvironmentEdgeManager.currentTime()); waitingTimeout.add(proc); continue; } // The procedure received an "abort-timeout", call abort() and // add the procedure back in the queue for rollback. if (proc.setTimeoutFailure()) { long rootProcId = Procedure.getRootProcedureId(procedures, proc); RootProcedureState procStack = rollbackStack.get(rootProcId); procStack.abort(); store.update(proc); runnables.addFront(proc); continue; } } } /** * Execute the rollback of the full procedure stack. * Once the procedure is rolledback, the root-procedure will be visible as * finished to user, and the result will be the fatal exception. */ private boolean executeRollback(final long rootProcId, final RootProcedureState procStack) { Procedure rootProc = procedures.get(rootProcId); RemoteProcedureException exception = rootProc.getException(); if (exception == null) { exception = procStack.getException(); rootProc.setFailure(exception); store.update(rootProc); } List<Procedure> subprocStack = procStack.getSubprocedures(); assert subprocStack != null : "Called rollback with no steps executed rootProc=" + rootProc; int stackTail = subprocStack.size(); boolean reuseLock = false; while (stackTail-- > 0) { final Procedure proc = subprocStack.get(stackTail); if (!reuseLock && !proc.acquireLock(getEnvironment())) { // can't take a lock on the procedure, add the root-proc back on the // queue waiting for the lock availability return false; } boolean abortRollback = !executeRollback(proc); abortRollback |= !isRunning() || !store.isRunning(); // If the next procedure is the same to this one // (e.g. StateMachineProcedure reuse the same instance) // we can avoid to lock/unlock each step reuseLock = stackTail > 0 && (subprocStack.get(stackTail - 1) == proc) && !abortRollback; if (!reuseLock) { proc.releaseLock(getEnvironment()); } // allows to kill the executor before something is stored to the wal. // useful to test the procedure recovery. if (abortRollback) { return false; } subprocStack.remove(stackTail); // if the procedure is kind enough to pass the slot to someone else, yield if (proc.isYieldAfterExecutionStep(getEnvironment())) { return false; } } // Finalize the procedure state LOG.info("Rolledback procedure " + rootProc + " exec-time=" + StringUtils.humanTimeDiff(rootProc.elapsedTime()) + " exception=" + exception.getMessage()); procedureFinished(rootProc); return true; } /** * Execute the rollback of the procedure step. * It updates the store with the new state (stack index) * or will remove completly the procedure in case it is a child. */ private boolean executeRollback(final Procedure proc) { try { proc.doRollback(getEnvironment()); } catch (IOException e) { if (LOG.isDebugEnabled()) { LOG.debug("rollback attempt failed for " + proc, e); } return false; } catch (InterruptedException e) { handleInterruptedException(proc, e); return false; } catch (Throwable e) { // Catch NullPointerExceptions or similar errors... LOG.fatal("CODE-BUG: Uncatched runtime exception for procedure: " + proc, e); } // allows to kill the executor before something is stored to the wal. // useful to test the procedure recovery. if (testing != null && testing.shouldKillBeforeStoreUpdate()) { LOG.debug("TESTING: Kill before store update"); stop(); return false; } if (proc.removeStackIndex()) { proc.setState(ProcedureState.ROLLEDBACK); if (proc.hasParent()) { store.delete(proc.getProcId()); procedures.remove(proc.getProcId()); } else { store.update(proc); } } else { store.update(proc); } return true; } /** * Executes the specified procedure * - calls the doExecute() of the procedure * - if the procedure execution didn't fail (e.g. invalid user input) * - ...and returned subprocedures * - the subprocedures are initialized. * - the subprocedures are added to the store * - the subprocedures are added to the runnable queue * - the procedure is now in a WAITING state, waiting for the subprocedures to complete * - ...if there are no subprocedure * - the procedure completed successfully * - if there is a parent (WAITING) * - the parent state will be set to RUNNABLE * - in case of failure * - the store is updated with the new state * - the executor (caller of this method) will start the rollback of the procedure */ private void execProcedure(final RootProcedureState procStack, final Procedure procedure) { Preconditions.checkArgument(procedure.getState() == ProcedureState.RUNNABLE); // Execute the procedure boolean reExecute = false; Procedure[] subprocs = null; do { reExecute = false; try { subprocs = procedure.doExecute(getEnvironment()); if (subprocs != null && subprocs.length == 0) { subprocs = null; } } catch (ProcedureYieldException e) { if (LOG.isTraceEnabled()) { LOG.trace("Yield procedure: " + procedure + ": " + e.getMessage()); } runnables.yield(procedure); return; } catch (InterruptedException e) { handleInterruptedException(procedure, e); runnables.yield(procedure); return; } catch (Throwable e) { // Catch NullPointerExceptions or similar errors... String msg = "CODE-BUG: Uncatched runtime exception for procedure: " + procedure; LOG.error(msg, e); procedure.setFailure(new RemoteProcedureException(msg, e)); } if (!procedure.isFailed()) { if (subprocs != null) { if (subprocs.length == 1 && subprocs[0] == procedure) { // quick-shortcut for a state machine like procedure subprocs = null; reExecute = true; } else { // yield the current procedure, and make the subprocedure runnable for (int i = 0; i < subprocs.length; ++i) { Procedure subproc = subprocs[i]; if (subproc == null) { String msg = "subproc[" + i + "] is null, aborting the procedure"; procedure.setFailure( new RemoteProcedureException(msg, new IllegalArgumentException(msg))); subprocs = null; break; } assert subproc.getState() == ProcedureState.INITIALIZING; subproc.setParentProcId(procedure.getProcId()); subproc.setProcId(nextProcId()); } if (!procedure.isFailed()) { procedure.setChildrenLatch(subprocs.length); switch (procedure.getState()) { case RUNNABLE: procedure.setState(ProcedureState.WAITING); break; case WAITING_TIMEOUT: waitingTimeout.add(procedure); break; default: break; } } } } else if (procedure.getState() == ProcedureState.WAITING_TIMEOUT) { waitingTimeout.add(procedure); } else { // No subtask, so we are done procedure.setState(ProcedureState.FINISHED); } } // Add the procedure to the stack procStack.addRollbackStep(procedure); // allows to kill the executor before something is stored to the wal. // useful to test the procedure recovery. if (testing != null && testing.shouldKillBeforeStoreUpdate()) { LOG.debug("TESTING: Kill before store update"); stop(); return; } // Commit the transaction if (subprocs != null && !procedure.isFailed()) { if (LOG.isTraceEnabled()) { LOG.trace("Store add " + procedure + " children " + Arrays.toString(subprocs)); } store.insert(procedure, subprocs); } else { if (LOG.isTraceEnabled()) { LOG.trace("Store update " + procedure); } store.update(procedure); } // if the store is not running we are aborting if (!store.isRunning()) { return; } // if the procedure is kind enough to pass the slot to someone else, yield if (reExecute && procedure.isYieldAfterExecutionStep(getEnvironment())) { return; } assert (reExecute && subprocs == null) || !reExecute; } while (reExecute); // Submit the new subprocedures if (subprocs != null && !procedure.isFailed()) { for (int i = 0; i < subprocs.length; ++i) { Procedure subproc = subprocs[i]; assert !procedures.containsKey(subproc.getProcId()); procedures.put(subproc.getProcId(), subproc); runnables.addFront(subproc); } } if (procedure.isFinished() && procedure.hasParent()) { Procedure parent = procedures.get(procedure.getParentProcId()); if (parent == null) { assert procStack.isRollingback(); return; } // If this procedure is the last child awake the parent procedure if (LOG.isTraceEnabled()) { LOG.trace(parent + " child is done: " + procedure); } if (parent.childrenCountDown() && parent.getState() == ProcedureState.WAITING) { parent.setState(ProcedureState.RUNNABLE); store.update(parent); runnables.addFront(parent); if (LOG.isTraceEnabled()) { LOG.trace(parent + " all the children finished their work, resume."); } return; } } } private void handleInterruptedException(final Procedure proc, final InterruptedException e) { if (LOG.isTraceEnabled()) { LOG.trace("got an interrupt during " + proc + ". suspend and retry it later.", e); } // NOTE: We don't call Thread.currentThread().interrupt() // because otherwise all the subsequent calls e.g. Thread.sleep() will throw // the InterruptedException. If the master is going down, we will be notified // and the executor/store will be stopped. // (The interrupted procedure will be retried on the next run) } private void sendProcedureLoadedNotification(final long procId) { if (!this.listeners.isEmpty()) { for (ProcedureExecutorListener listener : this.listeners) { try { listener.procedureLoaded(procId); } catch (Throwable e) { LOG.error("The listener " + listener + " had an error: " + e.getMessage(), e); } } } } private void sendProcedureAddedNotification(final long procId) { if (!this.listeners.isEmpty()) { for (ProcedureExecutorListener listener : this.listeners) { try { listener.procedureAdded(procId); } catch (Throwable e) { LOG.error("The listener " + listener + " had an error: " + e.getMessage(), e); } } } } private void sendProcedureFinishedNotification(final long procId) { if (!this.listeners.isEmpty()) { for (ProcedureExecutorListener listener : this.listeners) { try { listener.procedureFinished(procId); } catch (Throwable e) { LOG.error("The listener " + listener + " had an error: " + e.getMessage(), e); } } } } private long nextProcId() { long procId = lastProcId.incrementAndGet(); if (procId < 0) { while (!lastProcId.compareAndSet(procId, 0)) { procId = lastProcId.get(); if (procId >= 0) break; } while (procedures.containsKey(procId)) { procId = lastProcId.incrementAndGet(); } } return procId; } private Long getRootProcedureId(Procedure proc) { return Procedure.getRootProcedureId(procedures, proc); } private void procedureFinished(final Procedure proc) { // call the procedure completion cleanup handler try { proc.completionCleanup(getEnvironment()); } catch (Throwable e) { // Catch NullPointerExceptions or similar errors... LOG.error("CODE-BUG: uncatched runtime exception for procedure: " + proc, e); } // update the executor internal state maps completed.put(proc.getProcId(), newResultFromProcedure(proc)); rollbackStack.remove(proc.getProcId()); procedures.remove(proc.getProcId()); // call the runnableSet completion cleanup handler try { runnables.completionCleanup(proc); } catch (Throwable e) { // Catch NullPointerExceptions or similar errors... LOG.error("CODE-BUG: uncatched runtime exception for runnableSet: " + runnables, e); } // Notify the listeners sendProcedureFinishedNotification(proc.getProcId()); } public Pair<ProcedureResult, Procedure> getResultOrProcedure(final long procId) { ProcedureResult result = completed.get(procId); Procedure proc = null; if (result == null) { proc = procedures.get(procId); if (proc == null) { result = completed.get(procId); } } return new Pair(result, proc); } private static ProcedureResult newResultFromProcedure(final Procedure proc) { if (proc.isFailed()) { return new ProcedureResult(proc.getNonceKey(), proc.getStartTime(), proc.getLastUpdate(), proc.getException()); } return new ProcedureResult(proc.getNonceKey(), proc.getStartTime(), proc.getLastUpdate(), proc.getResult()); } }