edu.brown.hstore.PartitionExecutor.java Source code

Java tutorial

Introduction

Here is the source code for edu.brown.hstore.PartitionExecutor.java

Source

/***************************************************************************
 *   Copyright (C) 2012 by H-Store Project                                 *
 *   Brown University                                                      *
 *   Massachusetts Institute of Technology                                 *
 *   Yale University                                                       *
 *                                                                         *
 *   Permission is hereby granted, free of charge, to any person obtaining *
 *   a copy of this software and associated documentation files (the       *
 *   "Software"), to deal in the Software without restriction, including   *
 *   without limitation the rights to use, copy, modify, merge, publish,   *
 *   distribute, sublicense, and/or sell copies of the Software, and to    *
 *   permit persons to whom the Software is furnished to do so, subject to *
 *   the following conditions:                                             *
 *                                                                         *
 *   The above copyright notice and this permission notice shall be        *
 *   included in all copies or substantial portions of the Software.       *
 *                                                                         *
 *   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       *
 *   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    *
 *   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*
 *   IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR     *
 *   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
 *   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
 *   OTHER DEALINGS IN THE SOFTWARE.                                       *
 ***************************************************************************/
/* This file is part of VoltDB.
 * Copyright (C) 2008-2010 VoltDB L.L.C.
 *
 * VoltDB is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * VoltDB is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with VoltDB.  If not, see <http://www.gnu.org/licenses/>.
 */
package edu.brown.hstore;

import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collection;
import java.util.Deque;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.ReentrantLock;

import org.apache.commons.collections15.map.ListOrderedMap;
import org.apache.log4j.Logger;
import org.voltdb.BackendTarget;
import org.voltdb.ClientResponseImpl;
import org.voltdb.DependencySet;
import org.voltdb.HsqlBackend;
import org.voltdb.ParameterSet;
import org.voltdb.SQLStmt;
import org.voltdb.SnapshotSiteProcessor;
import org.voltdb.SnapshotSiteProcessor.SnapshotTableTask;
import org.voltdb.VoltProcedure;
import org.voltdb.VoltProcedure.VoltAbortException;
import org.voltdb.VoltSystemProcedure;
import org.voltdb.VoltTable;
import org.voltdb.catalog.Catalog;
import org.voltdb.catalog.Cluster;
import org.voltdb.catalog.Database;
import org.voltdb.catalog.Host;
import org.voltdb.catalog.Partition;
import org.voltdb.catalog.PlanFragment;
import org.voltdb.catalog.Procedure;
import org.voltdb.catalog.Site;
import org.voltdb.catalog.Statement;
import org.voltdb.catalog.Table;
import org.voltdb.exceptions.ConstraintFailureException;
import org.voltdb.exceptions.EEException;
import org.voltdb.exceptions.MispredictionException;
import org.voltdb.exceptions.SQLException;
import org.voltdb.exceptions.SerializableException;
import org.voltdb.exceptions.ServerFaultException;
import org.voltdb.jni.ExecutionEngine;
import org.voltdb.jni.ExecutionEngineIPC;
import org.voltdb.jni.ExecutionEngineJNI;
import org.voltdb.jni.MockExecutionEngine;
import org.voltdb.messaging.FastDeserializer;
import org.voltdb.messaging.FastSerializer;
import org.voltdb.messaging.FinishTaskMessage;
import org.voltdb.messaging.FragmentTaskMessage;
import org.voltdb.messaging.InitiateTaskMessage;
import org.voltdb.messaging.PotentialSnapshotWorkMessage;
import org.voltdb.messaging.TransactionInfoBaseMessage;
import org.voltdb.messaging.VoltMessage;
import org.voltdb.utils.DBBPool;
import org.voltdb.utils.DBBPool.BBContainer;
import org.voltdb.utils.Encoder;
import org.voltdb.utils.EstTime;
import org.voltdb.utils.Pair;

import com.google.protobuf.ByteString;
import com.google.protobuf.RpcCallback;

import edu.brown.catalog.CatalogUtil;
import edu.brown.catalog.PlanFragmentIdGenerator;
import edu.brown.hstore.Hstoreservice.Status;
import edu.brown.hstore.Hstoreservice.TransactionPrefetchResult;
import edu.brown.hstore.Hstoreservice.TransactionWorkRequest;
import edu.brown.hstore.Hstoreservice.TransactionWorkResponse;
import edu.brown.hstore.Hstoreservice.WorkFragment;
import edu.brown.hstore.Hstoreservice.WorkResult;
import edu.brown.hstore.callbacks.TransactionFinishCallback;
import edu.brown.hstore.callbacks.TransactionPrepareCallback;
import edu.brown.hstore.conf.HStoreConf;
import edu.brown.hstore.dtxn.AbstractTransaction;
import edu.brown.hstore.dtxn.ExecutionState;
import edu.brown.hstore.dtxn.LocalTransaction;
import edu.brown.hstore.dtxn.MapReduceTransaction;
import edu.brown.hstore.dtxn.RemoteTransaction;
import edu.brown.hstore.executors.AggregateExecutor;
import edu.brown.hstore.executors.CombineExecutor;
import edu.brown.hstore.interfaces.Loggable;
import edu.brown.hstore.interfaces.Shutdownable;
import edu.brown.hstore.util.ArrayCache.IntArrayCache;
import edu.brown.hstore.util.ArrayCache.LongArrayCache;
import edu.brown.hstore.util.ParameterSetArrayCache;
import edu.brown.hstore.util.QueryCache;
import edu.brown.hstore.util.ThrottlingQueue;
import edu.brown.hstore.util.TransactionWorkRequestBuilder;
import edu.brown.logging.LoggerUtil;
import edu.brown.logging.LoggerUtil.LoggerBoolean;
import edu.brown.markov.EstimationThresholds;
import edu.brown.markov.MarkovEstimate;
import edu.brown.markov.MarkovGraph;
import edu.brown.markov.TransactionEstimator;
import edu.brown.utils.CollectionUtil;
import edu.brown.utils.EventObservable;
import edu.brown.utils.PartitionEstimator;
import edu.brown.utils.ProfileMeasurement;
import edu.brown.utils.StringUtil;
import edu.brown.utils.TypedPoolableObjectFactory;

/**
 * The main executor of transactional work in the system. Controls running
 * stored procedures and manages the execution engine's running of plan
 * fragments. Interacts with the DTXN system to get work to do. The thread might
 * do other things, but this is where the good stuff happens.
 */
/**
 * @author mimosally
 * 
 */
public class PartitionExecutor implements Runnable, Shutdownable, Loggable {
    private static final Logger LOG = Logger.getLogger(PartitionExecutor.class);
    private static final LoggerBoolean debug = new LoggerBoolean(LOG.isDebugEnabled());
    private static final LoggerBoolean trace = new LoggerBoolean(LOG.isTraceEnabled());
    private static boolean d;
    private static boolean t;
    static {
        LoggerUtil.attachObserver(LOG, debug, trace);
        d = debug.get();
        t = trace.get();
    }

    // ----------------------------------------------------------------------------
    // INTERNAL EXECUTION STATE
    // ----------------------------------------------------------------------------

    /**
     * The current execution mode for this PartitionExecutor This defines what
     * level of speculative execution we have enabled.
     */
    protected enum ExecutionMode {
        /** Disable processing all transactions until... **/
        DISABLED,
        /** No speculative execution. All transactions are committed immediately **/
        COMMIT_ALL,
        /** Allow read-only txns to return results. **/
        COMMIT_READONLY,
        /**
         * All txn responses must wait until the current distributed txn is
         * committed
         **/
        COMMIT_NONE,
    };

    // ----------------------------------------------------------------------------
    // GLOBAL CONSTANTS
    // ----------------------------------------------------------------------------

    /**
     * Create a new instance of the corresponding VoltProcedure for the given
     * Procedure catalog object
     */
    public class VoltProcedureFactory extends TypedPoolableObjectFactory<VoltProcedure> {
        private final Procedure catalog_proc;
        private final boolean has_java;
        private final Class<? extends VoltProcedure> proc_class;

        @SuppressWarnings("unchecked")
        public VoltProcedureFactory(Procedure catalog_proc) {
            super(hstore_conf.site.pool_profiling);
            this.catalog_proc = catalog_proc;
            this.has_java = this.catalog_proc.getHasjava();

            // Only try to load the Java class file for the SP if it has one
            Class<? extends VoltProcedure> p_class = null;
            if (catalog_proc.getHasjava()) {
                final String className = catalog_proc.getClassname();
                try {
                    p_class = (Class<? extends VoltProcedure>) Class.forName(className);
                } catch (final ClassNotFoundException e) {
                    LOG.fatal("Failed to load procedure class '" + className + "'", e);
                    System.exit(1);
                }
            }
            this.proc_class = p_class;

        }

        @Override
        public VoltProcedure makeObjectImpl() throws Exception {
            VoltProcedure volt_proc = null;
            try {
                if (this.has_java) {
                    volt_proc = (VoltProcedure) this.proc_class.newInstance();
                } else {
                    volt_proc = new VoltProcedure.StmtProcedure();
                }
                volt_proc.globalInit(PartitionExecutor.this, this.catalog_proc,
                        PartitionExecutor.this.backend_target, PartitionExecutor.this.hsql,
                        PartitionExecutor.this.p_estimator);
            } catch (Exception e) {
                if (d)
                    LOG.warn("Failed to created VoltProcedure instance for " + catalog_proc.getName(), e);
                throw e;
            }
            return (volt_proc);
        }
    };

    /**
     * Procedure Name -> VoltProcedure
     */
    private final Map<String, VoltProcedure> procedures = new HashMap<String, VoltProcedure>(16, (float) .1);

    // ----------------------------------------------------------------------------
    // DATA MEMBERS
    // ----------------------------------------------------------------------------

    private Thread self;

    /**
     * If this flag is enabled, then we need to shut ourselves down and stop
     * running txns
     */
    private Shutdownable.ShutdownState shutdown_state = Shutdownable.ShutdownState.INITIALIZED;
    private Semaphore shutdown_latch;

    /**
     * Catalog objects
     */
    protected Catalog catalog;
    protected Cluster cluster;
    protected Database database;
    protected Site site;
    protected int siteId;
    private Partition partition;
    private int partitionId;
    private Integer partitionIdObj;

    private final BackendTarget backend_target;
    private final ExecutionEngine ee;
    private final HsqlBackend hsql;
    private final DBBPool buffer_pool = new DBBPool(false, false);
    private final FastSerializer fs = new FastSerializer(this.buffer_pool);

    /**
     * Runtime Estimators
     */
    private final PartitionEstimator p_estimator;
    private final TransactionEstimator t_estimator;
    private EstimationThresholds thresholds;

    // Each execution site manages snapshot using a SnapshotSiteProcessor
    private final SnapshotSiteProcessor m_snapshotter;

    // ----------------------------------------------------------------------------
    // H-Store Transaction Stuff
    // ----------------------------------------------------------------------------

    protected HStoreSite hstore_site;
    protected HStoreCoordinator hstore_coordinator;
    protected HStoreConf hstore_conf;

    // ----------------------------------------------------------------------------
    // Shared VoltProcedure Data Members
    // ----------------------------------------------------------------------------

    /**
     * This is the execution state for the current transaction. There is only
     * one of these per partition, so it must be cleared out for each new txn
     */
    private final ExecutionState execState;

    /**
     * Mapping from SQLStmt batch hash codes (computed by
     * VoltProcedure.getBatchHashCode()) to BatchPlanners The idea is that we
     * can quickly derived the partitions for each unique set of SQLStmt list
     */
    public final Map<Integer, BatchPlanner> batchPlanners = new HashMap<Integer, BatchPlanner>(100);

    /**
     * Reusable cache of ParameterSet arrays
     */
    private final ParameterSetArrayCache procParameterSets;

    // ----------------------------------------------------------------------------
    // Internal Execution State
    // ----------------------------------------------------------------------------
    private AggregateExecutor aggexecutor; // fast aggregate in Java
    private CombineExecutor combexecutor; // fast combine in Java
    /**
     * The transaction id of the current transaction This is mostly used for
     * testing and should not be relied on from the outside.
     */
    private Long currentTxnId = null;

    /**
     * We can only have one active distributed transactions at a time. The
     * multi-partition TransactionState that is currently executing at this
     * partition When we get the response for these txn, we know we can
     * commit/abort the speculatively executed transactions
     */
    private AbstractTransaction currentDtxn = null;

    /**
     * List of InitiateTaskMessages that are blocked waiting for the outstanding
     * dtxn to commit
     */
    private List<VoltMessage> currentBlockedTxns = new ArrayList<VoltMessage>();

    /**
     * The current ExecutionMode. This defines when transactions are allowed to
     * execute and whether they can return their results to the client
     * immediately or whether they must wait until the current_dtxn commits.
     */
    private ExecutionMode currentExecMode = ExecutionMode.COMMIT_ALL;

    /**
     * The main lock used for critical areas in this PartitionExecutor This
     * should only be used sparingly. Note that you do not need this lock in
     * order to execute something in this partition's ExecutionEngine.
     */
    private final ReentrantLock exec_lock = new ReentrantLock();

    /**
     * ClientResponses from speculatively executed transactions that are waiting
     * to be committed
     */
    private final LinkedBlockingDeque<Pair<LocalTransaction, ClientResponseImpl>> queued_responses = new LinkedBlockingDeque<Pair<LocalTransaction, ClientResponseImpl>>();

    /**
     * The time in ms since epoch of the last call to ExecutionEngine.tick(...)
     */
    private long lastTickTime = 0;

    /**
     * The last txn id that we executed (either local or remote)
     */
    private volatile Long lastExecutedTxnId = null;

    /**
     * The last txn id that we committed
     */
    private volatile long lastCommittedTxnId = -1;

    /**
     * The last undoToken that we handed out
     */
    private long lastUndoToken = 0l;

    /**
     * This is the queue of the list of things that we need to execute. The
     * entries may be either InitiateTaskMessages (i.e., start a stored
     * procedure) or FragmentTaskMessage (i.e., execute some fragments on behalf
     * of another transaction)
     */
    private final PartitionExecutorQueue work_queue = new PartitionExecutorQueue();

    /**
     * This is the queue for work deferred .
     */
    private final PartitionExecutorDeferredQueue deferred_queue = new PartitionExecutorDeferredQueue();

    /**
     * Special wrapper around the PartitionExecutorQueue that can determine
     * whether this partition is overloaded and therefore new requests should be
     * throttled
     */
    private final ThrottlingQueue<VoltMessage> work_throttler;

    /**
      * 
      */
    private final QueryCache queryCache = new QueryCache(10, 10); // FIXME

    // ----------------------------------------------------------------------------
    // TEMPORARY DATA COLLECTIONS
    // ----------------------------------------------------------------------------

    /**
      * 
      */
    private final List<WorkFragment> partitionFragments = new ArrayList<WorkFragment>();

    /**
     * WorkFragments that we need to send to a remote HStoreSite for execution
     */
    private final List<WorkFragment> tmp_remoteFragmentList = new ArrayList<WorkFragment>();
    /**
     * WorkFragments that we need to send to our own PartitionExecutor
     */
    private final List<WorkFragment> tmp_localWorkFragmentList = new ArrayList<WorkFragment>();
    /**
     * WorkFragments that we need to send to a different PartitionExecutor that
     * is on this same HStoreSite
     */
    private final List<WorkFragment> tmp_localSiteFragmentList = new ArrayList<WorkFragment>();
    /**
     * Temporary space used when calling removeInternalDependencies()
     */
    private final HashMap<Integer, List<VoltTable>> tmp_removeDependenciesMap = new HashMap<Integer, List<VoltTable>>();
    /**
     * Remote SiteId -> TransactionWorkRequest.Builder
     */
    private final TransactionWorkRequestBuilder tmp_transactionRequestBuilders[];
    /**
     * PartitionId -> List<VoltTable>
     */
    private final Map<Integer, List<VoltTable>> tmp_EEdependencies = new HashMap<Integer, List<VoltTable>>();
    /**
     * List of serialized ParameterSets
     */
    private final List<ByteString> tmp_serializedParams = new ArrayList<ByteString>();
    /**
     * List of PartitionIds that need to be notified that the transaction is
     * preparing to commit
     */
    private final List<Integer> tmp_preparePartitions = new ArrayList<Integer>();
    /**
     * Reusable ParameterSet array cache for WorkFragments
     */
    private final ParameterSetArrayCache tmp_fragmentParams;

    /**
     * Reusable long array for fragment ids
     */
    private final LongArrayCache tmp_fragmentIds = new LongArrayCache(10); // FIXME
    /**
     * Reusable int array for output dependency ids
     */
    private final IntArrayCache tmp_outputDepIds = new IntArrayCache(10); // FIXME
    /**
     * Reusable int array for input dependency ids
     */
    private final IntArrayCache tmp_inputDepIds = new IntArrayCache(10);

    // ----------------------------------------------------------------------------
    // PROFILING OBJECTS
    // ----------------------------------------------------------------------------

    /**
     * How much time the PartitionExecutor was idle waiting for work to do in
     * its queue
     */
    private final ProfileMeasurement work_idle_time = new ProfileMeasurement("EE_IDLE");
    /**
     * How much time it takes for this PartitionExecutor to execute a
     * transaction
     */
    private final ProfileMeasurement work_exec_time = new ProfileMeasurement("EE_EXEC");

    // ----------------------------------------------------------------------------
    // CALLBACKS
    // ----------------------------------------------------------------------------

    /**
     * This will be invoked for each TransactionWorkResponse that comes back
     * from the remote HStoreSites. Note that we don't need to do any counting
     * as to whether a transaction has gotten back all of the responses that it
     * expected. That logic is down below in waitForResponses()
     */
    private final RpcCallback<TransactionWorkResponse> request_work_callback = new RpcCallback<TransactionWorkResponse>() {
        @Override
        public void run(TransactionWorkResponse msg) {
            Long txn_id = msg.getTransactionId();
            AbstractTransaction ts = hstore_site.getTransaction(txn_id);

            // We can ignore anything that comes in for a transaction that we
            // don't know about
            if (ts == null) {
                if (d)
                    LOG.debug("No transaction state exists for txn #" + txn_id);
                return;
            }

            if (d)
                LOG.debug(String.format("Processing TransactionWorkResponse for %s with %d results", ts,
                        msg.getResultsCount()));
            for (int i = 0, cnt = msg.getResultsCount(); i < cnt; i++) {
                WorkResult result = msg.getResults(i);
                if (t)
                    LOG.trace(String.format("Got %s from partition %d for %s", result.getClass().getSimpleName(),
                            result.getPartitionId(), ts));
                PartitionExecutor.this.processWorkResult((LocalTransaction) ts, result);
            } // FOR
        }
    }; // END CLASS

    // ----------------------------------------------------------------------------
    // SYSPROC STUFF
    // ----------------------------------------------------------------------------

    // Associate the system procedure planfragment ids to wrappers.
    // Planfragments are registered when the procedure wrapper is init()'d.
    private final HashMap<Long, VoltSystemProcedure> m_registeredSysProcPlanFragments = new HashMap<Long, VoltSystemProcedure>();

    public void registerPlanFragment(final long pfId, final VoltSystemProcedure proc) {
        synchronized (m_registeredSysProcPlanFragments) {
            if (!m_registeredSysProcPlanFragments.containsKey(pfId)) {
                assert (m_registeredSysProcPlanFragments
                        .containsKey(pfId) == false) : "Trying to register the same sysproc more than once: "
                                + pfId;
                m_registeredSysProcPlanFragments.put(pfId, proc);
                LOG.trace("Registered @" + proc.getClass().getSimpleName() + " sysproc handle for FragmentId #"
                        + pfId);
            }
        } // SYNCH
    }

    /**
     * SystemProcedures are "friends" with PartitionExecutors and granted access
     * to internal state via m_systemProcedureContext. access to internal state
     * via m_systemProcedureContext.
     */
    public interface SystemProcedureExecutionContext {
        public Catalog getCatalog();

        public Database getDatabase();

        public Cluster getCluster();

        public Site getSite();

        public Host getHost();

        public ExecutionEngine getExecutionEngine();

        public long getLastCommittedTxnId();

        public long getNextUndo();

        public PartitionExecutor getExecutionSite();

        public Long getCurrentTxnId();
    }

    protected class SystemProcedureContext implements SystemProcedureExecutionContext {
        public Catalog getCatalog() {
            return catalog;
        }

        public Database getDatabase() {
            return cluster.getDatabases().get("database");
        }

        public Cluster getCluster() {
            return cluster;
        }

        public Site getSite() {
            return site;
        }

        public Host getHost() {
            return PartitionExecutor.this.getHost();
        }

        public ExecutionEngine getExecutionEngine() {
            return ee;
        }

        public long getLastCommittedTxnId() {
            return PartitionExecutor.this.getLastCommittedTxnId();
        }

        public long getNextUndo() {
            return getNextUndoToken();
        }

        public PartitionExecutor getExecutionSite() {
            return PartitionExecutor.this;
        }

        public Long getCurrentTxnId() {
            return PartitionExecutor.this.currentTxnId;
        }
    }

    private final SystemProcedureContext m_systemProcedureContext = new SystemProcedureContext();

    // ----------------------------------------------------------------------------
    // INITIALIZATION
    // ----------------------------------------------------------------------------

    /**
     * Dummy constructor...
     */
    protected PartitionExecutor() {
        this.work_throttler = null;
        this.ee = null;
        this.hsql = null;
        this.p_estimator = null;
        this.t_estimator = null;
        this.m_snapshotter = null;
        this.thresholds = null;
        this.catalog = null;
        this.cluster = null;
        this.site = null;
        this.database = null;
        this.backend_target = BackendTarget.HSQLDB_BACKEND;
        this.siteId = 0;
        this.partitionId = 0;
        this.execState = null;
        this.procParameterSets = null;
        this.tmp_fragmentParams = null;
        this.tmp_transactionRequestBuilders = null;
        this.aggexecutor = null;
        this.combexecutor = null;
    }

    /**
     * Initialize the StoredProcedure runner and EE for this Site.
     * 
     * @param partitionId
     * @param t_estimator
     * @param coordinator
     * @param siteManager
     * @param serializedCatalog
     *            A list of catalog commands, separated by newlines that, when
     *            executed, reconstruct the complete m_catalog.
     */
    public PartitionExecutor(final int partitionId, final Catalog catalog, final BackendTarget target,
            PartitionEstimator p_estimator, TransactionEstimator t_estimator) {
        this.hstore_conf = HStoreConf.singleton();

        this.work_throttler = new ThrottlingQueue<VoltMessage>(this.work_queue,
                hstore_conf.site.queue_incoming_max_per_partition, hstore_conf.site.queue_incoming_release_factor,
                hstore_conf.site.queue_incoming_increase, hstore_conf.site.queue_incoming_increase_max);

        this.catalog = catalog;
        this.partition = CatalogUtil.getPartitionById(this.catalog, partitionId);
        assert (this.partition != null) : "Invalid Partition #" + partitionId;
        this.partitionId = this.partition.getId();
        this.partitionIdObj = Integer.valueOf(this.partitionId);
        this.site = this.partition.getParent();
        assert (site != null) : "Unable to get Site for Partition #" + partitionId;
        this.siteId = this.site.getId();
        this.aggexecutor = new AggregateExecutor(this);
        this.combexecutor = new CombineExecutor(this);
        this.execState = new ExecutionState(this);

        this.backend_target = target;
        this.cluster = CatalogUtil.getCluster(catalog);
        this.database = CatalogUtil.getDatabase(cluster);

        // The PartitionEstimator is what we use to figure our where our
        // transactions are going to go
        this.p_estimator = p_estimator; // t_estimator.getPartitionEstimator();

        // The TransactionEstimator is the runtime piece that we use to keep
        // track of where the
        // transaction is in its execution workflow. This allows us to make
        // predictions about
        // what kind of things we expect the xact to do in the future
        if (t_estimator == null) { // HACK
            this.t_estimator = new TransactionEstimator(partitionId, p_estimator);
        } else {
            this.t_estimator = t_estimator;
        }

        // An execution site can be backed by HSQLDB, by volt's EE accessed
        // via JNI or by volt's EE accessed via IPC. When backed by HSQLDB,
        // the VoltProcedure interface invokes HSQLDB directly through its
        // hsql Backend member variable. The real volt backend is encapsulated
        // by the ExecutionEngine class. This class has implementations for both
        // JNI and IPC - and selects the desired implementation based on the
        // value of this.eeBackend.
        HsqlBackend hsqlTemp = null;
        ExecutionEngine eeTemp = null;
        SnapshotSiteProcessor snapshotter = null;
        try {
            if (d)
                LOG.debug("Creating EE wrapper with target type '" + target + "'");
            if (this.backend_target == BackendTarget.HSQLDB_BACKEND) {
                hsqlTemp = new HsqlBackend(partitionId);
                final String hexDDL = database.getSchema();
                final String ddl = Encoder.hexDecodeToString(hexDDL);
                final String[] commands = ddl.split(";");
                for (String command : commands) {
                    if (command.length() == 0) {
                        continue;
                    }
                    hsqlTemp.runDDL(command);
                }
                eeTemp = new MockExecutionEngine();

            } else if (target == BackendTarget.NATIVE_EE_JNI) {
                org.voltdb.EELibraryLoader.loadExecutionEngineLibrary(true);
                // set up the EE
                eeTemp = new ExecutionEngineJNI(this, cluster.getRelativeIndex(), this.getSiteId(),
                        this.getPartitionId(), this.getHostId(), "localhost");
                eeTemp.loadCatalog(catalog.serialize());
                lastTickTime = System.currentTimeMillis();
                eeTemp.tick(lastTickTime, 0);

                snapshotter = new SnapshotSiteProcessor(new Runnable() {
                    final PotentialSnapshotWorkMessage msg = new PotentialSnapshotWorkMessage();

                    @Override
                    public void run() {
                        PartitionExecutor.this.work_queue.add(msg);
                    }
                });
            } else {
                // set up the EE over IPC
                eeTemp = new ExecutionEngineIPC(this, cluster.getRelativeIndex(), this.getSiteId(),
                        this.getPartitionId(), this.getHostId(), "localhost", target);
                eeTemp.loadCatalog(catalog.serialize());
                lastTickTime = System.currentTimeMillis();
                eeTemp.tick(lastTickTime, 0);
            }
        }
        // just print error info an bail if we run into an error here
        catch (final Exception ex) {
            throw new ServerFaultException("Failed to initialize PartitionExecutor", ex);
        }
        this.ee = eeTemp;
        this.hsql = hsqlTemp;
        m_snapshotter = snapshotter;
        assert (this.ee != null);
        assert (!(this.ee == null
                && this.hsql == null)) : "Both execution engine objects are empty. This should never happen";

        // ParameterSet Array Caches
        this.procParameterSets = new ParameterSetArrayCache(10);
        this.tmp_fragmentParams = new ParameterSetArrayCache(5);

        // Initialize temporary data structures
        int num_sites = CatalogUtil.getNumberOfSites(this.catalog);
        this.tmp_transactionRequestBuilders = new TransactionWorkRequestBuilder[num_sites];

    }

    @SuppressWarnings("unchecked")
    protected void initializeVoltProcedures() {
        // load up all the stored procedures
        for (final Procedure catalog_proc : database.getProcedures()) {
            VoltProcedure volt_proc = null;

            if (catalog_proc.getHasjava()) {
                // Only try to load the Java class file for the SP if it has one
                Class<? extends VoltProcedure> p_class = null;
                final String className = catalog_proc.getClassname();
                try {
                    p_class = (Class<? extends VoltProcedure>) Class.forName(className);
                    volt_proc = (VoltProcedure) p_class.newInstance();
                } catch (Exception e) {
                    throw new ServerFaultException(
                            "Failed to created VoltProcedure instance for " + catalog_proc.getName(), e);
                }

            } else {
                volt_proc = new VoltProcedure.StmtProcedure();
            }
            volt_proc.globalInit(PartitionExecutor.this, catalog_proc, this.backend_target, this.hsql,
                    this.p_estimator);
            this.procedures.put(catalog_proc.getName(), volt_proc);
        } // FOR
    }

    /**
     * Link this PartitionExecutor with its parent HStoreSite This will
     * initialize the references the various components shared among the
     * PartitionExecutors
     * 
     * @param hstore_site
     */
    public void initHStoreSite(HStoreSite hstore_site) {
        if (t)
            LOG.trace(String.format("Initializing HStoreSite components at partition %d", this.partitionId));
        assert (this.hstore_site == null);
        this.hstore_site = hstore_site;
        this.hstore_coordinator = hstore_site.getHStoreCoordinator();
        this.thresholds = (hstore_site != null ? hstore_site.getThresholds() : null);

        if (hstore_conf.site.exec_profiling) {
            EventObservable<AbstractTransaction> eo = this.hstore_site.getStartWorkloadObservable();
            this.work_idle_time.resetOnEvent(eo);
            this.work_exec_time.resetOnEvent(eo);
        }

        this.initializeVoltProcedures();
    }

    // ----------------------------------------------------------------------------
    // MAIN EXECUTION LOOP
    // ----------------------------------------------------------------------------

    /**
     * Primary run method that is invoked a single time when the thread is
     * started. Has the opportunity to do startup config.
     */
    @Override
    public void run() {
        assert (this.hstore_site != null);
        assert (this.hstore_coordinator != null);
        assert (this.self == null);
        this.self = Thread.currentThread();
        this.self.setName(HStoreThreadManager.getThreadName(this.hstore_site, this.partitionId));

        if (hstore_conf.site.cpu_affinity) {
            this.hstore_site.getThreadManager().registerEEThread(partition);
        }

        // *********************************** DEBUG
        // ***********************************
        if (hstore_conf.site.exec_validate_work) {
            LOG.warn("Enabled Distributed Transaction Checking");
        }
        // *********************************** DEBUG
        // ***********************************

        // Things that we will need in the loop below
        AbstractTransaction current_txn = null;
        VoltMessage work = null;
        boolean stop = false;

        try {
            // Setup shutdown lock
            this.shutdown_latch = new Semaphore(0);

            if (d)
                LOG.debug("Starting PartitionExecutor run loop...");
            while (stop == false && this.isShuttingDown() == false) {
                this.currentTxnId = null;
                work = null;

                // -------------------------------
                // Poll Work Queue
                // -------------------------------
                try {
                    work = this.work_queue.poll();
                    if (work == null) {
                        // See if there is anything that we can do while we wait
                        // XXX this.utilityWork(null);

                        if (t)
                            LOG.trace("Partition " + this.partitionId + " queue is empty. Waiting...");
                        if (hstore_conf.site.exec_profiling)
                            this.work_idle_time.start();
                        work = this.work_queue.take();
                        if (hstore_conf.site.exec_profiling)
                            this.work_idle_time.stop();
                    }
                } catch (InterruptedException ex) {
                    if (d && this.isShuttingDown() == false)
                        LOG.debug("Unexpected interuption while polling work queue. Halting PartitionExecutor...",
                                ex);
                    stop = true;
                    break;
                }

                // -------------------------------
                // Transactional Work
                // -------------------------------
                if (work instanceof TransactionInfoBaseMessage) {
                    this.currentTxnId = ((TransactionInfoBaseMessage) work).getTxnId();
                    current_txn = hstore_site.getTransaction(this.currentTxnId);
                    if (current_txn == null) {
                        String msg = String.format("No transaction state for txn #%d [%s]", this.currentTxnId,
                                work.getClass().getSimpleName());
                        LOG.error(msg + "\n" + work.toString());
                        throw new ServerFaultException(msg, this.currentTxnId);
                    }
                    // If this transaction has already been aborted and they are
                    // trying to give us
                    // something that isn't a FinishTaskMessage, then we won't
                    // bother processing it
                    else if (current_txn.isAborted() && (work instanceof FinishTaskMessage) == false) {
                        if (d)
                            LOG.debug(
                                    String.format("%s - Was marked as aborted. Will not process %s on partition %d",
                                            current_txn, work.getClass().getSimpleName(), this.partitionId));
                        continue;
                    }

                    // -------------------------------
                    // Execute Query Plan Fragments
                    // -------------------------------
                    if (work instanceof FragmentTaskMessage) {
                        FragmentTaskMessage ftask = (FragmentTaskMessage) work;
                        WorkFragment fragment = ftask.getWorkFragment();
                        assert (fragment != null);

                        // Get the ParameterSet array for this WorkFragment
                        // It can either be attached to the AbstractTransaction
                        // handle if it came
                        // over the wire directly from the txn's base partition,
                        // or it can be attached
                        // as for prefetch WorkFragments
                        ParameterSet parameters[] = null;
                        if (fragment.getPrefetch()) {
                            parameters = current_txn.getPrefetchParameterSets();
                            current_txn.markExecPrefetchQuery(this.partitionId);
                        } else {
                            parameters = current_txn.getAttachedParameterSets();
                        }
                        parameters = this.getFragmentParameters(current_txn, fragment, parameters);
                        assert (parameters != null);

                        // At this point we know that we are either the current
                        // dtxn or the current dtxn is null
                        // We will allow any read-only transaction to commit if
                        // (1) The WorkFragment for the remote txn is read-only
                        // (2) This txn has always been read-only up to this
                        // point at this partition
                        ExecutionMode newMode = null;
                        if (hstore_conf.site.exec_speculative_execution) {
                            newMode = (fragment.getReadOnly() && current_txn.isExecReadOnly(this.partitionId)
                                    ? ExecutionMode.COMMIT_READONLY
                                    : ExecutionMode.COMMIT_NONE);
                        } else {
                            newMode = ExecutionMode.DISABLED;
                        }
                        exec_lock.lock();
                        try {
                            // There is no current DTXN, so that means its us!
                            if (this.currentDtxn == null) {
                                this.setCurrentDtxn(current_txn);
                                if (d)
                                    LOG.debug(String.format(
                                            "Marking %s as current DTXN on partition %d [nextMode=%s]", current_txn,
                                            this.partitionId, newMode));
                            }
                            // There is a current DTXN but it's not us!
                            // That means we need to block ourselves until it
                            // finishes
                            else if (this.currentDtxn != current_txn) {
                                if (d)
                                    LOG.warn(String.format(
                                            "%s - Blocking on partition %d until current Dtxn %s finishes",
                                            current_txn, this.partitionId, this.currentDtxn));
                                this.currentBlockedTxns.add(ftask);
                                continue;
                            }
                            assert (this.currentDtxn == current_txn) : String.format(
                                    "Trying to execute a second Dtxn %s before the current one has finished [current=%s]",
                                    current_txn, this.currentDtxn);
                            this.setExecutionMode(current_txn, newMode);
                        } finally {
                            exec_lock.unlock();
                        } // SYNCH

                        this.processWorkFragment(current_txn, fragment, parameters);

                        // -------------------------------
                        // Invoke Stored Procedure
                        // -------------------------------
                    } else if (work instanceof InitiateTaskMessage) {
                        if (hstore_conf.site.exec_profiling)
                            this.work_exec_time.start();
                        InitiateTaskMessage itask = (InitiateTaskMessage) work;

                        // If this is a MapReduceTransaction handle, we actually
                        // want to get the
                        // inner LocalTransaction handle for this partition. The
                        // MapReduceTransaction
                        // is just a placeholder
                        if (current_txn instanceof MapReduceTransaction) {
                            MapReduceTransaction orig_ts = (MapReduceTransaction) current_txn;
                            current_txn = orig_ts.getLocalTransaction(this.partitionId);
                            assert (current_txn != null) : "Unexpected null LocalTransaction handle from "
                                    + orig_ts;
                        }

                        try {
                            this.processInitiateTaskMessage((LocalTransaction) current_txn, itask);
                        } catch (Throwable ex) {
                            LOG.error(String.format("Unexpected error when executing %s\n%s", current_txn,
                                    current_txn.debug()));
                            throw ex;
                        } finally {
                            if (hstore_conf.site.exec_profiling)
                                this.work_exec_time.stop();
                        }

                        // -------------------------------
                        // Finish Transaction
                        // -------------------------------
                    } else if (work instanceof FinishTaskMessage) {
                        FinishTaskMessage ftask = (FinishTaskMessage) work;
                        this.finishTransaction(current_txn, (ftask.getStatus() == Status.OK));
                    }

                    // -------------------------------
                    // PotentialSnapshotWorkMessage
                    // -------------------------------
                } else if (work instanceof PotentialSnapshotWorkMessage) {
                    m_snapshotter.doSnapshotWork(ee);

                    // -------------------------------
                    // BAD MOJO!
                    // -------------------------------
                } else if (work != null) {
                    throw new ServerFaultException("Unexpected work message in queue: " + work, this.currentTxnId);
                }

                // Is there a better way to do this?
                this.work_throttler.checkThrottling(false);

                if (hstore_conf.site.exec_profiling && this.currentTxnId != null) {
                    this.lastExecutedTxnId = this.currentTxnId;
                    this.currentTxnId = null;
                }
            } // WHILE
        } catch (final Throwable ex) {
            if (this.isShuttingDown() == false) {
                ex.printStackTrace();
                LOG.fatal(String.format("Unexpected error for PartitionExecutor partition #%d [%s]%s",
                        this.partitionId, (current_txn != null ? " - " + current_txn : ""), ex), ex);
                if (current_txn != null)
                    LOG.fatal("TransactionState Dump:\n" + current_txn.debug());

            }
            this.hstore_coordinator.shutdownCluster(ex);
        } finally {
            if (d) {
                String txnDebug = "";
                if (current_txn != null && current_txn.getBasePartition() == this.partitionId) {
                    txnDebug = "\n" + current_txn.debug();
                }
                LOG.warn(String.format("PartitionExecutor %d is stopping.%s%s", this.partitionId,
                        (this.currentTxnId != null ? " In-Flight Txn: #" + this.currentTxnId : ""), txnDebug));
            }

            // Release the shutdown latch in case anybody waiting for us
            this.shutdown_latch.release();

            // Stop HStoreMessenger (because we're nice)
            if (this.isShuttingDown() == false) {
                if (this.hstore_coordinator != null)
                    this.hstore_coordinator.shutdown();
            }
        }
    }

    /**
     * Special function that allows us to do some utility work while we are
     * waiting for a response or something real to do.
     */
    protected void utilityWork(CountDownLatch dtxnLatch) {
        // TODO: Set the txnId in our handle to be what the original txn was
        // that
        // deferred this query.

        /*
         * We need to start popping from the deferred_queue here. There is no
         * need for a while loop if we're going to requeue each popped txn in
         * wthe work_queue, because we know we this.work_queue.isEmpty() will be
         * false as soon as we pop one local txn off of deferred_queue. We will
         * arrive back in utilityWork() when that txn finishes if no new txn's
         * have entered.
         */
        do {
            LocalTransaction ts = deferred_queue.poll();
            if (ts == null)
                break;
            this.queueNewTransaction(ts);
        } while ((dtxnLatch != null && dtxnLatch.getCount() > 0)
                || (dtxnLatch == null && this.work_queue.isEmpty()));
        // while (this.work_queue.isEmpty()) {
        // }
        // Try to free some memory
        // this.tmp_fragmentParams.reset();
        // this.tmp_serializedParams.clear();
        // this.tmp_EEdependencies.clear();
    }

    public void tick() {
        // invoke native ee tick if at least one second has passed
        final long time = EstTime.currentTimeMillis();
        if ((time - lastTickTime) >= 1000) {
            if ((lastTickTime != 0) && (ee != null)) {
                ee.tick(time, lastCommittedTxnId);
            }
            lastTickTime = time;
        }

        // do other periodic work
        m_snapshotter.doSnapshotWork(ee);
    }

    @Override
    public void updateLogging() {
        d = debug.get();
        t = trace.get();
    }

    // ----------------------------------------------------------------------------
    // UTILITY METHODS
    // ----------------------------------------------------------------------------

    public ExecutionEngine getExecutionEngine() {
        return (this.ee);
    }

    public Thread getExecutionThread() {
        return (this.self);
    }

    public PartitionEstimator getPartitionEstimator() {
        return (this.p_estimator);
    }

    public TransactionEstimator getTransactionEstimator() {
        return (this.t_estimator);
    }

    public ThrottlingQueue<VoltMessage> getThrottlingQueue() {
        return (this.work_throttler);
    }

    public HStoreSite getHStoreSite() {
        return (this.hstore_site);
    }

    public HStoreConf getHStoreConf() {
        return (this.hstore_conf);
    }

    public HStoreCoordinator getHStoreCoordinator() {
        return (this.hstore_coordinator);
    }

    public Site getCatalogSite() {
        return (this.site);
    }

    public int getHostId() {
        return (this.site.getHost().getRelativeIndex());
    }

    public Host getHost() {
        return (this.site.getHost());
    }

    public int getSiteId() {
        return (this.siteId);
    }

    public Partition getPartition() {
        return (this.partition);
    }

    public int getPartitionId() {
        return (this.partitionId);
    }

    public Long getLastExecutedTxnId() {
        return (this.lastExecutedTxnId);
    }

    public Long getLastCommittedTxnId() {
        return (this.lastCommittedTxnId);
    }

    public ParameterSetArrayCache getProcedureParameterSetArrayCache() {
        return (this.procParameterSets);
    }

    /**
     * Returns the next undo token to use when hitting up the EE with work
     * MAX_VALUE = no undo
     * 
     * @param txn_id
     * @return
     */
    public long getNextUndoToken() {
        return (++this.lastUndoToken);
    }

    /**
     * Set the current ExecutionMode for this executor
     * 
     * @param newMode
     * @param txn_id
     */
    private void setExecutionMode(AbstractTransaction ts, ExecutionMode newMode) {
        if (d && this.currentExecMode != newMode) {
            LOG.debug(String.format(
                    "Setting ExecutionMode for partition %d to %s because of %s [currentDtxn=%s, origMode=%s]",
                    this.partitionId, newMode, ts, this.currentDtxn, this.currentExecMode));
        }
        assert (newMode != ExecutionMode.COMMIT_READONLY
                || (newMode == ExecutionMode.COMMIT_READONLY && this.currentDtxn != null)) : String.format(
                        "%s is trying to set partition %d to %s when the current DTXN is null?", ts,
                        this.partitionId, newMode);
        this.currentExecMode = newMode;
    }

    public ExecutionMode getExecutionMode() {
        return (this.currentExecMode);
    }

    /**
     * Get the txnId of the current distributed transaction at this partition
     * <B>FOR TESTING ONLY</B>
     */
    public AbstractTransaction getCurrentDtxn() {
        return (this.currentDtxn);
    }

    /**
     * Get the txnId of the current distributed transaction at this partition
     * <B>FOR TESTING ONLY</B>
     */
    public Long getCurrentDtxnId() {
        Long ret = null;
        // This is a race condition, so we'll just ignore any errors
        if (this.currentDtxn != null) {
            try {
                ret = this.currentDtxn.getTransactionId();
            } catch (NullPointerException ex) {
                // IGNORE
            }
        }
        return (ret);
    }

    public Long getCurrentTxnId() {
        return (this.currentTxnId);
    }

    public int getBlockedQueueSize() {
        return (this.currentBlockedTxns.size());
    }

    public int getWaitingQueueSize() {
        return (this.queued_responses.size());
    }

    public int getWorkQueueSize() {
        return (this.work_queue.size());
    }

    public ProfileMeasurement getWorkIdleTime() {
        return (this.work_idle_time);
    }

    public ProfileMeasurement getWorkExecTime() {
        return (this.work_exec_time);
    }

    /**
     * Returns the number of txns that have been invoked on this partition
     * 
     * @return
     */
    public int getTransactionCounter() {
        return (this.work_exec_time.getInvocations());
    }

    /**
     * Returns the VoltProcedure instance for a given stored procedure name This
     * is slow and should not be used at run time
     * 
     * @param proc_name
     * @return
     */
    public VoltProcedure getVoltProcedure(String proc_name) {
        return (this.procedures.get(proc_name));
    }

    private ParameterSet[] getFragmentParameters(AbstractTransaction ts, WorkFragment fragment,
            ParameterSet allParams[]) {
        int num_fragments = fragment.getFragmentIdCount();
        ParameterSet fragmentParams[] = tmp_fragmentParams.getParameterSet(num_fragments);
        assert (fragmentParams != null);
        assert (fragmentParams.length == num_fragments);

        for (int i = 0; i < num_fragments; i++) {
            int param_index = fragment.getParamIndex(i);
            assert (param_index < allParams.length) : String.format(
                    "StatementIndex is %d but there are only %d ParameterSets for %s", param_index,
                    allParams.length, ts);
            fragmentParams[i].setParameters(allParams[param_index]);
        } // FOR
        return (fragmentParams);
    }

    private Map<Integer, List<VoltTable>> getFragmentInputs(AbstractTransaction ts, WorkFragment fragment,
            Map<Integer, List<VoltTable>> inputs) {
        Map<Integer, List<VoltTable>> attachedInputs = ts.getAttachedInputDependencies();
        assert (attachedInputs != null);
        boolean is_local = (ts instanceof LocalTransaction);

        if (d)
            LOG.debug(String.format("%s - Attempting to retrieve input dependencies for WorkFragment [isLocal=%s]",
                    ts, is_local));
        for (int i = 0, cnt = fragment.getFragmentIdCount(); i < cnt; i++) {
            WorkFragment.InputDependency input_dep_ids = fragment.getInputDepId(i);
            for (int input_dep_id : input_dep_ids.getIdsList()) {
                if (input_dep_id == HStoreConstants.NULL_DEPENDENCY_ID)
                    continue;

                // If the Transaction is on the same HStoreSite, then all the
                // input dependencies will be internal and can be retrieved
                // locally
                if (is_local) {
                    List<VoltTable> deps = ((LocalTransaction) ts).getInternalDependency(input_dep_id);
                    assert (deps != null);
                    assert (inputs.containsKey(input_dep_id) == false);
                    inputs.put(input_dep_id, deps);
                    if (d)
                        LOG.debug(
                                String.format("%s - Retrieved %d INTERNAL VoltTables for DependencyId #%d\n" + deps,
                                        ts, deps.size(), input_dep_id));
                }
                // Otherwise they will be "attached" inputs to the
                // RemoteTransaction handle
                // We should really try to merge these two concepts into a
                // single function call
                else if (attachedInputs.containsKey(input_dep_id)) {
                    List<VoltTable> deps = attachedInputs.get(input_dep_id);
                    List<VoltTable> pDeps = null;
                    // XXX: Do we actually need to copy these???
                    // XXX: I think we only need to copy if we're debugging the
                    // tables!
                    if (d) { // this.firstPartition == false) {
                        pDeps = new ArrayList<VoltTable>();
                        for (VoltTable vt : deps) {
                            // TODO: Move into VoltTableUtil
                            ByteBuffer buffer = vt.getTableDataReference();
                            byte arr[] = new byte[vt.getUnderlyingBufferSize()]; // FIXME
                            buffer.get(arr, 0, arr.length);
                            pDeps.add(new VoltTable(ByteBuffer.wrap(arr), true));
                        }
                    } else {
                        pDeps = deps;
                    }
                    inputs.put(input_dep_id, pDeps);
                    if (d)
                        LOG.debug(String.format("%s - Retrieved %d ATTACHED VoltTables for DependencyId #%d in %s",
                                ts, deps.size(), input_dep_id));
                }

            } // FOR (inputs)
        } // FOR (fragments)
        if (d) {
            if (inputs.isEmpty() == false) {

                LOG.debug(String.format("%s - Retrieved %d InputDependencies for %s on partition %d", ts,
                        inputs.size(), fragment.getFragmentIdList(), fragment.getPartitionId())); // StringUtil.formatMaps(inputs)));

                LOG.debug(String.format("%s - Retrieved %d InputDependencies for %s on partition %d", ts,
                        inputs.size(), fragment.getFragmentIdList(), fragment.getPartitionId())); // StringUtil.formatMaps(inputs)));

            } else if (fragment.getNeedsInput()) {
                LOG.warn(String.format("%s - No InputDependencies retrieved for %s on partition %d", ts,
                        fragment.getFragmentIdList(), fragment.getPartitionId()));
            }
        }
        return (inputs);
    }

    /**
     * @param ts
     */
    private void setCurrentDtxn(AbstractTransaction ts) {
        // There can never be another current dtxn still unfinished at this
        // partition!
        assert (this.currentBlockedTxns.isEmpty()) : String.format(
                "Concurrent multi-partition transactions at partition %d: Orig[%s] <=> New[%s] / BlockedQueue:%d",
                this.partitionId, this.currentDtxn, ts, this.currentBlockedTxns.size());
        assert (this.currentDtxn == null) : String.format(
                "Concurrent multi-partition transactions at partition %d: Orig[%s] <=> New[%s] / BlockedQueue:%d",
                this.partitionId, this.currentDtxn, ts, this.currentBlockedTxns.size());
        if (d)
            LOG.debug(String.format("Setting %s as the current DTXN for partition #%d [previous=%s]", ts,
                    this.partitionId, this.currentDtxn));
        this.currentDtxn = ts;
    }

    private void resetCurrentDtxn() {
        assert (this.currentDtxn != null) : "Trying to reset the currentDtxn when it is already null";
        if (d)
            LOG.debug(String.format("Resetting current DTXN for partition #%d to null [previous=%s]",
                    this.partitionId, this.currentDtxn));
        this.currentDtxn = null;
    }

    // ---------------------------------------------------------------
    // PartitionExecutor API
    // ---------------------------------------------------------------

    /**
     * New work from the coordinator that this local site needs to execute
     * (non-blocking) This method will simply chuck the task into the work
     * queue. We should not be sent an InitiateTaskMessage here!
     * 
     * @param ts
     * @param task
     */
    public void queueWork(AbstractTransaction ts, FragmentTaskMessage task) {
        assert (ts.isInitialized());
        this.work_queue.add(task);
        if (d)
            LOG.debug(String.format("%s - Added distributed txn %s to front of partition %d work queue [size=%d]",
                    ts, task.getClass().getSimpleName(), this.partitionId, this.work_queue.size()));
    }

    /**
     * Put the finish request for the transaction into the queue
     * 
     * @param task
     * @param status
     *            The final status of the transaction
     */
    public void queueFinish(AbstractTransaction ts, Status status) {
        assert (ts.isInitialized());
        FinishTaskMessage task = ts.getFinishTaskMessage(status);
        this.work_queue.add(task);
        if (d)
            LOG.debug(String.format("%s - Added distributed %s to front of partition %d work queue [size=%d]", ts,
                    task.getClass().getSimpleName(), this.partitionId, this.work_queue.size()));
    }

    /**
     * New work for a local transaction
     * 
     * @param ts
     * @param task
     * @param callback
     */
    public boolean queueNewTransaction(LocalTransaction ts) {
        assert (ts != null) : "Unexpected null transaction handle!";
        final InitiateTaskMessage task = ts.getInitiateTaskMessage();
        final boolean singlePartitioned = ts.isPredictSinglePartition();
        boolean success = true;

        if (d)
            LOG.debug(String.format(
                    "%s - Queuing new transaction execution request on partition %d [currentDtxn=%s, mode=%s, taskHash=%d]",
                    ts, this.partitionId, this.currentDtxn, this.currentExecMode, task.hashCode()));

        // If we're a single-partition and speculative execution is enabled,
        // then we can always set it up now
        if (hstore_conf.site.exec_speculative_execution && singlePartitioned
                && this.currentExecMode != ExecutionMode.DISABLED) {
            if (d)
                LOG.debug(String.format("%s - Adding to work queue at partition %d [size=%d]", ts, this.partitionId,
                        this.work_queue.size()));
            success = this.work_throttler.offer(task, false);
            // Otherwise figure out whether this txn needs to be blocked or not
        } else {
            if (d)
                LOG.debug(String.format("%s - Attempting to add %s to partition %d queue [currentTxn=%s]", ts,
                        task.getClass().getSimpleName(), this.partitionId, this.currentTxnId));

            exec_lock.lock();
            try {
                // No outstanding DTXN
                if (this.currentDtxn == null && this.currentExecMode != ExecutionMode.DISABLED) {
                    if (d)
                        LOG.debug(String.format("%s - Adding %s to work queue [size=%d]", ts,
                                task.getClass().getSimpleName(), this.work_queue.size()));
                    // Only use the throttler for single-partition txns
                    if (singlePartitioned) {
                        success = this.work_throttler.offer(task, false);
                    } else {
                        // this.work_queue.addFirst(task);
                        this.work_queue.add(task);
                    }
                }
                // Add the transaction request to the blocked queue
                else {
                    // TODO: This is where we can check whether this new
                    // transaction request is commutative
                    // with the current dtxn. If it is, then we know that we
                    // don't
                    // need to block it or worry about whether it will conflict
                    // with the current dtxn
                    if (d)
                        LOG.debug(String.format("%s - Blocking until dtxn %s finishes", ts, this.currentDtxn));
                    this.currentBlockedTxns.add(task);
                }
            } finally {
                exec_lock.unlock();
            } // SYNCH
        }

        if (success == false) {
            // Depending on what we need to do for this type txn, we will send
            // either an ABORT_THROTTLED or an ABORT_REJECT in our response
            // An ABORT_THROTTLED means that the client will back-off of a bit
            // before sending another txn request, where as an ABORT_REJECT
            // means
            // that it will just try immediately
            Status status = ((singlePartitioned ? hstore_conf.site.queue_incoming_throttle
                    : hstore_conf.site.queue_dtxn_throttle) ? Status.ABORT_THROTTLED : Status.ABORT_REJECT);

            if (d)
                LOG.debug(String.format(
                        "%s - Hit with a %s response from partition %d [currentTxn=%s, throttled=%s, queueSize=%d]",
                        ts, status, this.partitionId, this.currentTxnId, this.work_throttler.isThrottled(),
                        this.work_throttler.size()));
            if (singlePartitioned == false) {
                TransactionFinishCallback finish_callback = ts
                        .initTransactionFinishCallback(Status.ABORT_THROTTLED);
                hstore_coordinator.transactionFinish(ts, status, finish_callback);
            }
            // We will want to delete this transaction after we reject it if it
            // is a single-partition txn
            // Otherwise we will let the normal distributed transaction process
            // clean things up
            hstore_site.transactionReject(ts, status);
        }
        return (success);
    }

    // ---------------------------------------------------------------
    // WORK QUEUE PROCESSING METHODS
    // ---------------------------------------------------------------

    /**
     * Enable speculative execution mode for this partition The given
     * TransactionId is the transaction that we need to wait to finish before we
     * can release the speculatively executed transactions Returns true if
     * speculative execution was enabled at this partition
     * 
     * @param txn_id
     * @param force
     * @return
     */
    public boolean enableSpeculativeExecution(AbstractTransaction ts, boolean force) {
        assert (ts != null) : "Null transaction handle???";
        // assert(this.speculative_execution == SpeculateType.DISABLED) :
        // "Trying to enable spec exec twice because of txn #" + txn_id;

        if (d)
            LOG.debug(String.format("%s - Checking whether txn is read-only at partition %d [readOnly=%s]", ts,
                    this.partitionId, ts.isExecReadOnly(this.partitionId)));

        // Check whether the txn that we're waiting for is read-only.
        // If it is, then that means all read-only transactions can commit right
        // away
        if (ts.isExecReadOnly(this.partitionId)) {
            ExecutionMode newMode = ExecutionMode.COMMIT_READONLY;
            if (d)
                LOG.debug(String.format(
                        "%s - Attempting to enable %s speculative execution at partition %d [currentMode=%s]", ts,
                        newMode, partitionId, this.currentExecMode));
            exec_lock.lock();
            try {
                if (this.currentDtxn == ts && this.currentExecMode != ExecutionMode.DISABLED) {
                    this.setExecutionMode(ts, newMode);
                    this.releaseBlockedTransactions(ts, true);
                    if (d)
                        LOG.debug(String.format("%s - Enabled %s speculative execution at partition %d", ts,
                                this.currentExecMode, partitionId));
                    return (true);
                }
            } finally {
                exec_lock.unlock();
            } // SYNCH
        }
        return (false);
    }

    /**
     * Process a FragmentResponseMessage and update the TransactionState
     * accordingly
     * 
     * @param ts
     * @param result
     */
    private void processWorkResult(LocalTransaction ts, WorkResult result) {
        if (d)
            LOG.debug(String.format(
                    "Processing FragmentResponseMessage for %s on partition %d [srcPartition=%d, deps=%d]", ts,
                    this.partitionId, result.getPartitionId(), result.getDepDataCount()));

        // If the Fragment failed to execute, then we need to abort the
        // Transaction
        // Note that we have to do this before we add the responses to the
        // TransactionState so that
        // we can be sure that the VoltProcedure knows about the problem when it
        // wakes the stored
        // procedure back up
        if (result.getStatus() != Status.OK) {
            if (t)
                LOG.trace(String.format("Received non-success response %s from partition %d for %s",
                        result.getStatus(), result.getPartitionId(), ts));

            SerializableException error = null;
            if (hstore_conf.site.txn_profiling)
                ts.profiler.startDeserialization();
            try {
                ByteBuffer buffer = result.getError().asReadOnlyByteBuffer();
                error = SerializableException.deserializeFromBuffer(buffer);
            } catch (Exception ex) {
                throw new ServerFaultException(String.format(
                        "Failed to deserialize SerializableException from partition %d for %s [bytes=%d]",
                        result.getPartitionId(), ts, result.getError().size()), ex);
            } finally {
                if (hstore_conf.site.txn_profiling)
                    ts.profiler.stopDeserialization();
            }
            // At this point there is no need to even deserialize the rest of
            // the message because
            // we know that we're going to have to abort the transaction
            if (error == null) {
                LOG.warn(ts + " - Unexpected null SerializableException\n" + result);
            } else {
                if (d)
                    LOG.error(ts + " - Got error from partition " + result.getPartitionId(), error);
                ts.setPendingError(error, true);
            }
            return;
        }

        if (hstore_conf.site.txn_profiling)
            ts.profiler.startDeserialization();
        for (int i = 0, cnt = result.getDepDataCount(); i < cnt; i++) {
            if (t)
                LOG.trace(String.format("Storing intermediate results from partition %d for %s",
                        result.getPartitionId(), ts));
            int depId = result.getDepId(i);
            ByteString bs = result.getDepData(i);
            VoltTable vt = null;
            if (bs.isEmpty() == false) {
                FastDeserializer fd = new FastDeserializer(bs.asReadOnlyByteBuffer());
                try {
                    vt = fd.readObject(VoltTable.class);
                } catch (Exception ex) {
                    throw new ServerFaultException("Failed to deserialize VoltTable from partition "
                            + result.getPartitionId() + " for " + ts, ex);
                }
            }
            ts.addResult(result.getPartitionId(), depId, vt);
        } // FOR (dependencies)
        if (hstore_conf.site.txn_profiling)
            ts.profiler.stopDeserialization();
    }

    /**
     * Execute a new transaction based on an InitiateTaskMessage
     * 
     * @param itask
     */
    private void processInitiateTaskMessage(LocalTransaction ts, InitiateTaskMessage itask)
            throws InterruptedException {
        if (hstore_conf.site.txn_profiling)
            ts.profiler.startExec();

        ExecutionMode before_mode = ExecutionMode.COMMIT_ALL;
        boolean predict_singlePartition = ts.isPredictSinglePartition();

        if (t)
            LOG.trace(String.format("%s - Attempting to begin processing %s on partition %d [taskHash=%d]", ts,
                    itask.getClass().getSimpleName(), this.partitionId, itask.hashCode()));
        // If this is going to be a multi-partition transaction, then we will
        // mark it as the current dtxn
        // for this PartitionExecutor.
        if (predict_singlePartition == false) {
            this.exec_lock.lock();
            try {
                if (this.currentDtxn != null) {
                    this.currentBlockedTxns.add(itask);
                    return;
                }
                this.setCurrentDtxn(ts);
                // 2011-11-14: We don't want to set the execution mode here,
                // because we know that we
                // can check whether we were read-only after the txn finishes
                if (d)
                    LOG.debug(String.format("Marking %s as current DTXN on Partition %d [isLocal=%s, execMode=%s]",
                            ts, this.partitionId, true, this.currentExecMode));
                before_mode = this.currentExecMode;
            } finally {
                exec_lock.unlock();
            } // SYNCH
        } else {
            exec_lock.lock();
            try {
                // If this is a single-partition transaction, then we need to
                // check whether we are being executed
                // under speculative execution mode. We have to check this here
                // because it may be the case that we queued a
                // bunch of transactions when speculative execution was enabled,
                // but now the transaction that was ahead of this
                // one is finished, so now we're just executing them regularly
                if (this.currentExecMode != ExecutionMode.COMMIT_ALL) {
                    assert (this.currentDtxn != null) : String.format(
                            "Invalid execution mode %s without a dtxn at partition %d", this.currentExecMode,
                            this.partitionId);

                    // HACK: If we are currently under DISABLED mode when we get
                    // this, then we just need to block the transaction
                    // and return back to the queue. This is easier than having
                    // to set all sorts of crazy locks
                    if (this.currentExecMode == ExecutionMode.DISABLED) {
                        if (d)
                            LOG.debug(String.format("Blocking single-partition %s until dtxn %s finishes [mode=%s]",
                                    ts, this.currentDtxn, this.currentExecMode));
                        this.currentBlockedTxns.add(itask);
                        return;
                    }

                    before_mode = this.currentExecMode;
                    if (hstore_conf.site.exec_speculative_execution) {
                        ts.setSpeculative(true);
                        if (d)
                            LOG.debug(String.format(
                                    "Marking %s as speculatively executed on partition %d [txnMode=%s, dtxn=%s]",
                                    ts, this.partitionId, before_mode, this.currentDtxn));
                    }
                }
            } finally {
                exec_lock.unlock();
            } // SYNCH
        }

        // Always clear+set the ExecutionState
        this.execState.clear();
        ts.setExecutionState(this.execState);

        VoltProcedure volt_proc = this.procedures.get(itask.getStoredProcedureName());
        assert (volt_proc != null) : "No VoltProcedure for " + ts;

        if (d) {
            LOG.debug(String.format("%s - Starting execution of txn [txnMode=%s, mode=%s]", ts, before_mode,
                    this.currentExecMode));
            if (t)
                LOG.trace("Current Transaction at partition #" + this.partitionId + "\n" + ts.debug());
        }

        ClientResponseImpl cresponse = null;
        try {
            cresponse = (ClientResponseImpl) volt_proc.call(ts, itask.getParameters()); // Blocking...
            // VoltProcedure.call() should handle any exceptions thrown by the
            // transaction
            // If we get anything out here then that's bad news
        } catch (Throwable ex) {
            if (this.isShuttingDown() == false) {
                SQLStmt last[] = volt_proc.voltLastQueriesExecuted();
                LOG.fatal("Unexpected error while executing " + ts, ex);
                if (last.length > 0) {
                    LOG.fatal(String.format("Last Queries Executed [%d]: %s", last.length, Arrays.toString(last)));
                }
                LOG.fatal("LocalTransactionState Dump:\n" + ts.debug());
                this.crash(ex);
            }
        } finally {
            ts.resetExecutionState();
        }
        // If this is a MapReduce job, then we can just ignore the
        // ClientResponse
        // and return immediately
        if (ts.isMapReduce()) {
            return;
        } else if (cresponse == null) {
            assert (this.isShuttingDown()) : String.format("No ClientResponse for %s???", ts);
            return;
        }

        Status status = cresponse.getStatus();
        if (d)
            LOG.debug(String.format("Finished execution of %s [status=%s, beforeMode=%s, currentMode=%s]", ts,
                    status, before_mode, this.currentExecMode));

        // We assume that most transactions are not speculatively executed and
        // are successful
        // Therefore we don't want to grab the exec_mode lock here.
        if (predict_singlePartition == false || this.canProcessClientResponseNow(ts, status, before_mode)) {
            this.processClientResponse(ts, cresponse);
        }
        // Otherwise acquire the lock and then figure out what we can do with
        // this guy
        else {
            exec_lock.lock();
            try {
                if (this.canProcessClientResponseNow(ts, status, before_mode)) {
                    this.processClientResponse(ts, cresponse);
                    // Otherwise always queue our response, since we know that
                    // whatever thread is out there
                    // is waiting for us to finish before it drains the queued
                    // responses
                } else {
                    // If the transaction aborted, then we can't execute any
                    // transaction that touch the tables that this guy touches
                    // But since we can't just undo this transaction without
                    // undoing everything that came before it, we'll just
                    // disable executing all transactions until the
                    // multi-partition transaction commits
                    // NOTE: We don't need acquire the 'exec_mode' lock here,
                    // because we know that we either executed in non-spec mode,
                    // or
                    // that there already was a multi-partition transaction
                    // hanging around.
                    if (status != Status.OK && ts.isExecReadOnlyAllPartitions() == false) {
                        this.setExecutionMode(ts, ExecutionMode.DISABLED);
                        int blocked = this.work_queue.drainTo(this.currentBlockedTxns);
                        if (t && blocked > 0)
                            LOG.trace(String.format(
                                    "Blocking %d transactions at partition %d because ExecutionMode is now %s",
                                    blocked, this.partitionId, this.currentExecMode));
                        if (d)
                            LOG.debug(String.format(
                                    "Disabling execution on partition %d because speculative %s aborted",
                                    this.partitionId, ts));
                    }
                    if (t)
                        LOG.trace(String.format(
                                "%s - Queuing ClientResponse [status=%s, origMode=%s, newMode=%s, dtxn=%s]", ts,
                                cresponse.getStatus(), before_mode, this.currentExecMode, this.currentDtxn));
                    this.queueClientResponse(ts, cresponse);
                }
            } finally {
                exec_lock.unlock();
            } // SYNCH
        }
        volt_proc.finish();
    }

    /**
     * Determines whether a finished transaction that executed locally can have
     * their ClientResponse processed immediately or if it needs to wait for the
     * response from the outstanding multi-partition transaction for this
     * partition (1) This is the multi-partition transaction that everyone is
     * waiting for (2) The transaction was not executed under speculative
     * execution mode (3) The transaction does not need to wait for the
     * multi-partition transaction to finish first
     * 
     * @param ts
     * @param status
     * @param before_mode
     * @return
     */
    private boolean canProcessClientResponseNow(LocalTransaction ts, Status status, ExecutionMode before_mode) {
        if (d)
            LOG.debug(String.format(
                    "%s - Checking whether to process response now [status=%s, singlePartition=%s, readOnly=%s, beforeMode=%s, currentMode=%s]",
                    ts, status, ts.isExecSinglePartition(), ts.isExecReadOnly(this.partitionId), before_mode,
                    this.currentExecMode));
        // Commit All
        if (this.currentExecMode == ExecutionMode.COMMIT_ALL) {
            return (true);

            // Process successful txns based on the mode that it was executed
            // under
        } else if (status == Status.OK) {
            switch (before_mode) {
            case COMMIT_ALL:
                return (true);
            case COMMIT_READONLY:
                return (ts.isExecReadOnly(this.partitionId));
            case COMMIT_NONE: {
                return (false);
            }
            default:
                throw new ServerFaultException("Unexpected execution mode: " + before_mode, ts.getTransactionId());
            } // SWITCH
        }
        // Anything mispredicted should be processed right away
        else if (status == Status.ABORT_MISPREDICT) {
            return (true);
        }
        // If the transaction aborted and it was read-only thus far, then we
        // want to process it immediately
        else if (status != Status.OK && ts.isExecReadOnly(this.partitionId)) {
            return (true);
        }
        // If this txn threw a user abort, and the current outstanding dtxn is
        // read-only
        // then it's safe for us to rollback
        else if (status == Status.ABORT_USER
                && (this.currentDtxn != null && this.currentDtxn.isExecReadOnly(this.partitionId))) {
            return (true);
        }

        assert (this.currentExecMode != ExecutionMode.COMMIT_ALL) : String.format(
                "Queuing ClientResponse for %s when in non-specutative mode [mode=%s, status=%s]", ts,
                this.currentExecMode, status);
        return (false);
    }

    /**
     * Execute a WorkFragment for a distributed transaction
     * 
     * @param fragment
     * @throws Exception
     */
    private void processWorkFragment(AbstractTransaction ts, WorkFragment fragment, ParameterSet parameters[]) {
        assert (this.partitionId == fragment.getPartitionId()) : String.format(
                "Tried to execute WorkFragment %s for %s on partition %d but it was suppose to be executed on partition %d",
                fragment.getFragmentIdList(), ts, this.partitionId, fragment.getPartitionId());

        // A txn is "local" if the Java is executing at the same partition as
        // this one
        boolean is_local = ts.isExecLocal(this.partitionId);
        boolean is_dtxn = (ts instanceof LocalTransaction == false);
        boolean is_prefetch = fragment.getPrefetch();
        if (d)
            LOG.debug(String.format("%s - Executing %s [isLocal=%s, isDtxn=%s, isPrefetch=%s, fragments=%s]", ts,
                    fragment.getClass().getSimpleName(), is_local, is_dtxn, is_prefetch,
                    fragment.getFragmentIdCount()));

        // If this txn isn't local, then we have to update our undoToken
        if (is_local == false) {
            ts.initRound(this.partitionId, this.getNextUndoToken());
            ts.startRound(this.partitionId);
        }

        DependencySet result = null;
        Status status = Status.OK;
        SerializableException error = null;

        try {
            result = this.executeWorkFragment(ts, fragment, parameters);
        } catch (ConstraintFailureException ex) {
            if (d)
                LOG.warn(String.format("%s - Unexpected ConstraintFailureException error on partition %d", ts,
                        this.partitionId), ex);
            status = Status.ABORT_UNEXPECTED;
            error = ex;
        } catch (EEException ex) {
            LOG.error(String.format("%s - Unexpected ExecutionEngine error on partition %d", ts, this.partitionId),
                    ex);
            this.crash(ex);
            status = Status.ABORT_UNEXPECTED;
            error = ex;
        } catch (SQLException ex) {
            LOG.error(String.format("%s - Unexpected SQL error on partition %d", ts, this.partitionId), ex);
            status = Status.ABORT_UNEXPECTED;
            error = ex;
        } catch (Throwable ex) {
            LOG.error(String.format("%s - Unexpected error on partition %d", ts, this.partitionId), ex);
            status = Status.ABORT_UNEXPECTED;
            if (ex instanceof SerializableException) {
                error = (SerializableException) ex;
            } else {
                error = new SerializableException(ex);
            }
        } finally {
            // Success, but without any results???
            if (result == null && status == Status.OK) {
                Exception ex = new Exception(String.format(
                        "The WorkFragment %s executed successfully on Partition %d but result is null for %s",
                        fragment.getFragmentIdList(), this.partitionId, ts));
                if (d)
                    LOG.warn(ex);
                status = Status.ABORT_UNEXPECTED;
                error = new SerializableException(ex);
            }
        }

        // For single-partition INSERT/UPDATE/DELETE queries, we don't directly
        // execute the SendPlanNode in order to get back the number of tuples
        // that
        // were modified. So we have to rely on the output dependency ids set in
        // the task
        assert (status != Status.OK
                || (status == Status.OK && result.size() == fragment.getFragmentIdCount())) : "Got back "
                        + result.size() + " results but was expecting " + fragment.getFragmentIdCount();

        // Make sure that we mark the round as finished before we start sending
        // results
        if (is_local == false) {
            ts.finishRound(this.partitionId);
        }

        // -------------------------------
        // PREFETCH QUERIES
        // -------------------------------
        if (is_prefetch) {
            // Regardless of whether this txn is running at the same HStoreSite
            // as this PartitionExecutor,
            // we always need to put the result inside of the
            // AbstractTransaction
            // This is so that we can identify if we get request for a query
            // that we have already executed
            // We'll only do this if it succeeded. If it failed, then we won't
            // do anything and will
            // just wait until they come back to execute the query again before
            // we tell them that something went wrong. It's ghetto, but it's
            // just easier this way...
            if (status == Status.OK) {
                if (d)
                    LOG.debug(String.format("%s - Storing %d prefetch query results in partition %d query cache",
                            ts, result.size(), ts.getBasePartition()));
                PartitionExecutor other = null; //
                for (int i = 0, cnt = result.size(); i < cnt; i++) {
                    // We're going to store the result in the base partition
                    // cache if they're
                    // on the same HStoreSite as us
                    if (hstore_site.isLocalPartition(ts.getBasePartition())) {
                        if (other == null)
                            other = this.hstore_site.getPartitionExecutor(ts.getBasePartition());
                        other.queryCache.addTransactionQueryResult(ts.getTransactionId(), fragment.getFragmentId(i),
                                fragment.getPartitionId(), parameters[i], result.dependencies[i]);
                    }
                    // We also need to store it in our own cache in case we need
                    // to retrieve it
                    // if they come at us with the same query request
                    this.queryCache.addTransactionQueryResult(ts.getTransactionId(), fragment.getFragmentId(i),
                            fragment.getPartitionId(), parameters[i], result.dependencies[i]);

                } // FOR
            }

            // Now if it's a remote transaction, we need to use the coordinator
            // to send
            // them our result. Note that we want to send a single message per
            // partition. Unlike
            // with the TransactionWorkRequests, we don't need to wait until all
            // of the partitions
            // that are prefetching for this txn at our local HStoreSite to
            // finish.
            if (is_dtxn) {
                WorkResult wr = this.buildWorkResult(ts, result, status, error);
                TransactionPrefetchResult prefetchResult = TransactionPrefetchResult.newBuilder()
                        .setTransactionId(ts.getTransactionId().longValue()).setSourcePartition(this.partitionId)
                        .setResult(wr).setStatus(status).build();
                hstore_coordinator.transactionPrefetchResult((RemoteTransaction) ts, prefetchResult);
            }
        }
        // -------------------------------
        // LOCAL TRANSACTION
        // -------------------------------
        else if (is_dtxn == false) {
            LocalTransaction local_ts = (LocalTransaction) ts;

            // If the transaction is local, store the result directly in the
            // local TransactionState
            if (status == Status.OK) {
                if (t)
                    LOG.trace("Storing " + result.size()
                            + " dependency results locally for successful FragmentTaskMessage");
                assert (result.size() == fragment.getOutputDepIdCount());
                for (int i = 0, cnt = result.size(); i < cnt; i++) {
                    int dep_id = fragment.getOutputDepId(i);
                    if (t)
                        LOG.trace("Storing DependencyId #" + dep_id + " for " + ts);
                    try {
                        local_ts.addResult(this.partitionId, dep_id, result.dependencies[i]);
                    } catch (Throwable ex) {
                        ex.printStackTrace();
                        String msg = String.format("Failed to stored Dependency #%d for %s [idx=%d, fragmentId=%d]",
                                dep_id, ts, i, fragment.getFragmentId(i));
                        LOG.error(msg + "\n" + fragment.toString());
                        throw new ServerFaultException(msg, ex);
                    }
                } // FOR
            } else {
                local_ts.setPendingError(error, true);
            }
        }
        // -------------------------------
        // REMOTE TRANSACTION
        // -------------------------------
        else {
            if (d)
                LOG.debug(String.format(
                        "Constructing WorkResult %s with %d bytes from partition %d to send back to initial partition %d [status=%s]",
                        ts, (result != null ? result.size() : null), this.partitionId, ts.getBasePartition(),
                        status));

            RpcCallback<WorkResult> callback = ((RemoteTransaction) ts).getFragmentTaskCallback();
            if (callback == null) {
                LOG.fatal("Unable to send FragmentResponseMessage for " + ts);
                LOG.fatal("Orignal FragmentTaskMessage:\n" + fragment);
                LOG.fatal(ts.toString());
                throw new ServerFaultException("No RPC callback to HStoreSite for " + ts, ts.getTransactionId());
            }
            WorkResult response = this.buildWorkResult((RemoteTransaction) ts, result, status, error);
            assert (response != null);
            callback.run(response);

        }
    }

    /**
     * Executes a FragmentTaskMessage on behalf of some remote site and returns
     * the resulting DependencySet
     * 
     * @param fragment
     * @return
     * @throws Exception
     */
    private DependencySet executeWorkFragment(AbstractTransaction ts, WorkFragment fragment,
            ParameterSet parameters[]) throws Exception {
        DependencySet result = null;
        final long undoToken = ts.getLastUndoToken(this.partitionId);
        int fragmentCount = fragment.getFragmentIdCount();
        boolean fastaggre = false; // fast aggregate flag
        boolean fastcombi = false; // fast combine flag

        if (fragmentCount == 0) {
            LOG.warn(String.format("Got a FragmentTask for %s that does not have any fragments?!?", ts));
            return (result);
        }

        // Construct arrays given to the EE
        long fragmentIds[] = tmp_fragmentIds.getArray(fragmentCount);
        int outputDepIds[] = tmp_outputDepIds.getArray(fragmentCount);
        int inputDepIds[] = tmp_inputDepIds.getArray(fragmentCount); // Is this
        // ok?
        for (int i = 0; i < fragmentCount; i++) {
            fragmentIds[i] = fragment.getFragmentId(i);
            outputDepIds[i] = fragment.getOutputDepId(i);
            for (int input_depId : fragment.getInputDepId(i).getIdsList()) {
                inputDepIds[i] = input_depId; // FIXME!
            } // FOR
        } // FOR

        // Input Dependencies
        this.tmp_EEdependencies.clear();
        this.getFragmentInputs(ts, fragment, this.tmp_EEdependencies);

        // *********************************** DEBUG
        // ***********************************
        if (d) {
            LOG.debug(String.format("%s - Getting ready to kick %d fragments to EE", ts, fragmentCount));
            // if (t) {
            // LOG.trace("FragmentTaskIds: " + Arrays.toString(fragmentIds));
            // Map<String, Object> m = new ListOrderedMap<String, Object>();
            // for (int i = 0; i < parameters.length; i++) {
            // m.put("Parameter[" + i + "]", parameters[i]);
            // } // FOR
            // LOG.trace("Parameters:\n" + StringUtil.formatMaps(m));
            // }
        }
        // *********************************** DEBUG
        // ***********************************

        // -------------------------------
        // SYSPROC FRAGMENTS
        // -------------------------------
        if (ts.isSysProc()) {
            assert (fragmentCount == 1);
            long fragment_id = fragmentIds[0];
            assert (fragmentCount == parameters.length) : String.format("%s - Fragments:%d / Parameters:%d", ts,
                    fragmentCount, parameters.length);
            ParameterSet fragmentParams = parameters[0];

            VoltSystemProcedure volt_proc = this.m_registeredSysProcPlanFragments.get(fragment_id);
            if (volt_proc == null) {
                String msg = "No sysproc handle exists for FragmentID #" + fragment_id + " :: "
                        + this.m_registeredSysProcPlanFragments;
                throw new ServerFaultException(msg, ts.getTransactionId());
            }

            // HACK: We have to set the TransactionState for sysprocs manually
            volt_proc.setTransactionState(ts);
            ts.markExecNotReadOnly(this.partitionId);
            try {
                result = volt_proc.executePlanFragment(ts.getTransactionId(), this.tmp_EEdependencies,
                        (int) fragment_id, fragmentParams, this.m_systemProcedureContext);
            } catch (Throwable ex) {
                String msg = "Unexpected error when executing system procedure";
                throw new ServerFaultException(msg, ex, ts.getTransactionId());
            }
            if (d)
                LOG.debug(
                        String.format("%s - Finished executing sysproc fragment %d\n%s", ts, fragment_id, result));
            // -------------------------------
            // REGULAR FRAGMENTS
            // -------------------------------
        } else {

            for (int i = 0; i < fragmentCount; i++) {
                fastaggre = PlanFragmentIdGenerator.isPlanFragmentFastAggregate(fragmentIds[i]);
                fastcombi = PlanFragmentIdGenerator.isPlanFragmentFastCombine(fragmentIds[i]);
                // determine to use fast feature
                if (d)
                    LOG.debug("Determine to fast execute in Java:" + hstore_conf.site.exec_fast_executors);
                if (hstore_conf.site.exec_fast_executors) {

                    if (fastaggre) {
                        // go into Java to execute fast aggregate

                        // make sure each voltTable just has one column, and do
                        // the
                        // simple summation
                        result = aggexecutor.execute(outputDepIds, inputDepIds, this.tmp_EEdependencies);

                    } else if (fastcombi) {
                        // do fast combine in Java
                        result = combexecutor.execute(outputDepIds, inputDepIds, this.tmp_EEdependencies);

                    } else {
                        // send to ExecutionEngine
                        result = this.executePlanFragments(ts, undoToken, fragmentCount, fragmentIds, parameters,
                                outputDepIds, inputDepIds, this.tmp_EEdependencies);
                    }
                } else {
                    // donnot use the fast feature
                    result = this.executePlanFragments(ts, undoToken, fragmentCount, fragmentIds, parameters,
                            outputDepIds, inputDepIds, this.tmp_EEdependencies);
                }
            } // for

            if (result == null) {
                if (d)
                    LOG.warn(String.format("Output DependencySet for %s in %s is null?",
                            Arrays.toString(fragmentIds), ts));
            }
        }
        return (result);
    }

    /**
     * Execute a BatchPlan directly on this PartitionExecutor without having to
     * covert it to FragmentTaskMessages first. This is big speed improvement
     * over having to queue things up
     * 
     * @param ts
     * @param plan
     * @return
     */
    public VoltTable[] executeLocalPlan(LocalTransaction ts, BatchPlanner.BatchPlan plan,
            ParameterSet parameterSets[]) {
        long undoToken = HStoreConstants.DISABLE_UNDO_LOGGING_TOKEN;

        // If we originally executed this transaction with undo buffers and we
        // have a MarkovEstimate,
        // then we can go back and check whether we want to disable undo logging
        // for the rest of the transaction
        // We can do this regardless of whether the transaction has written
        // anything <-- NOT TRUE!
        if (ts.getEstimatorState() != null && ts.isPredictSinglePartition() && ts.isSpeculative() == false
                && hstore_conf.site.exec_no_undo_logging) {
            MarkovEstimate est = ts.getEstimatorState().getLastEstimate();
            assert (est != null) : "Got back null MarkovEstimate for " + ts;
            if (est.isAbortable(this.thresholds)
                    || est.isReadOnlyPartition(this.thresholds, this.partitionId) == false) {
                undoToken = this.getNextUndoToken();
            } else if (d) {
                LOG.debug(String.format("Bold! Disabling undo buffers for inflight %s [prob=%f]\n%s\n%s", ts,
                        est.getAbortProbability(), est, plan.toString()));
            }
        }
        // If the transaction is predicted to be read-only, then we won't bother
        // with an undo buffer
        else if ((ts.isPredictReadOnly() == false && hstore_conf.site.exec_no_undo_logging_all == false)
                || hstore_conf.site.exec_force_undo_logging_all) {
            undoToken = this.getNextUndoToken();
        }
        ts.fastInitRound(this.partitionId, undoToken);
        ts.setBatchSize(plan.getBatchSize());

        int fragmentCount = plan.getFragmentCount();
        long fragmentIds[] = plan.getFragmentIds();
        int output_depIds[] = plan.getOutputDependencyIds();
        int input_depIds[] = plan.getInputDependencyIds();

        // Mark that we touched the local partition once for each query in the
        // batch
        // ts.getTouchedPartitions().put(this.partitionId, plan.getBatchSize());

        // Only notify other partitions that we're done with them if we're not a
        // single-partition transaction
        if (hstore_conf.site.exec_speculative_execution && ts.isPredictSinglePartition() == false) {
            // TODO: We need to notify the remote HStoreSites that we are done
            // with their partitions
            ts.calculateDonePartitions(this.thresholds);
        }

        if (t) {
            // StringBuilder sb = new StringBuilder();
            // sb.append("Parameters:");
            // for (int i = 0; i < parameterSets.length; i++) {
            // sb.append(String.format("\n [%02d] %s", i,
            // parameterSets[i].toString()));
            // }
            // LOG.trace(sb.toString());
            LOG.trace(String.format(
                    "Txn #%d - BATCHPLAN:\n" + "  fragmentIds:   %s\n" + "  fragmentCount: %s\n"
                            + "  output_depIds: %s\n" + "  input_depIds:  %s",
                    ts.getTransactionId(), Arrays.toString(plan.getFragmentIds()), plan.getFragmentCount(),
                    Arrays.toString(plan.getOutputDependencyIds()), Arrays.toString(plan.getInputDependencyIds())));
        }

        // NOTE: There are no dependencies that we need to pass in because the
        // entire batch is single-partitioned
        DependencySet result = this.executePlanFragments(ts, undoToken, fragmentCount, fragmentIds, parameterSets,
                output_depIds, input_depIds, null);
        // assert(result != null) : "Unexpected null DependencySet result for "
        // + ts;
        if (t)
            LOG.trace("Output:\n" + result);

        ts.fastFinishRound(this.partitionId);
        return (result != null ? result.dependencies : null);
    }

    /**
     * Execute the given fragment tasks on this site's underlying EE
     * 
     * @param ts
     * @param undoToken
     * @param batchSize
     * @param fragmentIds
     * @param parameterSets
     * @param output_depIds
     * @param input_depIds
     * @return
     */
    private DependencySet executePlanFragments(AbstractTransaction ts, long undoToken, int batchSize,
            long fragmentIds[], ParameterSet parameterSets[], int output_depIds[], int input_depIds[],
            Map<Integer, List<VoltTable>> input_deps) {
        assert (this.ee != null) : "The EE object is null. This is bad!";
        Long txn_id = ts.getTransactionId();

        // *********************************** DEBUG
        // ***********************************
        if (d) {
            StringBuilder sb = new StringBuilder();
            sb.append(String.format("%s - Executing %d fragments [lastTxnId=%d, undoToken=%d]", ts, batchSize,
                    this.lastCommittedTxnId, undoToken));
            if (t) {
                Map<String, Object> m = new ListOrderedMap<String, Object>();
                m.put("Fragments", Arrays.toString(fragmentIds));

                Map<Integer, Object> inner = new ListOrderedMap<Integer, Object>();
                for (int i = 0; i < batchSize; i++)
                    inner.put(i, parameterSets[i].toString());
                m.put("Parameters", inner);

                if (batchSize > 0 && input_depIds[0] != HStoreConstants.NULL_DEPENDENCY_ID) {
                    inner = new ListOrderedMap<Integer, Object>();
                    for (int i = 0; i < batchSize; i++) {
                        List<VoltTable> deps = input_deps.get(input_depIds[i]);
                        inner.put(input_depIds[i], (deps != null ? StringUtil.join("\n", deps) : "???"));
                    } // FOR
                    m.put("Input Dependencies", inner);
                }
                m.put("Output Dependencies", Arrays.toString(output_depIds));
                sb.append("\n" + StringUtil.formatMaps(m));
            }
            LOG.debug(sb.toString());
        }
        // *********************************** DEBUG
        // ***********************************

        // pass attached dependencies to the EE (for non-sysproc work).
        if (input_deps != null && input_deps.isEmpty() == false) {
            if (d)
                LOG.debug(String.format("%s - Stashing %d InputDependencies at partition %d", ts, input_deps.size(),
                        this.partitionId));
            ee.stashWorkUnitDependencies(input_deps);
        }

        // Check whether this fragments are read-only
        if (ts.isExecReadOnly(this.partitionId)) {

            boolean readonly = PlanFragmentIdGenerator.areFragmentsReadOnly(this.database, fragmentIds, batchSize);

            if (readonly == false) {
                if (d)
                    LOG.debug(String.format("%s - Marking txn as not read-only %s", ts,
                            Arrays.toString(fragmentIds)));
                ts.markExecNotReadOnly(this.partitionId);
            }

            // We can do this here because the only way that we're not read-only
            // is if
            // we actually modify data at this partition
            ts.setSubmittedEE(this.partitionId);
        }

        DependencySet result = null;
        boolean needs_profiling = (hstore_conf.site.txn_profiling && ts.isExecLocal(this.partitionId));
        if (needs_profiling)
            ((LocalTransaction) ts).profiler.startExecEE();
        Throwable error = null;
        try {
            if (d)
                LOG.debug(String.format("%s - Executing fragments %s at partition %d", ts,
                        Arrays.toString(fragmentIds), this.partitionId));

            result = this.ee.executeQueryPlanFragmentsAndGetDependencySet(fragmentIds, batchSize, input_depIds,
                    output_depIds, parameterSets, batchSize, txn_id.longValue(), this.lastCommittedTxnId,
                    undoToken);

        } catch (SerializableException ex) {
            if (d)
                LOG.error(String.format("%s - Unexpected error in the ExecutionEngine on partition %d", ts,
                        this.partitionId), ex);
            error = ex;
            throw ex;
        } catch (Throwable ex) {
            error = ex;
            new ServerFaultException(
                    String.format("%s - Failed to execute PlanFragments: %s", ts, Arrays.toString(fragmentIds)),
                    ex);
        } finally {
            if (needs_profiling)
                ((LocalTransaction) ts).profiler.stopExecEE();
            if (error == null && result == null) {
                LOG.warn(String.format(
                        "%s - Finished executing fragments but got back null results [fragmentIds=%s]", ts,
                        Arrays.toString(fragmentIds)));
            }
        }

        // *********************************** DEBUG
        // ***********************************
        if (d) {
            if (result != null) {
                LOG.debug(String.format("%s - Finished executing fragments and got back %d results", ts,
                        result.depIds.length));
            } else {
                LOG.warn(String.format(
                        "%s - Finished executing fragments but got back null results? That seems bad...", ts));
            }
        }
        // *********************************** DEBUG
        // ***********************************
        return (result);
    }

    /**
     * @param txn_id
     * @param clusterName
     * @param databaseName
     * @param tableName
     * @param data
     * @param allowELT
     * @throws VoltAbortException
     */
    public void loadTable(AbstractTransaction ts, String clusterName, String databaseName, String tableName,
            VoltTable data, int allowELT) throws VoltAbortException {
        if (cluster == null) {
            throw new VoltProcedure.VoltAbortException("cluster '" + clusterName + "' does not exist");
        }
        if (this.database.getName().equalsIgnoreCase(databaseName) == false) {
            throw new VoltAbortException(
                    "database '" + databaseName + "' does not exist in cluster " + clusterName);
        }
        Table table = this.database.getTables().getIgnoreCase(tableName);
        if (table == null) {
            throw new VoltAbortException(
                    "table '" + tableName + "' does not exist in database " + clusterName + "." + databaseName);
        }

        ts.setSubmittedEE(this.partitionId);
        ee.loadTable(table.getRelativeIndex(), data, ts.getTransactionId(), lastCommittedTxnId, getNextUndoToken(),
                allowELT != 0);
    }

    /**
     * Execute a SQLStmt batch at this partition.
     * 
     * @param ts
     *            The txn handle that is executing this query batch
     * @param batchSize
     *            The number of SQLStmts that the txn queued up using
     *            voltQueueSQL()
     * @param batchStmts
     *            The SQLStmts that the txn is trying to execute
     * @param batchParams
     *            The input parameters for the SQLStmts
     * @param finalTask
     *            Whether the txn has marked this as the last batch that they
     *            will ever execute
     * @param forceSinglePartition
     *            Whether to force the BatchPlanner to only generate a
     *            single-partition plan
     * @return
     */
    public VoltTable[] executeSQLStmtBatch(LocalTransaction ts, int batchSize, SQLStmt batchStmts[],
            ParameterSet batchParams[], boolean finalTask, boolean forceSinglePartition) {

        if (hstore_conf.site.exec_deferrable_queries) {
            // TODO: Loop through batchStmts and check whether their
            // corresponding Statement
            // is marked as deferrable. If so, then remove them from batchStmts
            // and batchParams
            // (sliding everyone over by one in the arrays). Queue up the
            // deferred query.
            // Be sure decrement batchSize after you finished processing this.
            // EXAMPLE: batchStmts[0].getStatement().getDeferrable()
        }

        // Calculate the hash code for this batch to see whether we already have
        // a planner
        final Integer batchHashCode = VoltProcedure.getBatchHashCode(batchStmts, batchSize);
        BatchPlanner planner = this.batchPlanners.get(batchHashCode);
        if (planner == null) { // Assume fast case
            planner = new BatchPlanner(batchStmts, batchSize, ts.getProcedure(), this.p_estimator,
                    forceSinglePartition);
            this.batchPlanners.put(batchHashCode, planner);
        }
        assert (planner != null);

        // At this point we have to calculate exactly what we need to do on each
        // partition
        // for this batch. So somehow right now we need to fire this off to
        // either our
        // local executor or to Evan's magical distributed transaction manager
        BatchPlanner.BatchPlan plan = planner.plan(ts.getTransactionId(), ts.getClientHandle(), this.partitionIdObj,
                ts.getPredictTouchedPartitions(), ts.isPredictSinglePartition(), ts.getTouchedPartitions(),
                batchParams);

        assert (plan != null);
        if (d)
            LOG.debug("BatchPlan for " + ts + ":\n" + plan.toString());
        if (hstore_conf.site.txn_profiling)
            ts.profiler.stopExecPlanning();

        // Tell the TransactionEstimator that we're about to execute these mofos
        TransactionEstimator.State t_state = ts.getEstimatorState();
        if (t_state != null) {
            if (hstore_conf.site.txn_profiling)
                ts.profiler.startExecEstimation();
            this.t_estimator.executeQueries(t_state, planner.getStatements(), plan.getStatementPartitions(), true);
            if (hstore_conf.site.txn_profiling)
                ts.profiler.stopExecEstimation();
        }

        // Check whether our plan was caused a mispredict
        // Doing it this way allows us to update the TransactionEstimator before
        // we abort the txn
        if (plan.getMisprediction() != null) {
            MispredictionException ex = plan.getMisprediction();
            ts.setPendingError(ex, false);

            MarkovGraph markov = (t_state != null ? t_state.getMarkovGraph() : null);
            if (hstore_conf.site.markov_mispredict_recompute && markov != null) {
                if (d)
                    LOG.debug("Recomputing MarkovGraph probabilities because " + ts + " mispredicted");
                // FIXME this.executor.helper.queueMarkovToRecompute(markov);
            }

            // Print Misprediction Debug
            if (d || hstore_conf.site.exec_mispredict_crash) {
                // FIXME LOG.warn("\n" + mispredictDebug(batchStmts,
                // batchParams, markov, t_state, ex, batchSize));
            }

            // Crash on Misprediction!
            if (hstore_conf.site.exec_mispredict_crash) {
                LOG.fatal(String.format("Crashing because site.exec_mispredict_crash is true [txn=%s]", ts));
                this.crash(ex);
            } else if (d) {
                LOG.debug(ts + " mispredicted! Aborting and restarting!");
            }
            throw ex;
        }

        VoltTable results[] = null;
        if (plan.isReadOnly() == false)
            ts.markExecNotReadOnlyAllPartitions();

        // If the BatchPlan only has WorkFragments that are for this partition,
        // then
        // we can use the fast-path executeLocalPlan() method
        if (plan.isSingledPartitionedAndLocal()) {
            if (d)
                LOG.debug("Executing BatchPlan directly with ExecutionSite");
            results = this.executeLocalPlan(ts, plan, batchParams);
        }
        // Otherwise, we need to generate WorkFragments and then send the
        // messages out
        // to our remote partitions using the HStoreCoordinator
        else {
            this.partitionFragments.clear();
            plan.getWorkFragments(ts.getTransactionId(), this.partitionFragments);
            if (t)
                LOG.trace(
                        "Got back a set of tasks for " + this.partitionFragments.size() + " partitions for " + ts);

            // Block until we get all of our responses.
            results = this.dispatchWorkFragments(ts, batchSize, this.partitionFragments, batchParams);
        }
        if (d && results == null)
            LOG.warn("Got back a null results array for " + ts + "\n" + plan.toString());

        if (hstore_conf.site.txn_profiling)
            ts.profiler.startExecJava();

        return (results);
    }

    /**
     * @param fresponse
     */
    protected WorkResult buildWorkResult(AbstractTransaction ts, DependencySet result, Status status,
            SerializableException error) {
        WorkResult.Builder builder = WorkResult.newBuilder();

        // Partition Id
        builder.setPartitionId(this.partitionId);

        // Status
        builder.setStatus(status);

        // SerializableException
        if (error != null) {
            int size = error.getSerializedSize();
            BBContainer bc = this.buffer_pool.acquire(size);
            error.serializeToBuffer(bc.b);
            bc.b.rewind();
            builder.setError(ByteString.copyFrom(bc.b));
            bc.discard();
        }

        // Push dependencies back to the remote partition that needs it
        if (status == Status.OK) {
            for (int i = 0, cnt = result.size(); i < cnt; i++) {
                builder.addDepId(result.depIds[i]);
                this.fs.clear();
                try {
                    result.dependencies[i].writeExternal(this.fs);
                    ByteString bs = ByteString.copyFrom(this.fs.getBBContainer().b);
                    builder.addDepData(bs);
                } catch (Exception ex) {
                    throw new ServerFaultException(
                            String.format("Failed to serialize output dependency %d for %s", result.depIds[i], ts),
                            ex);
                }
                if (t)
                    LOG.trace(String.format("%s - Serialized Output Dependency %d\n%s", ts, result.depIds[i],
                            result.dependencies[i]));
            } // FOR
            this.fs.getBBContainer().discard();
        }

        return (builder.build());
    }

    /**
     * This site is requesting that the coordinator execute work on its behalf
     * at remote sites in the cluster
     * 
     * @param ftasks
     */
    private void requestWork(LocalTransaction ts, Collection<WorkFragment> tasks, List<ByteString> parameterSets) {
        assert (!tasks.isEmpty());
        assert (ts != null);
        Long txn_id = ts.getTransactionId();

        if (t)
            LOG.trace(String.format("Wrapping %d WorkFragments into a TransactionWorkRequest for %s", tasks.size(),
                    ts));

        // If our transaction was originally designated as a single-partitioned,
        // then we need to make
        // sure that we don't touch any partition other than our local one. If
        // we do, then we need abort
        // it and restart it as multi-partitioned
        boolean need_restart = false;
        boolean predict_singlepartition = ts.isPredictSinglePartition();
        BitSet done_partitions = ts.getDonePartitions();

        boolean new_done = false;
        if (hstore_conf.site.exec_speculative_execution) {
            new_done = ts.calculateDonePartitions(this.thresholds);
        }

        // Now we can go back through and start running all of the
        // FragmentTaskMessages that were not blocked
        // waiting for an input dependency. Note that we pack all the fragments
        // into a single
        // CoordinatorFragment rather than sending each FragmentTaskMessage in
        // its own message
        for (WorkFragment ftask : tasks) {
            assert (!ts.isBlocked(ftask));

            int target_partition = ftask.getPartitionId();
            int target_site = hstore_site.getSiteIdForPartitionId(target_partition);

            // Make sure that this isn't a single-partition txn trying to access
            // a remote partition
            if (predict_singlepartition && target_partition != this.partitionId) {
                if (d)
                    LOG.debug(String.format(
                            "%s on partition %d is suppose to be single-partitioned, but it wants to execute a fragment on partition %d",
                            ts, this.partitionId, target_partition));
                need_restart = true;
                break;
            }
            // Make sure that this txn isn't trying ot access a partition that
            // we said we were
            // done with earlier
            else if (done_partitions.get(target_partition)) {
                if (d)
                    LOG.debug(String.format(
                            "%s on partition %d was marked as done on partition %d but now it wants to go back for more!",
                            ts, this.partitionId, target_partition));
                need_restart = true;
                break;
            }
            // Make sure we at least have something to do!
            else if (ftask.getFragmentIdCount() == 0) {
                LOG.warn(String.format("%s - Trying to send a WorkFragment request with 0 fragments", ts));
                continue;
            }

            // Get the TransactionWorkRequest.Builder for the remote HStoreSite
            // We will use this store our serialized input dependencies
            TransactionWorkRequestBuilder requestBuilder = tmp_transactionRequestBuilders[target_site];
            if (requestBuilder == null) {
                requestBuilder = tmp_transactionRequestBuilders[target_site] = new TransactionWorkRequestBuilder();
            }
            TransactionWorkRequest.Builder builder = requestBuilder.getBuilder(ts);

            // Also keep track of what Statements they are executing so that we
            // know
            // we need to send over the wire to them.
            requestBuilder.addParamIndexes(ftask.getParamIndexList());

            // Input Dependencies
            if (ftask.getNeedsInput()) {
                if (d)
                    LOG.debug("Retrieving input dependencies for " + ts);

                tmp_removeDependenciesMap.clear();
                this.getFragmentInputs(ts, ftask, tmp_removeDependenciesMap);

                for (Entry<Integer, List<VoltTable>> e : tmp_removeDependenciesMap.entrySet()) {
                    if (requestBuilder.hasInputDependencyId(e.getKey()))
                        continue;

                    if (d)
                        LOG.debug(String.format("%s - Attaching %d input dependencies to be sent to %s", ts,
                                e.getValue().size(), HStoreThreadManager.formatSiteName(target_site)));
                    for (VoltTable vt : e.getValue()) {
                        this.fs.clear();
                        try {
                            this.fs.writeObject(vt);
                            builder.addAttachedDepId(e.getKey().intValue());
                            builder.addAttachedData(ByteString.copyFrom(this.fs.getBBContainer().b));
                        } catch (Exception ex) {
                            String msg = String.format("Failed to serialize input dependency %d for %s", e.getKey(),
                                    ts);
                            throw new ServerFaultException(msg, ts.getTransactionId());
                        }
                        if (d)
                            LOG.debug(String.format(
                                    "%s - Storing %d rows for InputDependency %d to send to partition %d [bytes=%d]",
                                    ts, vt.getRowCount(), e.getKey(), ftask.getPartitionId(),
                                    CollectionUtil.last(builder.getAttachedDataList()).size()));
                    } // FOR
                    requestBuilder.addInputDependencyId(e.getKey());
                } // FOR
                this.fs.getBBContainer().discard();
            }
            builder.addFragments(ftask);
        } // FOR (tasks)

        // Bad mojo! We need to throw a MispredictionException so that the
        // VoltProcedure
        // will catch it and we can propagate the error message all the way back
        // to the HStoreSite
        if (need_restart) {
            if (t)
                LOG.trace(String.format("Aborting %s because it was mispredicted", ts));
            // This is kind of screwy because we don't actually want to send the
            // touched partitions
            // histogram because VoltProcedure will just do it for us...
            throw new MispredictionException(txn_id, null);
        }

        // Stick on the ParameterSets that each site needs into the
        // TransactionWorkRequest
        for (int target_site = 0; target_site < tmp_transactionRequestBuilders.length; target_site++) {
            TransactionWorkRequestBuilder builder = tmp_transactionRequestBuilders[target_site];
            if (builder == null || builder.isDirty() == false) {
                continue;
            }
            assert (builder != null);
            builder.addParameterSets(parameterSets);

            // Bombs away!
            this.hstore_coordinator.transactionWork(ts, target_site, builder.build(), this.request_work_callback);
            if (d)
                LOG.debug(String.format("%s - Sent Work request to remote HStoreSites for %s", ts, target_site));

        } // FOR

        // TODO: We need to check whether we need to notify other HStoreSites
        // that we didn't send
        // a new FragmentTaskMessage to that we are done with their partitions
        if (new_done) {

        }
    }

    /**
     * Execute the given tasks and then block the current thread waiting for the
     * list of dependency_ids to come back from whatever it was we were suppose
     * to do... This is the slowest way to execute a bunch of WorkFragments and
     * therefore should only be invoked for batches that need to access
     * non-local Partitions
     * 
     * @param ts
     * @param fragments
     * @param parameters
     * @return
     */
    public VoltTable[] dispatchWorkFragments(final LocalTransaction ts, final int batchSize,
            Collection<WorkFragment> fragments, final ParameterSet parameters[]) {
        assert (fragments.isEmpty() == false) : "Unexpected empty WorkFragment list for " + ts;

        // *********************************** DEBUG
        // ***********************************
        if (d) {
            LOG.debug(String.format("%s - Preparing to dispatch %d messages and wait for the results", ts,
                    fragments.size()));
            if (t) {
                StringBuilder sb = new StringBuilder();
                sb.append(ts + " - WorkFragments:\n");
                for (WorkFragment fragment : fragments) {
                    sb.append(StringUtil.box(fragment.toString()) + "\n");
                } // FOR
                sb.append(ts + " - ParameterSets:\n");
                for (ParameterSet ps : parameters) {
                    sb.append(ps + "\n");
                } // FOR
                LOG.trace(sb);
            }
        }
        // *********************************** DEBUG
        // ***********************************

        // OPTIONAL: Check to make sure that this request is valid
        // (1) At least one of the WorkFragments needs to be executed on a
        // remote partition
        // (2) All of the PlanFragments ids in the WorkFragments match this
        // txn's Procedure
        if (hstore_conf.site.exec_validate_work && ts.isSysProc() == false) {
            LOG.warn(String.format("%s - Checking whether all of the WorkFragments are valid", ts));
            boolean has_remote = false;
            for (WorkFragment frag : fragments) {
                if (frag.getPartitionId() != this.partitionId) {
                    has_remote = true;
                }
                for (int frag_id : frag.getFragmentIdList()) {
                    PlanFragment catalog_frag = CatalogUtil.getPlanFragment(database, frag_id);
                    Statement catalog_stmt = catalog_frag.getParent();
                    assert (catalog_stmt != null);
                    Procedure catalog_proc = catalog_stmt.getParent();
                    if (catalog_proc.equals(ts.getProcedure()) == false) {
                        LOG.warn(ts.debug() + "\n" + fragments + "\n---- INVALID ----\n" + frag);
                        String msg = String.format("%s - Unexpected %s", ts, catalog_frag.fullName());
                        throw new ServerFaultException(msg, ts.getTransactionId());
                    }
                }
            } // FOR
            if (has_remote == false) {
                LOG.warn(ts.debug() + "\n" + fragments);
                String msg = String.format(
                        "%s - Trying to execute all local single-partition queries using the slow-path!", ts);
                throw new ServerFaultException(msg, ts.getTransactionId());
            }
        }

        // We have to store all of the tasks in the TransactionState before we
        // start executing, otherwise
        // there is a race condition that a task with input dependencies will
        // start running as soon as we
        // get one response back from another executor
        ts.initRound(this.partitionId, this.getNextUndoToken());
        ts.setBatchSize(batchSize);

        final boolean prefetch = ts.hasPrefetchQueries();
        final boolean predict_singlePartition = ts.isPredictSinglePartition();

        // Attach the ParameterSets to our transaction handle so that anybody on
        // this HStoreSite
        // can access them directly without needing to deserialize them from the
        // WorkFragments
        ts.attachParameterSets(parameters);

        // Now if we have some work sent out to other partitions, we need to
        // wait until they come back
        // In the first part, we wait until all of our blocked
        // FragmentTaskMessages become unblocked
        LinkedBlockingDeque<Collection<WorkFragment>> queue = ts.getUnblockedWorkFragmentsQueue();

        boolean first = true;
        boolean serializedParams = false;
        CountDownLatch latch = null;
        boolean all_local = true;
        boolean is_localSite;
        boolean is_localPartition;
        int num_localPartition = 0;
        int num_localSite = 0;
        int num_remote = 0;
        int num_skipped = 0;
        int total = 0;

        // Run through this loop if:
        // (1) We have no pending errors
        // (2) This is our first time in the loop (first == true)
        // (3) If we know that there are still messages being blocked
        // (4) If we know that there are still unblocked messages that we need
        // to process
        // (5) The latch for this round is still greater than zero
        while (ts.hasPendingError() == false
                && (first == true || ts.stillHasWorkFragments() || (latch != null && latch.getCount() > 0))) {
            if (t)
                LOG.trace(String.format("%s - [first=%s, stillHasWorkFragments=%s, latch=%s]", ts, first,
                        ts.stillHasWorkFragments(), queue.size(), latch));

            // If this is the not first time through the loop, then poll the
            // queue to get our list of fragments
            if (first == false) {
                all_local = true;
                is_localSite = false;
                is_localPartition = false;
                num_localPartition = 0;
                num_localSite = 0;
                num_remote = 0;
                num_skipped = 0;
                total = 0;

                if (t)
                    LOG.trace(String.format("%s - Waiting for unblocked tasks on partition %d", ts,
                            this.partitionId));
                if (hstore_conf.site.txn_profiling)
                    ts.profiler.startExecDtxnWork();
                try {
                    fragments = queue.takeFirst(); // BLOCKING
                } catch (InterruptedException ex) {
                    if (this.hstore_site.isShuttingDown() == false) {
                        LOG.error(String.format("%s - We were interrupted while waiting for blocked tasks", ts),
                                ex);
                    }
                    return (null);
                } finally {
                    if (hstore_conf.site.txn_profiling)
                        ts.profiler.stopExecDtxnWork();
                }
            }
            assert (fragments != null);

            // If the list to fragments unblock is empty, then we
            // know that we have dispatched all of the WorkFragments for the
            // transaction's current SQLStmt batch. That means we can just wait
            // until all the results return to us.
            if (fragments.isEmpty()) {
                if (t)
                    LOG.trace(ts + " - Got an empty list of WorkFragments. Blocking until dependencies arrive");
                break;
            }

            this.tmp_localWorkFragmentList.clear();
            if (predict_singlePartition == false) {
                this.tmp_remoteFragmentList.clear();
                this.tmp_localSiteFragmentList.clear();
            }

            // -------------------------------
            // FAST PATH: Assume everything is local
            // -------------------------------
            if (predict_singlePartition) {
                for (WorkFragment ftask : fragments) {
                    if (first == false || ts.addWorkFragment(ftask) == false) {
                        this.tmp_localWorkFragmentList.add(ftask);
                        total++;
                        num_localPartition++;
                    }
                } // FOR

                // We have to tell the TransactinState to start the round before
                // we send off the
                // FragmentTasks for execution, since they might start executing
                // locally!
                if (first) {
                    ts.startRound(this.partitionId);
                    latch = ts.getDependencyLatch();
                }

                // Execute all of our WorkFragments quickly at our local
                // ExecutionEngine
                for (WorkFragment fragment : this.tmp_localWorkFragmentList) {
                    if (d)
                        LOG.debug(String.format("Got unblocked FragmentTaskMessage for %s. Executing locally...",
                                ts));
                    assert (fragment.getPartitionId() == this.partitionId) : String.format(
                            "Trying to process FragmentTaskMessage for %s on partition %d but it should have been sent to partition %d [singlePartition=%s]\n%s",
                            ts, this.partitionId, fragment.getPartitionId(), predict_singlePartition, fragment);
                    ParameterSet fragmentParams[] = this.getFragmentParameters(ts, fragment, parameters);
                    this.processWorkFragment(ts, fragment, fragmentParams);
                } // FOR
            }
            // -------------------------------
            // SLOW PATH: Mixed local and remote messages
            // -------------------------------
            else {
                // Look at each task and figure out whether it needs to be
                // executed at a remote
                // HStoreSite or whether we can execute it at one of our local
                // PartitionExecutors.
                for (WorkFragment fragment : fragments) {
                    int partition = fragment.getPartitionId();
                    is_localSite = hstore_site.isLocalPartition(partition);
                    is_localPartition = (partition == this.partitionId);
                    all_local = all_local && is_localPartition;
                    if (first == false || ts.addWorkFragment(fragment) == false) {
                        total++;

                        // At this point we know that all the WorkFragment has
                        // been registered
                        // in the LocalTransaction, so then it's safe for us to
                        // look to see
                        // whether we already have a prefetched result that we
                        // need
                        if (prefetch && is_localPartition == false) {
                            boolean skip_queue = true;
                            for (int i = 0, cnt = fragment.getFragmentIdCount(); i < cnt; i++) {
                                int fragId = fragment.getFragmentId(i);
                                int paramIdx = fragment.getParamIndex(i);

                                VoltTable vt = this.queryCache.getTransactionCachedResult(ts.getTransactionId(),
                                        fragId, partition, parameters[paramIdx]);
                                if (vt != null) {
                                    ts.addResult(partition, fragment.getOutputDepId(i), vt);
                                } else {
                                    skip_queue = false;
                                }
                            } // FOR
                              // If we were able to get cached results for all
                              // of the fragmentIds in
                              // this WorkFragment, then there is no need for
                              // us
                              // to send the message
                              // So we'll just skip queuing it up! How nice!
                            if (skip_queue) {
                                if (d)
                                    LOG.debug(String.format(
                                            "%s - Using prefetch result for all fragments from partition %d", ts,
                                            partition));
                                num_skipped++;
                                continue;
                            }
                        }

                        // Otherwise add it to our list of WorkFragments that we
                        // want
                        // queue up right now
                        if (is_localPartition) {
                            this.tmp_localWorkFragmentList.add(fragment);
                            num_localPartition++;
                        } else if (is_localSite) {
                            this.tmp_localSiteFragmentList.add(fragment);
                            num_localSite++;
                        } else {
                            this.tmp_remoteFragmentList.add(fragment);
                            num_remote++;
                        }
                    }
                } // FOR
                assert (total == (num_remote + num_localSite + num_localPartition + num_skipped)) : String.format(
                        "Total:%d / Remote:%d / LocalSite:%d / LocalPartition:%d / Skipped:%d", total, num_remote,
                        num_localSite, num_localPartition, num_skipped);
                if (num_localPartition == 0 && num_localSite == 0 && num_remote == 0 && num_skipped == 0) {
                    String msg = String.format("Deadlock! All tasks for %s are blocked waiting on input!", ts);
                    throw new ServerFaultException(msg, ts.getTransactionId());
                }

                // We have to tell the TransactinState to start the round before
                // we send off the
                // FragmentTasks for execution, since they might start executing
                // locally!
                if (first) {
                    ts.startRound(this.partitionId);
                    latch = ts.getDependencyLatch();
                }

                // Now request the fragments that aren't local
                // We want to push these out as soon as possible
                if (num_remote > 0) {
                    // We only need to serialize the ParameterSets once
                    if (serializedParams == false) {
                        if (hstore_conf.site.txn_profiling)
                            ts.profiler.startSerialization();
                        tmp_serializedParams.clear();
                        for (int i = 0; i < parameters.length; i++) {
                            if (parameters[i] == null) {
                                tmp_serializedParams.add(ByteString.EMPTY);
                            } else {
                                this.fs.clear();
                                try {
                                    parameters[i].writeExternal(this.fs);
                                    ByteString bs = ByteString.copyFrom(this.fs.getBBContainer().b);
                                    tmp_serializedParams.add(bs);
                                } catch (Exception ex) {
                                    throw new ServerFaultException(
                                            "Failed to serialize ParameterSet " + i + " for " + ts, ex);
                                }
                            }
                        } // FOR
                        if (hstore_conf.site.txn_profiling)
                            ts.profiler.stopSerialization();
                    }
                    if (d)
                        LOG.debug(String.format(
                                "%s - Requesting %d FragmentTaskMessages to be executed on remote partitions", ts,
                                num_remote));
                    this.requestWork(ts, tmp_remoteFragmentList, tmp_serializedParams);
                }

                // Then dispatch the task that are needed at the same HStoreSite
                // but
                // at a different partition than this one
                if (num_localSite > 0) {
                    if (d)
                        LOG.debug(String.format("%s - Executing %d FragmentTaskMessages on local site's partitions",
                                ts, num_localSite));
                    for (WorkFragment fragment : this.tmp_localSiteFragmentList) {
                        FragmentTaskMessage ftask = ts.getFragmentTaskMessage(fragment);
                        hstore_site.getPartitionExecutor(fragment.getPartitionId()).queueWork(ts, ftask);
                    } // FOR
                }

                // Then execute all of the tasks need to access the partitions
                // at this HStoreSite
                // We'll dispatch the remote-partition-local-site fragments
                // first because they're going
                // to need to get queued up by at the other PartitionExecutors
                if (num_localPartition > 0) {
                    if (d)
                        LOG.debug(String.format("%s - Executing %d FragmentTaskMessages on local partition", ts,
                                num_localPartition));
                    for (WorkFragment fragment : this.tmp_localWorkFragmentList) {
                        ParameterSet fragmentParams[] = this.getFragmentParameters(ts, fragment, parameters);
                        this.processWorkFragment(ts, fragment, fragmentParams);
                    } // FOR
                }
            }
            if (t)
                LOG.trace(String.format(
                        "%s - Dispatched %d WorkFragments [remoteSite=%d, localSite=%d, localPartition=%d]", ts,
                        total, num_remote, num_localSite, num_localPartition));
            first = false;
        } // WHILE
        this.fs.getBBContainer().discard();

        if (t)
            LOG.trace(String.format("%s - BREAK OUT [first=%s, stillHasWorkFragments=%s, latch=%s]", ts, first,
                    ts.stillHasWorkFragments(), latch));
        // assert(ts.stillHasWorkFragments() == false) :
        // String.format("Trying to block %s before all of its WorkFragments have been dispatched!\n%s\n%s",
        // ts,
        // StringUtil.join("** ", "\n", tempDebug),
        // this.getVoltProcedure(ts.getProcedureName()).getLastBatchPlan());

        // Now that we know all of our FragmentTaskMessages have been
        // dispatched, we can then
        // wait for all of the results to come back in.
        if (latch == null)
            latch = ts.getDependencyLatch();
        if (latch.getCount() > 0) {
            if (d) {
                LOG.debug(String.format("%s - All blocked messages dispatched. Waiting for %d dependencies", ts,
                        latch.getCount()));
                if (t)
                    LOG.trace(ts.toString());
            }
            if (hstore_conf.site.txn_profiling)
                ts.profiler.startExecDtxnWork();
            boolean done = false;
            // XXX this.utilityWork(latch);
            try {
                done = latch.await(hstore_conf.site.exec_response_timeout, TimeUnit.MILLISECONDS);
            } catch (InterruptedException ex) {
                if (this.hstore_site.isShuttingDown() == false) {
                    LOG.error(String.format("%s - We were interrupted while waiting for results", ts), ex);
                }
                done = true;
            } catch (Throwable ex) {
                new ServerFaultException(String.format("Fatal error for %s while waiting for results", ts), ex);
            } finally {
                if (hstore_conf.site.txn_profiling)
                    ts.profiler.stopExecDtxnWork();
            }
            if (done == false && this.isShuttingDown() == false) {
                LOG.warn(String.format("Still waiting for responses for %s after %d ms [latch=%d]\n%s", ts,
                        hstore_conf.site.exec_response_timeout, latch.getCount(), ts.debug()));
                LOG.warn("Procedure Parameters:\n" + ts.getInvocation().getParams());
                hstore_conf.site.exec_profiling = true;
                LOG.warn(hstore_site.statusSnapshot());

                String msg = "PartitionResponses for " + ts + " never arrived!";
                throw new ServerFaultException(msg, ts.getTransactionId());
            }
        }

        // IMPORTANT: Check whether the fragments failed somewhere and we got a
        // response with an error
        // We will rethrow this so that it pops the stack all the way back to
        // VoltProcedure.call()
        // where we can generate a message to the client
        if (ts.hasPendingError()) {
            if (d)
                LOG.warn(
                        String.format("%s was hit with a %s", ts, ts.getPendingError().getClass().getSimpleName()));
            throw ts.getPendingError();
        }

        // IMPORTANT: Don't try to check whether we got back the right number of
        // tables because the batch
        // may have hit an error and we didn't execute all of them.
        VoltTable results[] = ts.getResults();
        ts.finishRound(this.partitionId);
        if (d) {
            if (t)
                LOG.trace(ts + " is now running and looking for love in all the wrong places...");
            LOG.debug(ts + " is returning back " + results.length + " tables to VoltProcedure");
        }
        return (results);
    }

    // ---------------------------------------------------------------
    // COMMIT + ABORT METHODS
    // ---------------------------------------------------------------

    /**
     * Queue a speculatively executed transaction to send its ClientResponseImpl
     * message
     */
    private void queueClientResponse(LocalTransaction ts, ClientResponseImpl cresponse) {
        if (d)
            LOG.debug(String.format("Queuing ClientResponse for %s [handle=%s, status=%s]", ts,
                    ts.getClientHandle(), cresponse.getStatus()));
        assert (ts.isPredictSinglePartition() == true) : String.format(
                "Specutatively executed multi-partition %s [mode=%s, status=%s]", ts, this.currentExecMode,
                cresponse.getStatus());
        assert (ts.isSpeculative() == true) : String.format(
                "Queuing ClientResponse for non-specutative %s [mode=%s, status=%s]", ts, this.currentExecMode,
                cresponse.getStatus());
        assert (cresponse.getStatus() != Status.ABORT_MISPREDICT) : String.format(
                "Trying to queue ClientResponse for mispredicted %s [mode=%s, status=%s]", ts, this.currentExecMode,
                cresponse.getStatus());
        assert (this.currentExecMode != ExecutionMode.COMMIT_ALL) : String.format(
                "Queuing ClientResponse for %s when in non-specutative mode [mode=%s, status=%s]", ts,
                this.currentExecMode, cresponse.getStatus());

        // The ClientResponse is already going to be in the LocalTransaction
        // handle
        // ts.setClientResponse(cresponse);
        this.queued_responses.add(Pair.of(ts, cresponse));

        if (d)
            LOG.debug("Total # of Queued Responses: " + this.queued_responses.size());
    }

    /**
     * For the given transaction's ClientResponse, figure out whether we can
     * send it back to the client right now or whether we need to initiate
     * two-phase commit.
     * 
     * @param ts
     * @param cresponse
     */
    public void processClientResponse(LocalTransaction ts, ClientResponseImpl cresponse) {
        // IMPORTANT: If we executed this locally and only touched our
        // partition, then we need to commit/abort right here
        // 2010-11-14: The reason why we can do this is because we will just
        // ignore the commit
        // message when it shows from the Dtxn.Coordinator. We should probably
        // double check with Evan on this...
        boolean is_singlepartitioned = ts.isPredictSinglePartition();
        Status status = cresponse.getStatus();

        if (d) {
            LOG.debug(String.format(
                    "%s - Processing ClientResponse at partition %d [handle=%d, status=%s, singlePartition=%s, local=%s]",
                    ts, this.partitionId, cresponse.getClientHandle(), status, ts.isPredictSinglePartition(),
                    ts.isExecLocal(this.partitionId)));
            if (t) {
                LOG.trace(ts + " Touched Partitions: " + ts.getTouchedPartitions().values());
                LOG.trace(ts + " Done Partitions: " + ts.getDonePartitions());
            }
        }

        // -------------------------------
        // ALL: Single-Partition Transactions
        // -------------------------------
        if (is_singlepartitioned) {
            // Commit or abort the transaction
            this.finishWork(ts, (status == Status.OK));

            // If the txn was mispredicted, then we will pass the information
            // over to the HStoreSite
            // so that it can re-execute the transaction. We want to do this
            // first so that the txn gets re-executed
            // as soon as possible...
            if (status == Status.ABORT_MISPREDICT) {
                if (d)
                    LOG.debug(String.format("%s - Restarting because transaction is mispredicted", ts));
                // We don't want to delete the transaction here because whoever
                // is going to requeue it for
                // us will need to know what partitions that the transaction
                // touched when it executed before
                this.hstore_site.transactionRequeue(ts, status);
            }
            // Use the separate post-processor thread to send back the result
            else if (hstore_conf.site.exec_postprocessing_thread) {
                if (t)
                    LOG.trace(String.format("%s - Sending ClientResponse to post-processing thread [status=%s]", ts,
                            cresponse.getStatus()));
                this.hstore_site.queueClientResponse(ts, cresponse);
            }
            // Send back the result right now!
            else {
                if (hstore_conf.site.exec_command_logging)
                    ts.markLogEnabled();
                this.hstore_site.sendClientResponse(ts, cresponse);
                ts.markAsDeletable();
                this.hstore_site.deleteTransaction(ts.getTransactionId(), status);
            }
        }
        // -------------------------------
        // COMMIT: Distributed Transaction
        // -------------------------------
        else if (status == Status.OK) {
            // We have to send a prepare message to all of our remote
            // HStoreSites
            // We want to make sure that we don't go back to ones that we've
            // already told
            BitSet donePartitions = ts.getDonePartitions();
            tmp_preparePartitions.clear();
            for (Integer p : ts.getPredictTouchedPartitions()) {
                if (donePartitions.get(p.intValue()) == false) {
                    tmp_preparePartitions.add(p);
                }
            } // FOR

            // We need to set the new ExecutionMode before we invoke
            // transactionPrepare
            // because the LocalTransaction handle might get cleaned up
            // immediately
            ExecutionMode newMode = null;
            if (hstore_conf.site.exec_speculative_execution) {
                newMode = (ts.isExecReadOnly(this.partitionId) ? ExecutionMode.COMMIT_READONLY
                        : ExecutionMode.COMMIT_NONE);
            } else {
                newMode = ExecutionMode.DISABLED;
            }
            this.setExecutionMode(ts, newMode);

            if (hstore_conf.site.txn_profiling)
                ts.profiler.startPostPrepare();
            TransactionPrepareCallback callback = ts.initTransactionPrepareCallback(cresponse);
            assert (callback != null) : "Missing TransactionPrepareCallback for " + ts + " [initialized="
                    + ts.isInitialized() + "]";
            this.hstore_coordinator.transactionPrepare(ts, callback, tmp_preparePartitions);
        }
        // -------------------------------
        // ABORT: Distributed Transaction
        // -------------------------------
        else {
            // Send back the result to the client right now, since there's no
            // way
            // that we're magically going to be able to recover this and get
            // them a result
            // This has to come before the network messages above because this
            // will clean-up the
            // LocalTransaction state information
            this.hstore_site.sendClientResponse(ts, cresponse);

            // Then send a message all the partitions involved that the party is
            // over
            // and that they need to abort the transaction. We don't actually
            // care when we get the
            // results back because we'll start working on new txns right away.
            if (hstore_conf.site.txn_profiling)
                ts.profiler.startPostFinish();
            TransactionFinishCallback finish_callback = ts.initTransactionFinishCallback(status);
            this.hstore_coordinator.transactionFinish(ts, status, finish_callback);
        }
    }

    /**
     * Internal call to abort/commit the transaction
     * 
     * @param ts
     * @param commit
     */
    private void finishWork(AbstractTransaction ts, boolean commit) {
        assert (ts.isFinishedEE(this.partitionId) == false) : String
                .format("Trying to commit %s twice at partition %d", ts, this.partitionId);

        // This can be null if they haven't submitted anything
        long undoToken = ts.getLastUndoToken(this.partitionId);

        // Only commit/abort this transaction if:
        // (1) We have an ExecutionEngine handle
        // (2) We have the last undo token used by this transaction
        // (3) The transaction was executed with undo buffers
        // (4) The transaction actually submitted work to the EE
        // (5) The transaction modified data at this partition
        if (this.ee != null && ts.hasSubmittedEE(this.partitionId)
                && undoToken != HStoreConstants.NULL_UNDO_LOGGING_TOKEN) {
            if (ts.isExecReadOnly(this.partitionId) == false
                    && undoToken == HStoreConstants.DISABLE_UNDO_LOGGING_TOKEN) {
                if (commit == false) {
                    LOG.fatal(ts.debug());
                    String msg = "TRYING TO ABORT TRANSACTION WITHOUT UNDO LOGGING";
                    this.crash(new ServerFaultException(msg, ts.getTransactionId()));
                }
                if (d)
                    LOG.debug(String.format("%s - undoToken == DISABLE_UNDO_LOGGING_TOKEN", ts));
            } else {
                boolean needs_profiling = (hstore_conf.site.txn_profiling && ts.isExecLocal(this.partitionId)
                        && ts.isPredictSinglePartition());
                if (needs_profiling)
                    ((LocalTransaction) ts).profiler.startPostEE();
                if (commit) {
                    if (d)
                        LOG.debug(String.format(
                                "%s - Committing on partition=%d [lastTxnId=%d, undoToken=%d, submittedEE=%s]", ts,
                                this.partitionId, this.lastCommittedTxnId, undoToken,
                                ts.hasSubmittedEE(this.partitionId)));
                    this.ee.releaseUndoToken(undoToken);

                    // Evan says that txns will be aborted LIFO. This means the
                    // first txn that
                    // we get in abortWork() will have a the greatest undoToken,
                    // which means that
                    // it will automagically rollback all other outstanding
                    // txns.
                    // I'm lazy/tired, so for now I'll just rollback everything
                    // I get, but in theory
                    // we should be able to check whether our undoToken has
                    // already been rolled back
                } else {
                    if (d)
                        LOG.debug(String.format(
                                "%s - Aborting on partition=%d [lastTxnId=%d, undoToken=%d, submittedEE=%s]", ts,
                                this.partitionId, this.lastCommittedTxnId, undoToken,
                                ts.hasSubmittedEE(this.partitionId)));
                    this.ee.undoUndoToken(undoToken);
                }
                if (needs_profiling)
                    ((LocalTransaction) ts).profiler.stopPostEE();
            }
        }

        // We always need to do the following things regardless if we hit up the
        // EE or not
        if (commit)
            this.lastCommittedTxnId = ts.getTransactionId();
        ts.setFinishedEE(this.partitionId);
    }

    /**
     * Somebody told us that our partition needs to abort/commit the given
     * transaction id. This method should only be used for distributed
     * transactions, because it will do some extra work for speculative
     * execution
     * 
     * @param txn_id
     * @param commit
     *            If true, the work performed by this txn will be commited.
     *            Otherwise it will be aborted
     */
    private void finishTransaction(AbstractTransaction ts, boolean commit) {
        if (this.currentDtxn != ts) {
            if (d)
                LOG.debug(String.format(
                        "%s - Skipping finishWork request at partition %d because it is not the current Dtxn [%s/undoToken=%d]",
                        ts, this.partitionId, this.currentDtxn, ts.getLastUndoToken(partitionId)));
            return;
        }
        if (d)
            LOG.debug(String.format("%s - Processing finishWork request at partition %d", ts, this.partitionId));

        assert (this.currentDtxn == ts) : "Expected current DTXN to be " + ts + " but it was " + this.currentDtxn;

        this.finishWork(ts, commit);

        // Clear our cached query results that are specific for this transaction
        this.queryCache.purgeTransaction(ts.getTransactionId());

        // Check whether this is the response that the speculatively executed
        // txns have been waiting for
        // We could have turned off speculative execution mode beforehand
        if (d)
            LOG.debug(String.format(
                    "Attempting to unmark %s as the current DTXN at partition %d and setting execution mode to %s",
                    this.currentDtxn, this.partitionId, ExecutionMode.COMMIT_ALL));
        exec_lock.lock();
        try {
            // Resetting the current_dtxn variable has to come *before* we
            // change the execution mode
            this.resetCurrentDtxn();
            this.setExecutionMode(ts, ExecutionMode.COMMIT_ALL);

            // We can always commit our boys no matter what if we know that this
            // multi-partition txn
            // was read-only at the given partition
            if (hstore_conf.site.exec_speculative_execution) {
                if (d)
                    LOG.debug(String.format(
                            "Turning off speculative execution mode at partition %d because %s is finished",
                            this.partitionId, ts));
                this.releaseQueuedResponses(ts.isExecReadOnly(this.partitionId) ? true : commit);
            }
            // Release blocked transactions
            this.releaseBlockedTransactions(ts, false);
        } catch (Throwable ex) {
            throw new ServerFaultException(
                    String.format("Failed to finish %s at partition %d", ts, this.partitionId), ex);
        } finally {
            exec_lock.unlock();
        } // SYNCH

        // If we have a cleanup callback, then invoke that
        if (ts.getCleanupCallback() != null) {
            if (t)
                LOG.trace(String.format("%s - Notifying %s that the txn is finished at partition %d", ts,
                        ts.getCleanupCallback().getClass().getSimpleName(), this.partitionId));
            ts.getCleanupCallback().run(this.partitionId);
        }
        // If it's a LocalTransaction, then we'll want to invoke their
        // TransactionFinishCallback
        else if (ts instanceof LocalTransaction) {
            TransactionFinishCallback callback = ((LocalTransaction) ts).getTransactionFinishCallback();
            if (t)
                LOG.trace(String.format("%s - Notifying %s that the txn is finished at partition %d", ts,
                        callback.getClass().getSimpleName(), this.partitionId));
            callback.decrementCounter(1);
        }

    }

    /**
     * @param txn_id
     * @param p
     */
    private void releaseBlockedTransactions(AbstractTransaction ts, boolean speculative) {
        if (this.currentBlockedTxns.isEmpty() == false) {
            if (d)
                LOG.debug(
                        String.format("Attempting to release %d blocked transactions at partition %d because of %s",
                                this.currentBlockedTxns.size(), this.partitionId, ts));
            int released = 0;
            for (VoltMessage msg : this.currentBlockedTxns) {
                this.work_queue.add(msg);
                released++;
            } // FOR
            this.currentBlockedTxns.clear();
            if (d)
                LOG.debug(String.format("Released %d blocked transactions at partition %d because of %s", released,
                        this.partitionId, ts));
        }
        assert (this.currentBlockedTxns.isEmpty());
    }

    /**
     * Commit/abort all of the queue transactions that were specutatively
     * executed and waiting for their responses to be sent back to the client
     * 
     * @param commit
     */
    private void releaseQueuedResponses(boolean commit) {
        // First thing we need to do is get the latch that will be set by any
        // transaction
        // that was in the middle of being executed when we were called
        if (d)
            LOG.debug(String.format("Checking waiting/blocked transactions at partition %d [currentMode=%s]",
                    this.partitionId, this.currentExecMode));

        if (this.queued_responses.isEmpty()) {
            if (d)
                LOG.debug(String.format("No speculative transactions to commit at partition %d. Ignoring...",
                        this.partitionId));
            return;
        }

        // Ok now at this point we can access our queue send back all of our
        // responses
        if (d)
            LOG.debug(String.format("%s %d speculatively executed transactions on partition %d",
                    (commit ? "Commiting" : "Aborting"), this.queued_responses.size(), this.partitionId));

        // Loop backwards through our queued responses and find the latest txn
        // that
        // we need to tell the EE to commit. All ones that completed before that
        // won't
        // have to hit up the EE.
        Pair<LocalTransaction, ClientResponseImpl> pair = null;
        LocalTransaction ts = null;
        ClientResponseImpl cr = null;
        boolean ee_commit = true;
        int skip_commit = 0;
        int aborted = 0;
        while ((pair = (hstore_conf.site.exec_queued_response_ee_bypass ? this.queued_responses.pollLast()
                : this.queued_responses.pollFirst())) != null) {
            ts = pair.getFirst();
            cr = pair.getSecond();

            // 2011-07-02: I have no idea how this could not be stopped here,
            // but for some reason
            // I am getting a random error.
            // FIXME if (hstore_conf.site.txn_profiling &&
            // ts.profiler.finish_time.isStopped())
            // ts.profiler.finish_time.start();

            // If the multi-p txn aborted, then we need to abort everything in
            // our queue
            // Change the status to be a MISPREDICT so that they get executed
            // again
            if (commit == false) {
                // We're going to assume that any transaction that didn't
                // mispredict
                // was single-partitioned. We'll use their TouchedPartitions
                // histogram
                if (cr.getStatus() != Status.ABORT_MISPREDICT) {
                    ts.setPendingError(new MispredictionException(ts.getTransactionId(), ts.getTouchedPartitions()),
                            false);
                    cr.setStatus(Status.ABORT_MISPREDICT);
                }
                aborted++;

                // Optimization: Check whether the last element in the list is a
                // commit
                // If it is, then we know that we don't need to tell the EE
                // about all the ones that executed before it
            } else if (hstore_conf.site.exec_queued_response_ee_bypass) {
                // Don't tell the EE that we committed
                if (ee_commit == false) {
                    if (t)
                        LOG.trace(String.format("Bypassing EE commit for %s [undoToken=%d]", ts,
                                ts.getLastUndoToken(this.partitionId)));
                    ts.unsetSubmittedEE(this.partitionId);
                    skip_commit++;

                } else if (ee_commit && cr.getStatus() == Status.OK) {
                    if (t)
                        LOG.trace(String.format(
                                "Committing %s but will bypass all other successful transactions [undoToken=%d]",
                                ts, ts.getLastUndoToken(this.partitionId)));
                    ee_commit = false;
                }
            }

            try {
                if (hstore_conf.site.exec_postprocessing_thread) {
                    if (t)
                        LOG.trace(String.format(
                                "Passing queued ClientResponse for %s to post-processing thread [status=%s]", ts,
                                cr.getStatus()));
                    hstore_site.queueClientResponse(ts, cr);
                } else {
                    if (t)
                        LOG.trace(String.format("Sending queued ClientResponse for %s back directly [status=%s]",
                                ts, cr.getStatus()));
                    this.processClientResponse(ts, cr);
                }
            } catch (Throwable ex) {
                throw new ServerFaultException("Failed to complete queued " + ts, ex);
            }
        } // WHILE
        if (d && skip_commit > 0 && hstore_conf.site.exec_queued_response_ee_bypass) {
            LOG.debug(String.format("Fast Commit EE Bypass Optimization [skipped=%d, aborted=%d]", skip_commit,
                    aborted));
        }
        return;
    }

    // ---------------------------------------------------------------
    // SNAPSHOT METHODS
    // ---------------------------------------------------------------

    /**
     * Do snapshot work exclusively until there is no more. Also blocks until
     * the syncing and closing of snapshot data targets has completed.
     */
    public void initiateSnapshots(Deque<SnapshotTableTask> tasks) {
        m_snapshotter.initiateSnapshots(ee, tasks);
    }

    public Collection<Exception> completeSnapshotWork() throws InterruptedException {
        return m_snapshotter.completeSnapshotWork(ee);
    }

    // ---------------------------------------------------------------
    // SHUTDOWN METHODS
    // ---------------------------------------------------------------

    /**
     * Cause this PartitionExecutor to make the entire HStore cluster shutdown
     * This won't return!
     */
    public synchronized void crash(Throwable ex) {
        LOG.warn(String.format("PartitionExecutor for Partition #%d is crashing", this.partitionId), ex);
        assert (this.hstore_coordinator != null);
        this.hstore_coordinator.shutdownClusterBlocking(ex);
    }

    @Override
    public boolean isShuttingDown() {
        return (this.hstore_site.isShuttingDown()); // shutdown_state ==
        // State.PREPARE_SHUTDOWN ||
        // this.shutdown_state ==
        // State.SHUTDOWN);
    }

    @Override
    public void prepareShutdown(boolean error) {
        this.shutdown_state = Shutdownable.ShutdownState.PREPARE_SHUTDOWN;
    }

    /**
     * Somebody from the outside wants us to shutdown
     */
    public synchronized void shutdown() {
        if (this.shutdown_state == ShutdownState.SHUTDOWN) {
            if (d)
                LOG.debug(String.format("Partition #%d told to shutdown again. Ignoring...", this.partitionId));
            return;
        }
        this.shutdown_state = ShutdownState.SHUTDOWN;

        if (d)
            LOG.debug(String.format("Shutting down PartitionExecutor for Partition #%d", this.partitionId));

        // Clear the queue
        this.work_queue.clear();

        // Knock out this ma
        if (this.m_snapshotter != null)
            this.m_snapshotter.shutdown();

        // Make sure we shutdown our threadpool
        // this.thread_pool.shutdownNow();
        if (this.self != null)
            this.self.interrupt();

        if (this.shutdown_latch != null) {
            try {
                this.shutdown_latch.acquire();
            } catch (InterruptedException ex) {
                // Ignore
            } catch (Exception ex) {
                LOG.fatal("Unexpected error while shutting down", ex);
            }
        }
    }
}