Source code

Java tutorial


Here is the source code for


package com.splout.db.qnode;

 * #%L
 * Splout SQL Server
 * %%
 * Copyright (C) 2012 Datasalt Systems S.L.
 * %%
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * GNU General Public License for more details.
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <>.
 * #L%

import com.hazelcast.core.ICountDownLatch;
import com.hazelcast.core.IMap;
import com.splout.db.common.JSONSerDe;
import com.splout.db.common.PartitionEntry;
import com.splout.db.common.ReplicationEntry;
import com.splout.db.common.Tablespace;
import com.splout.db.hazelcast.CoordinationStructures;
import com.splout.db.hazelcast.TablespaceVersion;
import com.splout.db.qnode.beans.*;
import com.splout.db.thrift.DNodeService;
import com.splout.db.thrift.DeployAction;
import com.splout.db.thrift.PartitionMetadata;
import org.apache.commons.collections.MapUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.thrift.transport.TTransportException;

import java.text.SimpleDateFormat;
import java.util.*;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;

 * The Deployer is a specialized module ({@link com.splout.db.qnode.QNodeHandlerModule}) of the
 * {@link com.splout.db.qnode.QNode} that performs the business logic associated with a distributed deployment. It is
 * used by the {@link com.splout.db.qnode.QNodeHandler}.
@SuppressWarnings({ "unchecked", "rawtypes" })
public class Deployer extends QNodeHandlerModule {

    private final static Log log = LogFactory.getLog(Deployer.class);
    private ListeningExecutorService deployExecutor;
    // Registry of deployments being running. Usefull for cancelling them.
    private ConcurrentHashMap<Long, Future<?>> runningDeployments = new ConcurrentHashMap<Long, Future<?>>();

    public static class UnexistingVersion extends Exception {

        public UnexistingVersion() {

        public UnexistingVersion(String message) {

     * Runnable that deals with the asynchronous part of the deployment. Particularly, it waits until DNodes finish their
     * work, and then performs the version switch.
    public class ManageDeploy implements Runnable {

        // Number of seconds to wait until another
        // check to see if timeout was reached or
        // if a DNode failed.
        private long secondsToCheckFailureOrTimeout = 15l;

        private long version;
        private List<String> dnodes;
        private long timeoutSeconds;
        private List<DeployRequest> deployRequests;
        private long dnodesSpreadMetadataTimeout;
        private boolean isReplicaBalancingEnabled;

        public ManageDeploy(List<String> dnodes, List<DeployRequest> deployRequests, long version,
                long timeoutSeconds, long secondsToCheckFailureOrTimeout, long dnodesSpreadMetadataTimeout,
                boolean isReplicaBalancingEnabled) {
            this.dnodes = dnodes;
            this.deployRequests = deployRequests;
            this.version = version;
            this.timeoutSeconds = timeoutSeconds;
            this.secondsToCheckFailureOrTimeout = secondsToCheckFailureOrTimeout;
            this.dnodesSpreadMetadataTimeout = Math.max(dnodesSpreadMetadataTimeout, 1);
            this.isReplicaBalancingEnabled = isReplicaBalancingEnabled;

        public void run() {
   + " Executing deploy for version ["
                    + version + "]");

            try {
                long waitSeconds = 0;
                ICountDownLatch countDownLatchForDeploy = context.getCoordinationStructures()
                boolean finished;
                do {
                    finished = countDownLatchForDeploy.await(secondsToCheckFailureOrTimeout, TimeUnit.SECONDS);
                    // We have to do this check as the await method seems to ignore the interrupt signal. Grrrrr!!
                    // We use interrupted as we want the interrupt flag do be cleared. Otherwise cancelling code
                    // could throw another InterruptedException further.
                    if (Thread.interrupted()) {
                        throw new InterruptedException("Deployment for version [" + version
                                + "] received an interrupt. Probably somebody is cancelling this deployment.");
                    waitSeconds += secondsToCheckFailureOrTimeout;
                    if (!finished) {
                        // If any of the DNodes failed, then we cancel the deployment.
                        if (checkForFailure()) {
                            abortDeploy(dnodes, "One or more DNodes failed", version);
                        // Let's see if we reached the timeout.
                        // Negative timeoutSeconds => waits forever
                        if (waitSeconds > timeoutSeconds && timeoutSeconds >= 0) {
                            log.warn("Deploy of version [" + version + "] timed out. Reached [" + waitSeconds
                                    + "] seconds.");
                            abortDeploy(dnodes, "Timeout reached", version);
                } while (!finished);

                // It's still possible that the deploy failed so let's check it again
                if (checkForFailure()) {
                    abortDeploy(dnodes, "One or more DNodes failed.", version);

                // Check after the wait than the complete tablespaces are available to that QNode. If that is the
                // case for this QNode it will be probably the case for the rest of QNodes.
                long millisToWait = 50;
                double acumulatedMillis = 0.;
                List<SwitchVersionRequest> versionsToCheck = switchActions();
                do {
                    acumulatedMillis += millisToWait;

                    // Let's see if we reached the timeout.
                    // Negative timeoutSeconds => waits forever
                    if ((acumulatedMillis / 1000) > dnodesSpreadMetadataTimeout) {
                        log.warn("Deploy of version [" + version
                                + "] timed out when waiting DNodes to spread the metadata. Reached ["
                                + (acumulatedMillis / 1000) + "] seconds.");
                        abortDeploy(dnodes, "Timeout reached", version);

                    Iterator<SwitchVersionRequest> it = versionsToCheck.iterator();
                    while (it.hasNext()) {
                        SwitchVersionRequest req =;
                        Tablespace t = context.getTablespaceVersionsMap()
                                .get(new TablespaceVersion(req.getTablespace(), req.getVersion()));
                        // Check that this TablespaceVersion has been reported by some node through Hazelcast
                        if (t != null && t.getReplicationMap() != null && t.getPartitionMap() != null
                                && t.getPartitionMap().getPartitionEntries() != null
                                && t.getReplicationMap().getReplicationEntries() != null
                                && t.getReplicationMap().getReplicationEntries().size() > 0) {
                            if (t.getPartitionMap().getPartitionEntries().size() == t.getReplicationMap()
                                    .getReplicationEntries().size()) {
                      "Ok, TablespaceVersion [" + req.getTablespace() + ", " + req.getVersion()
                                        + "] being handled by enough DNodes as reported by Hazelcast.");
                } while (versionsToCheck.size() > 0);

      "All DNodes performed the deploy of version [" + version + "]. Publishing tablespaces...");

                // We finish by publishing the versions table with the new versions.
                try {
                } catch (UnexistingVersion e) {
                    throw new RuntimeException(
                            "Unexisting version after deploying this version. Sounds like a bug.", e);

                // If some replicas are under-replicated, start a balancing process

      "Deploy of version [" + version + "] Finished PROPERLY. :-)");
                        "Deploy of version [" + version + "] finished properly.");
                context.getCoordinationStructures().getDeploymentsStatusPanel().put(version, DeployStatus.FINISHED);
            } catch (InterruptedException e) {
                // Case when a deployment is cancelled.
      "Deployment of [" + version + "] interrupted.");
                abortDeploy(dnodes, e.getMessage(), version);
            } catch (Throwable t) {
                throw new RuntimeException(t);
            } finally {

         * Compose the list of switch actions to switch
         * @return
        private List<SwitchVersionRequest> switchActions() {
            ArrayList<SwitchVersionRequest> actions = new ArrayList<SwitchVersionRequest>();
            for (DeployRequest req : deployRequests) {
                actions.add(new SwitchVersionRequest(req.getTablespace(), version));
            return actions;

         * Log DNodes errors in deployment. We log both to the QNode logger and to Hazelcast so the info is persisted in the
         * session.
        private void explainErrors() {
            IMap<String, String> deployErrorPanel = context.getCoordinationStructures()
            String msg = "Deployment of version [" + version + "] failed in DNode[";
            for (Entry<String, String> entry : deployErrorPanel.entrySet()) {
                String fMsg = msg + entry.getKey() + "] - it failed with the error [" + entry.getValue() + "]";
                context.getCoordinationStructures().logDeployMessage(version, fMsg);

         * Return true if one or more of the DNodes reported an error.
        private boolean checkForFailure() {
            IMap<String, String> deployErrorPanel = context.getCoordinationStructures()
            if (!isReplicaBalancingEnabled) {
                return !deployErrorPanel.isEmpty();
            // If replica balancing is enabled we check whether we could survive after the failed DNodes
            Set<String> failedDNodes = new HashSet<String>(deployErrorPanel.keySet());
            // Check if deploy needs to be canceled or if the system could auto-rebalance itself afterwards
            for (DeployRequest deployRequest : deployRequests) {
                for (ReplicationEntry repEntry : deployRequest.getReplicationMap()) {
                    if (failedDNodes.containsAll(repEntry.getNodes())) {
                        // There is AT LEAST one partition that depends on the failed DNodes so the deploy must fail!
                        return true;
            return false;
    } /* End ManageDeploy */

     * The Deployer deals with deploy and switch version requests.
    public Deployer(QNodeHandlerContext context) {
        deployExecutor = MoreExecutors.listeningDecorator(
                Executors.newCachedThreadPool(new ThreadFactoryBuilder().setNameFormat("deploy-%d").build()));

     * Call this method for starting an asynchronous deployment given a proper deploy request - proxy method for
     * {@link QNodeHandler}. Returns a {@link QueryStatus} with the status of the request.
     * @throws InterruptedException
    public DeployInfo deploy(List<DeployRequest> deployRequests) throws InterruptedException {
        // A new unique version number is generated.
        final long version = context.getCoordinationStructures().uniqueVersionId();

        DeployInfo deployInfo = fillDeployInfo(deployRequests, version, context.getQNodeAddress());

                "Deploy [" + version + "] for tablespaces" + deployInfo.getTablespacesDeployed() + " started.");
        context.getCoordinationStructures().getDeploymentsStatusPanel().put(version, DeployStatus.ONGOING);

        // Generate the list of actions per DNode
        Map<String, List<DeployAction>> actionsPerDNode = generateDeployActionsPerDNode(deployRequests, version);

        // Starting the countdown latch.
        ICountDownLatch countDownLatchForDeploy = context.getCoordinationStructures()
        Set<String> dnodesInvolved = actionsPerDNode.keySet();

        // Sending deploy signals to each DNode
        for (Map.Entry<String, List<DeployAction>> actionPerDNode : actionsPerDNode.entrySet()) {
            DNodeService.Client client = null;
            boolean renew = false;
            try {
                client = context.getDNodeClientFromPool(actionPerDNode.getKey());
                client.deploy(actionPerDNode.getValue(), version);
            } catch (TTransportException e) {
                renew = true;
            } catch (Exception e) {
                String errorMsg = "Error sending deploy actions to DNode [" + actionPerDNode.getKey() + "]";
                log.error(errorMsg, e);
                abortDeploy(new ArrayList<String>(actionsPerDNode.keySet()), errorMsg, version);
                deployInfo.setError("Error connecting to DNode " + actionPerDNode.getKey());
                context.getCoordinationStructures().getDeployInfoPanel().put(version, deployInfo);
                return deployInfo;
            } finally {
                if (client != null) {
                    context.returnDNodeClientToPool(actionPerDNode.getKey(), client, renew);

        // Initiating an asynchronous process to manage the deployment
        ListenableFuture<?> future = deployExecutor.submit(new ManageDeploy(new ArrayList(actionsPerDNode.keySet()),
                deployRequests, version, context.getConfig().getLong(QNodeProperties.DEPLOY_TIMEOUT, -1),
                context.getConfig().getLong(QNodeProperties.DEPLOY_DNODES_SPREAD_METADATA_TIMEOUT, 180),

        registerAsRunning(version, future);

        context.getCoordinationStructures().getDeployInfoPanel().put(version, deployInfo);
        return deployInfo;

     * Registers a future as being running. Also registers an automatic callback
     * that will unregister the future once finished.
    protected void registerAsRunning(final long version, ListenableFuture<?> future) {
        runningDeployments.put(version, future);
        Futures.addCallback(future, new FutureCallback<Object>() {
            public void onSuccess(Object result) {

            public void onFailure(Throwable t) {

            public void unregister() {

    protected DeployInfo fillDeployInfo(List<DeployRequest> deployRequests, long version, String qNodeAddress) {
        DeployInfo deployInfo = new DeployInfo();


        List<String> tablespaces = new ArrayList<String>();
        List<String> dataURIs = new ArrayList<String>();

        for (DeployRequest request : deployRequests) {


        Date startTime = new Date();
        deployInfo.setStartedAt(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(startTime));

        return deployInfo;

     * DNodes are informed to stop the deployment, as something failed.
     * @throws InterruptedException
    public void abortDeploy(List<String> dnodes, String deployerErrorMessage, long version) {
        for (String dnode : dnodes) {
            DNodeService.Client client = null;
            boolean renew = false;
            try {
                client = context.getDNodeClientFromPool(dnode);
            } catch (TTransportException e) {
                renew = true;
            } catch (Exception e) {
                log.error("Error sending abort deploy flag to DNode [" + dnode + "]", e);
            } finally {
                if (client != null) {
                    context.returnDNodeClientToPool(dnode, client, renew);
                "Deploy failed due to: " + deployerErrorMessage);
        context.getCoordinationStructures().getDeploymentsStatusPanel().put(version, DeployStatus.FAILED);

     * Switches current versions being served for some tablespaces, in an atomic way.
    public void switchVersions(List<SwitchVersionRequest> switchRequest) throws UnexistingVersion {
        // We compute the new versions table, and then try to update it
        // We use optimistic locking: we read the original
        // map and try to update it. If the original has changed during
        // this process, we retry: reload the original map, ...
        Map<String, Long> versionsTable;
        Map<String, Long> newVersionsTable;
        do {
            versionsTable = context.getCoordinationStructures().getCopyVersionsBeingServed();
            newVersionsTable = new HashMap<String, Long>();
            if (versionsTable != null) {

            for (SwitchVersionRequest req : switchRequest) {
                TablespaceVersion tsv = new TablespaceVersion(req.getTablespace(), req.getVersion());
                if (context.getTablespaceVersionsMap().get(tsv) == null) {
                    throw new UnexistingVersion("Trying to switch to unexisting version[" + req.getVersion()
                            + "] for tablespace[" + req.getTablespace() + "]");
                newVersionsTable.put(tsv.getTablespace(), tsv.getVersion());

        } while (!context.getCoordinationStructures().updateVersionsBeingServed(versionsTable, newVersionsTable));

     * Generates the list of individual deploy actions that has to be sent to each DNode.
    private static Map<String, List<DeployAction>> generateDeployActionsPerDNode(List<DeployRequest> deployRequests,
            long version) {
        HashMap<String, List<DeployAction>> actions = new HashMap<String, List<DeployAction>>();

        long deployDate = System.currentTimeMillis(); // Here is where we decide the data of the deployment for all deployed
        // tablespaces

        for (DeployRequest req : deployRequests) {
            for (Object obj : req.getReplicationMap()) {
                ReplicationEntry rEntry = (ReplicationEntry) obj;
                PartitionEntry pEntry = null;
                for (PartitionEntry partEntry : req.getPartitionMap()) {
                    if (partEntry.getShard().equals(rEntry.getShard())) {
                        pEntry = partEntry;
                if (pEntry == null) {
                    String msg = "No Partition metadata for shard: " + rEntry.getShard()
                            + " this is very likely to be a software bug.";
                    try {
                        log.error("Partition map: " + JSONSerDe.ser(req.getPartitionMap()));
                        log.error("Replication map: " + JSONSerDe.ser(req.getReplicationMap()));
                    } catch (JSONSerDe.JSONSerDeException e) {
                        log.error("JSON error", e);
                    throw new RuntimeException(msg);
                // Normalize DNode ids -> The convention is that DNodes are identified by host:port . So we need to strip the
                // protocol, if any
                for (int i = 0; i < rEntry.getNodes().size(); i++) {
                    String dnodeId = rEntry.getNodes().get(i);
                    if (dnodeId.startsWith("tcp://")) {
                        dnodeId = dnodeId.substring("tcp://".length(), dnodeId.length());
                    rEntry.getNodes().set(i, dnodeId);
                for (String dNode : rEntry.getNodes()) {
                    List<DeployAction> actionsSoFar = (List<DeployAction>) MapUtils.getObject(actions, dNode,
                            new ArrayList<DeployAction>());
                    actions.put(dNode, actionsSoFar);
                    DeployAction deployAction = new DeployAction();
                    deployAction.setDataURI(req.getData_uri() + "/" + rEntry.getShard() + ".db");

                    // Add partition metadata to the deploy action for DNodes to save it
                    PartitionMetadata metadata = new PartitionMetadata();

        return actions;

    public StatusMessage cancelDeployment(long version) {
        Future<?> future = runningDeployments.get(version);
        if (future == null) {
            return new StatusMessage(StatusMessage.Status.ERROR,
                    "No deployment running for " + version + " found.");
        return new StatusMessage(StatusMessage.Status.OK,
                "Deployment for version " + version + " being cancelled.");