com.splout.db.qnode.QNodeHandlerContext.java Source code

Java tutorial

Introduction

Here is the source code for com.splout.db.qnode.QNodeHandlerContext.java

Source

package com.splout.db.qnode;

/*
 * #%L
 * Splout SQL Server
 * %%
 * Copyright (C) 2012 Datasalt Systems S.L.
 * %%
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 * #L%
 */

import java.util.ArrayList;
import java.util.Comparator;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.SortedSet;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.locks.ReentrantLock;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.thrift.transport.TTransportException;

import com.google.common.base.Joiner;
import com.google.common.collect.Ordering;
import com.google.common.collect.TreeMultimap;
import com.splout.db.common.SploutConfiguration;
import com.splout.db.common.Tablespace;
import com.splout.db.dnode.DNodeClient;
import com.splout.db.hazelcast.CoordinationStructures;
import com.splout.db.hazelcast.DNodeInfo;
import com.splout.db.hazelcast.TablespaceVersion;
import com.splout.db.qnode.ReplicaBalancer.BalanceAction;
import com.splout.db.thrift.DNodeService;
import com.yammer.metrics.Metrics;
import com.yammer.metrics.core.Gauge;

/**
 * This class contains the basic context of {@link QNodeHandler}. This context involves in-memory information of the
 * system such as: list of alive DNodes, list of tablespaces, versions and so forth. In addition, this class also
 * maintains a pool of connections to the DNodes. This class is shared among all different {@link QNodeHandlerModule}
 * such as {@link Deployer} so that each specialized module can have access to the context.
 */
public class QNodeHandlerContext {

    protected final static Log log = LogFactory.getLog(QNodeHandlerContext.class);

    // This map indicates which is the current version being served. It has to be updated atomically.
    private final Map<String, Long> currentVersionsMap = new ConcurrentHashMap<String, Long>();
    private String qNodeAddress;
    // The SploutConfiguration
    private SploutConfiguration config;
    // The coordination structures that use Hazelcast underneath
    private CoordinationStructures coordinationStructures;
    private TablespaceMemoryState tablespaceState = new TablespaceMemoryState();
    private ReplicaBalancer replicaBalancer;

    // This flag is set to "false" after WARMING_TIME seconds (qnode.warming.time)
    // Some actions will only be taken after warming time, just in case some nodes still didn't join the cluster.
    private final AtomicBoolean isWarming = new AtomicBoolean(true);

    // The per-DNode Thrift client pools
    private ConcurrentMap<String, BlockingQueue<DNodeService.Client>> thriftClientCache = new ConcurrentHashMap<String, BlockingQueue<DNodeService.Client>>();
    private ReentrantLock thriftClientCacheLock = new ReentrantLock();

    private final int thriftClientPoolSize;
    private final long dnodePoolTimeoutMillis;

    public QNodeHandlerContext(SploutConfiguration config, CoordinationStructures coordinationStructures) {
        this.config = config;
        this.coordinationStructures = coordinationStructures;
        this.thriftClientPoolSize = config.getInt(QNodeProperties.DNODE_POOL_SIZE);
        this.dnodePoolTimeoutMillis = config.getLong(QNodeProperties.QNODE_DNODE_POOL_TAKE_TIMEOUT);
        this.replicaBalancer = new ReplicaBalancer(this);
        initMetrics();
    }

    public static enum DNodeEvent {
        LEAVE, ENTRY, UPDATE
    }

    private void initMetrics() {
        Metrics.newGauge(QNodeHandlerContext.class, "thrift-total-connections-iddle", new Gauge<Integer>() {
            @Override
            public Integer value() {
                int count = 0;
                for (Entry<String, BlockingQueue<DNodeService.Client>> queue : thriftClientCache.entrySet()) {
                    count += queue.getValue().size();
                }
                return count;
            }
        });
        Metrics.newGauge(QNodeHandlerContext.class, "thrift-total-connections-being-used", new Gauge<Integer>() {
            @Override
            public Integer value() {
                int queues = 0;
                int count = 0;
                for (Entry<String, BlockingQueue<DNodeService.Client>> queue : thriftClientCache.entrySet()) {
                    queues++;
                    count += queue.getValue().size();
                }
                return (QNodeHandlerContext.this.thriftClientPoolSize * queues) - count;
            }
        });
        Metrics.newGauge(QNodeHandlerContext.class, "thrift-pools", new Gauge<String>() {
            @Override
            public String value() {
                ArrayList<String> fullPools = new ArrayList<String>();
                for (Entry<String, BlockingQueue<DNodeService.Client>> queue : thriftClientCache.entrySet()) {
                    int idle = queue.getValue().size();
                    int size = QNodeHandlerContext.this.thriftClientPoolSize;
                    fullPools
                            .add("Pool: " + queue.getKey() + " (" + (size - idle) + " of " + size + ") being used");
                }
                return Joiner.on(", ").join(fullPools);
            }
        });
        Metrics.newGauge(QNodeHandlerContext.class, "thrift-pools", new Gauge<Integer>() {
            @Override
            public Integer value() {
                return thriftClientCache.size();
            }
        });
        Metrics.newGauge(QNodeHandlerContext.class, "thrift-total-configured-connections", new Gauge<Integer>() {
            @Override
            public Integer value() {
                return (QNodeHandlerContext.this.thriftClientPoolSize * thriftClientCache.size());
            }
        });
    }

    @SuppressWarnings("serial")
    public final static class TablespaceVersionInfoException extends Exception {

        public TablespaceVersionInfoException(String msg) {
            super(msg);
        }
    }

    /**
     * Get the list of possible actions to take for balancing the cluster in case of under-replicated partitions.
     */
    public List<BalanceAction> getBalanceActions() {
        // we have this in this class to be able to use this lock (the same that recreats the in-memory TablespaceVersion map)
        synchronized (tablespaceState) {
            return replicaBalancer.scanPartitions();
        }
    }

    /**
     * Get the list of DNodes
     */
    public List<String> getDNodeList() {
        List<String> dNodeList = new ArrayList<String>();
        for (DNodeInfo dnode : getCoordinationStructures().getDNodes().values()) {
            dNodeList.add(dnode.getAddress());
        }
        return dNodeList;
    }

    /**
     * This method can be called to initialize a pool of connections to a dnode. This method may be called from multiple
     * threads so it should be safe to call it concurrently.
     */
    public void initializeThriftClientCacheFor(String dnode) throws TTransportException, InterruptedException {
        // this lock is on the whole cache but we would actually be interested in a per-DNode lock...
        // there's only one lock for simplicity.
        thriftClientCacheLock.lock();
        try {
            // initialize queue for this DNode
            BlockingQueue<DNodeService.Client> dnodeQueue = thriftClientCache.get(dnode);
            if (dnodeQueue == null) {
                // this assures that the per-DNode queue is only created once and then reused.
                dnodeQueue = new LinkedBlockingDeque<DNodeService.Client>(thriftClientPoolSize);
            }
            if (dnodeQueue.isEmpty()) {
                try {
                    for (int i = dnodeQueue.size(); i < thriftClientPoolSize; i++) {
                        dnodeQueue.put(DNodeClient.get(dnode));
                    }
                    // we only put the queue if all connections have been populated
                    thriftClientCache.put(dnode, dnodeQueue);
                } catch (TTransportException e) {
                    log.error("Error while trying to populate queue for " + dnode
                            + ", will discard created connections.", e);
                    while (!dnodeQueue.isEmpty()) {
                        dnodeQueue.poll().getOutputProtocol().getTransport().close();
                    }
                    throw e;
                }
            } else {
                // it should be safe to call this method from different places concurrently
                // so we contemplate the case where another Thread already populated the queue
                // and only populate it if it's really empty.
                log.warn(Thread.currentThread().getName() + " : queue for [" + dnode
                        + "] is not empty - it was populated before.");
            }
        } finally {
            thriftClientCacheLock.unlock();
        }
    }

    /**
     * This method can be called by {@link QNodeHandler} to cancel the Thrift client cache when a DNode disconnects.
     * Usually this happens when Hazelcast notifies it.
     */
    public void discardThriftClientCacheFor(String dnode) throws InterruptedException {
        thriftClientCacheLock.lock();
        try {
            // discarding all connections to a DNode who leaved
            log.info(Thread.currentThread().getName() + " : trashing queue for [" + dnode + "] as it leaved.");
            BlockingQueue<DNodeService.Client> dnodeQueue = thriftClientCache.get(dnode);
            // release connections until empty
            while (!dnodeQueue.isEmpty()) {
                dnodeQueue.take().getOutputProtocol().getTransport().close();
            }
            thriftClientCache.remove(dnode); // to indicate that the DNode is not present
        } finally {
            thriftClientCacheLock.unlock();
        }
    }

    /**
     * Get the Thrift client for this DNode.
     * <p/>
     * Can throw a TTransportException in the rare case when
     * a new pool is initialized here. In this case, you shouldn't call
     * the method {@link #returnDNodeClientToPool(String, com.splout.db.thrift.DNodeService.Client, boolean)}
     * to return the connection.
     * <p/>
     * This method never returns null.
     *
     * @throws java.lang.InterruptedException             if somebody interrupts the thread meanwhile the method is waiting in the pool
     * @throws com.splout.db.qnode.PoolCreationException  if there is failure when a new pool is created.
     * @throws com.splout.db.qnode.DNodePoolFullException if the pool for the given dnode is empty and the timeout
     *                                                    for waiting for a connection is reached.
     */
    public DNodeService.Client getDNodeClientFromPool(String dnode)
            throws InterruptedException, PoolCreationException, DNodePoolFullException {
        BlockingQueue<DNodeService.Client> dnodeQueue = thriftClientCache.get(dnode);
        if (dnodeQueue == null) {
            // This shouldn't happen in real life because it is initialized by the QNode, but it is useful for unit
            // testing.
            // Under some rare race conditions the pool may be required before the QNode creates it, but this method
            // assures that the queue will only be created once and, if it's not possible to create it, an exception
            // will be thrown and nothing bad will happen.
            try {
                initializeThriftClientCacheFor(dnode);
                dnodeQueue = thriftClientCache.get(dnode);
            } catch (TTransportException e) {
                throw new PoolCreationException(e);
            }
        }

        DNodeService.Client client = dnodeQueue.poll(dnodePoolTimeoutMillis, TimeUnit.MILLISECONDS);
        // Timeout waiting for poll
        if (client == null) {
            throw new DNodePoolFullException("Pool for dnode[" + dnode + "] is full and timeout of ["
                    + dnodePoolTimeoutMillis + "] reached when waiting for a connection.");
        }
        return client;
    }

    /**
     * Return a Thrift client to the pool. This method is a bit tricky since we may want to return a connection when a
     * DNode already disconnected. Also, if the QNode is closing, we don't want to leave opened sockets around. To do it
     * safely, we check whether 1) we are closing / cleaning the QNode or 2) the DNode has disconnected.
     * <p/>
     * The given client never can be null.
     */
    public void returnDNodeClientToPool(String dnode, DNodeService.Client client, boolean renew) {
        if (closing.get()) { // don't return to the pool if the system is already closing! we must close everything!
            if (client != null) {
                client.getOutputProtocol().getTransport().close();
            }
            return;
        }
        BlockingQueue<DNodeService.Client> dnodeQueue = thriftClientCache.get(dnode);
        if (dnodeQueue == null) {
            // dnode is not connected, so we exit.
            if (client != null) {
                client.getOutputProtocol().getTransport().close();
            }
            return;
        }
        if (renew) { // we have to try to renew the connection
            try {
                DNodeService.Client newClient = DNodeClient.get(dnode);
                if (client != null) {
                    client.getOutputProtocol().getTransport().close();
                    client = newClient;
                }
            } catch (TTransportException e) {
                // Was not possible to renew connection. We'll keep the broken one.
                log.warn("TTransportException while renewing client to dnode[" + dnode
                        + "]. Broken client is returned to the pool as is to continue.");
            }
        }
        try {
            dnodeQueue.add(client);
        } catch (IllegalStateException e) {
            client.getOutputProtocol().getTransport().close();
            log.error("Trying to return a connection for dnode [" + dnode
                    + "] but the pool already has the maximum number of connections. This is likely a software bug!.");
        }

        // one last check to avoid not closing every socket.
        // here we avoid leaking a socket in case a close has happened in parallel or a DNode disconnected right in the
        // middle
        if (closing.get() || thriftClientCache.get(dnode) == null) {
            if (client != null) {
                client.getOutputProtocol().getTransport().close();
            }
        }
    }

    /**
     * Rotates the versions (deletes versions that are old or useless). To be executed at startup and after a deployment.
     */
    public List<com.splout.db.thrift.TablespaceVersion> synchronizeTablespaceVersions()
            throws InterruptedException {
        log.info("Starting to look for old tablespace versions to remove...");

        int maxVersionsPerTablespace = config.getInt(QNodeProperties.VERSIONS_PER_TABLESPACE);

        // Will contain the list of versions per each tablespace, sorted by creation date descendant
        TreeMultimap<String, Tablespace> tablespaces = TreeMultimap.create(Ordering.natural(),
                new Comparator<Tablespace>() {
                    @Override
                    public int compare(Tablespace tb1, Tablespace tb2) {
                        // reverse ordering. Older dates appears LAST. If same date, then version is compared.
                        int comp = -((Long) tb1.getCreationDate()).compareTo(tb2.getCreationDate());
                        if (comp == 0) {
                            return -((Long) tb1.getVersion()).compareTo(tb2.getVersion());
                        } else {
                            return comp;
                        }
                    }
                });

        Map<TablespaceVersion, Tablespace> myTablespaces = getTablespaceVersionsMap();

        // We build a in memory version of tablespaces for analyzing it
        // and prune old ones.
        for (Entry<TablespaceVersion, Tablespace> entry : myTablespaces.entrySet()) {
            tablespaces.put(entry.getKey().getTablespace(), entry.getValue());
        }
        log.info("Analyzing " + tablespaces.keySet().size() + " tablespaces with a total of " + tablespaces.size()
                + " versions...");

        // We will remove only versions older than the one being served
        Map<String, Long> hzVersionsBeingServed = coordinationStructures.getCopyVersionsBeingServed();
        if (hzVersionsBeingServed == null) {
            log.info("... No versions yet being served.");
            return null; // nothing to do yet
        }
        log.info("Number of versions being served: " + hzVersionsBeingServed.size());

        List<com.splout.db.thrift.TablespaceVersion> tablespacesToRemove = new ArrayList<com.splout.db.thrift.TablespaceVersion>();

        for (Entry<String, Long> entry : hzVersionsBeingServed.entrySet()) {
            String tablespace = entry.getKey();
            Long versionBeingServed = entry.getValue();
            // Tablespaces are sorted by creation date desc.
            SortedSet<Tablespace> allVersions = tablespaces.get(tablespace);
            Iterator<Tablespace> it = allVersions.iterator();
            boolean foundVersionBeingServed = false;
            int countVersionsAfter = 0;
            while (it.hasNext()) {
                Tablespace tb = it.next();
                if (versionBeingServed.equals(tb.getVersion())) {
                    foundVersionBeingServed = true;
                } else {
                    if (foundVersionBeingServed) {
                        countVersionsAfter++;
                        if (countVersionsAfter >= maxVersionsPerTablespace) {
                            // This is the case where we remove the version
                            // 1 - This tablespace has a version being served
                            // 2 - This version is older than the current tablespace being served
                            // 3 - We are already keeping maxVersionsPerTablespace versions
                            tablespacesToRemove
                                    .add(new com.splout.db.thrift.TablespaceVersion(tablespace, tb.getVersion()));
                            log.info("Tablespace [" + tablespace + "] Version [" + tb.getVersion() + "] "
                                    + "created at [" + new Date(tb.getCreationDate())
                                    + "] REMOVED. We already keep younger versions.");
                        } else {
                            log.info("Tablespace [" + tablespace + "] Version [" + tb.getVersion() + "] "
                                    + "created at [" + new Date(tb.getCreationDate()) + "] KEPT.");
                        }
                    } else {
                        log.info("Tablespace [" + tablespace + "] Version [" + tb.getVersion() + "] "
                                + "created at [" + new Date(tb.getCreationDate())
                                + "] either younger than served one or without version being served. Keeping.");
                    }
                }
            }

            if (!foundVersionBeingServed) {
                log.info("Tablespace [" + tablespace
                        + "] without any version being served. Please, have a look, and remove them if not used");
            }

            if (tablespacesToRemove.size() > 0) {
                log.info("Sending [" + tablespacesToRemove + "] to all alive DNodes.");
                for (DNodeInfo dnode : coordinationStructures.getDNodes().values()) {
                    DNodeService.Client client = null;
                    boolean renew = false;
                    try {
                        client = getDNodeClientFromPool(dnode.getAddress());
                        client.deleteOldVersions(tablespacesToRemove);
                    } catch (TTransportException e) {
                        renew = true;
                        log.warn("Failed sending delete TablespaceVersions order to (" + dnode
                                + "). Not critical as they will be removed after other deployments.", e);
                    } catch (Exception e) {
                        log.warn("Failed sending delete TablespaceVersions order to (" + dnode
                                + "). Not critical as they will be removed after other deployments.", e);
                    } finally {
                        if (client != null) {
                            returnDNodeClientToPool(dnode.getAddress(), client, renew);
                        }
                    }
                }
            }
            log.info("... done looking for old tablespace versions to remove...");
        }

        return tablespacesToRemove; // Return for unit test
    }

    private AtomicBoolean closing = new AtomicBoolean(false);

    public void close() {
        closing.set(true); // will indicate other parts of this code that things have to be closed!
        for (Map.Entry<String, BlockingQueue<DNodeService.Client>> entry : thriftClientCache.entrySet()) {
            while (entry.getValue().size() > 0) {
                try {
                    entry.getValue().take().getOutputProtocol().getTransport().close();
                } catch (InterruptedException e) {
                    log.error("Interrupted!", e);
                }
            }
        }
    }

    public void maybeBalance() {
        // do this only after warming
        if (!isWarming.get() && config.getBoolean(QNodeProperties.REPLICA_BALANCE_ENABLE)) {
            // check if we could balance some partitions
            List<ReplicaBalancer.BalanceAction> balanceActions = getBalanceActions();
            // we will only re-balance versions being served
            // otherwise strange things may happen: to re-balance a version in the middle of its deployment...
            Map<String, Long> versionsBeingServed = coordinationStructures.getCopyVersionsBeingServed();
            for (ReplicaBalancer.BalanceAction action : balanceActions) {
                if (versionsBeingServed != null && versionsBeingServed.get(action.getTablespace()) != null
                        && versionsBeingServed.get(action.getTablespace()) == action.getVersion()) {
                    // put if absent + TTL
                    coordinationStructures.getDNodeReplicaBalanceActionsSet().putIfAbsent(action, "",
                            config.getLong(QNodeProperties.BALANCE_ACTIONS_TTL), TimeUnit.SECONDS);
                }
            }
        }
    }

    // ---- Getters ---- //

    public Map<String, Long> getCurrentVersionsMap() {
        return currentVersionsMap;
    }

    public Map<TablespaceVersion, Tablespace> getTablespaceVersionsMap() {
        return tablespaceState.getTablespaceVersionsMap();
    }

    public CoordinationStructures getCoordinationStructures() {
        return coordinationStructures;
    }

    public TablespaceMemoryState getTablespaceState() {
        return tablespaceState;
    }

    public SploutConfiguration getConfig() {
        return config;
    }

    public ConcurrentMap<String, BlockingQueue<DNodeService.Client>> getThriftClientCache() {
        return thriftClientCache;
    }

    public AtomicBoolean getIsWarming() {
        return isWarming;
    }

    public String getQNodeAddress() {
        return qNodeAddress;
    }

    public void setQNodeAddress(String QNodeAddress) {
        this.qNodeAddress = QNodeAddress;
    }
}