org.rhq.cassandra.ClusterInitService.java Source code

Java tutorial

Introduction

Here is the source code for org.rhq.cassandra.ClusterInitService.java

Source

/*
 * RHQ Management Platform
 * Copyright (C) 2005-2013 Red Hat, Inc.
 * All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation version 2 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 */

package org.rhq.cassandra;

import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Map;
import java.util.Queue;
import java.util.Set;

import javax.management.MBeanServerConnection;
import javax.management.ObjectName;
import javax.management.remote.JMXConnector;
import javax.management.remote.JMXConnectorFactory;
import javax.management.remote.JMXServiceURL;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/**
 * This class provides operations to ensure a cluster is initialized and in a consistent
 * state. It does not offer functionality for initializing a cluster but rather to make
 * sure that nodes have started up and are accepting client connections for example.
 *
 * @author John Sanda
 * @author Jirka Kremser
 */
public final class ClusterInitService {

    private final Log log = LogFactory.getLog(ClusterInitService.class);

    private static final String JMX_CONNECTION_STRING = "service:jmx:rmi:///jndi/rmi://%s:%s/jmxrmi";

    /**
     * Sleep; if interrupted, throw a RuntimeException.
     */
    private static void sleep(long time) {
        try {
            Thread.sleep(time);
        } catch (InterruptedException ex) {
            Thread.currentThread().interrupt();
            throw new RuntimeException(ex);
        }
    }

    /**
     * Pings the storage nodes to verify if they are available and native transport
     * is running.
     *
     * @param storageNodes storage node addresses
     * @param jmxPorts JMX ports
     * @param numHosts minimum number of active hosts
     *
     * @return [true] cluster available with at least minimum number of hosts available, [false] otherwise
     */
    public boolean ping(String[] storageNodes, int[] jmxPorts, int numHosts) {
        int connections = 0;
        long sleep = 100;

        for (int index = 0; index < jmxPorts.length; index++) {
            try {
                boolean isNativeTransportRunning = this.isNativeTransportRunning(storageNodes[index],
                        jmxPorts[index]);
                if (isNativeTransportRunning) {
                    ++connections;
                }
                if (connections == numHosts) {
                    return true;
                }
            } catch (Exception e) {
                if (log.isDebugEnabled()) {
                    log.debug("Unable to open JMX connection on port [" + jmxPorts[index] + "] to cassandra node ["
                            + storageNodes[index] + "]", e);
                }
                return false;
            }
            sleep(sleep);
        }
        return true;
    }

    /**
     * This method attempts to establish a Thrift RPC connection to each host. If the
     * connection fails, the host is retried after going through the other, remaining
     * hosts. A runtime exception will be thrown after 10 failed retries.
     * <br/><br/>
     * After connecting to all nodes, this method will then sleep for a fixed delay.
     * See {@link #waitForClusterToStart(int, java.util.List, int)} for details.
     * @param storageNodes The cluster nodes to which a connection should be made
     * @param jmxPorts JMX port for each cluster node address
     */
    public void waitForClusterToStart(String[] storageNodes, int jmxPorts[]) {
        waitForClusterToStart(storageNodes, jmxPorts, storageNodes.length, 10);
    }

    /**
     * This method attempts to establish a Thrift RPC connection to each host for the
     * number specified. In other words, if there are four hosts and <code>numHosts</code>
     * is 2, this method will block only until it can connect to two of the hosts. If the
     * connection fails, the host is retried after going through the other, remaining
     * hosts.
     * <br/><br/>
     * After connecting to all cluster nodes, this method will sleep for 10 seconds
     * before returning. This is to give the cluster a chance to create the system auth
     * schema and to create the cassandra super user. Cassandra has a hard-coded delay of
     * 10 sceonds before it creates the super user, which means the rhq schema cannot be
     * created before that.
     * @param numHosts The number of hosts to which a successful connection has to be made
     *                 before returning.
     * @param retries The number of times to retry connecting. A runtime exception will be
     *                thrown when the number of failed connections exceeds this value.
     */
    public void waitForClusterToStart(String[] storageNodes, int jmxPorts[], int numHosts, int retries) {
        waitForClusterToStart(storageNodes, jmxPorts, numHosts, 250, retries, 1);
    }

    /**
     * This method attempts to establish a Thrift RPC connection to each host for the
     * number specified. In other words, if there are four hosts and <code>numHosts</code>
     * is 2, this method will block only until it can connect to two of the hosts. If the
     * connection fails, the host is retried after going through the other, remaining
     * hosts.
     * <br/><br/>
     * After connecting to all cluster nodes, this method will sleep for 10 seconds
     * before returning. This is to give the cluster a chance to create the system auth
     * schema and to create the cassandra super user. Cassandra has a hard-coded delay of
     * 10 sceonds before it creates the super user, which means the rhq schema cannot be
     * created before that.
     * @param numHosts The number of hosts to which a successful connection has to be made
     *                 before returning.
     * @param delay The amount of time wait between attempts to make a connection
     * @param retries The number of times to retry connecting. A runtime exception will be
     *                thrown when the number of failed connections exceeds this value.
     * @param initialWait The amount of seconds before first try.
     */
    public void waitForClusterToStart(String[] storageNodes, int jmxPorts[], int numHosts, long delay, int retries,
            int initialWait) {
        if (initialWait > 0) {
            if (log.isDebugEnabled()) {
                log.debug("Waiting before JMX calls to the storage nodes for " + initialWait + " seconds...");
            }
            sleep(initialWait * 1000);
        }

        int connections = 0;
        int failedConnections = 0;
        Queue<Integer> queue = new LinkedList<Integer>();
        for (int index = 0; index < storageNodes.length; index++) {
            queue.add(index);
        }

        Integer storageNodeIndex = queue.poll();

        while (storageNodeIndex != null) {
            if (failedConnections >= retries) {
                throw new RuntimeException("Unable to verify that cluster nodes have started after "
                        + failedConnections + " failed attempts");
            }
            try {
                boolean isNativeTransportRunning = isNativeTransportRunning(storageNodes[storageNodeIndex],
                        jmxPorts[storageNodeIndex]);
                if (log.isDebugEnabled() && isNativeTransportRunning) {
                    log.debug("Successfully connected to cassandra node [" + storageNodes[storageNodeIndex] + "]");
                }
                if (isNativeTransportRunning) {
                    ++connections;
                } else {
                    queue.offer(storageNodeIndex);
                }
                if (connections == numHosts) {
                    if (log.isDebugEnabled()) {
                        log.debug("Successdully connected to all nodes. Sleeping for 10 seconds to allow for the "
                                + "cassandra superuser set up to complete.");
                    }
                    sleep(10 * 1000);
                    return;
                }
            } catch (Exception e) {
                ++failedConnections;
                queue.offer(storageNodeIndex);
                if (log.isDebugEnabled()) {
                    log.debug("Unable to open JMX connection on port [" + jmxPorts[storageNodeIndex]
                            + "] to cassandra node [" + storageNodes[storageNodeIndex] + "].", e);
                } else if (log.isInfoEnabled()) {
                    log.debug("Unable to open connection to cassandra node.");
                }
            }
            sleep(delay);
            storageNodeIndex = queue.poll();
        }
    }

    /**
     * Waits for the cluster to reach schema agreement. During cluster initialization
     * before and while schema changes propagate throughout the cluster, there could be
     * multiple schema versions found among nodes. Schema agreement is reached when there
     * is a single schema version and all nodes are on that version.
     *
     * @param hosts The cluster nodes
     */
    public void waitForSchemaAgreement(String[] storageNodes, int[] jmxPorts) throws Exception {
        if (storageNodes == null || storageNodes.length == 0) {
            return;
        }

        long sleep = 100L;
        boolean schemaInAgreement = false;

        while (!schemaInAgreement) {
            Set<String> schemaVersions = new HashSet<String>();
            for (int index = 0; index < storageNodes.length; index++) {
                String otherSchchemaVersion = getSchemaVersionForNode(storageNodes[index], jmxPorts[index]);
                if (otherSchchemaVersion != null) {
                    schemaVersions.add(otherSchchemaVersion);
                }
            }
            if (schemaVersions.size() > 1) {
                if (log.isInfoEnabled()) {
                    log.info("Schema agreement has not been reached. Found " + schemaVersions.size()
                            + " schema versions");
                }
                if (log.isDebugEnabled()) {
                    log.debug("Found the following schema versions: " + schemaVersions);
                }
                sleep(sleep);
            } else {
                String schemaVersion = schemaVersions.iterator().next();
                if (schemaVersion != null) {
                    schemaInAgreement = true;
                } else {
                    if (log.isInfoEnabled()) {
                        log.info(
                                "Schema agreement has not been reached. Unable to get the schema version from cassandra nodes ["
                                        + storageNodes + "]");
                    }
                    sleep(sleep);
                }
            }

        }
    }

    public boolean isNativeTransportRunning(String storageNode, int jmxPort) throws Exception {
        Boolean nativeTransportRunning = false;
        String url = getJMXConnectionURL(storageNode, jmxPort);
        JMXServiceURL serviceURL = new JMXServiceURL(url);
        Map<String, String> env = new HashMap<String, String>();
        JMXConnector connector = null;

        try {
            connector = JMXConnectorFactory.connect(serviceURL, env);
            MBeanServerConnection serverConnection = connector.getMBeanServerConnection();
            ObjectName storageService = new ObjectName("org.apache.cassandra.db:type=StorageService");
            String attribute = "NativeTransportRunning";
            try {
                nativeTransportRunning = (Boolean) serverConnection.getAttribute(storageService, attribute);
            } catch (Exception e) {
                // It is ok to just catch and log exceptions here particularly in an integration
                // test environment where we could potentially try to do the JMX query before
                // Cassandra is fully initialized. We can query StorageService before the native
                // transport server is initialized which will result in Cassandra throwing a NPE.
                // We do not want propagate that exception because it is just a matter of waiting
                // for Cassandra to finish initializing.
                if (log.isDebugEnabled()) {
                    log.debug("Failed to read attribute [" + attribute + "] from " + storageService, e);
                } else {
                    log.info("Faied to read attribute [" + attribute + "] from " + storageService + ": "
                            + e.getMessage());
                }
            }
        } finally {
            if (connector != null) {
                connector.close();
            }
        }
        return nativeTransportRunning;
    }

    private String getSchemaVersionForNode(String storageNode, int jmxPort) throws Exception {
        String url = this.getJMXConnectionURL(storageNode, jmxPort);
        JMXServiceURL serviceURL = new JMXServiceURL(url);
        Map<String, String> env = new HashMap<String, String>();
        JMXConnector connector = null;

        try {
            connector = JMXConnectorFactory.connect(serviceURL, env);
            MBeanServerConnection serverConnection = connector.getMBeanServerConnection();
            ObjectName storageService = new ObjectName("org.apache.cassandra.db:type=StorageService");
            String attribute = "SchemaVersion";
            try {
                return (String) serverConnection.getAttribute(storageService, attribute);
            } catch (Exception e) {
                // It is ok to just catch and log exceptions here particularly in an integration
                // test environment where we could potentially try to do the JMX query before
                // Cassandra is fully initialized. We can query StorageService before the native
                // transport server is initialized which will result in Cassandra throwing a NPE.
                // We do not want propagate that exception because it is just a matter of waiting
                // for Cassandra to finish initializing.
                if (log.isDebugEnabled()) {
                    log.debug("Failed to read attribute [" + attribute + "] from " + storageService, e);
                } else {
                    log.info("Faied to read attribute [" + attribute + "] from " + storageService + ": "
                            + e.getMessage());
                }
            }
        } finally {
            if (connector != null) {
                connector.close();
            }
        }
        return null;
    }

    /**
     * Constructs the JMX connection URL based on the node address and
     * JMX port
     *
     * @param address
     * @param jmxPort
     * @return
     */
    private String getJMXConnectionURL(String address, int jmxPort) {
        String[] split = JMX_CONNECTION_STRING.split("%s");
        return split[0] + address + split[1] + jmxPort + split[2];
    }
}