andromache.config.CassandraConfigHelper.java Source code

Java tutorial

Introduction

Here is the source code for andromache.config.CassandraConfigHelper.java

Source

/*
 * Copyright 2013 Illarion Kovalchuk
 * <p/>
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * <p/>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p/>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package andromache.config;

import org.apache.cassandra.config.ConfigurationException;
import org.apache.cassandra.dht.IPartitioner;
import org.apache.cassandra.hadoop.ConfigHelper;
import org.apache.cassandra.io.compress.CompressionParameters;
import org.apache.cassandra.thrift.*;
import org.apache.cassandra.utils.FBUtilities;
import org.apache.cassandra.utils.Hex;
import org.apache.hadoop.conf.Configuration;
import org.apache.thrift.TBase;
import org.apache.thrift.TDeserializer;
import org.apache.thrift.TException;
import org.apache.thrift.TSerializer;
import org.apache.thrift.transport.TSocket;
import org.apache.thrift.transport.TTransport;
import org.apache.thrift.transport.TTransportException;

import javax.security.auth.login.LoginException;
import java.io.IOException;
import java.util.*;

public class CassandraConfigHelper {

    private static final String CASSANDRA_CONSISTENCYLEVEL_READ = "cassandra.consistencylevel.read";
    private static final String CASSANDRA_CONSISTENCYLEVEL_WRITE = "cassandra.consistencylevel.write";
    private static final String INPUT_COLUMNFAMILIES_CONFIG = "cassandra.input.columnfamilies";

    private static final String OUTPUT_KEYSPACE_USER_NAME_KEY = "cassandra.output.keyspace.username.key";
    private static final String OUTPUT_KEYSPACE_USER_PASSWORD_KEY = "cassandra.output.keyspace.userpassword.key";

    private static final String INPUT_PARTITIONER_CONFIG = "cassandra.input.partitioner.class";
    private static final String OUTPUT_PARTITIONER_CONFIG = "cassandra.output.partitioner.class";
    private static final String INPUT_KEYSPACE_CONFIG = "cassandra.input.keyspace";
    private static final String INPUT_KEYSPACE_USERNAME_CONFIG = "cassandra.input.keyspace.username";
    private static final String INPUT_KEYSPACE_PASSWD_CONFIG = "cassandra.input.keyspace.passwd";
    private static final String OUTPUT_KEYSPACE_PASSWD_CONFIG = "cassandra.output.keyspace.passwd";
    private static final String OUTPUT_COLUMNFAMILY_CONFIG = "cassandra.output.columnfamily";
    private static final String INPUT_PREDICATE_CONFIG = "cassandra.input.predicate";
    private static final String INPUT_KEYRANGE_CONFIG = "cassandra.input.keyRange";
    private static final String INPUT_SPLIT_SIZE_CONFIG = "cassandra.input.split.size";
    private static final String INPUT_WIDEROWS_CONFIG = "cassandra.input.widerows";
    private static final String RANGE_BATCH_SIZE_CONFIG = "cassandra.range.batch.size";
    private static final String INPUT_THRIFT_PORT = "cassandra.input.thrift.port";
    private static final String OUTPUT_THRIFT_PORT = "cassandra.output.thrift.port";
    private static final String INPUT_INITIAL_THRIFT_ADDRESS = "cassandra.input.thrift.address";
    private static final String OUTPUT_INITIAL_THRIFT_ADDRESS = "cassandra.output.thrift.address";
    private static final String READ_CONSISTENCY_LEVEL = "cassandra.consistencylevel.read";
    private static final String WRITE_CONSISTENCY_LEVEL = "cassandra.consistencylevel.write";
    private static final String OUTPUT_COMPRESSION_CLASS = "cassandra.output.compression.class";
    private static final String OUTPUT_COMPRESSION_CHUNK_LENGTH = "cassandra.output.compression.length";
    private static final String INPUT_TRANSPORT_FACTORY_CLASS = "cassandra.input.transport.factory.class";
    private static final String OUTPUT_TRANSPORT_FACTORY_CLASS = "cassandra.output.transport.factory.class";

    private static final int DEFAULT_SPLIT_SIZE = 64 * 1024;
    private static final int DEFAULT_THRIFT_PORT = 9160;
    private static final int DEFAULT_RANGE_BATCH_SIZE = 4096;
    private static final String DEFAULT_CONSISTENCY_LEVEL = ConsistencyLevel.QUORUM.name();

    public static void setOutputKeyspacePassword(Configuration conf, String password) {
        conf.set(OUTPUT_KEYSPACE_PASSWD_CONFIG, password);
    }

    /**
     * Set the KeyRange to limit the rows.
     * @param conf Job configuration you are about to run
     */
    public static void setInputRange(Configuration conf, String startToken, String endToken,
            List<IndexExpression> filter) {
        KeyRange range = new KeyRange().setStart_token(startToken).setEnd_token(endToken).setRow_filter(filter);
        conf.set(INPUT_KEYRANGE_CONFIG, thriftToString(range));
    }

    public static int getInputRpcPort(Configuration conf) {
        if (conf.get(INPUT_THRIFT_PORT, "").equals("")) {
            return DEFAULT_THRIFT_PORT;
        }
        return Integer.parseInt(conf.get(INPUT_THRIFT_PORT));
    }

    /**
     * Set the predicate that determines what columns will be selected from each row.
     *
     * @param conf      Job configuration you are about to run
     * @param predicate
     */
    public static void setInputSlicePredicate(Configuration conf, SlicePredicate predicate) {
        conf.set(INPUT_PREDICATE_CONFIG, thriftToString(predicate));
    }

    public static Cassandra.Client getClientFromOutputAddressList(Configuration conf) throws IOException {
        return getClientFromAddressList(conf, getOutputInitialAddress(conf).split(","), getOutputRpcPort(conf));
    }

    public static void setOutputCompressionChunkLength(Configuration conf, String length) {
        conf.set(OUTPUT_COMPRESSION_CHUNK_LENGTH, length);
    }

    public static String getInputKeyspacePassword(Configuration conf) {
        return conf.get(INPUT_KEYSPACE_PASSWD_CONFIG);
    }

    /**
     * Set the KeyRange to limit the rows.
     * @param conf Job configuration you are about to run
     */
    public static void setInputRange(Configuration conf, List<IndexExpression> filter) {
        KeyRange range = new KeyRange().setRow_filter(filter);
        conf.set(INPUT_KEYRANGE_CONFIG, thriftToString(range));
    }

    /** may be null if unset */
    public static KeyRange getInputKeyRange(Configuration conf) {
        String str = conf.get(INPUT_KEYRANGE_CONFIG);
        return null != str ? keyRangeFromString(str) : null;
    }

    private static KeyRange keyRangeFromString(String st) {
        assert st != null;
        TDeserializer deserializer = new TDeserializer(new TBinaryProtocol.Factory());
        KeyRange keyRange = new KeyRange();
        try {
            deserializer.deserialize(keyRange, Hex.hexToBytes(st));
        } catch (TException e) {
            throw new RuntimeException(e);
        }
        return keyRange;
    }

    /**
     * The number of rows to request with each get range slices request.
     * Too big and you can either get timeouts when it takes Cassandra too
     * long to fetch all the data. Too small and the performance
     * will be eaten up by the overhead of each request.
     *
     * @param conf      Job configuration you are about to run
     * @param batchsize Number of rows to request each time
     */
    public static void setRangeBatchSize(Configuration conf, int batchsize) {
        conf.setInt(RANGE_BATCH_SIZE_CONFIG, batchsize);
    }

    public static IPartitioner getInputPartitioner(Configuration conf) {
        try {
            return FBUtilities.newPartitioner(conf.get(INPUT_PARTITIONER_CONFIG));
        } catch (ConfigurationException e) {
            throw new RuntimeException(e);
        }
    }

    public static String getInputKeyspaceUserName(Configuration conf) {
        return conf.get(INPUT_KEYSPACE_USERNAME_CONFIG);
    }

    public static String getInputKeyspace(Configuration conf) {
        return conf.get(INPUT_KEYSPACE_CONFIG);
    }

    /**
     * Set the size of the input split.
     * This affects the number of maps created, if the number is too small
     * the overhead of each map will take up the bulk of the job time.
     *
     * @param conf      Job configuration you are about to run
     * @param splitsize Size of the input split
     */
    public static void setInputSplitSize(Configuration conf, int splitsize) {
        conf.setInt(INPUT_SPLIT_SIZE_CONFIG, splitsize);
    }

    public static void setOutputRpcPort(Configuration conf, String port) {
        conf.set(OUTPUT_THRIFT_PORT, port);
    }

    public static void setInputPartitioner(Configuration conf, String classname) {
        conf.set(INPUT_PARTITIONER_CONFIG, classname);
    }

    public static String getOutputCompressionChunkLength(Configuration conf) {
        return conf.get(OUTPUT_COMPRESSION_CHUNK_LENGTH,
                String.valueOf(CompressionParameters.DEFAULT_CHUNK_LENGTH));
    }

    public static String getInputInitialAddress(Configuration conf) {
        return conf.get(INPUT_INITIAL_THRIFT_ADDRESS);
    }

    public static String getOutputInitialAddress(Configuration conf) {
        return conf.get(OUTPUT_INITIAL_THRIFT_ADDRESS);
    }

    public static Cassandra.Client createConnection(Configuration conf, String host, Integer port)
            throws IOException {
        try {
            TSocket socket = new TSocket(host, port);
            TTransport transport = ConfigHelper.getInputTransportFactory(conf).openTransport(socket);
            return new Cassandra.Client(new TBinaryProtocol(transport));
        } catch (LoginException e) {
            throw new IOException("Unable to login to server " + host + ":" + port, e);
        } catch (TTransportException e) {
            throw new IOException("Unable to connect to server " + host + ":" + port, e);
        }
    }

    public static void setInputInitialAddress(Configuration conf, String address) {
        conf.set(INPUT_INITIAL_THRIFT_ADDRESS, address);
    }

    public static CompressionParameters getOutputCompressionParamaters(Configuration conf) {
        if (ConfigHelper.getOutputCompressionClass(conf) == null)
            return new CompressionParameters(null);

        Map<String, String> options = new HashMap<String, String>();
        options.put(CompressionParameters.SSTABLE_COMPRESSION, ConfigHelper.getOutputCompressionClass(conf));
        options.put(CompressionParameters.CHUNK_LENGTH_KB, ConfigHelper.getOutputCompressionChunkLength(conf));

        try {
            return CompressionParameters.create(options);
        } catch (ConfigurationException e) {
            throw new RuntimeException(e);
        }
    }

    public static void setInputTransportFactoryClass(Configuration conf, String classname) {
        conf.set(INPUT_TRANSPORT_FACTORY_CLASS, classname);
    }

    public static ITransportFactory getInputTransportFactory(Configuration conf) {
        return getTransportFactory(
                conf.get(INPUT_TRANSPORT_FACTORY_CLASS, TFramedTransportFactory.class.getName()));
    }

    private static ITransportFactory getTransportFactory(String factoryClassName) {
        try {
            return (ITransportFactory) Class.forName(factoryClassName).newInstance();
        } catch (Exception e) {
            throw new RuntimeException("Failed to instantiate transport factory:" + factoryClassName, e);
        }
    }

    public static void setOutputPartitioner(Configuration conf, String classname) {
        conf.set(OUTPUT_PARTITIONER_CONFIG, classname);
    }

    public static int getInputSplitSize(Configuration conf) {
        return conf.getInt(INPUT_SPLIT_SIZE_CONFIG, DEFAULT_SPLIT_SIZE);
    }

    public static String getOutputColumnFamily(Configuration conf) {
        return conf.get(OUTPUT_COLUMNFAMILY_CONFIG);
    }

    public static ConsistencyLevel getWriteConsistencyLevel(Configuration conf) {
        return ConsistencyLevel.valueOf(conf.get(WRITE_CONSISTENCY_LEVEL, DEFAULT_CONSISTENCY_LEVEL));
    }

    public static SlicePredicate getInputSlicePredicate(Configuration conf) {
        String s = conf.get(INPUT_PREDICATE_CONFIG);
        return s == null ? null : predicateFromString(s);
    }

    private static SlicePredicate predicateFromString(String st) {
        assert st != null;
        TDeserializer deserializer = new TDeserializer(new TBinaryProtocol.Factory());
        SlicePredicate predicate = new SlicePredicate();
        try {
            deserializer.deserialize(predicate, Hex.hexToBytes(st));
        } catch (TException e) {
            throw new RuntimeException(e);
        }
        return predicate;
    }

    /**
     * Set the KeyRange to limit the rows.
     * @param conf Job configuration you are about to run
     */
    public static void setInputRange(Configuration conf, String startToken, String endToken) {
        KeyRange range = new KeyRange().setStart_token(startToken).setEnd_token(endToken);
        conf.set(INPUT_KEYRANGE_CONFIG, thriftToString(range));
    }

    public static void setInputRpcPort(Configuration conf, String port) {
        conf.set(INPUT_THRIFT_PORT, port);
    }

    public static boolean getInputIsWide(Configuration conf) {
        return Boolean.valueOf(conf.get(INPUT_WIDEROWS_CONFIG));
    }

    /**
     * The number of rows to request with each get range slices request.
     * Too big and you can either get timeouts when it takes Cassandra too
     * long to fetch all the data. Too small and the performance
     * will be eaten up by the overhead of each request.
     *
     * @param conf Job configuration you are about to run
     * @return Number of rows to request each time
     */
    public static int getRangeBatchSize(Configuration conf) {
        return conf.getInt(RANGE_BATCH_SIZE_CONFIG, DEFAULT_RANGE_BATCH_SIZE);
    }

    public static IPartitioner getOutputPartitioner(Configuration conf) {
        try {
            return FBUtilities.newPartitioner(conf.get(OUTPUT_PARTITIONER_CONFIG));
        } catch (ConfigurationException e) {
            throw new RuntimeException(e);
        }
    }

    public static String getOutputCompressionClass(Configuration conf) {
        return conf.get(OUTPUT_COMPRESSION_CLASS);
    }

    public static void setOutputCompressionClass(Configuration conf, String classname) {
        conf.set(OUTPUT_COMPRESSION_CLASS, classname);
    }

    public static void setOutputTransportFactoryClass(Configuration conf, String classname) {
        conf.set(OUTPUT_TRANSPORT_FACTORY_CLASS, classname);
    }

    public static ITransportFactory getOutputTransportFactory(Configuration conf) {
        return getTransportFactory(
                conf.get(OUTPUT_TRANSPORT_FACTORY_CLASS, TFramedTransportFactory.class.getName()));
    }

    public static ConsistencyLevel getReadConsistencyLevel(Configuration conf) {
        return ConsistencyLevel.valueOf(conf.get(READ_CONSISTENCY_LEVEL, DEFAULT_CONSISTENCY_LEVEL));
    }

    public static void setOutputInitialAddress(Configuration conf, String address) {
        conf.set(OUTPUT_INITIAL_THRIFT_ADDRESS, address);
    }

    public static int getOutputRpcPort(Configuration conf) {
        if (conf.get(OUTPUT_THRIFT_PORT, "").equals("")) {
            return DEFAULT_THRIFT_PORT;
        }
        return Integer.parseInt(conf.get(OUTPUT_THRIFT_PORT));
    }

    public static Cassandra.Client getClientFromInputAddressList(Configuration conf) throws IOException {
        return getClientFromAddressList(conf, ConfigHelper.getInputInitialAddress(conf).split(","),
                ConfigHelper.getInputRpcPort(conf));
    }

    private static Cassandra.Client getClientFromAddressList(Configuration conf, String[] addresses, int port)
            throws IOException {
        Cassandra.Client client = null;
        List<IOException> exceptions = new ArrayList<IOException>();
        for (String address : addresses) {
            try {
                client = createConnection(conf, address, port);
                break;
            } catch (IOException ioe) {
                exceptions.add(ioe);
            }
        }
        if (client == null) {
            throw exceptions.get(exceptions.size() - 1);
        }
        return client;
    }

    public static void setUseWideRows(Configuration conf, boolean wideRows) {
        conf.set(INPUT_WIDEROWS_CONFIG, String.valueOf(wideRows));
    }

    public static void setInputColumnFamilies(Configuration conf, String inputKeyspace,
            List<String> inputColumnFamilies) {
        String cf = "";
        if (inputKeyspace == null) {
            throw new UnsupportedOperationException("keyspace may not be null");
        }
        if (cf == null) {
            throw new UnsupportedOperationException("columnfamily may not be null");
        }

        conf.set(INPUT_KEYSPACE_CONFIG, inputKeyspace);
        conf.setStrings(INPUT_COLUMNFAMILIES_CONFIG,
                inputColumnFamilies.toArray(new String[inputColumnFamilies.size()]));
    }

    public static Collection<String> getInputColumnFamilies(Configuration conf) {
        return conf.getStringCollection(INPUT_COLUMNFAMILIES_CONFIG);
    }

    public static void setConsistencyLevel(Configuration conf, ConsistencyLevel cl) {
        conf.set(CassandraConfigHelper.CASSANDRA_CONSISTENCYLEVEL_READ, cl.name());
        conf.set(CassandraConfigHelper.CASSANDRA_CONSISTENCYLEVEL_WRITE, cl.name());
    }

    public static void setDefaultWriteConsistencyLevel(Configuration configuration) {
        if (configuration.get(CASSANDRA_CONSISTENCYLEVEL_WRITE) == null) {
            configuration.set(CASSANDRA_CONSISTENCYLEVEL_WRITE, DEFAULT_CONSISTENCY_LEVEL);
        }
    }

    public static void setDefaultReadConsistencyLevel(Configuration configuration) {
        if (configuration.get(CASSANDRA_CONSISTENCYLEVEL_READ) == null) {
            configuration.set(CASSANDRA_CONSISTENCYLEVEL_READ, DEFAULT_CONSISTENCY_LEVEL);
        }
    }

    public static String getOutputKeyspaceUserName(Configuration configuration, String keyspace) {
        return configuration.get(OUTPUT_KEYSPACE_USER_NAME_KEY + ":" + keyspace);
    }

    public static void setOutputKeyspaceUserName(Configuration configuration, String keyspace, String userName) {
        configuration.set(OUTPUT_KEYSPACE_USER_NAME_KEY + ":" + keyspace, userName);
    }

    public static String getOutputKeyspacePassword(Configuration configuration, String keyspace) {
        return configuration.get(OUTPUT_KEYSPACE_USER_PASSWORD_KEY + ":" + keyspace);
    }

    public static void setOutputKeyspacePassword(Configuration configuration, String keyspace, String password) {
        configuration.set(OUTPUT_KEYSPACE_USER_PASSWORD_KEY + ":" + keyspace, password);
    }

    private static String thriftToString(TBase object) {
        assert object != null;
        // this is so awful it's kind of cool!
        TSerializer serializer = new TSerializer(new TBinaryProtocol.Factory());
        try {
            return Hex.bytesToHex(serializer.serialize(object));
        } catch (TException e) {
            throw new RuntimeException(e);
        }
    }
}