Java tutorial
/* * Copyright 2013 Illarion Kovalchuk * <p/> * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * <p/> * http://www.apache.org/licenses/LICENSE-2.0 * <p/> * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package andromache.config; import org.apache.cassandra.config.ConfigurationException; import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.hadoop.ConfigHelper; import org.apache.cassandra.io.compress.CompressionParameters; import org.apache.cassandra.thrift.*; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.Hex; import org.apache.hadoop.conf.Configuration; import org.apache.thrift.TBase; import org.apache.thrift.TDeserializer; import org.apache.thrift.TException; import org.apache.thrift.TSerializer; import org.apache.thrift.transport.TSocket; import org.apache.thrift.transport.TTransport; import org.apache.thrift.transport.TTransportException; import javax.security.auth.login.LoginException; import java.io.IOException; import java.util.*; public class CassandraConfigHelper { private static final String CASSANDRA_CONSISTENCYLEVEL_READ = "cassandra.consistencylevel.read"; private static final String CASSANDRA_CONSISTENCYLEVEL_WRITE = "cassandra.consistencylevel.write"; private static final String INPUT_COLUMNFAMILIES_CONFIG = "cassandra.input.columnfamilies"; private static final String OUTPUT_KEYSPACE_USER_NAME_KEY = "cassandra.output.keyspace.username.key"; private static final String OUTPUT_KEYSPACE_USER_PASSWORD_KEY = "cassandra.output.keyspace.userpassword.key"; private static final String INPUT_PARTITIONER_CONFIG = "cassandra.input.partitioner.class"; private static final String OUTPUT_PARTITIONER_CONFIG = "cassandra.output.partitioner.class"; private static final String INPUT_KEYSPACE_CONFIG = "cassandra.input.keyspace"; private static final String INPUT_KEYSPACE_USERNAME_CONFIG = "cassandra.input.keyspace.username"; private static final String INPUT_KEYSPACE_PASSWD_CONFIG = "cassandra.input.keyspace.passwd"; private static final String OUTPUT_KEYSPACE_PASSWD_CONFIG = "cassandra.output.keyspace.passwd"; private static final String OUTPUT_COLUMNFAMILY_CONFIG = "cassandra.output.columnfamily"; private static final String INPUT_PREDICATE_CONFIG = "cassandra.input.predicate"; private static final String INPUT_KEYRANGE_CONFIG = "cassandra.input.keyRange"; private static final String INPUT_SPLIT_SIZE_CONFIG = "cassandra.input.split.size"; private static final String INPUT_WIDEROWS_CONFIG = "cassandra.input.widerows"; private static final String RANGE_BATCH_SIZE_CONFIG = "cassandra.range.batch.size"; private static final String INPUT_THRIFT_PORT = "cassandra.input.thrift.port"; private static final String OUTPUT_THRIFT_PORT = "cassandra.output.thrift.port"; private static final String INPUT_INITIAL_THRIFT_ADDRESS = "cassandra.input.thrift.address"; private static final String OUTPUT_INITIAL_THRIFT_ADDRESS = "cassandra.output.thrift.address"; private static final String READ_CONSISTENCY_LEVEL = "cassandra.consistencylevel.read"; private static final String WRITE_CONSISTENCY_LEVEL = "cassandra.consistencylevel.write"; private static final String OUTPUT_COMPRESSION_CLASS = "cassandra.output.compression.class"; private static final String OUTPUT_COMPRESSION_CHUNK_LENGTH = "cassandra.output.compression.length"; private static final String INPUT_TRANSPORT_FACTORY_CLASS = "cassandra.input.transport.factory.class"; private static final String OUTPUT_TRANSPORT_FACTORY_CLASS = "cassandra.output.transport.factory.class"; private static final int DEFAULT_SPLIT_SIZE = 64 * 1024; private static final int DEFAULT_THRIFT_PORT = 9160; private static final int DEFAULT_RANGE_BATCH_SIZE = 4096; private static final String DEFAULT_CONSISTENCY_LEVEL = ConsistencyLevel.QUORUM.name(); public static void setOutputKeyspacePassword(Configuration conf, String password) { conf.set(OUTPUT_KEYSPACE_PASSWD_CONFIG, password); } /** * Set the KeyRange to limit the rows. * @param conf Job configuration you are about to run */ public static void setInputRange(Configuration conf, String startToken, String endToken, List<IndexExpression> filter) { KeyRange range = new KeyRange().setStart_token(startToken).setEnd_token(endToken).setRow_filter(filter); conf.set(INPUT_KEYRANGE_CONFIG, thriftToString(range)); } public static int getInputRpcPort(Configuration conf) { if (conf.get(INPUT_THRIFT_PORT, "").equals("")) { return DEFAULT_THRIFT_PORT; } return Integer.parseInt(conf.get(INPUT_THRIFT_PORT)); } /** * Set the predicate that determines what columns will be selected from each row. * * @param conf Job configuration you are about to run * @param predicate */ public static void setInputSlicePredicate(Configuration conf, SlicePredicate predicate) { conf.set(INPUT_PREDICATE_CONFIG, thriftToString(predicate)); } public static Cassandra.Client getClientFromOutputAddressList(Configuration conf) throws IOException { return getClientFromAddressList(conf, getOutputInitialAddress(conf).split(","), getOutputRpcPort(conf)); } public static void setOutputCompressionChunkLength(Configuration conf, String length) { conf.set(OUTPUT_COMPRESSION_CHUNK_LENGTH, length); } public static String getInputKeyspacePassword(Configuration conf) { return conf.get(INPUT_KEYSPACE_PASSWD_CONFIG); } /** * Set the KeyRange to limit the rows. * @param conf Job configuration you are about to run */ public static void setInputRange(Configuration conf, List<IndexExpression> filter) { KeyRange range = new KeyRange().setRow_filter(filter); conf.set(INPUT_KEYRANGE_CONFIG, thriftToString(range)); } /** may be null if unset */ public static KeyRange getInputKeyRange(Configuration conf) { String str = conf.get(INPUT_KEYRANGE_CONFIG); return null != str ? keyRangeFromString(str) : null; } private static KeyRange keyRangeFromString(String st) { assert st != null; TDeserializer deserializer = new TDeserializer(new TBinaryProtocol.Factory()); KeyRange keyRange = new KeyRange(); try { deserializer.deserialize(keyRange, Hex.hexToBytes(st)); } catch (TException e) { throw new RuntimeException(e); } return keyRange; } /** * The number of rows to request with each get range slices request. * Too big and you can either get timeouts when it takes Cassandra too * long to fetch all the data. Too small and the performance * will be eaten up by the overhead of each request. * * @param conf Job configuration you are about to run * @param batchsize Number of rows to request each time */ public static void setRangeBatchSize(Configuration conf, int batchsize) { conf.setInt(RANGE_BATCH_SIZE_CONFIG, batchsize); } public static IPartitioner getInputPartitioner(Configuration conf) { try { return FBUtilities.newPartitioner(conf.get(INPUT_PARTITIONER_CONFIG)); } catch (ConfigurationException e) { throw new RuntimeException(e); } } public static String getInputKeyspaceUserName(Configuration conf) { return conf.get(INPUT_KEYSPACE_USERNAME_CONFIG); } public static String getInputKeyspace(Configuration conf) { return conf.get(INPUT_KEYSPACE_CONFIG); } /** * Set the size of the input split. * This affects the number of maps created, if the number is too small * the overhead of each map will take up the bulk of the job time. * * @param conf Job configuration you are about to run * @param splitsize Size of the input split */ public static void setInputSplitSize(Configuration conf, int splitsize) { conf.setInt(INPUT_SPLIT_SIZE_CONFIG, splitsize); } public static void setOutputRpcPort(Configuration conf, String port) { conf.set(OUTPUT_THRIFT_PORT, port); } public static void setInputPartitioner(Configuration conf, String classname) { conf.set(INPUT_PARTITIONER_CONFIG, classname); } public static String getOutputCompressionChunkLength(Configuration conf) { return conf.get(OUTPUT_COMPRESSION_CHUNK_LENGTH, String.valueOf(CompressionParameters.DEFAULT_CHUNK_LENGTH)); } public static String getInputInitialAddress(Configuration conf) { return conf.get(INPUT_INITIAL_THRIFT_ADDRESS); } public static String getOutputInitialAddress(Configuration conf) { return conf.get(OUTPUT_INITIAL_THRIFT_ADDRESS); } public static Cassandra.Client createConnection(Configuration conf, String host, Integer port) throws IOException { try { TSocket socket = new TSocket(host, port); TTransport transport = ConfigHelper.getInputTransportFactory(conf).openTransport(socket); return new Cassandra.Client(new TBinaryProtocol(transport)); } catch (LoginException e) { throw new IOException("Unable to login to server " + host + ":" + port, e); } catch (TTransportException e) { throw new IOException("Unable to connect to server " + host + ":" + port, e); } } public static void setInputInitialAddress(Configuration conf, String address) { conf.set(INPUT_INITIAL_THRIFT_ADDRESS, address); } public static CompressionParameters getOutputCompressionParamaters(Configuration conf) { if (ConfigHelper.getOutputCompressionClass(conf) == null) return new CompressionParameters(null); Map<String, String> options = new HashMap<String, String>(); options.put(CompressionParameters.SSTABLE_COMPRESSION, ConfigHelper.getOutputCompressionClass(conf)); options.put(CompressionParameters.CHUNK_LENGTH_KB, ConfigHelper.getOutputCompressionChunkLength(conf)); try { return CompressionParameters.create(options); } catch (ConfigurationException e) { throw new RuntimeException(e); } } public static void setInputTransportFactoryClass(Configuration conf, String classname) { conf.set(INPUT_TRANSPORT_FACTORY_CLASS, classname); } public static ITransportFactory getInputTransportFactory(Configuration conf) { return getTransportFactory( conf.get(INPUT_TRANSPORT_FACTORY_CLASS, TFramedTransportFactory.class.getName())); } private static ITransportFactory getTransportFactory(String factoryClassName) { try { return (ITransportFactory) Class.forName(factoryClassName).newInstance(); } catch (Exception e) { throw new RuntimeException("Failed to instantiate transport factory:" + factoryClassName, e); } } public static void setOutputPartitioner(Configuration conf, String classname) { conf.set(OUTPUT_PARTITIONER_CONFIG, classname); } public static int getInputSplitSize(Configuration conf) { return conf.getInt(INPUT_SPLIT_SIZE_CONFIG, DEFAULT_SPLIT_SIZE); } public static String getOutputColumnFamily(Configuration conf) { return conf.get(OUTPUT_COLUMNFAMILY_CONFIG); } public static ConsistencyLevel getWriteConsistencyLevel(Configuration conf) { return ConsistencyLevel.valueOf(conf.get(WRITE_CONSISTENCY_LEVEL, DEFAULT_CONSISTENCY_LEVEL)); } public static SlicePredicate getInputSlicePredicate(Configuration conf) { String s = conf.get(INPUT_PREDICATE_CONFIG); return s == null ? null : predicateFromString(s); } private static SlicePredicate predicateFromString(String st) { assert st != null; TDeserializer deserializer = new TDeserializer(new TBinaryProtocol.Factory()); SlicePredicate predicate = new SlicePredicate(); try { deserializer.deserialize(predicate, Hex.hexToBytes(st)); } catch (TException e) { throw new RuntimeException(e); } return predicate; } /** * Set the KeyRange to limit the rows. * @param conf Job configuration you are about to run */ public static void setInputRange(Configuration conf, String startToken, String endToken) { KeyRange range = new KeyRange().setStart_token(startToken).setEnd_token(endToken); conf.set(INPUT_KEYRANGE_CONFIG, thriftToString(range)); } public static void setInputRpcPort(Configuration conf, String port) { conf.set(INPUT_THRIFT_PORT, port); } public static boolean getInputIsWide(Configuration conf) { return Boolean.valueOf(conf.get(INPUT_WIDEROWS_CONFIG)); } /** * The number of rows to request with each get range slices request. * Too big and you can either get timeouts when it takes Cassandra too * long to fetch all the data. Too small and the performance * will be eaten up by the overhead of each request. * * @param conf Job configuration you are about to run * @return Number of rows to request each time */ public static int getRangeBatchSize(Configuration conf) { return conf.getInt(RANGE_BATCH_SIZE_CONFIG, DEFAULT_RANGE_BATCH_SIZE); } public static IPartitioner getOutputPartitioner(Configuration conf) { try { return FBUtilities.newPartitioner(conf.get(OUTPUT_PARTITIONER_CONFIG)); } catch (ConfigurationException e) { throw new RuntimeException(e); } } public static String getOutputCompressionClass(Configuration conf) { return conf.get(OUTPUT_COMPRESSION_CLASS); } public static void setOutputCompressionClass(Configuration conf, String classname) { conf.set(OUTPUT_COMPRESSION_CLASS, classname); } public static void setOutputTransportFactoryClass(Configuration conf, String classname) { conf.set(OUTPUT_TRANSPORT_FACTORY_CLASS, classname); } public static ITransportFactory getOutputTransportFactory(Configuration conf) { return getTransportFactory( conf.get(OUTPUT_TRANSPORT_FACTORY_CLASS, TFramedTransportFactory.class.getName())); } public static ConsistencyLevel getReadConsistencyLevel(Configuration conf) { return ConsistencyLevel.valueOf(conf.get(READ_CONSISTENCY_LEVEL, DEFAULT_CONSISTENCY_LEVEL)); } public static void setOutputInitialAddress(Configuration conf, String address) { conf.set(OUTPUT_INITIAL_THRIFT_ADDRESS, address); } public static int getOutputRpcPort(Configuration conf) { if (conf.get(OUTPUT_THRIFT_PORT, "").equals("")) { return DEFAULT_THRIFT_PORT; } return Integer.parseInt(conf.get(OUTPUT_THRIFT_PORT)); } public static Cassandra.Client getClientFromInputAddressList(Configuration conf) throws IOException { return getClientFromAddressList(conf, ConfigHelper.getInputInitialAddress(conf).split(","), ConfigHelper.getInputRpcPort(conf)); } private static Cassandra.Client getClientFromAddressList(Configuration conf, String[] addresses, int port) throws IOException { Cassandra.Client client = null; List<IOException> exceptions = new ArrayList<IOException>(); for (String address : addresses) { try { client = createConnection(conf, address, port); break; } catch (IOException ioe) { exceptions.add(ioe); } } if (client == null) { throw exceptions.get(exceptions.size() - 1); } return client; } public static void setUseWideRows(Configuration conf, boolean wideRows) { conf.set(INPUT_WIDEROWS_CONFIG, String.valueOf(wideRows)); } public static void setInputColumnFamilies(Configuration conf, String inputKeyspace, List<String> inputColumnFamilies) { String cf = ""; if (inputKeyspace == null) { throw new UnsupportedOperationException("keyspace may not be null"); } if (cf == null) { throw new UnsupportedOperationException("columnfamily may not be null"); } conf.set(INPUT_KEYSPACE_CONFIG, inputKeyspace); conf.setStrings(INPUT_COLUMNFAMILIES_CONFIG, inputColumnFamilies.toArray(new String[inputColumnFamilies.size()])); } public static Collection<String> getInputColumnFamilies(Configuration conf) { return conf.getStringCollection(INPUT_COLUMNFAMILIES_CONFIG); } public static void setConsistencyLevel(Configuration conf, ConsistencyLevel cl) { conf.set(CassandraConfigHelper.CASSANDRA_CONSISTENCYLEVEL_READ, cl.name()); conf.set(CassandraConfigHelper.CASSANDRA_CONSISTENCYLEVEL_WRITE, cl.name()); } public static void setDefaultWriteConsistencyLevel(Configuration configuration) { if (configuration.get(CASSANDRA_CONSISTENCYLEVEL_WRITE) == null) { configuration.set(CASSANDRA_CONSISTENCYLEVEL_WRITE, DEFAULT_CONSISTENCY_LEVEL); } } public static void setDefaultReadConsistencyLevel(Configuration configuration) { if (configuration.get(CASSANDRA_CONSISTENCYLEVEL_READ) == null) { configuration.set(CASSANDRA_CONSISTENCYLEVEL_READ, DEFAULT_CONSISTENCY_LEVEL); } } public static String getOutputKeyspaceUserName(Configuration configuration, String keyspace) { return configuration.get(OUTPUT_KEYSPACE_USER_NAME_KEY + ":" + keyspace); } public static void setOutputKeyspaceUserName(Configuration configuration, String keyspace, String userName) { configuration.set(OUTPUT_KEYSPACE_USER_NAME_KEY + ":" + keyspace, userName); } public static String getOutputKeyspacePassword(Configuration configuration, String keyspace) { return configuration.get(OUTPUT_KEYSPACE_USER_PASSWORD_KEY + ":" + keyspace); } public static void setOutputKeyspacePassword(Configuration configuration, String keyspace, String password) { configuration.set(OUTPUT_KEYSPACE_USER_PASSWORD_KEY + ":" + keyspace, password); } private static String thriftToString(TBase object) { assert object != null; // this is so awful it's kind of cool! TSerializer serializer = new TSerializer(new TBinaryProtocol.Factory()); try { return Hex.bytesToHex(serializer.serialize(object)); } catch (TException e) { throw new RuntimeException(e); } } }