Java tutorial
/* * Copyright 2014, Stratio. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.stratio.deep.cassandra.util; import static com.stratio.deep.cassandra.util.AnnotationUtils.MAP_JAVA_TYPE_TO_ABSTRACT_TYPE; import static com.stratio.deep.commons.utils.AnnotationUtils.deepFieldName; import static com.stratio.deep.commons.utils.AnnotationUtils.getBeanFieldValue; import static com.stratio.deep.commons.utils.Utils.quote; import static com.stratio.deep.commons.utils.Utils.singleQuote; import java.io.Serializable; import java.lang.reflect.Field; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Set; import java.util.UUID; import java.util.regex.Pattern; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.CompositeType; import org.apache.cassandra.db.marshal.ListType; import org.apache.cassandra.db.marshal.MapType; import org.apache.cassandra.db.marshal.SetType; import org.apache.cassandra.db.marshal.TimeUUIDType; import org.apache.cassandra.db.marshal.UUIDType; import org.apache.cassandra.dht.Token; import org.apache.commons.collections.MapUtils; import org.apache.commons.lang.StringUtils; import org.apache.spark.TaskContext; import org.apache.spark.rdd.RDD; import com.datastax.driver.core.DataType; import com.datastax.driver.core.querybuilder.Batch; import com.datastax.driver.core.querybuilder.Insert; import com.datastax.driver.core.querybuilder.QueryBuilder; import com.stratio.deep.cassandra.config.CassandraDeepJobConfig; import com.stratio.deep.cassandra.config.ICassandraDeepJobConfig; import com.stratio.deep.cassandra.config.OperatorCassandra; import com.stratio.deep.cassandra.cql.DeepCqlRecordWriter; import com.stratio.deep.cassandra.querybuilder.DefaultQueryBuilder; import com.stratio.deep.commons.annotations.DeepField; import com.stratio.deep.commons.entity.Cell; import com.stratio.deep.commons.entity.Cells; import com.stratio.deep.commons.entity.IDeepType; import com.stratio.deep.commons.exception.DeepGenericException; import com.stratio.deep.commons.filter.Filter; import com.stratio.deep.commons.filter.FilterType; import com.stratio.deep.commons.functions.AbstractSerializableFunction2; import com.stratio.deep.commons.rdd.DeepTokenRange; import com.stratio.deep.commons.utils.AnnotationUtils; import com.stratio.deep.commons.utils.Pair; import com.stratio.deep.commons.utils.Utils; import scala.Function1; import scala.Tuple2; import scala.collection.Iterator; import scala.reflect.ClassTag; import scala.reflect.ClassTag$; /** * Created by luca on 16/04/14. */ public class CassandraUtils { /** * private constructor */ CassandraUtils() { } public static <W> void doCql3SaveToCassandra(RDD<W> rdd, ICassandraDeepJobConfig<W> writeConfig, Function1<W, Tuple2<Cells, Cells>> transformer) { if (!writeConfig.getIsWriteConfig()) { throw new IllegalArgumentException("Provided configuration object is not suitable for writing"); } Tuple2<Map<String, ByteBuffer>, Map<String, ByteBuffer>> tuple = new Tuple2<>(null, null); RDD<Tuple2<Cells, Cells>> mappedRDD = rdd.map(transformer, ClassTag$.MODULE$.<Tuple2<Cells, Cells>>apply(tuple.getClass())); ((CassandraDeepJobConfig) writeConfig).createOutputTableIfNeeded(mappedRDD.first()); final int pageSize = writeConfig.getBatchSize(); int offset = 0; List<Tuple2<Cells, Cells>> elements = Arrays.asList((Tuple2<Cells, Cells>[]) mappedRDD.collect()); List<Tuple2<Cells, Cells>> split; do { split = elements.subList(pageSize * (offset++), Math.min(pageSize * offset, elements.size())); Batch batch = QueryBuilder.batch(); for (Tuple2<Cells, Cells> t : split) { Tuple2<String[], Object[]> bindVars = Utils.prepareTuple4CqlDriver(t); Insert insert = QueryBuilder .insertInto(quote(writeConfig.getKeyspace()), quote(writeConfig.getTable())) .values(bindVars._1(), bindVars._2()); batch.add(insert); } writeConfig.getSession().execute(batch); } while (!split.isEmpty() && split.size() == pageSize); } /** * Provided the mapping function <i>transformer</i> that transforms a generic RDD to an RDD<Tuple2<Cells, Cells>>, * this generic method persists the RDD to underlying Cassandra datastore. * * @param rdd * @param writeConfig * @param transformer */ public static <W> void doSaveToCassandra(RDD<W> rdd, final ICassandraDeepJobConfig<W> writeConfig, Function1<W, Tuple2<Cells, Cells>> transformer) { if (!writeConfig.getIsWriteConfig()) { throw new IllegalArgumentException("Provided configuration object is not suitable for writing"); } Tuple2<Map<String, ByteBuffer>, Map<String, ByteBuffer>> tuple = new Tuple2<>(null, null); final RDD<Tuple2<Cells, Cells>> mappedRDD = rdd.map(transformer, ClassTag$.MODULE$.<Tuple2<Cells, Cells>>apply(tuple.getClass())); ((CassandraDeepJobConfig) writeConfig).createOutputTableIfNeeded(mappedRDD.first()); ClassTag<Integer> uClassTag = ClassTag$.MODULE$.apply(Integer.class); mappedRDD.context().runJob(mappedRDD, new AbstractSerializableFunction2<TaskContext, Iterator<Tuple2<Cells, Cells>>, Integer>() { @Override public Integer apply(TaskContext context, Iterator<Tuple2<Cells, Cells>> rows) { try (DeepCqlRecordWriter writer = new DeepCqlRecordWriter(writeConfig, new DefaultQueryBuilder())) { while (rows.hasNext()) { Tuple2<Cells, Cells> row = rows.next(); writer.write(row._1(), row._2()); } } return null; } }, uClassTag); } /** * Returns an instance of the Cassandra validator that matches the provided object. * * @param obj the object to use to resolve the cassandra marshaller. * @param <T> the generic object type. * @return an instance of the Cassandra validator that matches the provided object. * @throws com.stratio.deep.commons.exception.DeepGenericException if no validator can be found for the specified object. */ public static <T> AbstractType<?> marshallerInstance(T obj) { AbstractType<?> abstractType = null; if (obj != null) { abstractType = MAP_JAVA_TYPE_TO_ABSTRACT_TYPE.get(obj.getClass()); if (obj instanceof UUID) { UUID uuid = (UUID) obj; if (uuid.version() == 1) { abstractType = TimeUUIDType.instance; } else { abstractType = UUIDType.instance; } } if (abstractType == null) { //LIST Case if (List.class.isAssignableFrom(obj.getClass())) { List list = (List) obj; if (!list.isEmpty()) { abstractType = ListType.getInstance(marshallerInstance(list.get(0))); } } // SET Case else if (Set.class.isAssignableFrom(obj.getClass())) { Set set = (Set) obj; if (!set.isEmpty()) { java.util.Iterator i = set.iterator(); Object o = i.next(); abstractType = SetType.getInstance(marshallerInstance(o)); } } // MAP Case else if (Map.class.isAssignableFrom(obj.getClass())) { Set set = ((Map) obj).keySet(); if (!set.isEmpty()) { java.util.Iterator i = set.iterator(); Object o = i.next(); abstractType = MapType.getInstance(marshallerInstance(o), marshallerInstance(((Map) obj).get(o))); } } } } if (abstractType == null) { throw new DeepGenericException("parameter class " + obj.getClass().getCanonicalName() + " does not have a" + " Cassandra marshaller"); } return abstractType; } /** * Generates the update query for the provided IDeepType. The UPDATE query takes into account all the columns of the * testentity, even those containing the null value. We do not generate the key part of the update query. The * provided query will be concatenated with the key part by CqlRecordWriter. * * @param keys the row keys wrapped inside a Cells object. * @param values all the other row columns wrapped inside a Cells object. * @param outputKeyspace the output keyspace. * @param outputColumnFamily the output column family. * @return the update query statement. */ public static String updateQueryGenerator(Cells keys, Cells values, String outputKeyspace, String outputColumnFamily) { StringBuilder sb = new StringBuilder("UPDATE ").append(outputKeyspace).append(".") .append(outputColumnFamily).append(" SET "); int k = 0; StringBuilder keyClause = new StringBuilder(" WHERE "); for (Cell cell : keys.getCells()) { if (((Cell) cell).isKey() || cell.isClusterKey()) { if (k > 0) { keyClause.append(" AND "); } keyClause.append(String.format("%s = ?", quote(cell.getCellName()))); ++k; } } k = 0; for (Cell cell : values.getCells()) { if (k > 0) { sb.append(", "); } sb.append(String.format("%s = ?", quote(cell.getCellName()))); ++k; } sb.append(keyClause).append(";"); return sb.toString(); } /** * Generates a create table cql statement from the given Cells description. * * @param keys the row keys wrapped inside a Cells object. * @param values all the other row columns wrapped inside a Cells object. * @param outputKeyspace the output keyspace. * @param outputColumnFamily the output column family. * @return the create table statement. */ public static String createTableQueryGenerator(Cells keys, Cells values, String outputKeyspace, String outputColumnFamily) { if (keys == null || StringUtils.isEmpty(outputKeyspace) || StringUtils.isEmpty(outputColumnFamily)) { throw new DeepGenericException("keys, outputKeyspace and outputColumnFamily cannot be null"); } StringBuilder sb = new StringBuilder("CREATE TABLE ").append(outputKeyspace).append(".") .append(outputColumnFamily).append(" ("); List<String> partitionKey = new ArrayList<>(); List<String> clusterKey = new ArrayList<>(); boolean isFirstField = true; for (Cell key : keys) { String cellName = quote(key.getCellName()); if (!isFirstField) { sb.append(", "); } // CellValidator cellValidator = CellValidator.cellValidator(key.getCellValue()); sb.append(cellName).append(" ") .append(CassandraUtils.marshallerInstance(key.getValue()).asCQL3Type().toString()); if (((Cell) key).isKey()) { partitionKey.add(cellName); } else if (((Cell) key).isClusterKey()) { clusterKey.add(cellName); } isFirstField = false; } if (values != null) { for (Cell cell : values) { sb.append(", "); if (cell.getValue() != null) { sb.append(quote(cell.getCellName())).append(" ") .append(CassandraUtils.marshallerInstance(cell.getValue()).asCQL3Type().toString()); } } } StringBuilder partitionKeyToken = new StringBuilder("("); isFirstField = true; for (String s : partitionKey) { if (!isFirstField) { partitionKeyToken.append(", "); } partitionKeyToken.append(s); isFirstField = false; } partitionKeyToken.append(")"); StringBuilder clusterKeyToken = new StringBuilder(""); isFirstField = true; for (String s : clusterKey) { if (!isFirstField) { clusterKeyToken.append(", "); } clusterKeyToken.append(s); isFirstField = false; } StringBuilder keyPart = new StringBuilder(", PRIMARY KEY "); if (!clusterKey.isEmpty()) { keyPart.append("("); } keyPart.append(partitionKeyToken); if (!clusterKey.isEmpty()) { keyPart.append(", "); keyPart.append(clusterKeyToken); keyPart.append(")"); } sb.append(keyPart).append(");"); return sb.toString(); } /** * Convers an instance of type <T> to a tuple of ( Map<String, ByteBuffer>, List<ByteBuffer> ). The first map * contains the key column names and the corresponding values. The ByteBuffer list contains the value of the columns * that will be bounded to CQL query parameters. * * @param e the entity object to process. * @param <T> the entity object generic type. * @return a pair whose first element is a Cells object containing key Cell(s) and whose second element contains all * of the other Cell(s). */ public static <T extends IDeepType> Tuple2<Cells, Cells> deepType2tuple(T e) { Pair<Field[], Field[]> fields = AnnotationUtils.filterKeyFields(e.getClass()); Field[] keyFields = fields.left; Field[] otherFields = fields.right; Cells keys = new Cells(e.getClass().getName()); Cells values = new Cells(e.getClass().getName()); for (Field keyField : keyFields) { keys.add(createFromEntity(e, keyField)); } for (Field valueField : otherFields) { values.add(createFromEntity(e, valueField)); } return new Tuple2<>(keys, values); } /** * Generates the part of the query where clause that will hit the Cassandra's secondary indexes. * * @param additionalFilters the map of filters names and values. * @return the query subpart corresponding to the provided additional filters. */ public static String additionalFilterGenerator(Map<String, Serializable> additionalFilters, Filter[] filters, String luceneIndex) { StringBuilder sb = new StringBuilder(""); if (!MapUtils.isEmpty(additionalFilters)) { for (Map.Entry<String, Serializable> entry : additionalFilters.entrySet()) { if (entry.getValue() == null) { continue; } String value = entry.getValue().toString(); if (entry.getValue() instanceof String) { value = singleQuote(value.trim()); } sb.append(" AND ").append(quote(entry.getKey())).append(" = ").append(value); } } if (filters != null) { for (int i = 0; i < filters.length; i++) { FilterType filterType = filters[i].getFilterType(); String value = filters[i].getValue().toString(); if (filters[i].getValue() instanceof String) { value = singleQuote(value.trim()); } switch (filterType) { case IN: List<String> inValues = (List<String>) filters[i].getValue(); sb.append(" AND ").append(quote(filters[i].getField())).append(" IN ").append("("); if (!inValues.isEmpty()) { if (inValues.get(0) instanceof String) { sb.append("'").append(StringUtils.join(((List<String>) filters[i].getValue()), "','")) .append("'"); } else { sb.append(StringUtils.join(((List<String>) filters[i].getValue()), ",")); } } sb.append(")"); break; case BETWEEN: break; case MATCH: sb.append(" AND ").append(luceneIndex).append(" = '"); sb.append(getLuceneWhereClause(filters[i])); sb.append("'"); break; case NEQ: sb.append(" AND ").append(quote(filters[i].getField())).append(" ").append(" < ").append(" ") .append(value).append(" AND ").append(quote(filters[i].getField())).append(" ") .append(" > ").append(" ").append(value); break; default: sb.append(" AND ").append(quote(filters[i].getField())).append(" ").append( OperatorCassandra.getOperatorCassandra(filters[i].getFilterType()).getOperator()) .append(" ").append(value); break; } } } return sb.toString(); } /** * Generates the part of the query where clause that will hit the Cassandra's secondary indexes. * * @param additionalFilters the map of filters names and values. * @return the query subpart corresponding to the provided additional filters. */ public static String additionalFilterGenerator(Map<String, Serializable> additionalFilters) { if (MapUtils.isEmpty(additionalFilters)) { return ""; } StringBuilder sb = new StringBuilder(""); for (Map.Entry<String, Serializable> entry : additionalFilters.entrySet()) { if (entry.getValue() == null) { continue; } String value = entry.getValue().toString(); if (entry.getValue() instanceof String) { value = singleQuote(value.trim()); } sb.append(" AND ").append(quote(entry.getKey())).append(" = ").append(value); } return sb.toString(); } private static String getLuceneWhereClause(Filter filter) { String result; StringBuilder sb = new StringBuilder("{filter:{type:\"boolean\",must:["); String column = filter.getField(); String value = (String) filter.getValue(); // Generate query for column String[] processedQuery = processLuceneQueryType(value); sb.append("{type:\""); sb.append(processedQuery[0]); sb.append("\",field:\""); sb.append(column); sb.append("\",value:\""); sb.append(processedQuery[1]); sb.append("\"},"); sb.replace(sb.length() - 1, sb.length(), ""); sb.append("]}}"); result = sb.toString(); return result; } /** * Process a query pattern to determine the type of Lucene query. The supported types of queries are: <li> * <ul> * Wildcard: The query contains * or ?. * </ul> * <ul> * Fuzzy: The query ends with ~ and a number. * </ul> * <ul> * Regex: The query contains [ or ]. * </ul> * <ul> * Match: Default query, supporting escaped symbols: *, ?, [, ], etc. * </ul> * </li> * * @param query The user query. * @return An array with the type of query and the processed query. */ private static String[] processLuceneQueryType(String query) { String[] result = { "", "" }; Pattern escaped = Pattern.compile(".*\\\\\\*.*|.*\\\\\\?.*|.*\\\\\\[.*|.*\\\\\\].*"); Pattern wildcard = Pattern.compile(".*\\*.*|.*\\?.*"); Pattern regex = Pattern.compile(".*\\].*|.*\\[.*"); Pattern fuzzy = Pattern.compile(".*~\\d+"); if (escaped.matcher(query).matches()) { result[0] = "match"; result[1] = query.replace("\\*", "*").replace("\\?", "?").replace("\\]", "]").replace("\\[", "["); } else if (regex.matcher(query).matches()) { result[0] = "regex"; result[1] = query; } else if (fuzzy.matcher(query).matches()) { result[0] = "fuzzy"; result[1] = query; } else if (wildcard.matcher(query).matches()) { result[0] = "wildcard"; result[1] = query; } else { result[0] = "match"; result[1] = query; } // C* Query builder doubles the ' character. result[1] = result[1].replaceAll("^'", "").replaceAll("'$", ""); return result; } /** * Returns the partition key related to a given {@link Cells}. * * @param cells {@link Cells} from Cassandra to extract the partition key. * @param keyValidator Cassandra key type. * @param numberOfKeys Number of keys. * @return Partition key. */ public static ByteBuffer getPartitionKey(Cells cells, AbstractType<?> keyValidator, int numberOfKeys) { ByteBuffer partitionKey; if (keyValidator instanceof CompositeType) { ByteBuffer[] keys = new ByteBuffer[numberOfKeys]; for (int i = 0; i < cells.size(); i++) { Cell c = cells.getCellByIdx(i); if (c.isKey()) { keys[i] = DataType.serializeValue(c.getValue(), CassandraDeepJobConfig.PROTOCOL_VERSION); } } partitionKey = CompositeType.build(keys); } else { Cell cell = cells.getCellByIdx(0); partitionKey = DataType.serializeValue(cell.getValue(), CassandraDeepJobConfig.PROTOCOL_VERSION); } return partitionKey; } public static Cell createFromByteBuffer(Cell metadata, ByteBuffer cellValue) { String cellName = metadata.getCellName(); boolean isClusterKey = metadata.isClusterKey(); boolean isKey = metadata.isKey(); Object o = null; if (cellValue != null) { o = ((DataType) metadata.getValue()).deserialize(cellValue, CassandraDeepJobConfig.PROTOCOL_VERSION); } return Cell.create(cellName, o, isKey, isClusterKey); } public static Cell createFromEntity(IDeepType e, Field field) { DeepField annotation = field.getAnnotation(DeepField.class); String cellName = deepFieldName(field); Object cellValue = getBeanFieldValue(e, field); boolean isClusterKey = annotation.isPartOfClusterKey(); boolean isKey = annotation.isPartOfPartitionKey(); return Cell.create(cellName, cellValue, isKey, isClusterKey); } /** * Checks if a token is included in the current split. * * @param token {@link Token} to be checked. * @return true, if the token is included in the interval; false, otherwise. */ public static boolean isTokenIncludedInRange(DeepTokenRange deepTokenRange, Token<Comparable> token) { boolean isIncluded = false; if (((Comparable) deepTokenRange.getStartTokenAsComparable()) .compareTo(deepTokenRange.getEndTokenAsComparable()) <= 0) { isIncluded = token.token.compareTo(deepTokenRange.getStartTokenAsComparable()) > 0; if (isIncluded) { isIncluded = token.token.compareTo(deepTokenRange.getEndTokenAsComparable()) <= 0; } } else { isIncluded = token.token.compareTo(deepTokenRange.getStartTokenAsComparable()) > 0; if (!isIncluded) { isIncluded = token.token.compareTo(deepTokenRange.getEndTokenAsComparable()) <= 0; } } return isIncluded; } public static boolean isFilterdByKey(Filter[] filters, String partitionKeyString) { if (filters != null) { for (int i = 0; i < filters.length; i++) { if (quote(filters[i].getField()).equalsIgnoreCase(quote(partitionKeyString))) { return true; } } } return false; } }