com.stratio.deep.cassandra.util.CassandraUtils.java Source code

Introduction

Here is the source code for com.stratio.deep.cassandra.util.CassandraUtils.java
Source

/*
 * Copyright 2014, Stratio.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.stratio.deep.cassandra.util;

import static com.stratio.deep.cassandra.util.AnnotationUtils.MAP_JAVA_TYPE_TO_ABSTRACT_TYPE;
import static com.stratio.deep.commons.utils.AnnotationUtils.deepFieldName;
import static com.stratio.deep.commons.utils.AnnotationUtils.getBeanFieldValue;
import static com.stratio.deep.commons.utils.Utils.quote;
import static com.stratio.deep.commons.utils.Utils.singleQuote;

import java.io.Serializable;
import java.lang.reflect.Field;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.regex.Pattern;

import org.apache.cassandra.db.marshal.AbstractType;
import org.apache.cassandra.db.marshal.CompositeType;
import org.apache.cassandra.db.marshal.ListType;
import org.apache.cassandra.db.marshal.MapType;
import org.apache.cassandra.db.marshal.SetType;
import org.apache.cassandra.db.marshal.TimeUUIDType;
import org.apache.cassandra.db.marshal.UUIDType;
import org.apache.cassandra.dht.Token;
import org.apache.commons.collections.MapUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.spark.TaskContext;
import org.apache.spark.rdd.RDD;

import com.datastax.driver.core.DataType;
import com.datastax.driver.core.querybuilder.Batch;
import com.datastax.driver.core.querybuilder.Insert;
import com.datastax.driver.core.querybuilder.QueryBuilder;
import com.stratio.deep.cassandra.config.CassandraDeepJobConfig;
import com.stratio.deep.cassandra.config.ICassandraDeepJobConfig;
import com.stratio.deep.cassandra.config.OperatorCassandra;
import com.stratio.deep.cassandra.cql.DeepCqlRecordWriter;
import com.stratio.deep.cassandra.querybuilder.DefaultQueryBuilder;
import com.stratio.deep.commons.annotations.DeepField;
import com.stratio.deep.commons.entity.Cell;
import com.stratio.deep.commons.entity.Cells;
import com.stratio.deep.commons.entity.IDeepType;
import com.stratio.deep.commons.exception.DeepGenericException;
import com.stratio.deep.commons.filter.Filter;
import com.stratio.deep.commons.filter.FilterType;
import com.stratio.deep.commons.functions.AbstractSerializableFunction2;
import com.stratio.deep.commons.rdd.DeepTokenRange;
import com.stratio.deep.commons.utils.AnnotationUtils;
import com.stratio.deep.commons.utils.Pair;
import com.stratio.deep.commons.utils.Utils;

import scala.Function1;
import scala.Tuple2;
import scala.collection.Iterator;
import scala.reflect.ClassTag;
import scala.reflect.ClassTag$;

/**
 * Created by luca on 16/04/14.
 */
public class CassandraUtils {

    /**
     * private constructor
     */
    CassandraUtils() {
    }

    public static <W> void doCql3SaveToCassandra(RDD<W> rdd, ICassandraDeepJobConfig<W> writeConfig,
            Function1<W, Tuple2<Cells, Cells>> transformer) {
        if (!writeConfig.getIsWriteConfig()) {
            throw new IllegalArgumentException("Provided configuration object is not suitable for writing");
        }
        Tuple2<Map<String, ByteBuffer>, Map<String, ByteBuffer>> tuple = new Tuple2<>(null, null);

        RDD<Tuple2<Cells, Cells>> mappedRDD = rdd.map(transformer,
                ClassTag$.MODULE$.<Tuple2<Cells, Cells>>apply(tuple.getClass()));

        ((CassandraDeepJobConfig) writeConfig).createOutputTableIfNeeded(mappedRDD.first());

        final int pageSize = writeConfig.getBatchSize();
        int offset = 0;

        List<Tuple2<Cells, Cells>> elements = Arrays.asList((Tuple2<Cells, Cells>[]) mappedRDD.collect());
        List<Tuple2<Cells, Cells>> split;
        do {
            split = elements.subList(pageSize * (offset++), Math.min(pageSize * offset, elements.size()));

            Batch batch = QueryBuilder.batch();

            for (Tuple2<Cells, Cells> t : split) {
                Tuple2<String[], Object[]> bindVars = Utils.prepareTuple4CqlDriver(t);

                Insert insert = QueryBuilder
                        .insertInto(quote(writeConfig.getKeyspace()), quote(writeConfig.getTable()))
                        .values(bindVars._1(), bindVars._2());

                batch.add(insert);
            }
            writeConfig.getSession().execute(batch);

        } while (!split.isEmpty() && split.size() == pageSize);
    }

    /**
     * Provided the mapping function <i>transformer</i> that transforms a generic RDD to an RDD<Tuple2<Cells, Cells>>,
     * this generic method persists the RDD to underlying Cassandra datastore.
     *
     * @param rdd
     * @param writeConfig
     * @param transformer
     */
    public static <W> void doSaveToCassandra(RDD<W> rdd, final ICassandraDeepJobConfig<W> writeConfig,
            Function1<W, Tuple2<Cells, Cells>> transformer) {

        if (!writeConfig.getIsWriteConfig()) {
            throw new IllegalArgumentException("Provided configuration object is not suitable for writing");
        }

        Tuple2<Map<String, ByteBuffer>, Map<String, ByteBuffer>> tuple = new Tuple2<>(null, null);

        final RDD<Tuple2<Cells, Cells>> mappedRDD = rdd.map(transformer,
                ClassTag$.MODULE$.<Tuple2<Cells, Cells>>apply(tuple.getClass()));

        ((CassandraDeepJobConfig) writeConfig).createOutputTableIfNeeded(mappedRDD.first());

        ClassTag<Integer> uClassTag = ClassTag$.MODULE$.apply(Integer.class);

        mappedRDD.context().runJob(mappedRDD,
                new AbstractSerializableFunction2<TaskContext, Iterator<Tuple2<Cells, Cells>>, Integer>() {

                    @Override
                    public Integer apply(TaskContext context, Iterator<Tuple2<Cells, Cells>> rows) {

                        try (DeepCqlRecordWriter writer = new DeepCqlRecordWriter(writeConfig,
                                new DefaultQueryBuilder())) {
                            while (rows.hasNext()) {
                                Tuple2<Cells, Cells> row = rows.next();
                                writer.write(row._1(), row._2());
                            }
                        }

                        return null;
                    }
                }, uClassTag);

    }

    /**
     * Returns an instance of the Cassandra validator that matches the provided object.
     *
     * @param obj the object to use to resolve the cassandra marshaller.
     * @param <T> the generic object type.
     * @return an instance of the Cassandra validator that matches the provided object.
     * @throws com.stratio.deep.commons.exception.DeepGenericException if no validator can be found for the specified object.
     */
    public static <T> AbstractType<?> marshallerInstance(T obj) {

        AbstractType<?> abstractType = null;

        if (obj != null) {
            abstractType = MAP_JAVA_TYPE_TO_ABSTRACT_TYPE.get(obj.getClass());

            if (obj instanceof UUID) {
                UUID uuid = (UUID) obj;

                if (uuid.version() == 1) {
                    abstractType = TimeUUIDType.instance;

                } else {
                    abstractType = UUIDType.instance;
                }
            }

            if (abstractType == null) {
                //LIST Case
                if (List.class.isAssignableFrom(obj.getClass())) {

                    List list = (List) obj;
                    if (!list.isEmpty()) {
                        abstractType = ListType.getInstance(marshallerInstance(list.get(0)));
                    }

                }
                // SET Case
                else if (Set.class.isAssignableFrom(obj.getClass())) {
                    Set set = (Set) obj;
                    if (!set.isEmpty()) {
                        java.util.Iterator i = set.iterator();
                        Object o = i.next();
                        abstractType = SetType.getInstance(marshallerInstance(o));
                    }
                }
                // MAP Case
                else if (Map.class.isAssignableFrom(obj.getClass())) {
                    Set set = ((Map) obj).keySet();
                    if (!set.isEmpty()) {
                        java.util.Iterator i = set.iterator();
                        Object o = i.next();
                        abstractType = MapType.getInstance(marshallerInstance(o),
                                marshallerInstance(((Map) obj).get(o)));

                    }

                }
            }

        }

        if (abstractType == null) {
            throw new DeepGenericException("parameter class " + obj.getClass().getCanonicalName()
                    + " does not have a" + " Cassandra marshaller");
        }

        return abstractType;
    }

    /**
     * Generates the update query for the provided IDeepType. The UPDATE query takes into account all the columns of the
     * testentity, even those containing the null value. We do not generate the key part of the update query. The
     * provided query will be concatenated with the key part by CqlRecordWriter.
     *
     * @param keys               the row keys wrapped inside a Cells object.
     * @param values             all the other row columns wrapped inside a Cells object.
     * @param outputKeyspace     the output keyspace.
     * @param outputColumnFamily the output column family.
     * @return the update query statement.
     */
    public static String updateQueryGenerator(Cells keys, Cells values, String outputKeyspace,
            String outputColumnFamily) {

        StringBuilder sb = new StringBuilder("UPDATE ").append(outputKeyspace).append(".")
                .append(outputColumnFamily).append(" SET ");

        int k = 0;

        StringBuilder keyClause = new StringBuilder(" WHERE ");
        for (Cell cell : keys.getCells()) {
            if (((Cell) cell).isKey() || cell.isClusterKey()) {
                if (k > 0) {
                    keyClause.append(" AND ");
                }

                keyClause.append(String.format("%s = ?", quote(cell.getCellName())));

                ++k;
            }

        }

        k = 0;
        for (Cell cell : values.getCells()) {
            if (k > 0) {
                sb.append(", ");
            }

            sb.append(String.format("%s = ?", quote(cell.getCellName())));
            ++k;
        }

        sb.append(keyClause).append(";");

        return sb.toString();
    }

    /**
     * Generates a create table cql statement from the given Cells description.
     *
     * @param keys               the row keys wrapped inside a Cells object.
     * @param values             all the other row columns wrapped inside a Cells object.
     * @param outputKeyspace     the output keyspace.
     * @param outputColumnFamily the output column family.
     * @return the create table statement.
     */
    public static String createTableQueryGenerator(Cells keys, Cells values, String outputKeyspace,
            String outputColumnFamily) {

        if (keys == null || StringUtils.isEmpty(outputKeyspace) || StringUtils.isEmpty(outputColumnFamily)) {
            throw new DeepGenericException("keys, outputKeyspace and outputColumnFamily cannot be null");
        }

        StringBuilder sb = new StringBuilder("CREATE TABLE ").append(outputKeyspace).append(".")
                .append(outputColumnFamily).append(" (");

        List<String> partitionKey = new ArrayList<>();
        List<String> clusterKey = new ArrayList<>();

        boolean isFirstField = true;

        for (Cell key : keys) {

            String cellName = quote(key.getCellName());

            if (!isFirstField) {
                sb.append(", ");
            }

            // CellValidator cellValidator = CellValidator.cellValidator(key.getCellValue());
            sb.append(cellName).append(" ")
                    .append(CassandraUtils.marshallerInstance(key.getValue()).asCQL3Type().toString());

            if (((Cell) key).isKey()) {
                partitionKey.add(cellName);
            } else if (((Cell) key).isClusterKey()) {
                clusterKey.add(cellName);
            }

            isFirstField = false;
        }

        if (values != null) {
            for (Cell cell : values) {
                sb.append(", ");
                if (cell.getValue() != null) {
                    sb.append(quote(cell.getCellName())).append(" ")
                            .append(CassandraUtils.marshallerInstance(cell.getValue()).asCQL3Type().toString());
                }
            }
        }

        StringBuilder partitionKeyToken = new StringBuilder("(");

        isFirstField = true;
        for (String s : partitionKey) {
            if (!isFirstField) {
                partitionKeyToken.append(", ");
            }
            partitionKeyToken.append(s);
            isFirstField = false;
        }

        partitionKeyToken.append(")");

        StringBuilder clusterKeyToken = new StringBuilder("");

        isFirstField = true;
        for (String s : clusterKey) {
            if (!isFirstField) {
                clusterKeyToken.append(", ");
            }
            clusterKeyToken.append(s);
            isFirstField = false;
        }

        StringBuilder keyPart = new StringBuilder(", PRIMARY KEY ");

        if (!clusterKey.isEmpty()) {
            keyPart.append("(");
        }

        keyPart.append(partitionKeyToken);

        if (!clusterKey.isEmpty()) {
            keyPart.append(", ");
            keyPart.append(clusterKeyToken);
            keyPart.append(")");
        }

        sb.append(keyPart).append(");");

        return sb.toString();
    }

    /**
     * Convers an instance of type <T> to a tuple of ( Map<String, ByteBuffer>, List<ByteBuffer> ). The first map
     * contains the key column names and the corresponding values. The ByteBuffer list contains the value of the columns
     * that will be bounded to CQL query parameters.
     *
     * @param e   the entity object to process.
     * @param <T> the entity object generic type.
     * @return a pair whose first element is a Cells object containing key Cell(s) and whose second element contains all
     * of the other Cell(s).
     */
    public static <T extends IDeepType> Tuple2<Cells, Cells> deepType2tuple(T e) {

        Pair<Field[], Field[]> fields = AnnotationUtils.filterKeyFields(e.getClass());

        Field[] keyFields = fields.left;
        Field[] otherFields = fields.right;

        Cells keys = new Cells(e.getClass().getName());
        Cells values = new Cells(e.getClass().getName());

        for (Field keyField : keyFields) {
            keys.add(createFromEntity(e, keyField));
        }
        for (Field valueField : otherFields) {
            values.add(createFromEntity(e, valueField));
        }

        return new Tuple2<>(keys, values);
    }

    /**
     * Generates the part of the query where clause that will hit the Cassandra's secondary indexes.
     *
     * @param additionalFilters the map of filters names and values.
     * @return the query subpart corresponding to the provided additional filters.
     */
    public static String additionalFilterGenerator(Map<String, Serializable> additionalFilters, Filter[] filters,
            String luceneIndex) {

        StringBuilder sb = new StringBuilder("");

        if (!MapUtils.isEmpty(additionalFilters)) {
            for (Map.Entry<String, Serializable> entry : additionalFilters.entrySet()) {
                if (entry.getValue() == null) {
                    continue;
                }

                String value = entry.getValue().toString();

                if (entry.getValue() instanceof String) {
                    value = singleQuote(value.trim());
                }

                sb.append(" AND ").append(quote(entry.getKey())).append(" = ").append(value);
            }
        }

        if (filters != null) {
            for (int i = 0; i < filters.length; i++) {

                FilterType filterType = filters[i].getFilterType();

                String value = filters[i].getValue().toString();

                if (filters[i].getValue() instanceof String) {
                    value = singleQuote(value.trim());
                }

                switch (filterType) {

                case IN:
                    List<String> inValues = (List<String>) filters[i].getValue();

                    sb.append(" AND ").append(quote(filters[i].getField())).append(" IN ").append("(");

                    if (!inValues.isEmpty()) {
                        if (inValues.get(0) instanceof String) {
                            sb.append("'").append(StringUtils.join(((List<String>) filters[i].getValue()), "','"))
                                    .append("'");
                        } else {
                            sb.append(StringUtils.join(((List<String>) filters[i].getValue()), ","));
                        }
                    }
                    sb.append(")");
                    break;
                case BETWEEN:
                    break;
                case MATCH:
                    sb.append(" AND ").append(luceneIndex).append(" = '");
                    sb.append(getLuceneWhereClause(filters[i]));
                    sb.append("'");
                    break;
                case NEQ:
                    sb.append(" AND ").append(quote(filters[i].getField())).append(" ").append(" < ").append(" ")
                            .append(value).append(" AND ").append(quote(filters[i].getField())).append(" ")
                            .append(" > ").append(" ").append(value);
                    break;
                default:
                    sb.append(" AND ").append(quote(filters[i].getField())).append(" ").append(
                            OperatorCassandra.getOperatorCassandra(filters[i].getFilterType()).getOperator())
                            .append(" ").append(value);
                    break;
                }
            }
        }

        return sb.toString();
    }

    /**
     * Generates the part of the query where clause that will hit the Cassandra's secondary indexes.
     *
     * @param additionalFilters the map of filters names and values.
     * @return the query subpart corresponding to the provided additional filters.
     */
    public static String additionalFilterGenerator(Map<String, Serializable> additionalFilters) {
        if (MapUtils.isEmpty(additionalFilters)) {
            return "";
        }

        StringBuilder sb = new StringBuilder("");

        for (Map.Entry<String, Serializable> entry : additionalFilters.entrySet()) {
            if (entry.getValue() == null) {
                continue;
            }

            String value = entry.getValue().toString();

            if (entry.getValue() instanceof String) {
                value = singleQuote(value.trim());
            }

            sb.append(" AND ").append(quote(entry.getKey())).append(" = ").append(value);
        }

        return sb.toString();
    }

    private static String getLuceneWhereClause(Filter filter) {
        String result;

        StringBuilder sb = new StringBuilder("{filter:{type:\"boolean\",must:[");

        String column = filter.getField();
        String value = (String) filter.getValue();

        // Generate query for column
        String[] processedQuery = processLuceneQueryType(value);
        sb.append("{type:\"");
        sb.append(processedQuery[0]);
        sb.append("\",field:\"");
        sb.append(column);
        sb.append("\",value:\"");
        sb.append(processedQuery[1]);
        sb.append("\"},");

        sb.replace(sb.length() - 1, sb.length(), "");
        sb.append("]}}");

        result = sb.toString();

        return result;

    }

    /**
     * Process a query pattern to determine the type of Lucene query. The supported types of queries are: <li>
     * <ul>
     * Wildcard: The query contains * or ?.
     * </ul>
     * <ul>
     * Fuzzy: The query ends with ~ and a number.
     * </ul>
     * <ul>
     * Regex: The query contains [ or ].
     * </ul>
     * <ul>
     * Match: Default query, supporting escaped symbols: *, ?, [, ], etc.
     * </ul>
     * </li>
     *
     * @param query The user query.
     * @return An array with the type of query and the processed query.
     */
    private static String[] processLuceneQueryType(String query) {
        String[] result = { "", "" };
        Pattern escaped = Pattern.compile(".*\\\\\\*.*|.*\\\\\\?.*|.*\\\\\\[.*|.*\\\\\\].*");
        Pattern wildcard = Pattern.compile(".*\\*.*|.*\\?.*");
        Pattern regex = Pattern.compile(".*\\].*|.*\\[.*");
        Pattern fuzzy = Pattern.compile(".*~\\d+");
        if (escaped.matcher(query).matches()) {
            result[0] = "match";
            result[1] = query.replace("\\*", "*").replace("\\?", "?").replace("\\]", "]").replace("\\[", "[");
        } else if (regex.matcher(query).matches()) {
            result[0] = "regex";
            result[1] = query;
        } else if (fuzzy.matcher(query).matches()) {
            result[0] = "fuzzy";
            result[1] = query;
        } else if (wildcard.matcher(query).matches()) {
            result[0] = "wildcard";
            result[1] = query;
        } else {
            result[0] = "match";
            result[1] = query;
        }
        // C* Query builder doubles the ' character.
        result[1] = result[1].replaceAll("^'", "").replaceAll("'$", "");
        return result;
    }

    /**
     * Returns the partition key related to a given {@link Cells}.
     *
     * @param cells        {@link Cells} from Cassandra to extract the partition key.
     * @param keyValidator Cassandra key type.
     * @param numberOfKeys Number of keys.
     * @return Partition key.
     */
    public static ByteBuffer getPartitionKey(Cells cells, AbstractType<?> keyValidator, int numberOfKeys) {
        ByteBuffer partitionKey;
        if (keyValidator instanceof CompositeType) {
            ByteBuffer[] keys = new ByteBuffer[numberOfKeys];

            for (int i = 0; i < cells.size(); i++) {
                Cell c = cells.getCellByIdx(i);

                if (c.isKey()) {
                    keys[i] = DataType.serializeValue(c.getValue(), CassandraDeepJobConfig.PROTOCOL_VERSION);
                }
            }

            partitionKey = CompositeType.build(keys);
        } else {
            Cell cell = cells.getCellByIdx(0);
            partitionKey = DataType.serializeValue(cell.getValue(), CassandraDeepJobConfig.PROTOCOL_VERSION);
        }
        return partitionKey;
    }

    public static Cell createFromByteBuffer(Cell metadata, ByteBuffer cellValue) {
        String cellName = metadata.getCellName();
        boolean isClusterKey = metadata.isClusterKey();
        boolean isKey = metadata.isKey();

        Object o = null;
        if (cellValue != null) {
            o = ((DataType) metadata.getValue()).deserialize(cellValue, CassandraDeepJobConfig.PROTOCOL_VERSION);

        }

        return Cell.create(cellName, o, isKey, isClusterKey);
    }

    public static Cell createFromEntity(IDeepType e, Field field) {
        DeepField annotation = field.getAnnotation(DeepField.class);
        String cellName = deepFieldName(field);
        Object cellValue = getBeanFieldValue(e, field);
        boolean isClusterKey = annotation.isPartOfClusterKey();
        boolean isKey = annotation.isPartOfPartitionKey();
        return Cell.create(cellName, cellValue, isKey, isClusterKey);

    }

    /**
     * Checks if a token is included in the current split.
     *
     * @param token {@link Token} to be checked.
     * @return true, if the token is included in the interval; false, otherwise.
     */
    public static boolean isTokenIncludedInRange(DeepTokenRange deepTokenRange, Token<Comparable> token) {

        boolean isIncluded = false;

        if (((Comparable) deepTokenRange.getStartTokenAsComparable())
                .compareTo(deepTokenRange.getEndTokenAsComparable()) <= 0) {
            isIncluded = token.token.compareTo(deepTokenRange.getStartTokenAsComparable()) > 0;

            if (isIncluded) {
                isIncluded = token.token.compareTo(deepTokenRange.getEndTokenAsComparable()) <= 0;
            }
        } else {
            isIncluded = token.token.compareTo(deepTokenRange.getStartTokenAsComparable()) > 0;

            if (!isIncluded) {
                isIncluded = token.token.compareTo(deepTokenRange.getEndTokenAsComparable()) <= 0;
            }
        }

        return isIncluded;
    }

    public static boolean isFilterdByKey(Filter[] filters, String partitionKeyString) {

        if (filters != null) {
            for (int i = 0; i < filters.length; i++) {
                if (quote(filters[i].getField()).equalsIgnoreCase(quote(partitionKeyString))) {
                    return true;
                }
            }
        }
        return false;
    }

}