org.apache.cassandra.db.index.sasi.plan.Operation.java Source code

Introduction

Here is the source code for org.apache.cassandra.db.index.sasi.plan.Operation.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.db.index.sasi.plan;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.*;

import org.apache.cassandra.config.ColumnDefinition;
import org.apache.cassandra.cql3.ColumnNameBuilder;
import org.apache.cassandra.db.Column;
import org.apache.cassandra.db.Row;
import org.apache.cassandra.db.index.sasi.conf.ColumnIndex;
import org.apache.cassandra.db.index.sasi.analyzer.AbstractAnalyzer;
import org.apache.cassandra.db.index.sasi.disk.Token;
import org.apache.cassandra.db.index.sasi.plan.Expression.Op;
import org.apache.cassandra.db.index.sasi.utils.RangeIntersectionIterator;
import org.apache.cassandra.db.index.sasi.utils.RangeIterator;
import org.apache.cassandra.db.index.sasi.utils.RangeUnionIterator;
import org.apache.cassandra.db.marshal.AbstractType;
import org.apache.cassandra.thrift.IndexExpression;
import org.apache.cassandra.thrift.IndexOperator;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.*;

public class Operation extends RangeIterator<Long, Token> {
    public enum OperationType {
        AND, OR;

        public boolean apply(boolean a, boolean b) {
            switch (this) {
            case OR:
                return a | b;

            case AND:
                return a & b;

            default:
                throw new AssertionError();
            }
        }
    }

    private final QueryController controller;

    protected final OperationType op;
    protected final AbstractType<?> comparator;

    protected final ListMultimap<ByteBuffer, Expression> expressions;
    protected final RangeIterator<Long, Token> range;

    protected Operation left, right;

    private Operation(OperationType operation, AbstractType<?> comparator, QueryController controller,
            ListMultimap<ByteBuffer, Expression> expressions, RangeIterator<Long, Token> range, Operation left,
            Operation right) {
        super(range);

        this.op = operation;
        this.comparator = comparator;
        this.controller = controller;
        this.expressions = expressions;
        this.range = range;

        this.left = left;
        this.right = right;
    }

    /**
     * Recursive "satisfies" checks based on operation
     * and data from the lower level members using depth-first search
     * and bubbling the results back to the top level caller.
     *
     * Most of the work here is done by {@link #localSatisfiedBy(Row, ColumnNameBuilder, boolean)}
     * see it's comment for details, if there are no local expressions
     * assigned to Operation it will call satisfiedBy(Row) on it's children.
     *
     * Query: first_name = X AND (last_name = Y OR address = XYZ AND street = IL AND city = C) OR (state = 'CA' AND country = 'US')
     * Row: key1: (first_name: X, last_name: Z, address: XYZ, street: IL, city: C, state: NY, country:US)
     *
     * #1                       OR
     *                        /    \
     * #2       (first_name) AND   AND (state, country)
     *                          \
     * #3            (last_name) OR
     *                             \
     * #4                          AND (address, street, city)
     *
     *
     * Evaluation of the key1 is top-down depth-first search:
     *
     * --- going down ---
     * Level #1 is evaluated, OR expression has to pull results from it's children which are at level #2 and OR them together,
     * Level #2 AND (state, country) could be be evaluated right away, AND (first_name) refers to it's "right" child from level #3
     * Level #3 OR (last_name) requests results from level #4
     * Level #4 AND (address, street, city) does logical AND between it's 3 fields, returns result back to level #3.
     * --- bubbling up ---
     * Level #3 computes OR between AND (address, street, city) result and it's "last_name" expression
     * Level #2 computes AND between "first_name" and result of level #3, AND (state, country) which is already computed
     * Level #1 does OR between results of AND (first_name) and AND (state, country) and returns final result.
     *
     * @param row The row to check.
     * @return true if give Row satisfied all of the expressions in the tree,
     *         false otherwise.
     */
    public boolean satisfiedBy(Row row, ColumnNameBuilder builder, boolean allowMissingColumns) {
        boolean sideL, sideR;

        if (expressions == null || expressions.isEmpty()) {
            sideL = left != null && left.satisfiedBy(row, builder, allowMissingColumns);
            sideR = right != null && right.satisfiedBy(row, builder, allowMissingColumns);

            // one of the expressions was skipped
            // because it had no indexes attached
            if (left == null)
                return sideR;
        } else {
            sideL = localSatisfiedBy(row, builder, allowMissingColumns);

            // if there is no right it means that this expression
            // is last in the sequence, we can just return result from local expressions
            if (right == null)
                return sideL;

            sideR = right.satisfiedBy(row, builder, allowMissingColumns);
        }

        return op.apply(sideL, sideR);
    }

    /**
     * Check every expression in the analyzed list to figure out if the
     * columns in the give row match all of the based on the operation
     * set to the current operation node.
     *
     * The algorithm is as follows: for every given expression from analyzed
     * list get corresponding column from the Row:
     *   - apply {@link Expression#contains(ByteBuffer)}
     *     method to figure out if it's satisfied;
     *   - apply logical operation between boolean accumulator and current boolean result;
     *   - if result == false and node's operation is AND return right away;
     *
     * After all of the expressions have been evaluated return resulting accumulator variable.
     *
     * Example:
     *
     * Operation = (op: AND, columns: [first_name = p, 5 < age < 7, last_name: y])
     * Row = (first_name: pavel, last_name: y, age: 6, timestamp: 15)
     *
     * #1 get "first_name" = p (expressions)
     *      - row-get "first_name"                      => "pavel"
     *      - compare "pavel" against "p"               => true (current)
     *      - set accumulator current                   => true (because this is expression #1)
     *
     * #2 get "last_name" = y (expressions)
     *      - row-get "last_name"                       => "y"
     *      - compare "y" against "y"                   => true (current)
     *      - set accumulator to accumulator & current  => true
     *
     * #3 get 5 < "age" < 7 (expressions)
     *      - row-get "age"                             => "6"
     *      - compare 5 < 6 < 7                         => true (current)
     *      - set accumulator to accumulator & current  => true
     *
     * #4 return accumulator => true (row satisfied all of the conditions)
     *
     * @param row The row to check.
     * @return true if give Row satisfied all of the analyzed expressions,
     *         false otherwise.
     */
    private boolean localSatisfiedBy(Row row, @SuppressWarnings("unused") ColumnNameBuilder builder,
            boolean allowMissingColumns) {
        if (row == null || row.cf == null)
            return false;

        final long now = System.currentTimeMillis();

        boolean result = false;

        int idx = 0;
        for (ByteBuffer columnName : expressions.keySet()) {
            // don't ever try to validate key alias
            ColumnDefinition columnDefinition = controller.getColumn(columnName);
            if (columnDefinition == null || columnDefinition.type == ColumnDefinition.Type.PARTITION_KEY)
                continue;

            Column column = row.cf.getColumn(columnName);
            boolean isMissingColumn = column == null || !column.isLive(now);

            if (!allowMissingColumns && isMissingColumn) {
                throw new IllegalStateException(
                        "All indexed columns should be included into the column slice, missing: "
                                + comparator.getString(columnName));
            }

            boolean isMatch = false;
            // If there is a column with multiple expressions that effectively means an OR
            // e.g. comment = 'x y z' could be split into 'comment' EQ 'x', 'comment' EQ 'y', 'comment' EQ 'z'
            // by analyzer, in situation like that we only need to check if at least one of expressions matches,
            // and there is no hit on the NOT_EQ (if any) which are always at the end of the filter list.
            // Loop always starts from the end of the list, which makes it possible to break after the last
            // NOT_EQ condition on first EQ/RANGE condition satisfied, instead of checking every
            // single expression in the column filter list.
            List<Expression> filters = expressions.get(columnName);
            for (int i = filters.size() - 1; i >= 0; i--) {
                Expression expression = filters.get(i);

                isMatch = !isMissingColumn && expression.contains(column.value());
                if (expression.getOp() == Op.NOT_EQ) {
                    // since this is NOT_EQ operation we have to
                    // inverse match flag (to check against other expressions),
                    // and break in case of negative inverse because that means
                    // that it's a positive hit on the not-eq clause.
                    isMatch = !isMatch;
                    if (!isMatch)
                        break;
                } // if it was a match on EQ/RANGE or column is missing
                else if (isMatch || isMissingColumn)
                    break;
            }

            if (idx++ == 0) {
                result = isMatch;
                continue;
            }

            result = op.apply(result, isMatch);

            // exit early because we already got a single false
            if (op == OperationType.AND && !result)
                return false;
        }

        return idx == 0 || result;
    }

    @VisibleForTesting
    protected static ListMultimap<ByteBuffer, Expression> analyzeGroup(QueryController controller,
            final AbstractType<?> comparator, OperationType op, List<IndexExpression> expressions) {
        ListMultimap<ByteBuffer, Expression> analyzed = ArrayListMultimap.create();

        // sort all of the expressions in the operation by name and priority of the logical operator
        // this gives us an efficient way to handle inequality and combining into ranges without extra processing
        // and converting expressions from one type to another.
        Collections.sort(expressions, new Comparator<IndexExpression>() {
            @Override
            public int compare(IndexExpression a, IndexExpression b) {
                int cmp = comparator.compare(ByteBuffer.wrap(a.getColumn_name()),
                        ByteBuffer.wrap(b.getColumn_name()));
                return cmp == 0 ? -Integer.compare(getPriority(a.getOp()), getPriority(b.getOp())) : cmp;
            }
        });

        for (final IndexExpression e : expressions) {
            if (e.isSetLogicalOp())
                continue;

            ByteBuffer name = ByteBuffer.wrap(e.getColumn_name());
            ColumnIndex columnIndex = controller.getIndex(name);

            List<Expression> perColumn = analyzed.get(name);

            if (columnIndex == null) {
                ColumnDefinition nonIndexedColumn = controller.getColumn(name);
                columnIndex = new ColumnIndex(controller.getKeyValidator(), nonIndexedColumn,
                        controller.getComparator(nonIndexedColumn));
            }

            AbstractAnalyzer analyzer = columnIndex.getAnalyzer();
            analyzer.reset(ByteBuffer.wrap(e.getValue()));

            // EQ/NOT_EQ can have multiple expressions e.g. text = "Hello World",
            // becomes text = "Hello" OR text = "World" because "space" is always interpreted as a split point (by analyzer),
            // NOT_EQ is made an independent expression only in case of pre-existing multiple EQ expressions, or
            // if there is no EQ operations and NOT_EQ is met or a single NOT_EQ expression present,
            // in such case we know exactly that there would be no more EQ/RANGE expressions for given column
            // since NOT_EQ has the lowest priority.
            if (e.getOp() == IndexOperator.EQ || (e.getOp() == IndexOperator.NOT_EQ && (perColumn.size() == 0
                    || perColumn.size() > 1 || (perColumn.size() == 1 && perColumn.get(0).getOp() == Op.NOT_EQ)))) {
                while (analyzer.hasNext()) {
                    final ByteBuffer token = analyzer.next();
                    perColumn.add(new Expression(controller, columnIndex) {
                        {
                            add(e.op, token);
                        }
                    });
                }
            } else
            // "range" or not-equals operator, combines both bounds together into the single expression,
            // iff operation of the group is AND, otherwise we are forced to create separate expressions,
            // not-equals is combined with the range iff operator is AND.
            {
                Expression range;
                if (perColumn.size() == 0 || op != OperationType.AND)
                    perColumn.add((range = new Expression(controller, columnIndex)));
                else
                    range = Iterables.getLast(perColumn);

                while (analyzer.hasNext())
                    range.add(e.op, analyzer.next());
            }
        }

        return analyzed;
    }

    private static int getPriority(IndexOperator op) {
        switch (op) {
        case EQ:
            return 4;

        case GTE:
        case GT:
            return 3;

        case LTE:
        case LT:
            return 2;

        case NOT_EQ:
            return 1;

        default:
            return 0;
        }
    }

    @Override
    protected Token computeNext() {
        return range != null && range.hasNext() ? range.next() : endOfData();
    }

    @Override
    protected void performSkipTo(Long nextToken) {
        if (range != null)
            range.skipTo(nextToken);
    }

    @Override
    public void close() throws IOException {
        controller.releaseIndexes(this);
    }

    public static class Builder {
        private final QueryController controller;

        protected final OperationType op;
        protected final AbstractType<?> comparator;
        protected final List<IndexExpression> expressions;

        protected Builder left, right;

        public Builder(OperationType operation, AbstractType<?> comparator, QueryController controller,
                IndexExpression... columns) {
            this.op = operation;
            this.comparator = comparator;
            this.controller = controller;
            this.expressions = new ArrayList<>();
            Collections.addAll(expressions, columns);
        }

        public Builder setRight(Builder operation) {
            this.right = operation;
            return this;
        }

        public Builder setLeft(Builder operation) {
            this.left = operation;
            return this;
        }

        public void add(IndexExpression e) {
            expressions.add(e);
        }

        public void add(Collection<IndexExpression> newExpressions) {
            if (expressions != null)
                expressions.addAll(newExpressions);
        }

        public Operation complete() {
            if (!expressions.isEmpty()) {
                ListMultimap<ByteBuffer, Expression> analyzedExpressions = analyzeGroup(controller, comparator, op,
                        expressions);
                RangeIterator.Builder<Long, Token> range = controller.getIndexes(op, analyzedExpressions.values());

                Operation rightOp = null;
                if (right != null) {
                    rightOp = right.complete();
                    range.add(rightOp);
                }

                return new Operation(op, comparator, controller, analyzedExpressions, range.build(), null, rightOp);
            } else {
                Operation leftOp = null, rightOp = null;
                boolean leftIndexes = false, rightIndexes = false;

                if (left != null) {
                    leftOp = left.complete();
                    leftIndexes = leftOp != null && leftOp.range != null;
                }

                if (right != null) {
                    rightOp = right.complete();
                    rightIndexes = rightOp != null && rightOp.range != null;
                }

                RangeIterator<Long, Token> join;
                /**
                 * Operation should allow one of it's sub-trees to wrap no indexes, that is related  to the fact that we
                 * have to accept defined-but-not-indexed columns as well as key range as IndexExpressions.
                 *
                 * Two cases are possible:
                 *
                 * only left child produced indexed iterators, that could happen when there are two columns
                 * or key range on the right:
                 *
                 *                AND
                 *              /     \
                 *            OR       \
                 *           /   \     AND
                 *          a     b   /   \
                 *                  key   key
                 *
                 * only right child produced indexed iterators:
                 *
                 *               AND
                 *              /    \
                 *            AND     a
                 *           /   \
                 *         key  key
                 */
                if (leftIndexes && !rightIndexes)
                    join = leftOp;
                else if (!leftIndexes && rightIndexes)
                    join = rightOp;
                else if (leftIndexes) {
                    RangeIterator.Builder<Long, Token> builder = op == OperationType.OR
                            ? RangeUnionIterator.<Long, Token>builder()
                            : RangeIntersectionIterator.<Long, Token>builder();

                    join = builder.add(leftOp).add(rightOp).build();
                } else
                    throw new AssertionError("both sub-trees have 0 indexes.");

                return new Operation(op, comparator, controller, null, join, leftOp, rightOp);
            }
        }
    }
}