Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.cassandra.db.index.sasi.plan; import java.io.IOException; import java.nio.ByteBuffer; import java.util.*; import org.apache.cassandra.config.ColumnDefinition; import org.apache.cassandra.cql3.ColumnNameBuilder; import org.apache.cassandra.db.Column; import org.apache.cassandra.db.Row; import org.apache.cassandra.db.index.sasi.conf.ColumnIndex; import org.apache.cassandra.db.index.sasi.analyzer.AbstractAnalyzer; import org.apache.cassandra.db.index.sasi.disk.Token; import org.apache.cassandra.db.index.sasi.plan.Expression.Op; import org.apache.cassandra.db.index.sasi.utils.RangeIntersectionIterator; import org.apache.cassandra.db.index.sasi.utils.RangeIterator; import org.apache.cassandra.db.index.sasi.utils.RangeUnionIterator; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.thrift.IndexExpression; import org.apache.cassandra.thrift.IndexOperator; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.*; public class Operation extends RangeIterator<Long, Token> { public enum OperationType { AND, OR; public boolean apply(boolean a, boolean b) { switch (this) { case OR: return a | b; case AND: return a & b; default: throw new AssertionError(); } } } private final QueryController controller; protected final OperationType op; protected final AbstractType<?> comparator; protected final ListMultimap<ByteBuffer, Expression> expressions; protected final RangeIterator<Long, Token> range; protected Operation left, right; private Operation(OperationType operation, AbstractType<?> comparator, QueryController controller, ListMultimap<ByteBuffer, Expression> expressions, RangeIterator<Long, Token> range, Operation left, Operation right) { super(range); this.op = operation; this.comparator = comparator; this.controller = controller; this.expressions = expressions; this.range = range; this.left = left; this.right = right; } /** * Recursive "satisfies" checks based on operation * and data from the lower level members using depth-first search * and bubbling the results back to the top level caller. * * Most of the work here is done by {@link #localSatisfiedBy(Row, ColumnNameBuilder, boolean)} * see it's comment for details, if there are no local expressions * assigned to Operation it will call satisfiedBy(Row) on it's children. * * Query: first_name = X AND (last_name = Y OR address = XYZ AND street = IL AND city = C) OR (state = 'CA' AND country = 'US') * Row: key1: (first_name: X, last_name: Z, address: XYZ, street: IL, city: C, state: NY, country:US) * * #1 OR * / \ * #2 (first_name) AND AND (state, country) * \ * #3 (last_name) OR * \ * #4 AND (address, street, city) * * * Evaluation of the key1 is top-down depth-first search: * * --- going down --- * Level #1 is evaluated, OR expression has to pull results from it's children which are at level #2 and OR them together, * Level #2 AND (state, country) could be be evaluated right away, AND (first_name) refers to it's "right" child from level #3 * Level #3 OR (last_name) requests results from level #4 * Level #4 AND (address, street, city) does logical AND between it's 3 fields, returns result back to level #3. * --- bubbling up --- * Level #3 computes OR between AND (address, street, city) result and it's "last_name" expression * Level #2 computes AND between "first_name" and result of level #3, AND (state, country) which is already computed * Level #1 does OR between results of AND (first_name) and AND (state, country) and returns final result. * * @param row The row to check. * @return true if give Row satisfied all of the expressions in the tree, * false otherwise. */ public boolean satisfiedBy(Row row, ColumnNameBuilder builder, boolean allowMissingColumns) { boolean sideL, sideR; if (expressions == null || expressions.isEmpty()) { sideL = left != null && left.satisfiedBy(row, builder, allowMissingColumns); sideR = right != null && right.satisfiedBy(row, builder, allowMissingColumns); // one of the expressions was skipped // because it had no indexes attached if (left == null) return sideR; } else { sideL = localSatisfiedBy(row, builder, allowMissingColumns); // if there is no right it means that this expression // is last in the sequence, we can just return result from local expressions if (right == null) return sideL; sideR = right.satisfiedBy(row, builder, allowMissingColumns); } return op.apply(sideL, sideR); } /** * Check every expression in the analyzed list to figure out if the * columns in the give row match all of the based on the operation * set to the current operation node. * * The algorithm is as follows: for every given expression from analyzed * list get corresponding column from the Row: * - apply {@link Expression#contains(ByteBuffer)} * method to figure out if it's satisfied; * - apply logical operation between boolean accumulator and current boolean result; * - if result == false and node's operation is AND return right away; * * After all of the expressions have been evaluated return resulting accumulator variable. * * Example: * * Operation = (op: AND, columns: [first_name = p, 5 < age < 7, last_name: y]) * Row = (first_name: pavel, last_name: y, age: 6, timestamp: 15) * * #1 get "first_name" = p (expressions) * - row-get "first_name" => "pavel" * - compare "pavel" against "p" => true (current) * - set accumulator current => true (because this is expression #1) * * #2 get "last_name" = y (expressions) * - row-get "last_name" => "y" * - compare "y" against "y" => true (current) * - set accumulator to accumulator & current => true * * #3 get 5 < "age" < 7 (expressions) * - row-get "age" => "6" * - compare 5 < 6 < 7 => true (current) * - set accumulator to accumulator & current => true * * #4 return accumulator => true (row satisfied all of the conditions) * * @param row The row to check. * @return true if give Row satisfied all of the analyzed expressions, * false otherwise. */ private boolean localSatisfiedBy(Row row, @SuppressWarnings("unused") ColumnNameBuilder builder, boolean allowMissingColumns) { if (row == null || row.cf == null) return false; final long now = System.currentTimeMillis(); boolean result = false; int idx = 0; for (ByteBuffer columnName : expressions.keySet()) { // don't ever try to validate key alias ColumnDefinition columnDefinition = controller.getColumn(columnName); if (columnDefinition == null || columnDefinition.type == ColumnDefinition.Type.PARTITION_KEY) continue; Column column = row.cf.getColumn(columnName); boolean isMissingColumn = column == null || !column.isLive(now); if (!allowMissingColumns && isMissingColumn) { throw new IllegalStateException( "All indexed columns should be included into the column slice, missing: " + comparator.getString(columnName)); } boolean isMatch = false; // If there is a column with multiple expressions that effectively means an OR // e.g. comment = 'x y z' could be split into 'comment' EQ 'x', 'comment' EQ 'y', 'comment' EQ 'z' // by analyzer, in situation like that we only need to check if at least one of expressions matches, // and there is no hit on the NOT_EQ (if any) which are always at the end of the filter list. // Loop always starts from the end of the list, which makes it possible to break after the last // NOT_EQ condition on first EQ/RANGE condition satisfied, instead of checking every // single expression in the column filter list. List<Expression> filters = expressions.get(columnName); for (int i = filters.size() - 1; i >= 0; i--) { Expression expression = filters.get(i); isMatch = !isMissingColumn && expression.contains(column.value()); if (expression.getOp() == Op.NOT_EQ) { // since this is NOT_EQ operation we have to // inverse match flag (to check against other expressions), // and break in case of negative inverse because that means // that it's a positive hit on the not-eq clause. isMatch = !isMatch; if (!isMatch) break; } // if it was a match on EQ/RANGE or column is missing else if (isMatch || isMissingColumn) break; } if (idx++ == 0) { result = isMatch; continue; } result = op.apply(result, isMatch); // exit early because we already got a single false if (op == OperationType.AND && !result) return false; } return idx == 0 || result; } @VisibleForTesting protected static ListMultimap<ByteBuffer, Expression> analyzeGroup(QueryController controller, final AbstractType<?> comparator, OperationType op, List<IndexExpression> expressions) { ListMultimap<ByteBuffer, Expression> analyzed = ArrayListMultimap.create(); // sort all of the expressions in the operation by name and priority of the logical operator // this gives us an efficient way to handle inequality and combining into ranges without extra processing // and converting expressions from one type to another. Collections.sort(expressions, new Comparator<IndexExpression>() { @Override public int compare(IndexExpression a, IndexExpression b) { int cmp = comparator.compare(ByteBuffer.wrap(a.getColumn_name()), ByteBuffer.wrap(b.getColumn_name())); return cmp == 0 ? -Integer.compare(getPriority(a.getOp()), getPriority(b.getOp())) : cmp; } }); for (final IndexExpression e : expressions) { if (e.isSetLogicalOp()) continue; ByteBuffer name = ByteBuffer.wrap(e.getColumn_name()); ColumnIndex columnIndex = controller.getIndex(name); List<Expression> perColumn = analyzed.get(name); if (columnIndex == null) { ColumnDefinition nonIndexedColumn = controller.getColumn(name); columnIndex = new ColumnIndex(controller.getKeyValidator(), nonIndexedColumn, controller.getComparator(nonIndexedColumn)); } AbstractAnalyzer analyzer = columnIndex.getAnalyzer(); analyzer.reset(ByteBuffer.wrap(e.getValue())); // EQ/NOT_EQ can have multiple expressions e.g. text = "Hello World", // becomes text = "Hello" OR text = "World" because "space" is always interpreted as a split point (by analyzer), // NOT_EQ is made an independent expression only in case of pre-existing multiple EQ expressions, or // if there is no EQ operations and NOT_EQ is met or a single NOT_EQ expression present, // in such case we know exactly that there would be no more EQ/RANGE expressions for given column // since NOT_EQ has the lowest priority. if (e.getOp() == IndexOperator.EQ || (e.getOp() == IndexOperator.NOT_EQ && (perColumn.size() == 0 || perColumn.size() > 1 || (perColumn.size() == 1 && perColumn.get(0).getOp() == Op.NOT_EQ)))) { while (analyzer.hasNext()) { final ByteBuffer token = analyzer.next(); perColumn.add(new Expression(controller, columnIndex) { { add(e.op, token); } }); } } else // "range" or not-equals operator, combines both bounds together into the single expression, // iff operation of the group is AND, otherwise we are forced to create separate expressions, // not-equals is combined with the range iff operator is AND. { Expression range; if (perColumn.size() == 0 || op != OperationType.AND) perColumn.add((range = new Expression(controller, columnIndex))); else range = Iterables.getLast(perColumn); while (analyzer.hasNext()) range.add(e.op, analyzer.next()); } } return analyzed; } private static int getPriority(IndexOperator op) { switch (op) { case EQ: return 4; case GTE: case GT: return 3; case LTE: case LT: return 2; case NOT_EQ: return 1; default: return 0; } } @Override protected Token computeNext() { return range != null && range.hasNext() ? range.next() : endOfData(); } @Override protected void performSkipTo(Long nextToken) { if (range != null) range.skipTo(nextToken); } @Override public void close() throws IOException { controller.releaseIndexes(this); } public static class Builder { private final QueryController controller; protected final OperationType op; protected final AbstractType<?> comparator; protected final List<IndexExpression> expressions; protected Builder left, right; public Builder(OperationType operation, AbstractType<?> comparator, QueryController controller, IndexExpression... columns) { this.op = operation; this.comparator = comparator; this.controller = controller; this.expressions = new ArrayList<>(); Collections.addAll(expressions, columns); } public Builder setRight(Builder operation) { this.right = operation; return this; } public Builder setLeft(Builder operation) { this.left = operation; return this; } public void add(IndexExpression e) { expressions.add(e); } public void add(Collection<IndexExpression> newExpressions) { if (expressions != null) expressions.addAll(newExpressions); } public Operation complete() { if (!expressions.isEmpty()) { ListMultimap<ByteBuffer, Expression> analyzedExpressions = analyzeGroup(controller, comparator, op, expressions); RangeIterator.Builder<Long, Token> range = controller.getIndexes(op, analyzedExpressions.values()); Operation rightOp = null; if (right != null) { rightOp = right.complete(); range.add(rightOp); } return new Operation(op, comparator, controller, analyzedExpressions, range.build(), null, rightOp); } else { Operation leftOp = null, rightOp = null; boolean leftIndexes = false, rightIndexes = false; if (left != null) { leftOp = left.complete(); leftIndexes = leftOp != null && leftOp.range != null; } if (right != null) { rightOp = right.complete(); rightIndexes = rightOp != null && rightOp.range != null; } RangeIterator<Long, Token> join; /** * Operation should allow one of it's sub-trees to wrap no indexes, that is related to the fact that we * have to accept defined-but-not-indexed columns as well as key range as IndexExpressions. * * Two cases are possible: * * only left child produced indexed iterators, that could happen when there are two columns * or key range on the right: * * AND * / \ * OR \ * / \ AND * a b / \ * key key * * only right child produced indexed iterators: * * AND * / \ * AND a * / \ * key key */ if (leftIndexes && !rightIndexes) join = leftOp; else if (!leftIndexes && rightIndexes) join = rightOp; else if (leftIndexes) { RangeIterator.Builder<Long, Token> builder = op == OperationType.OR ? RangeUnionIterator.<Long, Token>builder() : RangeIntersectionIterator.<Long, Token>builder(); join = builder.add(leftOp).add(rightOp).build(); } else throw new AssertionError("both sub-trees have 0 indexes."); return new Operation(op, comparator, controller, null, join, leftOp, rightOp); } } } }