Java tutorial
// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. package org.apache.impala.analysis; import java.util.HashSet; import java.util.List; import org.apache.impala.catalog.Db; import org.apache.impala.catalog.Function.CompareMode; import org.apache.impala.catalog.PrimitiveType; import org.apache.impala.catalog.ScalarFunction; import org.apache.impala.catalog.ScalarType; import org.apache.impala.catalog.Type; import org.apache.impala.common.AnalysisException; import org.apache.impala.thrift.TCaseExpr; import org.apache.impala.thrift.TExprNode; import org.apache.impala.thrift.TExprNodeType; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.base.Predicates; import com.google.common.collect.Lists; import com.google.common.collect.Sets; /** * CASE and DECODE are represented using this class. The backend implementation is * always the "case" function. CASE always returns the THEN corresponding to the leftmost * WHEN that is TRUE, or the ELSE (or NULL if no ELSE is provided) if no WHEN is TRUE. * * The internal representation of * CASE [expr] WHEN expr THEN expr [WHEN expr THEN expr ...] [ELSE expr] END * Each When/Then is stored as two consecutive children (whenExpr, thenExpr). If a case * expr is given then it is the first child. If an else expr is given then it is the * last child. * * The internal representation of * DECODE(expr, key_expr, val_expr [, key_expr, val_expr ...] [, default_val_expr]) * has a pair of children for each pair of key/val_expr and an additional child if the * default_val_expr was given. The first child represents the comparison of expr to * key_expr. Decode has three forms: * 1) DECODE(expr, null_literal, val_expr) - * child[0] = IsNull(expr) * 2) DECODE(expr, non_null_literal, val_expr) - * child[0] = Eq(expr, literal) * 3) DECODE(expr1, expr2, val_expr) - * child[0] = Or(And(IsNull(expr1), IsNull(expr2)), Eq(expr1, expr2)) * The children representing val_expr (child[1]) and default_val_expr (child[2]) are * simply the exprs themselves. * * Example of equivalent CASE for DECODE(foo, 'bar', 1, col, 2, NULL, 3, 4): * CASE * WHEN foo = 'bar' THEN 1 -- no need for IS NULL check * WHEN foo IS NULL AND col IS NULL OR foo = col THEN 2 * WHEN foo IS NULL THEN 3 -- no need for equality check * ELSE 4 * END */ public class CaseExpr extends Expr { // Set if constructed from a DECODE, null otherwise. private FunctionCallExpr decodeExpr_; private boolean hasCaseExpr_; private boolean hasElseExpr_; public CaseExpr(Expr caseExpr, List<CaseWhenClause> whenClauses, Expr elseExpr) { super(); if (caseExpr != null) { children_.add(caseExpr); hasCaseExpr_ = true; } for (CaseWhenClause whenClause : whenClauses) { Preconditions.checkNotNull(whenClause.getWhenExpr()); children_.add(whenClause.getWhenExpr()); Preconditions.checkNotNull(whenClause.getThenExpr()); children_.add(whenClause.getThenExpr()); } if (elseExpr != null) { children_.add(elseExpr); hasElseExpr_ = true; } } /** * Constructs an equivalent CaseExpr representation. * * The DECODE behavior is basically the same as the hasCaseExpr_ version of CASE. * Though there is one difference. NULLs are considered equal when comparing the * argument to be decoded with the candidates. This differences is for compatibility * with Oracle. http://docs.oracle.com/cd/B19306_01/server.102/b14200/functions040.htm. * To account for the difference, the CASE representation will use the non-hasCaseExpr_ * version. * * The return type of DECODE differs from that of Oracle when the third argument is * the NULL literal. In Oracle the return type is STRING. In Impala the return type is * determined by the implicit casting rules (i.e. it's not necessarily a STRING). This * is done so seemingly normal usages such as DECODE(int_col, tinyint_col, NULL, * bigint_col) will avoid type check errors (STRING incompatible with BIGINT). */ public CaseExpr(FunctionCallExpr decodeExpr) { super(); decodeExpr_ = decodeExpr; hasCaseExpr_ = false; int childIdx = 0; Expr encoded = null; Expr encodedIsNull = null; if (!decodeExpr.getChildren().isEmpty()) { encoded = decodeExpr.getChild(childIdx++); encodedIsNull = new IsNullPredicate(encoded, false); } // Add the key_expr/val_expr pairs while (childIdx + 2 <= decodeExpr.getChildren().size()) { Expr candidate = decodeExpr.getChild(childIdx++); if (candidate.isLiteral()) { if (candidate.isNullLiteral()) { // An example case is DECODE(foo, NULL, bar), since NULLs are considered // equal, this becomes CASE WHEN foo IS NULL THEN bar END. children_.add(encodedIsNull.clone()); } else { children_.add(new BinaryPredicate(BinaryPredicate.Operator.EQ, encoded.clone(), candidate)); } } else { children_.add(new CompoundPredicate(CompoundPredicate.Operator.OR, new CompoundPredicate(CompoundPredicate.Operator.AND, encodedIsNull.clone(), new IsNullPredicate(candidate, false)), new BinaryPredicate(BinaryPredicate.Operator.EQ, encoded.clone(), candidate))); } // Add the value children_.add(decodeExpr.getChild(childIdx++)); } // Add the default value if (childIdx < decodeExpr.getChildren().size()) { hasElseExpr_ = true; children_.add(decodeExpr.getChild(childIdx)); } // Check that these exprs were cloned above, as reusing the same Expr object in // different places can lead to bugs, eg. if the Expr has multiple parents, they may // try to cast it to different types. Preconditions.checkState(!contains(encoded) && !contains(encodedIsNull)); } /** * Copy c'tor used in clone(). */ protected CaseExpr(CaseExpr other) { super(other); decodeExpr_ = other.decodeExpr_; hasCaseExpr_ = other.hasCaseExpr_; hasElseExpr_ = other.hasElseExpr_; } public static void initBuiltins(Db db) { for (Type t : Type.getSupportedTypes()) { if (t.isNull()) continue; if (t.isScalarType(PrimitiveType.CHAR)) continue; // TODO: case is special and the signature cannot be represented. // It is alternating varargs // e.g. case(bool, type, bool type, bool type, etc). // Instead we just add a version for each of the return types // e.g. case(BOOLEAN), case(INT), etc db.addBuiltin(ScalarFunction.createBuiltinOperator("case", "", Lists.newArrayList(t), t)); // Same for DECODE db.addBuiltin(ScalarFunction.createBuiltinOperator("decode", "", Lists.newArrayList(t), t)); } } @Override public boolean equals(Object obj) { if (!super.equals(obj)) return false; CaseExpr expr = (CaseExpr) obj; return hasCaseExpr_ == expr.hasCaseExpr_ && hasElseExpr_ == expr.hasElseExpr_ && isDecode() == expr.isDecode(); } @Override public String toSqlImpl() { return (decodeExpr_ == null) ? toCaseSql() : decodeExpr_.toSqlImpl(); } @VisibleForTesting String toCaseSql() { StringBuilder output = new StringBuilder("CASE"); int childIdx = 0; if (hasCaseExpr_) { output.append(" " + children_.get(childIdx++).toSql()); } while (childIdx + 2 <= children_.size()) { output.append(" WHEN " + children_.get(childIdx++).toSql()); output.append(" THEN " + children_.get(childIdx++).toSql()); } if (hasElseExpr_) { output.append(" ELSE " + children_.get(children_.size() - 1).toSql()); } output.append(" END"); return output.toString(); } @Override protected void toThrift(TExprNode msg) { msg.node_type = TExprNodeType.CASE_EXPR; msg.case_expr = new TCaseExpr(hasCaseExpr_, hasElseExpr_); } private void castCharToString(int childIndex) throws AnalysisException { if (children_.get(childIndex).getType().isScalarType(PrimitiveType.CHAR)) { children_.set(childIndex, children_.get(childIndex).castTo(ScalarType.STRING)); } } @Override protected void analyzeImpl(Analyzer analyzer) throws AnalysisException { if (isDecode()) { Preconditions.checkState(!hasCaseExpr_); // decodeExpr_.analyze() would fail validating function existence. The complex // vararg signature is currently unsupported. FunctionCallExpr.validateScalarFnParams(decodeExpr_.getParams()); if (decodeExpr_.getChildren().size() < 3) { throw new AnalysisException("DECODE in '" + toSql() + "' requires at least 3 " + "arguments."); } } // Since we have no BE implementation of a CaseExpr with CHAR types, // we cast the CHAR-typed whenExprs and caseExprs to STRING, // TODO: This casting is not always correct and needs to be fixed, see IMPALA-1652. // Keep track of maximum compatible type of case expr and all when exprs. Type whenType = null; // Keep track of maximum compatible type of else expr and all then exprs. Type returnType = null; // Remember last of these exprs for error reporting. Expr lastCompatibleThenExpr = null; Expr lastCompatibleWhenExpr = null; int loopEnd = children_.size(); if (hasElseExpr_) { --loopEnd; } int loopStart; Expr caseExpr = null; // Set loop start, and initialize returnType as type of castExpr. if (hasCaseExpr_) { loopStart = 1; castCharToString(0); caseExpr = children_.get(0); caseExpr.analyze(analyzer); whenType = caseExpr.getType(); lastCompatibleWhenExpr = children_.get(0); } else { whenType = Type.BOOLEAN; loopStart = 0; } // Go through when/then exprs and determine compatible types. for (int i = loopStart; i < loopEnd; i += 2) { castCharToString(i); Expr whenExpr = children_.get(i); if (hasCaseExpr_) { // Determine maximum compatible type of the case expr, // and all when exprs seen so far. We will add casts to them at the very end. whenType = analyzer.getCompatibleType(whenType, lastCompatibleWhenExpr, whenExpr); lastCompatibleWhenExpr = whenExpr; } else { // If no case expr was given, then the when exprs should always return // boolean or be castable to boolean. if (!Type.isImplicitlyCastable(whenExpr.getType(), Type.BOOLEAN, false)) { Preconditions.checkState(isCase()); throw new AnalysisException("When expr '" + whenExpr.toSql() + "'" + " is not of type boolean and not castable to type boolean."); } // Add a cast if necessary. if (!whenExpr.getType().isBoolean()) castChild(Type.BOOLEAN, i); } // Determine maximum compatible type of the then exprs seen so far. // We will add casts to them at the very end. Expr thenExpr = children_.get(i + 1); returnType = analyzer.getCompatibleType(returnType, lastCompatibleThenExpr, thenExpr); lastCompatibleThenExpr = thenExpr; } if (hasElseExpr_) { Expr elseExpr = children_.get(children_.size() - 1); returnType = analyzer.getCompatibleType(returnType, lastCompatibleThenExpr, elseExpr); } // Make sure BE doesn't see TYPE_NULL by picking an arbitrary type if (whenType.isNull()) whenType = ScalarType.BOOLEAN; if (returnType.isNull()) returnType = ScalarType.BOOLEAN; // Add casts to case expr to compatible type. if (hasCaseExpr_) { // Cast case expr. if (!children_.get(0).type_.equals(whenType)) { castChild(whenType, 0); } // Add casts to when exprs to compatible type. for (int i = loopStart; i < loopEnd; i += 2) { if (!children_.get(i).type_.equals(whenType)) { castChild(whenType, i); } } } // Cast then exprs to compatible type. for (int i = loopStart + 1; i < children_.size(); i += 2) { if (!children_.get(i).type_.equals(returnType)) { castChild(returnType, i); } } // Cast else expr to compatible type. if (hasElseExpr_) { if (!children_.get(children_.size() - 1).type_.equals(returnType)) { castChild(returnType, children_.size() - 1); } } // Do the function lookup just based on the whenType. Type[] args = new Type[1]; args[0] = whenType; fn_ = getBuiltinFunction(analyzer, "case", args, CompareMode.IS_NONSTRICT_SUPERTYPE_OF); Preconditions.checkNotNull(fn_); type_ = returnType; // Compute cost as the sum of evaluating all of the WHEN exprs, plus // the max of the THEN/ELSE exprs. float maxThenCost = 0; float whenCosts = 0; boolean hasChildCosts = true; for (int i = 0; i < children_.size(); ++i) { if (!getChild(i).hasCost()) { hasChildCosts = false; break; } if (hasCaseExpr_ && i % 2 == 1) { // This child is a WHEN expr. BINARY_PREDICATE_COST accounts for the cost of // comparing the CASE expr to the WHEN expr. whenCosts += getChild(0).getCost() + getChild(i).getCost() + BINARY_PREDICATE_COST; } else if (!hasCaseExpr_ && i % 2 == 0) { // This child is a WHEN expr. whenCosts += getChild(i).getCost(); } else if (i != 0) { // This child is a THEN or ELSE expr. float thenCost = getChild(i).getCost(); if (thenCost > maxThenCost) maxThenCost = thenCost; } } if (hasChildCosts) { evalCost_ = whenCosts + maxThenCost; } } @Override protected void computeNumDistinctValues() { // Skip the first child if case expression int loopStart = (hasCaseExpr_ ? 1 : 0); // If all the outputs have a known number of distinct values (i.e. not -1), then // sum the number of distinct constants with the maximum NDV for the non-constants. // // Otherwise, the number of distinct values is undetermined. The input cardinality // (i.e. the when's) are not used. boolean allOutputsKnown = true; int numOutputConstants = 0; long maxOutputNonConstNdv = -1; HashSet<LiteralExpr> constLiteralSet = Sets.newHashSetWithExpectedSize(children_.size()); for (int i = loopStart; i < children_.size(); ++i) { // The children follow this ordering: // [optional first child] when1 then1 when2 then2 ... else // After skipping optional first child, even indices are when expressions, except // for the last child, which can be an else expression if ((i - loopStart) % 2 == 0 && !(i == children_.size() - 1 && hasElseExpr_)) { // This is a when expression continue; } // This is an output expression (either then or else) Expr outputExpr = children_.get(i); if (outputExpr.isConstant()) { if (outputExpr.isLiteral()) { LiteralExpr outputLiteral = (LiteralExpr) outputExpr; if (constLiteralSet.add(outputLiteral)) ++numOutputConstants; } else { ++numOutputConstants; } } else { long outputNdv = outputExpr.getNumDistinctValues(); if (outputNdv == -1) allOutputsKnown = false; maxOutputNonConstNdv = Math.max(maxOutputNonConstNdv, outputNdv); } } // Else unspecified => NULL constant, which is not caught above if (!hasElseExpr_) ++numOutputConstants; if (allOutputsKnown) { if (maxOutputNonConstNdv == -1) { // All must be constant, because if we hit any SlotRef, this would be set numDistinctValues_ = numOutputConstants; } else { numDistinctValues_ = numOutputConstants + maxOutputNonConstNdv; } } else { // There is no correct answer when statistics are missing. Neither the // known outputs nor the inputs provide information numDistinctValues_ = -1; } } private boolean isCase() { return !isDecode(); } private boolean isDecode() { return decodeExpr_ != null; } public boolean hasCaseExpr() { return hasCaseExpr_; } public boolean hasElseExpr() { return hasElseExpr_; } @Override public Expr clone() { return new CaseExpr(this); } }