org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.io.sarg;

import java.sql.Date;
import java.sql.Timestamp;
import java.util.List;

import org.apache.commons.codec.binary.Base64;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.exec.SerializationUtilities;
import org.apache.hadoop.hive.ql.io.sarg.LiteralDelegate;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.io.Input;

public class ConvertAstToSearchArg {
    private static final Logger LOG = LoggerFactory.getLogger(ConvertAstToSearchArg.class);
    private final SearchArgument.Builder builder;
    private final Configuration conf;

    /**
     * Builds the expression and leaf list from the original predicate.
     * @param expression the expression to translate.
     */
    ConvertAstToSearchArg(Configuration conf, ExprNodeGenericFuncDesc expression) {
        this.conf = conf;
        builder = SearchArgumentFactory.newBuilder(conf);
        parse(expression);
    }

    /**
     * Build the search argument from the expression.
     * @return the search argument
     */
    public SearchArgument buildSearchArgument() {
        return builder.build();
    }

    /**
     * Get the type of the given expression node.
     * @param expr the expression to get the type of
     * @return int, string, or float or null if we don't know the type
     */
    private static PredicateLeaf.Type getType(ExprNodeDesc expr) {
        TypeInfo type = expr.getTypeInfo();
        if (type.getCategory() == ObjectInspector.Category.PRIMITIVE) {
            switch (((PrimitiveTypeInfo) type).getPrimitiveCategory()) {
            case BYTE:
            case SHORT:
            case INT:
            case LONG:
                return PredicateLeaf.Type.LONG;
            case CHAR:
            case VARCHAR:
            case STRING:
                return PredicateLeaf.Type.STRING;
            case FLOAT:
            case DOUBLE:
                return PredicateLeaf.Type.FLOAT;
            case DATE:
                return PredicateLeaf.Type.DATE;
            case TIMESTAMP:
                return PredicateLeaf.Type.TIMESTAMP;
            case DECIMAL:
                return PredicateLeaf.Type.DECIMAL;
            case BOOLEAN:
                return PredicateLeaf.Type.BOOLEAN;
            default:
            }
        }
        return null;
    }

    /**
     * Get the column name referenced in the expression. It must be at the top
     * level of this expression and there must be exactly one column.
     * @param expr the expression to look in
     * @param variable the slot the variable is expected in
     * @return the column name or null if there isn't exactly one column
     */
    private static String getColumnName(ExprNodeGenericFuncDesc expr, int variable) {
        List<ExprNodeDesc> children = expr.getChildren();
        if (variable < 0 || variable >= children.size()) {
            return null;
        }
        ExprNodeDesc child = children.get(variable);
        if (child instanceof ExprNodeColumnDesc) {
            return ((ExprNodeColumnDesc) child).getColumn();
        }
        return null;
    }

    private static Object boxLiteral(ExprNodeConstantDesc constantDesc, PredicateLeaf.Type type) {
        Object lit = constantDesc.getValue();
        if (lit == null) {
            return null;
        }
        switch (type) {
        case LONG:
            if (lit instanceof HiveDecimal) {
                HiveDecimal dec = (HiveDecimal) lit;
                if (!dec.isLong()) {
                    throw new ArithmeticException("Overflow");
                }
                return dec.longValue();
            }
            return ((Number) lit).longValue();
        case STRING:
            if (lit instanceof HiveChar) {
                return ((HiveChar) lit).getPaddedValue();
            } else if (lit instanceof String) {
                return lit;
            } else {
                return lit.toString();
            }
        case FLOAT:
            if (lit instanceof HiveDecimal) {
                // HiveDecimal -> Float -> Number -> Double
                return ((Number) ((HiveDecimal) lit).floatValue()).doubleValue();
            } else {
                return ((Number) lit).doubleValue();
            }
        case TIMESTAMP:
            return Timestamp.valueOf(lit.toString());
        case DATE:
            return Date.valueOf(lit.toString());
        case DECIMAL:
            return new HiveDecimalWritable(lit.toString());
        case BOOLEAN:
            return lit;
        default:
            throw new IllegalArgumentException("Unknown literal " + type);
        }
    }

    /**
     * Find the child that is the literal.
     * @param expr the parent node to check
     * @param type the type of the expression
     * @return the literal boxed if found or null
     */
    private static Object findLiteral(Configuration conf, ExprNodeGenericFuncDesc expr, PredicateLeaf.Type type) {
        List<ExprNodeDesc> children = expr.getChildren();
        if (children.size() != 2) {
            return null;
        }
        Object result = null;
        for (ExprNodeDesc child : children) {
            Object currentResult = getLiteral(conf, child, type);
            if (currentResult != null) {
                // Both children in the expression should not be literal
                if (result != null) {
                    return null;
                }
                result = currentResult;
            }
        }
        return result;
    }

    private static Object getLiteral(Configuration conf, ExprNodeDesc child, PredicateLeaf.Type type) {
        if (child instanceof ExprNodeConstantDesc) {
            return boxLiteral((ExprNodeConstantDesc) child, type);
        } else if (child instanceof ExprNodeDynamicValueDesc) {
            LiteralDelegate value = ((ExprNodeDynamicValueDesc) child).getDynamicValue();
            value.setConf(conf);
            return value;
        }
        return null;
    }

    /**
     * Return the boxed literal at the given position
     * @param expr the parent node
     * @param type the type of the expression
     * @param position the child position to check
     * @return the boxed literal if found otherwise null
     */
    private static Object getLiteral(Configuration conf, ExprNodeGenericFuncDesc expr, PredicateLeaf.Type type,
            int position) {
        List<ExprNodeDesc> children = expr.getChildren();
        ExprNodeDesc child = children.get(position);
        return getLiteral(conf, child, type);
    }

    private static Object[] getLiteralList(ExprNodeGenericFuncDesc expr, PredicateLeaf.Type type, int start) {
        List<ExprNodeDesc> children = expr.getChildren();
        Object[] result = new Object[children.size() - start];

        // ignore the first child, since it is the variable
        int posn = 0;
        for (ExprNodeDesc child : children.subList(start, children.size())) {
            if (child instanceof ExprNodeConstantDesc) {
                result[posn++] = boxLiteral((ExprNodeConstantDesc) child, type);
            } else {
                // if we get some non-literals, we need to punt
                return null;
            }
        }
        return result;
    }

    private void createLeaf(PredicateLeaf.Operator operator, ExprNodeGenericFuncDesc expression, int variable) {
        String columnName = getColumnName(expression, variable);
        if (columnName == null) {
            builder.literal(SearchArgument.TruthValue.YES_NO_NULL);
            return;
        }
        PredicateLeaf.Type type = getType(expression.getChildren().get(variable));
        if (type == null) {
            builder.literal(SearchArgument.TruthValue.YES_NO_NULL);
            return;
        }

        // if the variable was on the right, we need to swap things around
        boolean needSwap = false;
        if (variable != 0) {
            if (operator == PredicateLeaf.Operator.LESS_THAN) {
                needSwap = true;
                operator = PredicateLeaf.Operator.LESS_THAN_EQUALS;
            } else if (operator == PredicateLeaf.Operator.LESS_THAN_EQUALS) {
                needSwap = true;
                operator = PredicateLeaf.Operator.LESS_THAN;
            }
        }
        if (needSwap) {
            builder.startNot();
        }

        try {
            switch (operator) {
            case IS_NULL:
                builder.isNull(columnName, type);
                break;
            case EQUALS:
                builder.equals(columnName, type, findLiteral(conf, expression, type));
                break;
            case NULL_SAFE_EQUALS:
                builder.nullSafeEquals(columnName, type, findLiteral(conf, expression, type));
                break;
            case LESS_THAN:
                builder.lessThan(columnName, type, findLiteral(conf, expression, type));
                break;
            case LESS_THAN_EQUALS:
                builder.lessThanEquals(columnName, type, findLiteral(conf, expression, type));
                break;
            case IN:
                builder.in(columnName, type, getLiteralList(expression, type, variable + 1));
                break;
            case BETWEEN:
                builder.between(columnName, type, getLiteral(conf, expression, type, variable + 1),
                        getLiteral(conf, expression, type, variable + 2));
                break;
            }
        } catch (Exception e) {
            LOG.warn("Exception thrown during SARG creation. Returning YES_NO_NULL." + " Exception: "
                    + e.getMessage());
            builder.literal(SearchArgument.TruthValue.YES_NO_NULL);
        }

        if (needSwap) {
            builder.end();
        }
    }

    /**
     * Find the variable in the expression.
     * @param expr the expression to look in
     * @return the index of the variable or -1 if there is not exactly one
     *   variable.
     */
    private int findVariable(ExprNodeDesc expr) {
        int result = -1;
        List<ExprNodeDesc> children = expr.getChildren();
        for (int i = 0; i < children.size(); ++i) {
            ExprNodeDesc child = children.get(i);
            if (child instanceof ExprNodeColumnDesc) {
                // if we already found a variable, this isn't a sarg
                if (result != -1) {
                    return -1;
                } else {
                    result = i;
                }
            }
        }
        return result;
    }

    /**
     * Create a leaf expression when we aren't sure where the variable is
     * located.
     * @param operator the operator type that was found
     * @param expression the expression to check
     */
    private void createLeaf(PredicateLeaf.Operator operator, ExprNodeGenericFuncDesc expression) {
        createLeaf(operator, expression, findVariable(expression));
    }

    private void addChildren(ExprNodeGenericFuncDesc node) {
        for (ExprNodeDesc child : node.getChildren()) {
            parse(child);
        }
    }

    /**
     * Do the recursive parse of the Hive ExprNodeDesc into our ExpressionTree.
     * @param expression the Hive ExprNodeDesc
     */
    private void parse(ExprNodeDesc expression) {
        // Most of the stuff we can handle are generic function descriptions, so
        // handle the special cases.
        if (expression.getClass() != ExprNodeGenericFuncDesc.class) {

            // if it is a reference to a boolean column, covert it to a truth test.
            if (expression instanceof ExprNodeColumnDesc) {
                ExprNodeColumnDesc columnDesc = (ExprNodeColumnDesc) expression;
                if (columnDesc.getTypeString().equals("boolean")) {
                    builder.equals(columnDesc.getColumn(), PredicateLeaf.Type.BOOLEAN, true);
                    return;
                }
            }

            // otherwise, we don't know what to do so make it a maybe
            builder.literal(SearchArgument.TruthValue.YES_NO_NULL);
            return;
        }

        // get the kind of expression
        ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) expression;
        Class<?> op = expr.getGenericUDF().getClass();

        // handle the logical operators
        if (op == GenericUDFOPOr.class) {
            builder.startOr();
            addChildren(expr);
            builder.end();
        } else if (op == GenericUDFOPAnd.class) {
            builder.startAnd();
            addChildren(expr);
            builder.end();
        } else if (op == GenericUDFOPNot.class) {
            builder.startNot();
            addChildren(expr);
            builder.end();
        } else if (op == GenericUDFOPEqual.class) {
            createLeaf(PredicateLeaf.Operator.EQUALS, expr);
        } else if (op == GenericUDFOPNotEqual.class) {
            builder.startNot();
            createLeaf(PredicateLeaf.Operator.EQUALS, expr);
            builder.end();
        } else if (op == GenericUDFOPEqualNS.class) {
            createLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, expr);
        } else if (op == GenericUDFOPGreaterThan.class) {
            builder.startNot();
            createLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, expr);
            builder.end();
        } else if (op == GenericUDFOPEqualOrGreaterThan.class) {
            builder.startNot();
            createLeaf(PredicateLeaf.Operator.LESS_THAN, expr);
            builder.end();
        } else if (op == GenericUDFOPLessThan.class) {
            createLeaf(PredicateLeaf.Operator.LESS_THAN, expr);
        } else if (op == GenericUDFOPEqualOrLessThan.class) {
            createLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, expr);
        } else if (op == GenericUDFIn.class) {
            createLeaf(PredicateLeaf.Operator.IN, expr, 0);
        } else if (op == GenericUDFBetween.class) {
            createLeaf(PredicateLeaf.Operator.BETWEEN, expr, 1);
        } else if (op == GenericUDFOPNull.class) {
            createLeaf(PredicateLeaf.Operator.IS_NULL, expr, 0);
        } else if (op == GenericUDFOPNotNull.class) {
            builder.startNot();
            createLeaf(PredicateLeaf.Operator.IS_NULL, expr, 0);
            builder.end();

            // otherwise, we didn't understand it, so mark it maybe
        } else {
            builder.literal(SearchArgument.TruthValue.YES_NO_NULL);
        }
    }

    public static final String SARG_PUSHDOWN = "sarg.pushdown";

    public static SearchArgument create(Configuration conf, ExprNodeGenericFuncDesc expression) {
        return new ConvertAstToSearchArg(conf, expression).buildSearchArgument();
    }

    private final static ThreadLocal<Kryo> kryo = new ThreadLocal<Kryo>() {
        protected Kryo initialValue() {
            return new Kryo();
        }
    };

    public static SearchArgument create(String kryo) {
        return create(Base64.decodeBase64(kryo));
    }

    public static SearchArgument create(byte[] kryoBytes) {
        return kryo.get().readObject(new Input(kryoBytes), SearchArgumentImpl.class);
    }

    public static SearchArgument createFromConf(Configuration conf) {
        String sargString;
        if ((sargString = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR)) != null) {
            return create(conf, SerializationUtilities.deserializeExpression(sargString));
        } else if ((sargString = conf.get(SARG_PUSHDOWN)) != null) {
            return create(sargString);
        }
        return null;
    }

    public static boolean canCreateFromConf(Configuration conf) {
        return conf.get(TableScanDesc.FILTER_EXPR_CONF_STR) != null || conf.get(SARG_PUSHDOWN) != null;
    }

}