org.apache.hadoop.hive.ql.parse.PartitionPruner.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hive.ql.parse.PartitionPruner.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.parse;
/*
 * PartitionPruner.java
 *
 * Created on April 9, 2008, 3:48 PM
 *
 * To change this template, choose Tools | Template Manager
 * and open the template in the editor.
 */

import java.util.*;

import org.antlr.runtime.tree.*;

import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
import org.apache.hadoop.hive.ql.metadata.*;
import org.apache.hadoop.hive.ql.plan.exprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.exprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.exprNodeDesc;
import org.apache.hadoop.hive.ql.plan.exprNodeFieldDesc;
import org.apache.hadoop.hive.ql.plan.exprNodeFuncDesc;
import org.apache.hadoop.hive.ql.plan.exprNodeIndexDesc;
import org.apache.hadoop.hive.ql.plan.exprNodeNullDesc;
import org.apache.hadoop.hive.ql.typeinfo.TypeInfo;
import org.apache.hadoop.hive.ql.typeinfo.TypeInfoUtils;
import org.apache.hadoop.hive.ql.udf.UDFOPAnd;
import org.apache.hadoop.hive.ql.udf.UDFOPNot;
import org.apache.hadoop.hive.ql.udf.UDFOPOr;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public class PartitionPruner {

    // The log
    @SuppressWarnings("nls")
    private static final Log LOG = LogFactory.getLog("hive.ql.parse.PartitionPruner");

    private String tableAlias;

    private QBMetaData metaData;

    private Table tab;

    private exprNodeDesc prunerExpr;

    /** Creates a new instance of PartitionPruner */
    public PartitionPruner(String tableAlias, QBMetaData metaData) {
        this.tableAlias = tableAlias;
        this.metaData = metaData;
        this.tab = metaData.getTableForAlias(tableAlias);
        this.prunerExpr = new exprNodeConstantDesc(Boolean.TRUE);
    }

    /**
     * We use exprNodeConstantDesc(class,null) to represent unknown values.
     * Except UDFOPAnd, UDFOPOr, and UDFOPNot, all UDFs are assumed to return unknown values 
     * if any of the arguments are unknown.  
     *  
     * @param expr
     * @return The expression desc, will NEVER be null.
     * @throws SemanticException
     */
    @SuppressWarnings("nls")
    private exprNodeDesc genExprNodeDesc(CommonTree expr) throws SemanticException {
        //  We recursively create the exprNodeDesc.  Base cases:  when we encounter 
        //  a column ref, we convert that into an exprNodeColumnDesc;  when we encounter 
        //  a constant, we convert that into an exprNodeConstantDesc.  For others we just 
        //  build the exprNodeFuncDesc with recursively built children.

        exprNodeDesc desc = null;

        //  Is this a simple expr node (not a TOK_COLREF or a TOK_FUNCTION or an operator)?
        desc = SemanticAnalyzer.genSimpleExprNodeDesc(expr);
        if (desc != null) {
            return desc;
        }

        int tokType = expr.getType();
        switch (tokType) {
        case HiveParser.TOK_COLREF: {

            assert (expr.getChildCount() == 2);
            String tabAlias = SemanticAnalyzer.getTableName(expr);
            String colName = SemanticAnalyzer.getSerDeFieldExpression(expr);
            if (tabAlias == null || colName == null) {
                throw new SemanticException(ErrorMsg.INVALID_XPATH.getMsg(expr));
            }
            // Set value to null if it's not partition column
            if (tabAlias.equals(tableAlias) && tab.isPartitionKey(colName)) {
                desc = new exprNodeColumnDesc(String.class, colName);
            } else {
                // might be a column from another table
                try {
                    TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(
                            this.metaData.getTableForAlias(tabAlias).getDeserializer().getObjectInspector());
                    desc = new exprNodeConstantDesc(typeInfo.getStructFieldTypeInfo(colName), null);
                } catch (SerDeException e) {
                    throw new RuntimeException(e);
                }
            }
            break;
        }

        default: {
            boolean isFunction = (expr.getType() == HiveParser.TOK_FUNCTION);

            // Create all children
            int childrenBegin = (isFunction ? 1 : 0);
            ArrayList<exprNodeDesc> children = new ArrayList<exprNodeDesc>(expr.getChildCount() - childrenBegin);
            for (int ci = childrenBegin; ci < expr.getChildCount(); ci++) {
                exprNodeDesc child = genExprNodeDesc((CommonTree) expr.getChild(ci));
                assert (child.getTypeInfo() != null);
                children.add(child);
            }

            // Create function desc
            desc = SemanticAnalyzer.getXpathOrFuncExprNodeDesc(expr, isFunction, children);

            if (desc instanceof exprNodeFuncDesc && (((exprNodeFuncDesc) desc).getUDFMethod().getDeclaringClass()
                    .equals(UDFOPAnd.class)
                    || ((exprNodeFuncDesc) desc).getUDFMethod().getDeclaringClass().equals(UDFOPOr.class)
                    || ((exprNodeFuncDesc) desc).getUDFMethod().getDeclaringClass().equals(UDFOPNot.class))) {
                // do nothing because "And" and "Or" and "Not" supports null value evaluation
                // NOTE: In the future all UDFs that treats null value as UNKNOWN (both in parameters and return 
                // values) should derive from a common base class UDFNullAsUnknown, so instead of listing the classes
                // here we would test whether a class is derived from that base class. 
            } else {
                // If any child is null, set this node to null
                if (mightBeUnknown(desc)) {
                    LOG.trace("Pruner function might be unknown: " + expr.toStringTree());
                    desc = new exprNodeConstantDesc(desc.getTypeInfo(), null);
                }
            }
            break;
        }
        }
        return desc;
    }

    public static boolean mightBeUnknown(exprNodeDesc desc) {
        if (desc instanceof exprNodeConstantDesc) {
            exprNodeConstantDesc d = (exprNodeConstantDesc) desc;
            return d.getValue() == null;
        } else if (desc instanceof exprNodeNullDesc) {
            return false;
        } else if (desc instanceof exprNodeIndexDesc) {
            exprNodeIndexDesc d = (exprNodeIndexDesc) desc;
            return mightBeUnknown(d.getDesc()) || mightBeUnknown(d.getIndex());
        } else if (desc instanceof exprNodeFieldDesc) {
            exprNodeFieldDesc d = (exprNodeFieldDesc) desc;
            return mightBeUnknown(d.getDesc());
        } else if (desc instanceof exprNodeFuncDesc) {
            exprNodeFuncDesc d = (exprNodeFuncDesc) desc;
            for (int i = 0; i < d.getChildren().size(); i++) {
                if (mightBeUnknown(d.getChildren().get(i))) {
                    return true;
                }
            }
            return false;
        } else if (desc instanceof exprNodeColumnDesc) {
            return false;
        }
        return false;
    }

    /** Add an expression */
    @SuppressWarnings("nls")
    public void addExpression(CommonTree expr) throws SemanticException {
        LOG.trace("adding pruning Tree = " + expr.toStringTree());
        exprNodeDesc desc = genExprNodeDesc(expr);
        // Ignore null constant expressions
        if (!(desc instanceof exprNodeConstantDesc) || ((exprNodeConstantDesc) desc).getValue() != null) {
            LOG.trace("adding pruning expr = " + desc);
            this.prunerExpr = SemanticAnalyzer.getFuncExprNodeDesc("AND", this.prunerExpr, desc);
        }
    }

    /** From the table metadata prune the partitions to return the partitions **/
    @SuppressWarnings("nls")
    public Set<Partition> prune() throws HiveException {
        LOG.trace("Started pruning partiton");
        LOG.trace("tabname = " + this.tab.getName());
        LOG.trace("prune Expression = " + this.prunerExpr);

        HashSet<Partition> ret_parts = new HashSet<Partition>();
        try {
            StructObjectInspector rowObjectInspector = (StructObjectInspector) this.tab.getDeserializer()
                    .getObjectInspector();
            Object[] rowWithPart = new Object[2];
            InspectableObject inspectableObject = new InspectableObject();

            ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(this.prunerExpr);
            for (Partition part : Hive.get().getPartitions(this.tab)) {
                // Set all the variables here
                LinkedHashMap<String, String> partSpec = part.getSpec();

                // Create the row object
                ArrayList<String> partNames = new ArrayList<String>();
                ArrayList<String> partValues = new ArrayList<String>();
                ArrayList<ObjectInspector> partObjectInspectors = new ArrayList<ObjectInspector>();
                for (Map.Entry<String, String> entry : partSpec.entrySet()) {
                    partNames.add(entry.getKey());
                    partValues.add(entry.getValue());
                    partObjectInspectors
                            .add(ObjectInspectorFactory.getStandardPrimitiveObjectInspector(String.class));
                }
                StructObjectInspector partObjectInspector = ObjectInspectorFactory
                        .getStandardStructObjectInspector(partNames, partObjectInspectors);

                rowWithPart[1] = partValues;
                ArrayList<StructObjectInspector> ois = new ArrayList<StructObjectInspector>(2);
                ois.add(rowObjectInspector);
                ois.add(partObjectInspector);
                StructObjectInspector rowWithPartObjectInspector = ObjectInspectorFactory
                        .getUnionStructObjectInspector(ois);

                // evaluate the expression tree
                evaluator.evaluate(rowWithPart, rowWithPartObjectInspector, inspectableObject);
                LOG.trace("prune result for partition " + partSpec + ": " + inspectableObject.o);
                if (!Boolean.FALSE.equals(inspectableObject.o)) {
                    LOG.debug("retained partition: " + partSpec);
                    ret_parts.add(part);
                } else {
                    LOG.trace("pruned partition: " + partSpec);
                }
            }
        } catch (Exception e) {
            throw new HiveException(e);
        }

        // Now return the set of partitions
        return ret_parts;
    }

    public Table getTable() {
        return this.tab;
    }
}