org.apache.hadoop.hive.ql.exec.AnalysisOperator.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hive.ql.exec.AnalysisOperator.java

Source

/**
* Tencent is pleased to support the open source community by making TDW available.
* Copyright (C) 2014 THL A29 Limited, a Tencent company. All rights reserved.
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use 
* this file except in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed 
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
* OF ANY KIND, either express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.apache.hadoop.hive.ql.exec;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.ql.exec.persistence.AnalysisBuffer;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.analysisDesc;
import org.apache.hadoop.hive.ql.plan.analysisEvaluatorDesc;
import org.apache.hadoop.hive.ql.plan.exprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.exprNodeDesc;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDWFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDWFEvaluator.BooleanTrans;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDWFEvaluator.AnalysisEvaluatorBuffer;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;

public class AnalysisOperator extends Operator<analysisDesc> implements Serializable {

    private static final long serialVersionUID = -8313560832289804735L;
    transient protected Log LOG = LogFactory.getLog(this.getClass().getName());

    transient protected ExprNodeEvaluator[] pkeyFields;
    transient protected ObjectInspector[] pkeyObjectInspectors;
    transient protected ObjectInspector[] pkeyStandardObjectInspectors;
    transient protected Object[] pkeyObjects;

    transient protected ExprNodeEvaluator[] okeyFields;
    transient protected ObjectInspector[] okeyObjectInspectors;
    transient protected ObjectInspector[] okeyStandardObjectInspectors;
    transient protected Object[] okeyObjects;

    transient protected ExprNodeEvaluator[][] analysisParameterFields;
    transient protected ObjectInspector[][] analysisParameterObjectInspectors;
    transient protected Object[][] analysisParameterObjects;
    transient protected boolean[] analysisIsDistinct;

    transient protected ExprNodeEvaluator[] otherColumns;
    transient protected ObjectInspector[] otherColumnsObjectInspectors;
    transient protected Object[] otherColumnsObjects;

    transient protected GenericUDWFEvaluator[] analysisEvaluators;
    transient protected AnalysisEvaluatorBuffer[] aggregations;
    transient protected Object[][] analysisParametersLastInvoke;

    transient protected ArrayList<ObjectInspector> objectInspectors;
    transient protected ArrayList<String> fieldNames;

    transient protected ArrayList<Object> pnewKeys;
    transient protected ArrayList<Object> pcurrentKeys;

    transient StructObjectInspector pnewKeyObjectInspector;
    transient StructObjectInspector pcurrentKeyObjectInspector;

    transient protected boolean[] hasAggregateOrderBy;
    transient protected int hasAggregateOrderByNumber = 0;
    transient protected int[] hasAggregateOrderByIdx;
    transient protected int[] hasAggregateOrderByRevIdx;
    transient ObjectInspector aggregateOrderByObjectInspectorANAStore;

    boolean isDistinct = false;

    AnalysisBuffer<Object> anabuffer;

    int windowlag = 0;
    int windowlead = 0;
    int currentrowid = 0;
    int currentforwardrow = 0;

    enum ForwardMode {
        WHOLEPARTITION, IMMEDIATE
    }

    ObjectInspector rowInspector;
    ObjectInspector standardRowInspector;

    ForwardMode forwardMode = ForwardMode.IMMEDIATE;
    Configuration hconf;
    SerDe anaserde = null;

    protected void initializeOp(Configuration hconf) throws HiveException {
        this.hconf = hconf;
        rowInspector = inputObjInspectors[0];

        standardRowInspector = ObjectInspectorUtils.getStandardObjectInspector(rowInspector);

        isDistinct = conf.getDistinct();

        pkeyFields = new ExprNodeEvaluator[conf.getPartitionByKeys().size()];
        pkeyObjectInspectors = new ObjectInspector[pkeyFields.length];
        pkeyObjects = new Object[pkeyFields.length];

        if (pkeyFields.length > 0) {
            for (int i = 0; i < pkeyFields.length; i++) {
                pkeyFields[i] = ExprNodeEvaluatorFactory.get(conf.getPartitionByKeys().get(i));
                pkeyObjectInspectors[i] = pkeyFields[i].initialize(standardRowInspector);
                pkeyObjects[i] = null;
            }
        }

        okeyFields = new ExprNodeEvaluator[conf.getOrderByKeys().size()];
        okeyObjectInspectors = new ObjectInspector[okeyFields.length];
        okeyObjects = new Object[okeyFields.length];
        for (int i = 0; i < okeyFields.length; i++) {
            okeyFields[i] = ExprNodeEvaluatorFactory.get(conf.getOrderByKeys().get(i));
            okeyObjectInspectors[i] = okeyFields[i].initialize(standardRowInspector);
            okeyObjects[i] = null;
        }

        hasAggregateOrderBy = new boolean[conf.getAnalysises().size()];
        hasAggregateOrderByRevIdx = new int[conf.getAnalysises().size()];
        analysisParameterFields = new ExprNodeEvaluator[conf.getAnalysises().size()][];
        analysisParameterObjectInspectors = new ObjectInspector[conf.getAnalysises().size()][];
        analysisParameterObjects = new Object[conf.getAnalysises().size()][];
        for (int i = 0; i < analysisParameterFields.length; i++) {
            analysisEvaluatorDesc aed = conf.getAnalysises().get(i);
            String udwfname = aed.getGenericUDWFName().toLowerCase();
            ArrayList<exprNodeDesc> parameters = aed.getParameters();
            hasAggregateOrderBy[i] = aed.hasAggregateOrderBy();
            hasAggregateOrderByRevIdx[i] = -1;
            if (hasAggregateOrderBy[i]) {
                hasAggregateOrderByRevIdx[i] = hasAggregateOrderByNumber;
                hasAggregateOrderByNumber++;
            }
            if (udwfname.contains("lag")) {
                int lag = 1;
                if (parameters.size() > 1) {
                    lag = (Integer) ((exprNodeConstantDesc) parameters.get(1)).getValue();
                }
                if (lag > windowlag)
                    windowlag = lag;
            } else if (udwfname.contains("lead")) {
                int lead = 1;
                if (parameters.size() > 1) {
                    lead = (Integer) ((exprNodeConstantDesc) parameters.get(1)).getValue();
                }
                if (lead > windowlead)
                    windowlead = lead;
            } else if (udwfname.contains("row_number") || udwfname.contains("rank")
                    || udwfname.contains("first_value")) {
            } else if (hasAggregateOrderBy[i]) {

            } else {
                this.forwardMode = ForwardMode.WHOLEPARTITION;
            }

            if (udwfname.contains("rank")) {
                parameters = new ArrayList<exprNodeDesc>();
                parameters.addAll(conf.getOrderByKeys());
            }

            analysisParameterFields[i] = new ExprNodeEvaluator[parameters.size()];
            analysisParameterObjectInspectors[i] = new ObjectInspector[parameters.size()];
            analysisParameterObjects[i] = new Object[parameters.size()];
            for (int j = 0; j < parameters.size(); j++) {
                analysisParameterFields[i][j] = ExprNodeEvaluatorFactory.get(parameters.get(j));
                analysisParameterObjectInspectors[i][j] = analysisParameterFields[i][j]
                        .initialize(standardRowInspector);
                analysisParameterObjects[i][j] = null;
            }
        }

        hasAggregateOrderByIdx = new int[this.hasAggregateOrderByNumber];
        int numm = 0;
        for (int j = 0; j < this.hasAggregateOrderBy.length; j++) {
            if (this.hasAggregateOrderBy[j]) {
                this.hasAggregateOrderByIdx[numm++] = j;
            }
        }

        otherColumns = new ExprNodeEvaluator[conf.getOtherColumns().size()];
        otherColumnsObjectInspectors = new ObjectInspector[otherColumns.length];
        otherColumnsObjects = new Object[otherColumns.length];

        for (int i = 0; i < otherColumns.length; i++) {
            otherColumns[i] = ExprNodeEvaluatorFactory.get(conf.getOtherColumns().get(i));
            otherColumnsObjectInspectors[i] = otherColumns[i].initialize(standardRowInspector);
            otherColumnsObjects[i] = null;
        }

        analysisIsDistinct = new boolean[conf.getAnalysises().size()];
        for (int i = 0; i < analysisIsDistinct.length; i++) {
            analysisIsDistinct[i] = conf.getAnalysises().get(i).getDistinct();
        }

        analysisEvaluators = new GenericUDWFEvaluator[conf.getAnalysises().size()];
        for (int i = 0; i < analysisEvaluators.length; i++) {
            analysisEvaluatorDesc agg = conf.getAnalysises().get(i);
            analysisEvaluators[i] = agg.getGenericUDWFEvaluator();
        }
        int totalFields = pkeyFields.length + okeyFields.length + analysisEvaluators.length + otherColumns.length;
        objectInspectors = new ArrayList<ObjectInspector>(totalFields);
        for (int i = 0; i < pkeyFields.length; i++) {
            objectInspectors.add(pkeyObjectInspectors[i]);
        }

        for (int i = 0; i < okeyFields.length; i++) {
            objectInspectors.add(okeyObjectInspectors[i]);
        }

        ArrayList<ObjectInspector> aggregateOrderByObjectInspectors = new ArrayList<ObjectInspector>();
        ArrayList<String> anaStoredName = new ArrayList<String>();
        for (int i = 0; i < analysisEvaluators.length; i++) {
            ObjectInspector roi = analysisEvaluators[i].init(analysisParameterObjectInspectors[i]);
            objectInspectors.add(roi);
            if (hasAggregateOrderBy[i]) {
                anaStoredName.add("aggr" + i);
                aggregateOrderByObjectInspectors.add(roi);
            }
        }

        for (int i = 0; i < otherColumns.length; i++) {
            objectInspectors.add(otherColumnsObjectInspectors[i]);
        }

        outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector(conf.getOutputColumnNames(),
                objectInspectors);

        anaStoredName.add(0, "rowobj");
        aggregateOrderByObjectInspectors.add(0, standardRowInspector);
        aggregateOrderByObjectInspectorANAStore = ObjectInspectorFactory
                .getStandardStructObjectInspector(anaStoredName, aggregateOrderByObjectInspectors);

        ArrayList<String> keyNames = new ArrayList<String>(pkeyFields.length);
        for (int i = 0; i < pkeyFields.length; i++) {
            keyNames.add(conf.getOutputColumnNames().get(i));
        }

        pcurrentKeyObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(keyNames,
                Arrays.asList(pkeyObjectInspectors));

        pnewKeyObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(keyNames,
                Arrays.asList(pkeyObjectInspectors));

        analysisParametersLastInvoke = new Object[conf.getAnalysises().size()][];

        aggregations = newAggregations();

        pnewKeys = new ArrayList<Object>();

        StringBuffer colNames = new StringBuffer();
        StringBuffer colTypes = new StringBuffer();

        StructObjectInspector soi = (StructObjectInspector) aggregateOrderByObjectInspectorANAStore;
        List<? extends StructField> fields = soi.getAllStructFieldRefs();

        for (int k = 0; k < fields.size(); k++) {
            String newColName = "_VALUE_" + k;
            colNames.append(newColName);
            colNames.append(',');
            colTypes.append(fields.get(k).getFieldObjectInspector().getTypeName());
            colTypes.append(',');
        }
        colNames.setLength(colNames.length() - 1);
        colTypes.setLength(colTypes.length() - 1);

        Properties properties = Utilities.makeProperties(
                org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, "" + Utilities.ctrlaCode,
                org.apache.hadoop.hive.serde.Constants.LIST_COLUMNS, colNames.toString(),
                org.apache.hadoop.hive.serde.Constants.LIST_COLUMN_TYPES, colTypes.toString());

        try {
            anaserde = LazyBinarySerDe.class.newInstance();
            anaserde.initialize(hconf, properties);
        } catch (InstantiationException e) {
            e.printStackTrace();
        } catch (IllegalAccessException e) {
            e.printStackTrace();
        } catch (SerDeException e) {
            e.printStackTrace();
        }

        anabuffer = new AnalysisBuffer<Object>(anaserde, this.aggregateOrderByObjectInspectorANAStore, hconf);

        System.out.println("hasAggregateOrderByNumber\t" + hasAggregateOrderByNumber);
        for (int i = 0; i < hasAggregateOrderBy.length; i++) {
            System.out.print(hasAggregateOrderBy[i] + "\t");
        }
        System.out.println();
        for (int i = 0; i < hasAggregateOrderByIdx.length; i++) {
            System.out.print(hasAggregateOrderByIdx[i] + "\t");
        }
        System.out.println();
        for (int i = 0; i < hasAggregateOrderByRevIdx.length; i++) {
            System.out.print(hasAggregateOrderByRevIdx[i] + "\t");
        }
        System.out.println();

        initializeChildren(hconf);
    }

    protected AnalysisEvaluatorBuffer[] newAggregations() throws HiveException {
        AnalysisEvaluatorBuffer[] aggs = new AnalysisEvaluatorBuffer[analysisEvaluators.length];
        for (int i = 0; i < analysisEvaluators.length; i++) {
            aggs[i] = analysisEvaluators[i].getNewAnalysisEvaBuffer();
        }
        return aggs;
    }

    @Override
    public void process(Object row, int tag) throws HiveException {

        Object obj = ObjectInspectorUtils.copyToStandardObject(row, rowInspector,
                ObjectInspectorCopyOption.DEFAULT);

        pnewKeys.clear();
        for (int i = 0; i < pkeyFields.length; i++) {
            if (pkeyObjectInspectors[i] == null) {
                pkeyObjectInspectors[i] = pkeyFields[i].initialize(standardRowInspector);
            }
            pkeyObjects[i] = pkeyFields[i].evaluate(obj);
            pnewKeys.add(pkeyObjects[i]);
        }

        for (int i = 0; i < okeyFields.length; i++) {
            if (okeyObjectInspectors[i] == null) {
                okeyObjectInspectors[i] = okeyFields[i].initialize(standardRowInspector);
            }
            okeyObjects[i] = okeyFields[i].evaluate(obj);
        }

        boolean keysAreEqual = ObjectInspectorUtils.compare(pnewKeys, pnewKeyObjectInspector, pcurrentKeys,
                pcurrentKeyObjectInspector) == 0;

        if (pcurrentKeys != null && !keysAreEqual) {
            forwardPartition();
        }

        if (pcurrentKeys == null || !keysAreEqual) {
            if (pcurrentKeys == null) {
                pcurrentKeys = new ArrayList<Object>(pkeyFields.length);
            }
            deepCopyElements(pkeyObjects, pkeyObjectInspectors, pcurrentKeys, ObjectInspectorCopyOption.WRITABLE);

            resetAggregations(aggregations);

            for (int i = 0; i < analysisParametersLastInvoke.length; i++)
                analysisParametersLastInvoke[i] = null;
            anabuffer.reset();

            currentrowid = 0;
            currentforwardrow = 0;
        }

        updateAggregations(obj);

        ArrayList<Object> store1 = new ArrayList<Object>();
        store1.add(obj);
        if (hasAggregateOrderByNumber > 0) {
            for (int i = 0; i < hasAggregateOrderByNumber; i++) {
                Object paramobj = analysisParameterFields[this.hasAggregateOrderByIdx[i]][0].evaluate(obj);
                store1.add(this.analysisEvaluators[this.hasAggregateOrderByIdx[i]]
                        .terminateCurrent(this.aggregations[this.hasAggregateOrderByIdx[i]], paramobj));
            }
        }

        anabuffer.add(store1);

        if (forwardMode == ForwardMode.IMMEDIATE) {
            forwardimmediate();
        }
        currentrowid++;
    }

    private void forwardimmediate() throws HiveException {

        if (currentforwardrow < windowlag && currentrowid >= currentforwardrow + windowlead) {
            forward(anabuffer, anabuffer.getByRowid(currentforwardrow), currentforwardrow++, true);
        } else if (currentforwardrow >= windowlag) {
            if (forward(anabuffer, anabuffer.getByRowid(currentforwardrow), currentforwardrow, false)) {
                currentforwardrow++;
                anabuffer.removeFirst(false);
            }
        }
    }

    private void forwardPartition() throws HiveException {
        if (!anabuffer.seek(currentforwardrow))
            return;
        Object row;
        while ((row = anabuffer.next()) != null) {
            forward(anabuffer, row, currentforwardrow++, true);
        }
    }

    transient Object[] forwardCache;

    protected boolean forward(AnalysisBuffer<Object> analysisBuffer, Object row, int rowid, boolean absolute)
            throws HiveException {
        if (row == null)
            return false;
        ArrayList<Object> rowfull = ((ArrayList<Object>) row);
        row = rowfull.get(0);
        if (row == null)
            return false;
        BooleanTrans canternimate = new BooleanTrans();
        int totalFields = this.pcurrentKeys.size() + this.okeyFields.length + aggregations.length
                + this.otherColumns.length;
        if (forwardCache == null) {
            forwardCache = new Object[totalFields];
        }
        int ii = 0;
        for (int i = 0; i < pcurrentKeys.size(); i++) {
            forwardCache[ii] = pcurrentKeys.get(i);
            ii++;
        }

        for (int i = 0; i < okeyFields.length; i++) {
            forwardCache[ii] = okeyFields[i].evaluate(row);
            ii++;
        }

        for (int i = 0; i < aggregations.length; i++) {
            if (this.hasAggregateOrderBy[i]) {
                forwardCache[ii] = rowfull.get(this.hasAggregateOrderByRevIdx[i] + 1);
                canternimate.set(true);
            } else {
                forwardCache[ii] = analysisEvaluators[i].terminate(aggregations[i], analysisParameterFields[i],
                        analysisBuffer, rowid, absolute, canternimate);
            }
            if (!canternimate.get())
                return false;
            ii++;
        }

        for (int i = 0; i < otherColumns.length; i++) {
            forwardCache[ii] = otherColumns[i].evaluate(row);
            ii++;
        }

        forward(forwardCache, outputObjInspector);
        return true;
    }

    private static void deepCopyElements(Object[] keys, ObjectInspector[] keyObjectInspectors,
            ArrayList<Object> result, ObjectInspectorCopyOption copyOption) {
        result.clear();
        for (int i = 0; i < keys.length; i++) {
            result.add(ObjectInspectorUtils.copyToStandardObject(keys[i], keyObjectInspectors[i], copyOption));
        }
    }

    protected void resetAggregations(AnalysisEvaluatorBuffer[] aggs) throws HiveException {
        for (int i = 0; i < aggs.length; i++) {
            analysisEvaluators[i].reset(aggs[i]);
        }
    }

    protected void updateAggregations(Object obj) throws HiveException {

        for (int ai = 0; ai < aggregations.length; ai++) {

            Object[] o = new Object[analysisParameterFields[ai].length];
            for (int pi = 0; pi < analysisParameterFields[ai].length; pi++) {
                o[pi] = analysisParameterFields[ai][pi].evaluate(obj);
            }

            if (analysisIsDistinct[ai]) {

                if (analysisParametersLastInvoke[ai] == null) {
                    analysisParametersLastInvoke[ai] = new Object[o.length];
                }
                if (ObjectInspectorUtils.compare(o, analysisParameterObjectInspectors[ai],
                        analysisParametersLastInvoke[ai], analysisParameterObjectInspectors[ai]) != 0) {
                    analysisEvaluators[ai].analysis(aggregations[ai], o);
                    for (int pi = 0; pi < o.length; pi++) {
                        analysisParametersLastInvoke[ai][pi] = ObjectInspectorUtils.copyToStandardObject(o[pi],
                                analysisParameterObjectInspectors[ai][pi], ObjectInspectorCopyOption.WRITABLE);
                    }
                }

            } else {
                analysisEvaluators[ai].analysis(aggregations[ai], o);
            }
        }
    }

    protected void closeOp(boolean abort) throws HiveException {
        forwardPartition();
        anabuffer.close();
    }

    public String getName() {
        return new String("ANA");
    }

}