org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil.java Source code

Introduction

Here is the source code for org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil.java
Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.exec.vector;

import java.io.IOException;
import java.sql.Date;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.common.ObjectPair;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable;
import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable;
import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hive.common.util.DateUtils;

public class VectorizedBatchUtil {
    private static final Logger LOG = LoggerFactory.getLogger(VectorizedBatchUtil.class);

    /**
     * Sets the IsNull value for ColumnVector at specified index
     * @param cv
     * @param rowIndex
     */
    public static void setNullColIsNullValue(ColumnVector cv, int rowIndex) {
        cv.isNull[rowIndex] = true;
        if (cv.noNulls) {
            cv.noNulls = false;
        }
    }

    /**
     * Iterates thru all the column vectors and sets noNull to
     * specified value.
     *
     * @param batch
     *          Batch on which noNull is set
     */
    public static void setNoNullFields(VectorizedRowBatch batch) {
        for (int i = 0; i < batch.numCols; i++) {
            batch.cols[i].noNulls = true;
        }
    }

    /**
     * Iterates thru all the column vectors and sets repeating to
     * specified column.
     *
     */
    public static void setRepeatingColumn(VectorizedRowBatch batch, int column) {
        ColumnVector cv = batch.cols[column];
        cv.isRepeating = true;
    }

    /**
     * Reduce the batch size for a vectorized row batch
     */
    public static void setBatchSize(VectorizedRowBatch batch, int size) {
        assert (size <= batch.getMaxSize());
        batch.size = size;
    }

    public static ColumnVector createColumnVector(String typeName) {
        typeName = typeName.toLowerCase();

        // Allow undecorated CHAR and VARCHAR to support scratch column type names.
        if (typeName.equals("char") || typeName.equals("varchar")) {
            return new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
        }

        TypeInfo typeInfo = (TypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName);
        return createColumnVector(typeInfo);
    }

    public static ColumnVector createColumnVector(TypeInfo typeInfo) {
        switch (typeInfo.getCategory()) {
        case PRIMITIVE: {
            PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
            switch (primitiveTypeInfo.getPrimitiveCategory()) {
            case BOOLEAN:
            case BYTE:
            case SHORT:
            case INT:
            case LONG:
            case DATE:
            case INTERVAL_YEAR_MONTH:
                return new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
            case TIMESTAMP:
                return new TimestampColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
            case INTERVAL_DAY_TIME:
                return new IntervalDayTimeColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
            case FLOAT:
            case DOUBLE:
                return new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
            case BINARY:
            case STRING:
            case CHAR:
            case VARCHAR:
                return new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
            case DECIMAL:
                DecimalTypeInfo tInfo = (DecimalTypeInfo) primitiveTypeInfo;
                return new DecimalColumnVector(VectorizedRowBatch.DEFAULT_SIZE, tInfo.precision(), tInfo.scale());
            default:
                throw new RuntimeException(
                        "Vectorizaton is not supported for datatype:" + primitiveTypeInfo.getPrimitiveCategory());
            }
        }
        case STRUCT: {
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            List<TypeInfo> typeInfoList = structTypeInfo.getAllStructFieldTypeInfos();
            ColumnVector[] children = new ColumnVector[typeInfoList.size()];
            for (int i = 0; i < children.length; ++i) {
                children[i] = createColumnVector(typeInfoList.get(i));
            }
            return new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE, children);
        }
        case UNION: {
            UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
            List<TypeInfo> typeInfoList = unionTypeInfo.getAllUnionObjectTypeInfos();
            ColumnVector[] children = new ColumnVector[typeInfoList.size()];
            for (int i = 0; i < children.length; ++i) {
                children[i] = createColumnVector(typeInfoList.get(i));
            }
            return new UnionColumnVector(VectorizedRowBatch.DEFAULT_SIZE, children);
        }
        case LIST: {
            ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
            return new ListColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
                    createColumnVector(listTypeInfo.getListElementTypeInfo()));
        }
        case MAP: {
            MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
            return new MapColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
                    createColumnVector(mapTypeInfo.getMapKeyTypeInfo()),
                    createColumnVector(mapTypeInfo.getMapValueTypeInfo()));
        }
        default:
            throw new RuntimeException("Vectorization is not supported for datatype:" + typeInfo.getCategory());
        }
    }

    /**
     * Iterates thru all the columns in a given row and populates the batch
     * from a given offset
     *
     * @param row Deserialized row object
     * @param oi Object insepector for that row
     * @param rowIndex index to which the row should be added to batch
     * @param colOffset offset from where the column begins
     * @param batch Vectorized batch to which the row is added at rowIndex
     * @throws HiveException
     */
    public static void addRowToBatchFrom(Object row, StructObjectInspector oi, int rowIndex, int colOffset,
            VectorizedRowBatch batch, DataOutputBuffer buffer) throws HiveException {
        List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
        final int off = colOffset;
        // Iterate thru the cols and load the batch
        for (int i = 0; i < fieldRefs.size(); i++) {
            setVector(row, oi, fieldRefs.get(i), batch, buffer, rowIndex, i, off);
        }
    }

    /**
     * Add only the projected column of a regular row to the specified vectorized row batch
     * @param row the regular row
     * @param oi object inspector for the row
     * @param rowIndex the offset to add in the batch
     * @param batch vectorized row batch
     * @param buffer data output buffer
     * @throws HiveException
     */
    public static void addProjectedRowToBatchFrom(Object row, StructObjectInspector oi, int rowIndex,
            VectorizedRowBatch batch, DataOutputBuffer buffer) throws HiveException {
        List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
        for (int i = 0; i < fieldRefs.size(); i++) {
            int projectedOutputCol = batch.projectedColumns[i];
            if (batch.cols[projectedOutputCol] == null) {
                continue;
            }
            setVector(row, oi, fieldRefs.get(i), batch, buffer, rowIndex, projectedOutputCol, 0);
        }
    }

    /**
     * Iterates thru all the columns in a given row and populates the batch
     * from a given offset
     *
     * @param row Deserialized row object
     * @param oi Object insepector for that row
     * @param rowIndex index to which the row should be added to batch
     * @param batch Vectorized batch to which the row is added at rowIndex
     * @param context context object for this vectorized batch
     * @param buffer
     * @throws HiveException
     */
    public static void acidAddRowToBatch(Object row, StructObjectInspector oi, int rowIndex,
            VectorizedRowBatch batch, VectorizedRowBatchCtx context, DataOutputBuffer buffer) throws HiveException {
        List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
        // Iterate thru the cols and load the batch
        for (int i = 0; i < fieldRefs.size(); i++) {
            if (batch.cols[i] == null) {
                // This means the column was not included in the projection from the underlying read
                continue;
            }
            if (context.isPartitionCol(i)) {
                // The value will have already been set before we're called, so don't overwrite it
                continue;
            }
            setVector(row, oi, fieldRefs.get(i), batch, buffer, rowIndex, i, 0);
        }
    }

    private static void setVector(Object row, StructObjectInspector oi, StructField field, VectorizedRowBatch batch,
            DataOutputBuffer buffer, int rowIndex, int colIndex, int offset) throws HiveException {

        Object fieldData = oi.getStructFieldData(row, field);
        ObjectInspector foi = field.getFieldObjectInspector();

        // Vectorization only supports PRIMITIVE data types. Assert the same
        assert (foi.getCategory() == Category.PRIMITIVE);

        // Get writable object
        PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi;
        Object writableCol = poi.getPrimitiveWritableObject(fieldData);

        // NOTE: The default value for null fields in vectorization is 1 for int types, NaN for
        // float/double. String types have no default value for null.
        switch (poi.getPrimitiveCategory()) {
        case BOOLEAN: {
            LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
            if (writableCol != null) {
                lcv.vector[rowIndex] = ((BooleanWritable) writableCol).get() ? 1 : 0;
                lcv.isNull[rowIndex] = false;
            } else {
                lcv.vector[rowIndex] = 1;
                setNullColIsNullValue(lcv, rowIndex);
            }
        }
            break;
        case BYTE: {
            LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
            if (writableCol != null) {
                lcv.vector[rowIndex] = ((ByteWritable) writableCol).get();
                lcv.isNull[rowIndex] = false;
            } else {
                lcv.vector[rowIndex] = 1;
                setNullColIsNullValue(lcv, rowIndex);
            }
        }
            break;
        case SHORT: {
            LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
            if (writableCol != null) {
                lcv.vector[rowIndex] = ((ShortWritable) writableCol).get();
                lcv.isNull[rowIndex] = false;
            } else {
                lcv.vector[rowIndex] = 1;
                setNullColIsNullValue(lcv, rowIndex);
            }
        }
            break;
        case INT: {
            LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
            if (writableCol != null) {
                lcv.vector[rowIndex] = ((IntWritable) writableCol).get();
                lcv.isNull[rowIndex] = false;
            } else {
                lcv.vector[rowIndex] = 1;
                setNullColIsNullValue(lcv, rowIndex);
            }
        }
            break;
        case LONG: {
            LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
            if (writableCol != null) {
                lcv.vector[rowIndex] = ((LongWritable) writableCol).get();
                lcv.isNull[rowIndex] = false;
            } else {
                lcv.vector[rowIndex] = 1;
                setNullColIsNullValue(lcv, rowIndex);
            }
        }
            break;
        case DATE: {
            LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
            if (writableCol != null) {
                lcv.vector[rowIndex] = ((DateWritable) writableCol).getDays();
                lcv.isNull[rowIndex] = false;
            } else {
                lcv.vector[rowIndex] = 1;
                setNullColIsNullValue(lcv, rowIndex);
            }
        }
            break;
        case FLOAT: {
            DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + colIndex];
            if (writableCol != null) {
                dcv.vector[rowIndex] = ((FloatWritable) writableCol).get();
                dcv.isNull[rowIndex] = false;
            } else {
                dcv.vector[rowIndex] = Double.NaN;
                setNullColIsNullValue(dcv, rowIndex);
            }
        }
            break;
        case DOUBLE: {
            DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + colIndex];
            if (writableCol != null) {
                dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get();
                dcv.isNull[rowIndex] = false;
            } else {
                dcv.vector[rowIndex] = Double.NaN;
                setNullColIsNullValue(dcv, rowIndex);
            }
        }
            break;
        case TIMESTAMP: {
            TimestampColumnVector lcv = (TimestampColumnVector) batch.cols[offset + colIndex];
            if (writableCol != null) {
                lcv.set(rowIndex, ((TimestampWritable) writableCol).getTimestamp());
                lcv.isNull[rowIndex] = false;
            } else {
                lcv.setNullValue(rowIndex);
                setNullColIsNullValue(lcv, rowIndex);
            }
        }
            break;
        case INTERVAL_YEAR_MONTH: {
            LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
            if (writableCol != null) {
                HiveIntervalYearMonth i = ((HiveIntervalYearMonthWritable) writableCol).getHiveIntervalYearMonth();
                lcv.vector[rowIndex] = i.getTotalMonths();
                lcv.isNull[rowIndex] = false;
            } else {
                lcv.vector[rowIndex] = 1;
                setNullColIsNullValue(lcv, rowIndex);
            }
        }
            break;
        case INTERVAL_DAY_TIME: {
            IntervalDayTimeColumnVector icv = (IntervalDayTimeColumnVector) batch.cols[offset + colIndex];
            if (writableCol != null) {
                HiveIntervalDayTime idt = ((HiveIntervalDayTimeWritable) writableCol).getHiveIntervalDayTime();
                icv.set(rowIndex, idt);
                icv.isNull[rowIndex] = false;
            } else {
                icv.setNullValue(rowIndex);
                setNullColIsNullValue(icv, rowIndex);
            }
        }
            break;
        case BINARY: {
            BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
            if (writableCol != null) {
                bcv.isNull[rowIndex] = false;
                BytesWritable bw = (BytesWritable) writableCol;
                byte[] bytes = bw.getBytes();
                int start = buffer.getLength();
                int length = bw.getLength();
                try {
                    buffer.write(bytes, 0, length);
                } catch (IOException ioe) {
                    throw new IllegalStateException("bad write", ioe);
                }
                bcv.setRef(rowIndex, buffer.getData(), start, length);
            } else {
                setNullColIsNullValue(bcv, rowIndex);
            }
        }
            break;
        case STRING: {
            BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
            if (writableCol != null) {
                bcv.isNull[rowIndex] = false;
                Text colText = (Text) writableCol;
                int start = buffer.getLength();
                int length = colText.getLength();
                try {
                    buffer.write(colText.getBytes(), 0, length);
                } catch (IOException ioe) {
                    throw new IllegalStateException("bad write", ioe);
                }
                bcv.setRef(rowIndex, buffer.getData(), start, length);
            } else {
                setNullColIsNullValue(bcv, rowIndex);
            }
        }
            break;
        case CHAR: {
            BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
            if (writableCol != null) {
                bcv.isNull[rowIndex] = false;
                HiveChar colHiveChar = ((HiveCharWritable) writableCol).getHiveChar();
                byte[] bytes = colHiveChar.getStrippedValue().getBytes();

                // We assume the CHAR maximum length was enforced when the object was created.
                int length = bytes.length;

                int start = buffer.getLength();
                try {
                    // In vector mode, we store CHAR as unpadded.
                    buffer.write(bytes, 0, length);
                } catch (IOException ioe) {
                    throw new IllegalStateException("bad write", ioe);
                }
                bcv.setRef(rowIndex, buffer.getData(), start, length);
            } else {
                setNullColIsNullValue(bcv, rowIndex);
            }
        }
            break;
        case VARCHAR: {
            BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
            if (writableCol != null) {
                bcv.isNull[rowIndex] = false;
                HiveVarchar colHiveVarchar = ((HiveVarcharWritable) writableCol).getHiveVarchar();
                byte[] bytes = colHiveVarchar.getValue().getBytes();

                // We assume the VARCHAR maximum length was enforced when the object was created.
                int length = bytes.length;

                int start = buffer.getLength();
                try {
                    buffer.write(bytes, 0, length);
                } catch (IOException ioe) {
                    throw new IllegalStateException("bad write", ioe);
                }
                bcv.setRef(rowIndex, buffer.getData(), start, length);
            } else {
                setNullColIsNullValue(bcv, rowIndex);
            }
        }
            break;
        case DECIMAL:
            DecimalColumnVector dcv = (DecimalColumnVector) batch.cols[offset + colIndex];
            if (writableCol != null) {
                dcv.isNull[rowIndex] = false;
                HiveDecimalWritable wobj = (HiveDecimalWritable) writableCol;
                dcv.set(rowIndex, wobj);
            } else {
                setNullColIsNullValue(dcv, rowIndex);
            }
            break;
        default:
            throw new HiveException("Vectorizaton is not supported for datatype:" + poi.getPrimitiveCategory());
        }
    }

    public static StandardStructObjectInspector convertToStandardStructObjectInspector(
            StructObjectInspector structObjectInspector) throws HiveException {

        List<? extends StructField> fields = structObjectInspector.getAllStructFieldRefs();
        List<ObjectInspector> oids = new ArrayList<ObjectInspector>();
        ArrayList<String> columnNames = new ArrayList<String>();

        for (StructField field : fields) {
            TypeInfo typeInfo = TypeInfoUtils
                    .getTypeInfoFromTypeString(field.getFieldObjectInspector().getTypeName());
            ObjectInspector standardWritableObjectInspector = TypeInfoUtils
                    .getStandardWritableObjectInspectorFromTypeInfo(typeInfo);
            oids.add(standardWritableObjectInspector);
            columnNames.add(field.getFieldName());
        }
        return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, oids);
    }

    public static String[] columnNamesFromStructObjectInspector(StructObjectInspector structObjectInspector)
            throws HiveException {

        List<? extends StructField> fields = structObjectInspector.getAllStructFieldRefs();
        String[] result = new String[fields.size()];

        int i = 0;
        for (StructField field : fields) {
            result[i++] = field.getFieldName();
        }
        return result;
    }

    public static TypeInfo[] typeInfosFromTypeNames(String[] typeNames) throws HiveException {
        ArrayList<TypeInfo> typeInfoList = TypeInfoUtils.typeInfosFromTypeNames(Arrays.asList(typeNames));
        return typeInfoList.toArray(new TypeInfo[0]);
    }

    public static TypeInfo[] typeInfosFromStructObjectInspector(StructObjectInspector structObjectInspector) {
        ArrayList<TypeInfo> typeInfoList = TypeInfoUtils.typeInfosFromStructObjectInspector(structObjectInspector);
        return typeInfoList.toArray(new TypeInfo[0]);
    }

    public static ColumnVector makeLikeColumnVector(ColumnVector source) throws HiveException {
        if (source instanceof LongColumnVector) {
            return new LongColumnVector(((LongColumnVector) source).vector.length);
        } else if (source instanceof DoubleColumnVector) {
            return new DoubleColumnVector(((DoubleColumnVector) source).vector.length);
        } else if (source instanceof BytesColumnVector) {
            return new BytesColumnVector(((BytesColumnVector) source).vector.length);
        } else if (source instanceof DecimalColumnVector) {
            DecimalColumnVector decColVector = (DecimalColumnVector) source;
            return new DecimalColumnVector(decColVector.vector.length, decColVector.precision, decColVector.scale);
        } else if (source instanceof TimestampColumnVector) {
            return new TimestampColumnVector(((TimestampColumnVector) source).getLength());
        } else if (source instanceof IntervalDayTimeColumnVector) {
            return new IntervalDayTimeColumnVector(((IntervalDayTimeColumnVector) source).getLength());
        } else if (source instanceof ListColumnVector) {
            ListColumnVector src = (ListColumnVector) source;
            ColumnVector child = makeLikeColumnVector(src.child);
            return new ListColumnVector(src.offsets.length, child);
        } else if (source instanceof MapColumnVector) {
            MapColumnVector src = (MapColumnVector) source;
            ColumnVector keys = makeLikeColumnVector(src.keys);
            ColumnVector values = makeLikeColumnVector(src.values);
            return new MapColumnVector(src.offsets.length, keys, values);
        } else if (source instanceof StructColumnVector) {
            StructColumnVector src = (StructColumnVector) source;
            ColumnVector[] copy = new ColumnVector[src.fields.length];
            for (int i = 0; i < copy.length; ++i) {
                copy[i] = makeLikeColumnVector(src.fields[i]);
            }
            return new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE, copy);
        } else if (source instanceof UnionColumnVector) {
            UnionColumnVector src = (UnionColumnVector) source;
            ColumnVector[] copy = new ColumnVector[src.fields.length];
            for (int i = 0; i < copy.length; ++i) {
                copy[i] = makeLikeColumnVector(src.fields[i]);
            }
            return new UnionColumnVector(src.tags.length, copy);
        } else
            throw new HiveException("Column vector class " + source.getClass().getName() + " is not supported!");
    }

    public static void swapColumnVector(VectorizedRowBatch batch1, int batch1ColumnNum, VectorizedRowBatch batch2,
            int batch2ColumnNum) {
        ColumnVector colVector1 = batch1.cols[batch1ColumnNum];
        batch1.cols[batch1ColumnNum] = batch2.cols[batch2ColumnNum];
        batch2.cols[batch2ColumnNum] = colVector1;
    }

    public static void copyRepeatingColumn(VectorizedRowBatch sourceBatch, int sourceColumnNum,
            VectorizedRowBatch targetBatch, int targetColumnNum, boolean setByValue) {
        ColumnVector sourceColVector = sourceBatch.cols[sourceColumnNum];
        ColumnVector targetColVector = targetBatch.cols[targetColumnNum];

        targetColVector.isRepeating = true;

        if (!sourceColVector.noNulls) {
            targetColVector.noNulls = false;
            targetColVector.isNull[0] = true;
            return;
        }

        if (sourceColVector instanceof LongColumnVector) {
            ((LongColumnVector) targetColVector).vector[0] = ((LongColumnVector) sourceColVector).vector[0];
        } else if (sourceColVector instanceof DoubleColumnVector) {
            ((DoubleColumnVector) targetColVector).vector[0] = ((DoubleColumnVector) sourceColVector).vector[0];
        } else if (sourceColVector instanceof BytesColumnVector) {
            BytesColumnVector bytesColVector = (BytesColumnVector) sourceColVector;
            byte[] bytes = bytesColVector.vector[0];
            final int start = bytesColVector.start[0];
            final int length = bytesColVector.length[0];
            if (setByValue) {
                ((BytesColumnVector) targetColVector).setVal(0, bytes, start, length);
            } else {
                ((BytesColumnVector) targetColVector).setRef(0, bytes, start, length);
            }
        } else if (sourceColVector instanceof DecimalColumnVector) {
            ((DecimalColumnVector) targetColVector).set(0, ((DecimalColumnVector) sourceColVector).vector[0]);
        } else if (sourceColVector instanceof TimestampColumnVector) {
            ((TimestampColumnVector) targetColVector).set(0,
                    ((TimestampColumnVector) sourceColVector).asScratchTimestamp(0));
        } else if (sourceColVector instanceof IntervalDayTimeColumnVector) {
            ((IntervalDayTimeColumnVector) targetColVector).set(0,
                    ((IntervalDayTimeColumnVector) sourceColVector).asScratchIntervalDayTime(0));
        } else {
            throw new RuntimeException(
                    "Column vector class " + sourceColVector.getClass().getName() + " is not supported!");
        }
    }

    /**
     * Make a new (scratch) batch, which is exactly "like" the batch provided, except that it's empty
     * @param batch the batch to imitate
     * @return the new batch
     * @throws HiveException
     */
    public static VectorizedRowBatch makeLike(VectorizedRowBatch batch) throws HiveException {
        VectorizedRowBatch newBatch = new VectorizedRowBatch(batch.numCols);
        for (int i = 0; i < batch.numCols; i++) {
            if (batch.cols[i] != null) {
                newBatch.cols[i] = makeLikeColumnVector(batch.cols[i]);
                newBatch.cols[i].init();
            }
        }
        newBatch.projectedColumns = Arrays.copyOf(batch.projectedColumns, batch.projectedColumns.length);
        newBatch.projectionSize = batch.projectionSize;
        newBatch.reset();
        return newBatch;
    }

    public static Writable getPrimitiveWritable(PrimitiveCategory primitiveCategory) {
        switch (primitiveCategory) {
        case VOID:
            return null;
        case BOOLEAN:
            return new BooleanWritable(false);
        case BYTE:
            return new ByteWritable((byte) 0);
        case SHORT:
            return new ShortWritable((short) 0);
        case INT:
            return new IntWritable(0);
        case LONG:
            return new LongWritable(0);
        case TIMESTAMP:
            return new TimestampWritable(new Timestamp(0));
        case DATE:
            return new DateWritable(new Date(0));
        case FLOAT:
            return new FloatWritable(0);
        case DOUBLE:
            return new DoubleWritable(0);
        case BINARY:
            return new BytesWritable(ArrayUtils.EMPTY_BYTE_ARRAY);
        case STRING:
            return new Text(ArrayUtils.EMPTY_BYTE_ARRAY);
        case VARCHAR:
            return new HiveVarcharWritable(new HiveVarchar(StringUtils.EMPTY, -1));
        case CHAR:
            return new HiveCharWritable(new HiveChar(StringUtils.EMPTY, -1));
        case DECIMAL:
            return new HiveDecimalWritable();
        case INTERVAL_YEAR_MONTH:
            return new HiveIntervalYearMonthWritable();
        case INTERVAL_DAY_TIME:
            return new HiveIntervalDayTimeWritable();
        default:
            throw new RuntimeException("Primitive category " + primitiveCategory.name() + " not supported");
        }
    }

    public static String displayBytes(byte[] bytes, int start, int length) {
        StringBuilder sb = new StringBuilder();
        for (int i = start; i < start + length; i++) {
            char ch = (char) bytes[i];
            if (ch < ' ' || ch > '~') {
                sb.append(String.format("\\%03d", bytes[i] & 0xff));
            } else {
                sb.append(ch);
            }
        }
        return sb.toString();
    }

    public static void debugDisplayOneRow(VectorizedRowBatch batch, int index, String prefix) {
        StringBuilder sb = new StringBuilder();
        LOG.info(debugFormatOneRow(batch, index, prefix, sb).toString());
    }

    public static StringBuilder debugFormatOneRow(VectorizedRowBatch batch, int index, String prefix,
            StringBuilder sb) {
        sb.append(prefix + " row " + index + " ");
        for (int p = 0; p < batch.projectionSize; p++) {
            int column = batch.projectedColumns[p];
            sb.append("(" + p + "," + column + ") ");
            ColumnVector colVector = batch.cols[column];
            if (colVector == null) {
                sb.append("(null ColumnVector)");
            } else {
                boolean isRepeating = colVector.isRepeating;
                if (isRepeating) {
                    sb.append("(repeating)");
                }
                index = (isRepeating ? 0 : index);
                if (colVector.noNulls || !colVector.isNull[index]) {
                    if (colVector instanceof LongColumnVector) {
                        sb.append(((LongColumnVector) colVector).vector[index]);
                    } else if (colVector instanceof DoubleColumnVector) {
                        sb.append(((DoubleColumnVector) colVector).vector[index]);
                    } else if (colVector instanceof BytesColumnVector) {
                        BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector;
                        byte[] bytes = bytesColumnVector.vector[index];
                        int start = bytesColumnVector.start[index];
                        int length = bytesColumnVector.length[index];
                        if (bytes == null) {
                            sb.append("(Unexpected null bytes with start " + start + " length " + length + ")");
                        } else {
                            sb.append("bytes: '" + displayBytes(bytes, start, length) + "'");
                        }
                    } else if (colVector instanceof DecimalColumnVector) {
                        sb.append(((DecimalColumnVector) colVector).vector[index].toString());
                    } else if (colVector instanceof TimestampColumnVector) {
                        Timestamp timestamp = new Timestamp(0);
                        ((TimestampColumnVector) colVector).timestampUpdate(timestamp, index);
                        sb.append(timestamp.toString());
                    } else if (colVector instanceof IntervalDayTimeColumnVector) {
                        HiveIntervalDayTime intervalDayTime = ((IntervalDayTimeColumnVector) colVector)
                                .asScratchIntervalDayTime(index);
                        sb.append(intervalDayTime.toString());
                    } else {
                        sb.append("Unknown");
                    }
                } else {
                    sb.append("NULL");
                }
            }
            sb.append(" ");
        }
        return sb;
    }

    public static void debugDisplayBatch(VectorizedRowBatch batch, String prefix) {
        for (int i = 0; i < batch.size; i++) {
            int index = (batch.selectedInUse ? batch.selected[i] : i);
            debugDisplayOneRow(batch, index, prefix);
        }
    }
}