org.apache.spark.sql.execution.vectorized.ColumnVector.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.spark.sql.execution.vectorized.ColumnVector.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.spark.sql.execution.vectorized;

import java.math.BigDecimal;
import java.math.BigInteger;

import org.apache.commons.lang.NotImplementedException;
import org.apache.parquet.column.Dictionary;
import org.apache.parquet.io.api.Binary;

import org.apache.spark.memory.MemoryMode;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.catalyst.util.ArrayData;
import org.apache.spark.sql.catalyst.util.MapData;
import org.apache.spark.sql.types.*;
import org.apache.spark.unsafe.types.CalendarInterval;
import org.apache.spark.unsafe.types.UTF8String;

/**
 * This class represents a column of values and provides the main APIs to access the data
 * values. It supports all the types and contains get/put APIs as well as their batched versions.
 * The batched versions are preferable whenever possible.
 *
 * To handle nested schemas, ColumnVector has two types: Arrays and Structs. In both cases these
 * columns have child columns. All of the data is stored in the child columns and the parent column
 * contains nullability, and in the case of Arrays, the lengths and offsets into the child column.
 * Lengths and offsets are encoded identically to INTs.
 * Maps are just a special case of a two field struct.
 * Strings are handled as an Array of ByteType.
 *
 * Capacity: The data stored is dense but the arrays are not fixed capacity. It is the
 * responsibility of the caller to call reserve() to ensure there is enough room before adding
 * elements. This means that the put() APIs do not check as in common cases (i.e. flat schemas),
 * the lengths are known up front.
 *
 * Most of the APIs take the rowId as a parameter. This is the batch local 0-based row id for values
 * in the current RowBatch.
 *
 * A ColumnVector should be considered immutable once originally created. In other words, it is not
 * valid to call put APIs after reads until reset() is called.
 *
 * ColumnVectors are intended to be reused.
 */
public abstract class ColumnVector implements AutoCloseable {
    /**
     * Allocates a column to store elements of `type` on or off heap.
     * Capacity is the initial capacity of the vector and it will grow as necessary. Capacity is
     * in number of elements, not number of bytes.
     */
    public static ColumnVector allocate(int capacity, DataType type, MemoryMode mode) {
        if (mode == MemoryMode.OFF_HEAP) {
            return new OffHeapColumnVector(capacity, type);
        } else {
            return new OnHeapColumnVector(capacity, type);
        }
    }

    /**
     * Holder object to return an array. This object is intended to be reused. Callers should
     * copy the data out if it needs to be stored.
     */
    public static final class Array extends ArrayData {
        // The data for this array. This array contains elements from
        // data[offset] to data[offset + length).
        public final ColumnVector data;
        public int length;
        public int offset;

        // Populate if binary data is required for the Array. This is stored here as an optimization
        // for string data.
        public byte[] byteArray;
        public int byteArrayOffset;

        // Reused staging buffer, used for loading from offheap.
        protected byte[] tmpByteArray = new byte[1];

        protected Array(ColumnVector data) {
            this.data = data;
        }

        @Override
        public int numElements() {
            return length;
        }

        @Override
        public ArrayData copy() {
            throw new NotImplementedException();
        }

        // TODO: this is extremely expensive.
        @Override
        public Object[] array() {
            DataType dt = data.dataType();
            Object[] list = new Object[length];

            if (dt instanceof BooleanType) {
                for (int i = 0; i < length; i++) {
                    if (!data.isNullAt(offset + i)) {
                        list[i] = data.getBoolean(offset + i);
                    }
                }
            } else if (dt instanceof ByteType) {
                for (int i = 0; i < length; i++) {
                    if (!data.isNullAt(offset + i)) {
                        list[i] = data.getByte(offset + i);
                    }
                }
            } else if (dt instanceof ShortType) {
                for (int i = 0; i < length; i++) {
                    if (!data.isNullAt(offset + i)) {
                        list[i] = data.getShort(offset + i);
                    }
                }
            } else if (dt instanceof IntegerType) {
                for (int i = 0; i < length; i++) {
                    if (!data.isNullAt(offset + i)) {
                        list[i] = data.getInt(offset + i);
                    }
                }
            } else if (dt instanceof FloatType) {
                for (int i = 0; i < length; i++) {
                    if (!data.isNullAt(offset + i)) {
                        list[i] = data.getFloat(offset + i);
                    }
                }
            } else if (dt instanceof DoubleType) {
                for (int i = 0; i < length; i++) {
                    if (!data.isNullAt(offset + i)) {
                        list[i] = data.getDouble(offset + i);
                    }
                }
            } else if (dt instanceof LongType) {
                for (int i = 0; i < length; i++) {
                    if (!data.isNullAt(offset + i)) {
                        list[i] = data.getLong(offset + i);
                    }
                }
            } else if (dt instanceof DecimalType) {
                DecimalType decType = (DecimalType) dt;
                for (int i = 0; i < length; i++) {
                    if (!data.isNullAt(offset + i)) {
                        list[i] = getDecimal(i, decType.precision(), decType.scale());
                    }
                }
            } else if (dt instanceof StringType) {
                for (int i = 0; i < length; i++) {
                    if (!data.isNullAt(offset + i)) {
                        list[i] = getUTF8String(i).toString();
                    }
                }
            } else if (dt instanceof CalendarIntervalType) {
                for (int i = 0; i < length; i++) {
                    if (!data.isNullAt(offset + i)) {
                        list[i] = getInterval(i);
                    }
                }
            } else {
                throw new NotImplementedException("Type " + dt);
            }
            return list;
        }

        @Override
        public boolean isNullAt(int ordinal) {
            return data.isNullAt(offset + ordinal);
        }

        @Override
        public boolean getBoolean(int ordinal) {
            throw new NotImplementedException();
        }

        @Override
        public byte getByte(int ordinal) {
            return data.getByte(offset + ordinal);
        }

        @Override
        public short getShort(int ordinal) {
            throw new NotImplementedException();
        }

        @Override
        public int getInt(int ordinal) {
            return data.getInt(offset + ordinal);
        }

        @Override
        public long getLong(int ordinal) {
            return data.getLong(offset + ordinal);
        }

        @Override
        public float getFloat(int ordinal) {
            throw new NotImplementedException();
        }

        @Override
        public double getDouble(int ordinal) {
            return data.getDouble(offset + ordinal);
        }

        @Override
        public Decimal getDecimal(int ordinal, int precision, int scale) {
            return data.getDecimal(offset + ordinal, precision, scale);
        }

        @Override
        public UTF8String getUTF8String(int ordinal) {
            return data.getUTF8String(offset + ordinal);
        }

        @Override
        public byte[] getBinary(int ordinal) {
            return data.getBinary(offset + ordinal);
        }

        @Override
        public CalendarInterval getInterval(int ordinal) {
            int month = data.getChildColumn(0).getInt(offset + ordinal);
            long microseconds = data.getChildColumn(1).getLong(offset + ordinal);
            return new CalendarInterval(month, microseconds);
        }

        @Override
        public InternalRow getStruct(int ordinal, int numFields) {
            return data.getStruct(offset + ordinal);
        }

        @Override
        public ArrayData getArray(int ordinal) {
            return data.getArray(offset + ordinal);
        }

        @Override
        public MapData getMap(int ordinal) {
            throw new NotImplementedException();
        }

        @Override
        public Object get(int ordinal, DataType dataType) {
            throw new NotImplementedException();
        }
    }

    /**
     * Returns the data type of this column.
     */
    public final DataType dataType() {
        return type;
    }

    /**
     * Resets this column for writing. The currently stored values are no longer accessible.
     */
    public void reset() {
        if (isConstant)
            return;

        if (childColumns != null) {
            for (ColumnVector c : childColumns) {
                c.reset();
            }
        }
        numNulls = 0;
        elementsAppended = 0;
        if (anyNullsSet) {
            putNotNulls(0, capacity);
            anyNullsSet = false;
        }
    }

    /**
     * Cleans up memory for this column. The column is not usable after this.
     * TODO: this should probably have ref-counted semantics.
     */
    public abstract void close();

    /*
     * Ensures that there is enough storage to store capcity elements. That is, the put() APIs
     * must work for all rowIds < capcity.
     */
    public abstract void reserve(int capacity);

    /**
     * Returns the number of nulls in this column.
     */
    public final int numNulls() {
        return numNulls;
    }

    /**
     * Returns true if any of the nulls indicator are set for this column. This can be used
     * as an optimization to prevent setting nulls.
     */
    public final boolean anyNullsSet() {
        return anyNullsSet;
    }

    /**
     * Returns the off heap ptr for the arrays backing the NULLs and values buffer. Only valid
     * to call for off heap columns.
     */
    public abstract long nullsNativeAddress();

    public abstract long valuesNativeAddress();

    /**
     * Sets the value at rowId to null/not null.
     */
    public abstract void putNotNull(int rowId);

    public abstract void putNull(int rowId);

    /**
     * Sets the values from [rowId, rowId + count) to null/not null.
     */
    public abstract void putNulls(int rowId, int count);

    public abstract void putNotNulls(int rowId, int count);

    /**
     * Returns whether the value at rowId is NULL.
     */
    public abstract boolean isNullAt(int rowId);

    /**
     * Sets the value at rowId to `value`.
     */
    public abstract void putBoolean(int rowId, boolean value);

    /**
     * Sets values from [rowId, rowId + count) to value.
     */
    public abstract void putBooleans(int rowId, int count, boolean value);

    /**
     * Returns the value for rowId.
     */
    public abstract boolean getBoolean(int rowId);

    /**
     * Sets the value at rowId to `value`.
     */
    public abstract void putByte(int rowId, byte value);

    /**
     * Sets values from [rowId, rowId + count) to value.
     */
    public abstract void putBytes(int rowId, int count, byte value);

    /**
     * Sets values from [rowId, rowId + count) to [src + srcIndex, src + srcIndex + count)
     */
    public abstract void putBytes(int rowId, int count, byte[] src, int srcIndex);

    /**
     * Returns the value for rowId.
     */
    public abstract byte getByte(int rowId);

    /**
     * Sets the value at rowId to `value`.
     */
    public abstract void putShort(int rowId, short value);

    /**
     * Sets values from [rowId, rowId + count) to value.
     */
    public abstract void putShorts(int rowId, int count, short value);

    /**
     * Sets values from [rowId, rowId + count) to [src + srcIndex, src + srcIndex + count)
     */
    public abstract void putShorts(int rowId, int count, short[] src, int srcIndex);

    /**
     * Returns the value for rowId.
     */
    public abstract short getShort(int rowId);

    /**
     * Sets the value at rowId to `value`.
     */
    public abstract void putInt(int rowId, int value);

    /**
     * Sets values from [rowId, rowId + count) to value.
     */
    public abstract void putInts(int rowId, int count, int value);

    /**
     * Sets values from [rowId, rowId + count) to [src + srcIndex, src + srcIndex + count)
     */
    public abstract void putInts(int rowId, int count, int[] src, int srcIndex);

    /**
     * Sets values from [rowId, rowId + count) to [src[srcIndex], src[srcIndex + count])
     * The data in src must be 4-byte little endian ints.
     */
    public abstract void putIntsLittleEndian(int rowId, int count, byte[] src, int srcIndex);

    /**
     * Returns the value for rowId.
     */
    public abstract int getInt(int rowId);

    /**
     * Sets the value at rowId to `value`.
     */
    public abstract void putLong(int rowId, long value);

    /**
     * Sets values from [rowId, rowId + count) to value.
     */
    public abstract void putLongs(int rowId, int count, long value);

    /**
     * Sets values from [rowId, rowId + count) to [src + srcIndex, src + srcIndex + count)
     */
    public abstract void putLongs(int rowId, int count, long[] src, int srcIndex);

    /**
     * Sets values from [rowId, rowId + count) to [src[srcIndex], src[srcIndex + count])
     * The data in src must be 8-byte little endian longs.
     */
    public abstract void putLongsLittleEndian(int rowId, int count, byte[] src, int srcIndex);

    /**
     * Returns the value for rowId.
     */
    public abstract long getLong(int rowId);

    /**
     * Sets the value at rowId to `value`.
     */
    public abstract void putFloat(int rowId, float value);

    /**
     * Sets values from [rowId, rowId + count) to value.
     */
    public abstract void putFloats(int rowId, int count, float value);

    /**
     * Sets values from [rowId, rowId + count) to [src + srcIndex, src + srcIndex + count)
     * src should contain `count` doubles written as ieee format.
     */
    public abstract void putFloats(int rowId, int count, float[] src, int srcIndex);

    /**
     * Sets values from [rowId, rowId + count) to [src[srcIndex], src[srcIndex + count])
     * The data in src must be ieee formatted floats.
     */
    public abstract void putFloats(int rowId, int count, byte[] src, int srcIndex);

    /**
     * Returns the value for rowId.
     */
    public abstract float getFloat(int rowId);

    /**
     * Sets the value at rowId to `value`.
     */
    public abstract void putDouble(int rowId, double value);

    /**
     * Sets values from [rowId, rowId + count) to value.
     */
    public abstract void putDoubles(int rowId, int count, double value);

    /**
     * Sets values from [rowId, rowId + count) to [src + srcIndex, src + srcIndex + count)
     * src should contain `count` doubles written as ieee format.
     */
    public abstract void putDoubles(int rowId, int count, double[] src, int srcIndex);

    /**
     * Sets values from [rowId, rowId + count) to [src[srcIndex], src[srcIndex + count])
     * The data in src must be ieee formatted doubles.
     */
    public abstract void putDoubles(int rowId, int count, byte[] src, int srcIndex);

    /**
     * Returns the value for rowId.
     */
    public abstract double getDouble(int rowId);

    /**
     * Puts a byte array that already exists in this column.
     */
    public abstract void putArray(int rowId, int offset, int length);

    /**
     * Returns the length of the array at rowid.
     */
    public abstract int getArrayLength(int rowId);

    /**
     * Returns the offset of the array at rowid.
     */
    public abstract int getArrayOffset(int rowId);

    /**
     * Returns a utility object to get structs.
     */
    public ColumnarBatch.Row getStruct(int rowId) {
        resultStruct.rowId = rowId;
        return resultStruct;
    }

    /**
     * Returns a utility object to get structs.
     * provided to keep API compabilitity with InternalRow for code generation
     */
    public ColumnarBatch.Row getStruct(int rowId, int size) {
        resultStruct.rowId = rowId;
        return resultStruct;
    }

    /**
     * Returns the array at rowid.
     */
    public final Array getArray(int rowId) {
        resultArray.length = getArrayLength(rowId);
        resultArray.offset = getArrayOffset(rowId);
        return resultArray;
    }

    /**
     * Loads the data into array.byteArray.
     */
    public abstract void loadBytes(Array array);

    /**
     * Sets the value at rowId to `value`.
     */
    public abstract int putByteArray(int rowId, byte[] value, int offset, int count);

    public final int putByteArray(int rowId, byte[] value) {
        return putByteArray(rowId, value, 0, value.length);
    }

    /**
     * Returns the value for rowId.
     */
    private Array getByteArray(int rowId) {
        Array array = getArray(rowId);
        array.data.loadBytes(array);
        return array;
    }

    /**
     * Returns the value for rowId.
     */
    public MapData getMap(int ordinal) {
        throw new NotImplementedException();
    }

    /**
     * Returns the decimal for rowId.
     */
    public final Decimal getDecimal(int rowId, int precision, int scale) {
        if (precision <= Decimal.MAX_INT_DIGITS()) {
            return Decimal.createUnsafe(getInt(rowId), precision, scale);
        } else if (precision <= Decimal.MAX_LONG_DIGITS()) {
            return Decimal.createUnsafe(getLong(rowId), precision, scale);
        } else {
            // TODO: best perf?
            byte[] bytes = getBinary(rowId);
            BigInteger bigInteger = new BigInteger(bytes);
            BigDecimal javaDecimal = new BigDecimal(bigInteger, scale);
            return Decimal.apply(javaDecimal, precision, scale);
        }
    }

    public final void putDecimal(int rowId, Decimal value, int precision) {
        if (precision <= Decimal.MAX_INT_DIGITS()) {
            putInt(rowId, (int) value.toUnscaledLong());
        } else if (precision <= Decimal.MAX_LONG_DIGITS()) {
            putLong(rowId, value.toUnscaledLong());
        } else {
            BigInteger bigInteger = value.toJavaBigDecimal().unscaledValue();
            putByteArray(rowId, bigInteger.toByteArray());
        }
    }

    /**
     * Returns the UTF8String for rowId.
     */
    public final UTF8String getUTF8String(int rowId) {
        if (dictionary == null) {
            ColumnVector.Array a = getByteArray(rowId);
            return UTF8String.fromBytes(a.byteArray, a.byteArrayOffset, a.length);
        } else {
            Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(rowId));
            return UTF8String.fromBytes(v.getBytes());
        }
    }

    /**
     * Returns the byte array for rowId.
     */
    public final byte[] getBinary(int rowId) {
        if (dictionary == null) {
            ColumnVector.Array array = getByteArray(rowId);
            byte[] bytes = new byte[array.length];
            System.arraycopy(array.byteArray, array.byteArrayOffset, bytes, 0, bytes.length);
            return bytes;
        } else {
            Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(rowId));
            return v.getBytes();
        }
    }

    /**
     * Append APIs. These APIs all behave similarly and will append data to the current vector.  It
     * is not valid to mix the put and append APIs. The append APIs are slower and should only be
     * used if the sizes are not known up front.
     * In all these cases, the return value is the rowId for the first appended element.
     */
    public final int appendNull() {
        assert (!(dataType() instanceof StructType)); // Use appendStruct()
        reserve(elementsAppended + 1);
        putNull(elementsAppended);
        return elementsAppended++;
    }

    public final int appendNotNull() {
        reserve(elementsAppended + 1);
        putNotNull(elementsAppended);
        return elementsAppended++;
    }

    public final int appendNulls(int count) {
        assert (!(dataType() instanceof StructType));
        reserve(elementsAppended + count);
        int result = elementsAppended;
        putNulls(elementsAppended, count);
        elementsAppended += count;
        return result;
    }

    public final int appendNotNulls(int count) {
        assert (!(dataType() instanceof StructType));
        reserve(elementsAppended + count);
        int result = elementsAppended;
        putNotNulls(elementsAppended, count);
        elementsAppended += count;
        return result;
    }

    public final int appendBoolean(boolean v) {
        reserve(elementsAppended + 1);
        putBoolean(elementsAppended, v);
        return elementsAppended++;
    }

    public final int appendBooleans(int count, boolean v) {
        reserve(elementsAppended + count);
        int result = elementsAppended;
        putBooleans(elementsAppended, count, v);
        elementsAppended += count;
        return result;
    }

    public final int appendByte(byte v) {
        reserve(elementsAppended + 1);
        putByte(elementsAppended, v);
        return elementsAppended++;
    }

    public final int appendBytes(int count, byte v) {
        reserve(elementsAppended + count);
        int result = elementsAppended;
        putBytes(elementsAppended, count, v);
        elementsAppended += count;
        return result;
    }

    public final int appendBytes(int length, byte[] src, int offset) {
        reserve(elementsAppended + length);
        int result = elementsAppended;
        putBytes(elementsAppended, length, src, offset);
        elementsAppended += length;
        return result;
    }

    public final int appendShort(short v) {
        reserve(elementsAppended + 1);
        putShort(elementsAppended, v);
        return elementsAppended++;
    }

    public final int appendShorts(int count, short v) {
        reserve(elementsAppended + count);
        int result = elementsAppended;
        putShorts(elementsAppended, count, v);
        elementsAppended += count;
        return result;
    }

    public final int appendShorts(int length, short[] src, int offset) {
        reserve(elementsAppended + length);
        int result = elementsAppended;
        putShorts(elementsAppended, length, src, offset);
        elementsAppended += length;
        return result;
    }

    public final int appendInt(int v) {
        reserve(elementsAppended + 1);
        putInt(elementsAppended, v);
        return elementsAppended++;
    }

    public final int appendInts(int count, int v) {
        reserve(elementsAppended + count);
        int result = elementsAppended;
        putInts(elementsAppended, count, v);
        elementsAppended += count;
        return result;
    }

    public final int appendInts(int length, int[] src, int offset) {
        reserve(elementsAppended + length);
        int result = elementsAppended;
        putInts(elementsAppended, length, src, offset);
        elementsAppended += length;
        return result;
    }

    public final int appendLong(long v) {
        reserve(elementsAppended + 1);
        putLong(elementsAppended, v);
        return elementsAppended++;
    }

    public final int appendLongs(int count, long v) {
        reserve(elementsAppended + count);
        int result = elementsAppended;
        putLongs(elementsAppended, count, v);
        elementsAppended += count;
        return result;
    }

    public final int appendLongs(int length, long[] src, int offset) {
        reserve(elementsAppended + length);
        int result = elementsAppended;
        putLongs(elementsAppended, length, src, offset);
        elementsAppended += length;
        return result;
    }

    public final int appendFloat(float v) {
        reserve(elementsAppended + 1);
        putFloat(elementsAppended, v);
        return elementsAppended++;
    }

    public final int appendFloats(int count, float v) {
        reserve(elementsAppended + count);
        int result = elementsAppended;
        putFloats(elementsAppended, count, v);
        elementsAppended += count;
        return result;
    }

    public final int appendDouble(double v) {
        reserve(elementsAppended + 1);
        putDouble(elementsAppended, v);
        return elementsAppended++;
    }

    public final int appendDoubles(int count, double v) {
        reserve(elementsAppended + count);
        int result = elementsAppended;
        putDoubles(elementsAppended, count, v);
        elementsAppended += count;
        return result;
    }

    public final int appendDoubles(int length, double[] src, int offset) {
        reserve(elementsAppended + length);
        int result = elementsAppended;
        putDoubles(elementsAppended, length, src, offset);
        elementsAppended += length;
        return result;
    }

    public final int appendByteArray(byte[] value, int offset, int length) {
        int copiedOffset = arrayData().appendBytes(length, value, offset);
        reserve(elementsAppended + 1);
        putArray(elementsAppended, copiedOffset, length);
        return elementsAppended++;
    }

    public final int appendArray(int length) {
        reserve(elementsAppended + 1);
        putArray(elementsAppended, arrayData().elementsAppended, length);
        return elementsAppended++;
    }

    /**
     * Appends a NULL struct. This *has* to be used for structs instead of appendNull() as this
     * recursively appends a NULL to its children.
     * We don't have this logic as the general appendNull implementation to optimize the more
     * common non-struct case.
     */
    public final int appendStruct(boolean isNull) {
        if (isNull) {
            appendNull();
            for (ColumnVector c : childColumns) {
                if (c.type instanceof StructType) {
                    c.appendStruct(true);
                } else {
                    c.appendNull();
                }
            }
        } else {
            appendNotNull();
        }
        return elementsAppended;
    }

    /**
     * Returns the data for the underlying array.
     */
    public final ColumnVector arrayData() {
        return childColumns[0];
    }

    /**
     * Returns the ordinal's child data column.
     */
    public final ColumnVector getChildColumn(int ordinal) {
        return childColumns[ordinal];
    }

    /**
     * Returns the elements appended.
     */
    public final int getElementsAppended() {
        return elementsAppended;
    }

    /**
     * Returns true if this column is an array.
     */
    public final boolean isArray() {
        return resultArray != null;
    }

    /**
     * Marks this column as being constant.
     */
    public final void setIsConstant() {
        isConstant = true;
    }

    /**
     * Maximum number of rows that can be stored in this column.
     */
    protected int capacity;

    /**
     * Data type for this column.
     */
    protected final DataType type;

    /**
     * Number of nulls in this column. This is an optimization for the reader, to skip NULL checks.
     */
    protected int numNulls;

    /**
     * True if there is at least one NULL byte set. This is an optimization for the writer, to skip
     * having to clear NULL bits.
     */
    protected boolean anyNullsSet;

    /**
     * True if this column's values are fixed. This means the column values never change, even
     * across resets.
     */
    protected boolean isConstant;

    /**
     * Default size of each array length value. This grows as necessary.
     */
    protected static final int DEFAULT_ARRAY_LENGTH = 4;

    /**
     * Current write cursor (row index) when appending data.
     */
    protected int elementsAppended;

    /**
     * If this is a nested type (array or struct), the column for the child data.
     */
    protected final ColumnVector[] childColumns;

    /**
     * Reusable Array holder for getArray().
     */
    protected final Array resultArray;

    /**
     * Reusable Struct holder for getStruct().
     */
    protected final ColumnarBatch.Row resultStruct;

    /**
     * The Dictionary for this column.
     *
     * If it's not null, will be used to decode the value in getXXX().
     */
    protected Dictionary dictionary;

    /**
     * Reusable column for ids of dictionary.
     */
    protected ColumnVector dictionaryIds;

    /**
     * Update the dictionary.
     */
    public void setDictionary(Dictionary dictionary) {
        this.dictionary = dictionary;
    }

    /**
     * Returns true if this column has a dictionary.
     */
    public boolean hasDictionary() {
        return this.dictionary != null;
    }

    /**
     * Reserve a integer column for ids of dictionary.
     */
    public ColumnVector reserveDictionaryIds(int capacity) {
        if (dictionaryIds == null) {
            dictionaryIds = allocate(capacity, DataTypes.IntegerType,
                    this instanceof OnHeapColumnVector ? MemoryMode.ON_HEAP : MemoryMode.OFF_HEAP);
        } else {
            dictionaryIds.reset();
            dictionaryIds.reserve(capacity);
        }
        return dictionaryIds;
    }

    /**
     * Returns the underlying integer column for ids of dictionary.
     */
    public ColumnVector getDictionaryIds() {
        return dictionaryIds;
    }

    /**
     * Sets up the common state and also handles creating the child columns if this is a nested
     * type.
     */
    protected ColumnVector(int capacity, DataType type, MemoryMode memMode) {
        this.capacity = capacity;
        this.type = type;

        if (type instanceof ArrayType || type instanceof BinaryType || type instanceof StringType
                || DecimalType.isByteArrayDecimalType(type)) {
            DataType childType;
            int childCapacity = capacity;
            if (type instanceof ArrayType) {
                childType = ((ArrayType) type).elementType();
            } else {
                childType = DataTypes.ByteType;
                childCapacity *= DEFAULT_ARRAY_LENGTH;
            }
            this.childColumns = new ColumnVector[1];
            this.childColumns[0] = ColumnVector.allocate(childCapacity, childType, memMode);
            this.resultArray = new Array(this.childColumns[0]);
            this.resultStruct = null;
        } else if (type instanceof StructType) {
            StructType st = (StructType) type;
            this.childColumns = new ColumnVector[st.fields().length];
            for (int i = 0; i < childColumns.length; ++i) {
                this.childColumns[i] = ColumnVector.allocate(capacity, st.fields()[i].dataType(), memMode);
            }
            this.resultArray = null;
            this.resultStruct = new ColumnarBatch.Row(this.childColumns);
        } else if (type instanceof CalendarIntervalType) {
            // Two columns. Months as int. Microseconds as Long.
            this.childColumns = new ColumnVector[2];
            this.childColumns[0] = ColumnVector.allocate(capacity, DataTypes.IntegerType, memMode);
            this.childColumns[1] = ColumnVector.allocate(capacity, DataTypes.LongType, memMode);
            this.resultArray = null;
            this.resultStruct = new ColumnarBatch.Row(this.childColumns);
        } else {
            this.childColumns = null;
            this.resultArray = null;
            this.resultStruct = null;
        }
    }
}