com.linkedin.pinot.core.common.datatable.DataTableImplV2.java Source code

Java tutorial

Introduction

Here is the source code for com.linkedin.pinot.core.common.datatable.DataTableImplV2.java

Source

/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.linkedin.pinot.core.common.datatable;

import com.linkedin.pinot.common.response.ProcessingException;
import com.linkedin.pinot.common.utils.DataSchema;
import com.linkedin.pinot.common.utils.DataTable;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import org.apache.commons.lang3.StringUtils;

public class DataTableImplV2 implements DataTable {
    private static final int VERSION = 2;
    private static final Charset UTF_8 = Charset.forName("UTF-8");

    // VERSION
    // NUM_ROWS
    // NUM_COLUMNS
    // DICTIONARY_MAP (START|SIZE)
    // METADATA (START|SIZE)
    // DATA_SCHEMA (START|SIZE)
    // FIXED_SIZE_DATA (START|SIZE)
    // VARIABLE_SIZE_DATA (START|SIZE)
    private static final int HEADER_SIZE = (Integer.SIZE / Byte.SIZE) * 13;

    private final int _numRows;
    private final int _numColumns;
    private final DataSchema _dataSchema;
    private final int[] _columnOffsets;
    private final int _rowSizeInBytes;
    private final Map<String, Map<Integer, String>> _dictionaryMap;
    private final byte[] _fixedSizeDataBytes;
    private final ByteBuffer _fixedSizeData;
    private final byte[] _variableSizeDataBytes;
    private final ByteBuffer _variableSizeData;
    private final Map<String, String> _metadata;

    /**
     * Construct data table with results. (Server side)
     */
    public DataTableImplV2(int numRows, @Nonnull DataSchema dataSchema,
            @Nonnull Map<String, Map<Integer, String>> dictionaryMap, @Nonnull byte[] fixedSizeDataBytes,
            @Nonnull byte[] variableSizeDataBytes) {
        _numRows = numRows;
        _numColumns = dataSchema.size();
        _dataSchema = dataSchema;
        _columnOffsets = new int[_numColumns];
        _rowSizeInBytes = DataTableUtils.computeColumnOffsets(dataSchema, _columnOffsets);
        _dictionaryMap = dictionaryMap;
        _fixedSizeDataBytes = fixedSizeDataBytes;
        _fixedSizeData = ByteBuffer.wrap(fixedSizeDataBytes);
        _variableSizeDataBytes = variableSizeDataBytes;
        _variableSizeData = ByteBuffer.wrap(variableSizeDataBytes);
        _metadata = new HashMap<>();
    }

    /**
     * Construct empty data table. (Server side)
     */
    public DataTableImplV2() {
        _numRows = 0;
        _numColumns = 0;
        _dataSchema = null;
        _columnOffsets = null;
        _rowSizeInBytes = 0;
        _dictionaryMap = null;
        _fixedSizeDataBytes = null;
        _fixedSizeData = null;
        _variableSizeDataBytes = null;
        _variableSizeData = null;
        _metadata = new HashMap<>();
    }

    /**
     * Construct data table from byte array. (broker side)
     */
    public DataTableImplV2(@Nonnull ByteBuffer byteBuffer) throws IOException {
        // Read header.
        _numRows = byteBuffer.getInt();
        _numColumns = byteBuffer.getInt();
        int dictionaryMapStart = byteBuffer.getInt();
        int dictionaryMapLength = byteBuffer.getInt();
        int metadataStart = byteBuffer.getInt();
        int metadataLength = byteBuffer.getInt();
        int dataSchemaStart = byteBuffer.getInt();
        int dataSchemaLength = byteBuffer.getInt();
        int fixedSizeDataStart = byteBuffer.getInt();
        int fixedSizeDataLength = byteBuffer.getInt();
        int variableSizeDataStart = byteBuffer.getInt();
        int variableSizeDataLength = byteBuffer.getInt();

        // Read dictionary.
        if (dictionaryMapLength != 0) {
            byte[] dictionaryMapBytes = new byte[dictionaryMapLength];
            byteBuffer.position(dictionaryMapStart);
            byteBuffer.get(dictionaryMapBytes);
            _dictionaryMap = deserializeDictionaryMap(dictionaryMapBytes);
        } else {
            _dictionaryMap = null;
        }

        // Read metadata.
        byte[] metadataBytes = new byte[metadataLength];
        byteBuffer.position(metadataStart);
        byteBuffer.get(metadataBytes);
        _metadata = deserializeMetadata(metadataBytes);

        // Read data schema.
        if (dataSchemaLength != 0) {
            byte[] schemaBytes = new byte[dataSchemaLength];
            byteBuffer.position(dataSchemaStart);
            byteBuffer.get(schemaBytes);
            _dataSchema = DataSchema.fromBytes(schemaBytes);
            _columnOffsets = new int[_dataSchema.size()];
            _rowSizeInBytes = DataTableUtils.computeColumnOffsets(_dataSchema, _columnOffsets);
        } else {
            _dataSchema = null;
            _columnOffsets = null;
            _rowSizeInBytes = 0;
        }

        // Read fixed size data.
        if (fixedSizeDataLength != 0) {
            _fixedSizeDataBytes = new byte[fixedSizeDataLength];
            byteBuffer.position(fixedSizeDataStart);
            byteBuffer.get(_fixedSizeDataBytes);
            _fixedSizeData = ByteBuffer.wrap(_fixedSizeDataBytes);
        } else {
            _fixedSizeDataBytes = null;
            _fixedSizeData = null;
        }

        // Read variable size data.
        if (variableSizeDataLength != 0) {
            _variableSizeDataBytes = new byte[variableSizeDataLength];
            byteBuffer.position(variableSizeDataStart);
            byteBuffer.get(_variableSizeDataBytes);
            _variableSizeData = ByteBuffer.wrap(_variableSizeDataBytes);
        } else {
            _variableSizeDataBytes = null;
            _variableSizeData = null;
        }
    }

    private Map<String, Map<Integer, String>> deserializeDictionaryMap(byte[] bytes) throws IOException {
        try (ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(bytes);
                DataInputStream dataInputStream = new DataInputStream(byteArrayInputStream)) {
            int numDictionaries = dataInputStream.readInt();
            Map<String, Map<Integer, String>> dictionaryMap = new HashMap<>(numDictionaries);

            for (int i = 0; i < numDictionaries; i++) {
                String column = decodeString(dataInputStream);
                int dictionarySize = dataInputStream.readInt();
                Map<Integer, String> dictionary = new HashMap<>(dictionarySize);
                for (int j = 0; j < dictionarySize; j++) {
                    int key = dataInputStream.readInt();
                    String value = decodeString(dataInputStream);
                    dictionary.put(key, value);
                }
                dictionaryMap.put(column, dictionary);
            }

            return dictionaryMap;
        }
    }

    private Map<String, String> deserializeMetadata(byte[] bytes) throws IOException {
        try (ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(bytes);
                DataInputStream dataInputStream = new DataInputStream(byteArrayInputStream)) {
            int numEntries = dataInputStream.readInt();
            Map<String, String> metadata = new HashMap<>(numEntries);

            for (int i = 0; i < numEntries; i++) {
                String key = decodeString(dataInputStream);
                String value = decodeString(dataInputStream);
                metadata.put(key, value);
            }

            return metadata;
        }
    }

    private static String decodeString(DataInputStream dataInputStream) throws IOException {
        int length = dataInputStream.readInt();
        if (length == 0) {
            return StringUtils.EMPTY;
        } else {
            byte[] buffer = new byte[length];
            int numBytesRead = dataInputStream.read(buffer);
            assert numBytesRead == length;
            return new String(buffer, UTF_8);
        }
    }

    @Override
    public void addException(@Nonnull ProcessingException processingException) {
        _metadata.put(EXCEPTION_METADATA_KEY + processingException.getErrorCode(),
                processingException.getMessage());
    }

    @Nonnull
    @Override
    public byte[] toBytes() throws IOException {
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        DataOutputStream dataOutputStream = new DataOutputStream(byteArrayOutputStream);
        dataOutputStream.writeInt(VERSION);
        dataOutputStream.writeInt(_numRows);
        dataOutputStream.writeInt(_numColumns);
        int dataOffset = HEADER_SIZE;

        // Write dictionary.
        dataOutputStream.writeInt(dataOffset);
        byte[] dictionaryMapBytes = null;
        if (_dictionaryMap != null) {
            dictionaryMapBytes = serializeDictionaryMap();
            dataOutputStream.writeInt(dictionaryMapBytes.length);
            dataOffset += dictionaryMapBytes.length;
        } else {
            dataOutputStream.writeInt(0);
        }

        // Write metadata.
        dataOutputStream.writeInt(dataOffset);
        byte[] metadataBytes = serializeMetadata();
        dataOutputStream.writeInt(metadataBytes.length);
        dataOffset += metadataBytes.length;

        // Write data schema.
        dataOutputStream.writeInt(dataOffset);
        byte[] dataSchemaBytes = null;
        if (_dataSchema != null) {
            dataSchemaBytes = _dataSchema.toBytes();
            dataOutputStream.writeInt(dataSchemaBytes.length);
            dataOffset += dataSchemaBytes.length;
        } else {
            dataOutputStream.writeInt(0);
        }

        // Write fixed size data.
        dataOutputStream.writeInt(dataOffset);
        if (_fixedSizeDataBytes != null) {
            dataOutputStream.writeInt(_fixedSizeDataBytes.length);
            dataOffset += _fixedSizeDataBytes.length;
        } else {
            dataOutputStream.writeInt(0);
        }

        // Write variable size data.
        dataOutputStream.writeInt(dataOffset);
        if (_variableSizeDataBytes != null) {
            dataOutputStream.writeInt(_variableSizeDataBytes.length);
        } else {
            dataOutputStream.writeInt(0);
        }

        // Write actual data.
        if (dictionaryMapBytes != null) {
            dataOutputStream.write(dictionaryMapBytes);
        }
        dataOutputStream.write(metadataBytes);
        if (dataSchemaBytes != null) {
            dataOutputStream.write(dataSchemaBytes);
        }
        if (_fixedSizeDataBytes != null) {
            dataOutputStream.write(_fixedSizeDataBytes);
        }
        if (_variableSizeDataBytes != null) {
            dataOutputStream.write(_variableSizeDataBytes);
        }

        return byteArrayOutputStream.toByteArray();
    }

    private byte[] serializeDictionaryMap() throws IOException {
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        DataOutputStream dataOutputStream = new DataOutputStream(byteArrayOutputStream);

        dataOutputStream.writeInt(_dictionaryMap.size());
        for (Entry<String, Map<Integer, String>> dictionaryMapEntry : _dictionaryMap.entrySet()) {
            String columnName = dictionaryMapEntry.getKey();
            Map<Integer, String> dictionary = dictionaryMapEntry.getValue();
            byte[] bytes = columnName.getBytes(UTF_8);
            dataOutputStream.writeInt(bytes.length);
            dataOutputStream.write(bytes);
            dataOutputStream.writeInt(dictionary.size());

            for (Entry<Integer, String> dictionaryEntry : dictionary.entrySet()) {
                dataOutputStream.writeInt(dictionaryEntry.getKey());
                byte[] valueBytes = dictionaryEntry.getValue().getBytes(UTF_8);
                dataOutputStream.writeInt(valueBytes.length);
                dataOutputStream.write(valueBytes);
            }
        }

        return byteArrayOutputStream.toByteArray();
    }

    private byte[] serializeMetadata() throws IOException {
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        DataOutputStream dataOutputStream = new DataOutputStream(byteArrayOutputStream);

        dataOutputStream.writeInt(_metadata.size());
        for (Entry<String, String> entry : _metadata.entrySet()) {
            byte[] keyBytes = entry.getKey().getBytes(UTF_8);
            dataOutputStream.writeInt(keyBytes.length);
            dataOutputStream.write(keyBytes);

            byte[] valueBytes = entry.getValue().getBytes(UTF_8);
            dataOutputStream.writeInt(valueBytes.length);
            dataOutputStream.write(valueBytes);
        }

        return byteArrayOutputStream.toByteArray();
    }

    @Nonnull
    @Override
    public Map<String, String> getMetadata() {
        return _metadata;
    }

    @Nullable
    @Override
    public DataSchema getDataSchema() {
        return _dataSchema;
    }

    @Override
    public int getNumberOfRows() {
        return _numRows;
    }

    @Override
    public boolean getBoolean(int rowId, int colId) {
        _fixedSizeData.position(rowId * _rowSizeInBytes + _columnOffsets[colId]);
        return _fixedSizeData.get() == 1;
    }

    @Override
    public char getChar(int rowId, int colId) {
        _fixedSizeData.position(rowId * _rowSizeInBytes + _columnOffsets[colId]);
        return _fixedSizeData.getChar();
    }

    @Override
    public byte getByte(int rowId, int colId) {
        _fixedSizeData.position(rowId * _rowSizeInBytes + _columnOffsets[colId]);
        return _fixedSizeData.get();
    }

    @Override
    public short getShort(int rowId, int colId) {
        _fixedSizeData.position(rowId * _rowSizeInBytes + _columnOffsets[colId]);
        return _fixedSizeData.getShort();
    }

    @Override
    public int getInt(int rowId, int colId) {
        _fixedSizeData.position(rowId * _rowSizeInBytes + _columnOffsets[colId]);
        return _fixedSizeData.getInt();
    }

    @Override
    public long getLong(int rowId, int colId) {
        _fixedSizeData.position(rowId * _rowSizeInBytes + _columnOffsets[colId]);
        return _fixedSizeData.getLong();
    }

    @Override
    public float getFloat(int rowId, int colId) {
        _fixedSizeData.position(rowId * _rowSizeInBytes + _columnOffsets[colId]);
        return _fixedSizeData.getFloat();
    }

    @Override
    public double getDouble(int rowId, int colId) {
        _fixedSizeData.position(rowId * _rowSizeInBytes + _columnOffsets[colId]);
        return _fixedSizeData.getDouble();
    }

    @Nonnull
    @Override
    public String getString(int rowId, int colId) {
        _fixedSizeData.position(rowId * _rowSizeInBytes + _columnOffsets[colId]);
        int dictId = _fixedSizeData.getInt();
        return _dictionaryMap.get(_dataSchema.getColumnName(colId)).get(dictId);
    }

    @Nonnull
    @Override
    public <T> T getObject(int rowId, int colId) {
        int size = positionCursorInVariableBuffer(rowId, colId);
        ObjectType objectType = ObjectType.getObjectType(_variableSizeData.getInt());
        ByteBuffer byteBuffer = _variableSizeData.slice();
        byteBuffer.limit(size);
        try {
            return ObjectCustomSerDe.deserialize(byteBuffer, objectType);
        } catch (IOException e) {
            throw new RuntimeException("Caught exception while de-serializing object.", e);
        }
    }

    @Nonnull
    @Override
    public byte[] getByteArray(int rowId, int colId) {
        int length = positionCursorInVariableBuffer(rowId, colId);
        byte[] bytes = new byte[length];
        for (int i = 0; i < length; i++) {
            bytes[i] = _variableSizeData.get();
        }
        return bytes;
    }

    @Nonnull
    @Override
    public char[] getCharArray(int rowId, int colId) {
        int length = positionCursorInVariableBuffer(rowId, colId);
        char[] chars = new char[length];
        for (int i = 0; i < length; i++) {
            chars[i] = _variableSizeData.getChar();
        }
        return chars;
    }

    @Nonnull
    @Override
    public short[] getShortArray(int rowId, int colId) {
        int length = positionCursorInVariableBuffer(rowId, colId);
        short[] shorts = new short[length];
        for (int i = 0; i < length; i++) {
            shorts[i] = _variableSizeData.getShort();
        }
        return shorts;
    }

    @Nonnull
    @Override
    public int[] getIntArray(int rowId, int colId) {
        int length = positionCursorInVariableBuffer(rowId, colId);
        int[] ints = new int[length];
        for (int i = 0; i < length; i++) {
            ints[i] = _variableSizeData.getInt();
        }
        return ints;
    }

    @Nonnull
    @Override
    public long[] getLongArray(int rowId, int colId) {
        int length = positionCursorInVariableBuffer(rowId, colId);
        long[] longs = new long[length];
        for (int i = 0; i < length; i++) {
            longs[i] = _variableSizeData.getLong();
        }
        return longs;
    }

    @Nonnull
    @Override
    public float[] getFloatArray(int rowId, int colId) {
        int length = positionCursorInVariableBuffer(rowId, colId);
        float[] floats = new float[length];
        for (int i = 0; i < length; i++) {
            floats[i] = _variableSizeData.getFloat();
        }
        return floats;
    }

    @Nonnull
    @Override
    public double[] getDoubleArray(int rowId, int colId) {
        int length = positionCursorInVariableBuffer(rowId, colId);
        double[] doubles = new double[length];
        for (int i = 0; i < length; i++) {
            doubles[i] = _variableSizeData.getDouble();
        }
        return doubles;
    }

    @Nonnull
    @Override
    public String[] getStringArray(int rowId, int colId) {
        int length = positionCursorInVariableBuffer(rowId, colId);
        String[] strings = new String[length];
        Map<Integer, String> dictionary = _dictionaryMap.get(_dataSchema.getColumnName(colId));
        for (int i = 0; i < length; i++) {
            strings[i] = dictionary.get(_variableSizeData.getInt());
        }
        return strings;
    }

    private int positionCursorInVariableBuffer(int rowId, int colId) {
        _fixedSizeData.position(rowId * _rowSizeInBytes + _columnOffsets[colId]);
        _variableSizeData.position(_fixedSizeData.getInt());
        return _fixedSizeData.getInt();
    }

    @Override
    public String toString() {
        if (_dataSchema == null) {
            return _metadata.toString();
        }

        StringBuilder stringBuilder = new StringBuilder();
        stringBuilder.append(_dataSchema.toString()).append('\n');
        stringBuilder.append("numRows: ").append(_numRows).append('\n');

        _fixedSizeData.position(0);
        for (int rowId = 0; rowId < _numRows; rowId++) {
            for (int colId = 0; colId < _numColumns; colId++) {
                switch (_dataSchema.getColumnType(colId)) {
                case BOOLEAN:
                    stringBuilder.append(_fixedSizeData.get());
                    break;
                case BYTE:
                    stringBuilder.append(_fixedSizeData.get());
                    break;
                case CHAR:
                    stringBuilder.append(_fixedSizeData.getChar());
                    break;
                case SHORT:
                    stringBuilder.append(_fixedSizeData.getShort());
                    break;
                case INT:
                    stringBuilder.append(_fixedSizeData.getInt());
                    break;
                case LONG:
                    stringBuilder.append(_fixedSizeData.getLong());
                    break;
                case FLOAT:
                    stringBuilder.append(_fixedSizeData.getFloat());
                    break;
                case DOUBLE:
                    stringBuilder.append(_fixedSizeData.getDouble());
                    break;
                case STRING:
                    stringBuilder.append(_fixedSizeData.getInt());
                    break;
                // Object and array.
                default:
                    stringBuilder
                            .append(String.format("(%s:%s)", _fixedSizeData.getInt(), _fixedSizeData.getInt()));
                    break;
                }
                stringBuilder.append("\t");
            }
            stringBuilder.append("\n");
        }
        return stringBuilder.toString();
    }
}