org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.serde2.lazy.fast;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.sql.Date;
import java.sql.Timestamp;
import java.util.ArrayDeque;
import java.util.Deque;
import java.util.List;
import java.util.Map;

import org.apache.commons.codec.binary.Base64;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.serde2.ByteStream.Output;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable;
import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.lazy.LazyDate;
import org.apache.hadoop.hive.serde2.lazy.LazyHiveDecimal;
import org.apache.hadoop.hive.serde2.lazy.LazyHiveIntervalDayTime;
import org.apache.hadoop.hive.serde2.lazy.LazyHiveIntervalYearMonth;
import org.apache.hadoop.hive.serde2.lazy.LazyInteger;
import org.apache.hadoop.hive.serde2.lazy.LazyLong;
import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters;
import org.apache.hadoop.hive.serde2.lazy.LazyTimestamp;
import org.apache.hadoop.hive.serde2.lazy.LazyUtils;
import org.apache.hadoop.hive.serde2.fast.SerializeWrite;
import org.apache.hadoop.io.Text;

/*
 * Directly serialize, field-by-field, the LazyBinary format.
*
 * This is an alternative way to serialize than what is provided by LazyBinarySerDe.
  */
public final class LazySimpleSerializeWrite implements SerializeWrite {
    public static final Logger LOG = LoggerFactory.getLogger(LazySimpleSerializeWrite.class.getName());

    private LazySerDeParameters lazyParams;

    private byte[] separators;
    private boolean[] needsEscape;
    private boolean isEscaped;
    private byte escapeChar;
    private byte[] nullSequenceBytes;

    private Output output;

    private int fieldCount;
    private int index;
    private int currentLevel;
    private Deque<Integer> indexStack = new ArrayDeque<Integer>();

    // For thread safety, we allocate private writable objects for our use only.
    private DateWritable dateWritable;
    private TimestampWritable timestampWritable;
    private HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable;
    private HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable;
    private HiveIntervalDayTime hiveIntervalDayTime;
    private byte[] decimalScratchBuffer;

    public LazySimpleSerializeWrite(int fieldCount, LazySerDeParameters lazyParams) {

        this();
        this.fieldCount = fieldCount;

        this.lazyParams = lazyParams;

        separators = lazyParams.getSeparators();
        isEscaped = lazyParams.isEscaped();
        escapeChar = lazyParams.getEscapeChar();
        needsEscape = lazyParams.getNeedsEscape();
        nullSequenceBytes = lazyParams.getNullSequence().getBytes();
    }

    // Not public since we must have the field count and other information.
    private LazySimpleSerializeWrite() {
    }

    /*
     * Set the buffer that will receive the serialized data.  The output buffer will be reset.
     */
    @Override
    public void set(Output output) {
        this.output = output;
        output.reset();
        index = 0;
        currentLevel = 0;
    }

    /*
     * Set the buffer that will receive the serialized data.  The output buffer will NOT be reset.
     */
    @Override
    public void setAppend(Output output) {
        this.output = output;
        index = 0;
        currentLevel = 0;
    }

    /*
     * Reset the previously supplied buffer that will receive the serialized data.
     */
    @Override
    public void reset() {
        output.reset();
        index = 0;
        currentLevel = 0;
    }

    /*
     * Write a NULL field.
     */
    @Override
    public void writeNull() throws IOException {
        beginPrimitive();

        output.write(nullSequenceBytes);

        finishPrimitive();
    }

    /*
     * BOOLEAN.
     */
    @Override
    public void writeBoolean(boolean v) throws IOException {
        beginPrimitive();
        if (v) {
            output.write(LazyUtils.trueBytes, 0, LazyUtils.trueBytes.length);
        } else {
            output.write(LazyUtils.falseBytes, 0, LazyUtils.falseBytes.length);
        }
        finishPrimitive();
    }

    /*
     * BYTE.
     */
    @Override
    public void writeByte(byte v) throws IOException {
        beginPrimitive();
        LazyInteger.writeUTF8(output, v);
        finishPrimitive();
    }

    /*
     * SHORT.
     */
    @Override
    public void writeShort(short v) throws IOException {
        beginPrimitive();
        LazyInteger.writeUTF8(output, v);
        finishPrimitive();
    }

    /*
     * INT.
     */
    @Override
    public void writeInt(int v) throws IOException {
        beginPrimitive();
        LazyInteger.writeUTF8(output, v);
        finishPrimitive();
    }

    /*
     * LONG.
     */
    @Override
    public void writeLong(long v) throws IOException {
        beginPrimitive();
        LazyLong.writeUTF8(output, v);
        finishPrimitive();
    }

    /*
     * FLOAT.
     */
    @Override
    public void writeFloat(float vf) throws IOException {
        beginPrimitive();
        ByteBuffer b = Text.encode(String.valueOf(vf));
        output.write(b.array(), 0, b.limit());
        finishPrimitive();
    }

    /*
     * DOUBLE.
     */
    @Override
    public void writeDouble(double v) throws IOException {
        beginPrimitive();
        ByteBuffer b = Text.encode(String.valueOf(v));
        output.write(b.array(), 0, b.limit());
        finishPrimitive();
    }

    /*
     * STRING.
     * 
     * Can be used to write CHAR and VARCHAR when the caller takes responsibility for
     * truncation/padding issues.
     */
    @Override
    public void writeString(byte[] v) throws IOException {
        beginPrimitive();
        if (v.equals(nullSequenceBytes)) {
        }
        LazyUtils.writeEscaped(output, v, 0, v.length, isEscaped, escapeChar, needsEscape);
        finishPrimitive();
    }

    @Override
    public void writeString(byte[] v, int start, int length) throws IOException {
        beginPrimitive();
        LazyUtils.writeEscaped(output, v, start, length, isEscaped, escapeChar, needsEscape);
        finishPrimitive();
    }

    /*
     * CHAR.
     */
    @Override
    public void writeHiveChar(HiveChar hiveChar) throws IOException {
        beginPrimitive();
        ByteBuffer b = Text.encode(hiveChar.getPaddedValue());
        LazyUtils.writeEscaped(output, b.array(), 0, b.limit(), isEscaped, escapeChar, needsEscape);
        finishPrimitive();
    }

    /*
     * VARCHAR.
     */
    @Override
    public void writeHiveVarchar(HiveVarchar hiveVarchar) throws IOException {
        beginPrimitive();
        ByteBuffer b = Text.encode(hiveVarchar.getValue());
        LazyUtils.writeEscaped(output, b.array(), 0, b.limit(), isEscaped, escapeChar, needsEscape);
        finishPrimitive();
    }

    /*
     * BINARY.
     */
    @Override
    public void writeBinary(byte[] v) throws IOException {
        beginPrimitive();
        byte[] toEncode = new byte[v.length];
        System.arraycopy(v, 0, toEncode, 0, v.length);
        byte[] toWrite = Base64.encodeBase64(toEncode);
        output.write(toWrite, 0, toWrite.length);
        finishPrimitive();
    }

    @Override
    public void writeBinary(byte[] v, int start, int length) throws IOException {
        beginPrimitive();
        byte[] toEncode = new byte[length];
        System.arraycopy(v, start, toEncode, 0, length);
        byte[] toWrite = Base64.encodeBase64(toEncode);
        output.write(toWrite, 0, toWrite.length);
        finishPrimitive();
    }

    /*
     * DATE.
     */
    @Override
    public void writeDate(Date date) throws IOException {
        beginPrimitive();
        if (dateWritable == null) {
            dateWritable = new DateWritable();
        }
        dateWritable.set(date);
        LazyDate.writeUTF8(output, dateWritable);
        finishPrimitive();
    }

    // We provide a faster way to write a date without a Date object.
    @Override
    public void writeDate(int dateAsDays) throws IOException {
        beginPrimitive();
        if (dateWritable == null) {
            dateWritable = new DateWritable();
        }
        dateWritable.set(dateAsDays);
        LazyDate.writeUTF8(output, dateWritable);
        finishPrimitive();
    }

    /*
     * TIMESTAMP.
     */
    @Override
    public void writeTimestamp(Timestamp v) throws IOException {
        beginPrimitive();
        if (timestampWritable == null) {
            timestampWritable = new TimestampWritable();
        }
        timestampWritable.set(v);
        LazyTimestamp.writeUTF8(output, timestampWritable);
        finishPrimitive();
    }

    /*
     * INTERVAL_YEAR_MONTH.
     */
    @Override
    public void writeHiveIntervalYearMonth(HiveIntervalYearMonth viyt) throws IOException {
        beginPrimitive();
        if (hiveIntervalYearMonthWritable == null) {
            hiveIntervalYearMonthWritable = new HiveIntervalYearMonthWritable();
        }
        hiveIntervalYearMonthWritable.set(viyt);
        LazyHiveIntervalYearMonth.writeUTF8(output, hiveIntervalYearMonthWritable);
        finishPrimitive();
    }

    @Override
    public void writeHiveIntervalYearMonth(int totalMonths) throws IOException {
        beginPrimitive();
        if (hiveIntervalYearMonthWritable == null) {
            hiveIntervalYearMonthWritable = new HiveIntervalYearMonthWritable();
        }
        hiveIntervalYearMonthWritable.set(totalMonths);
        LazyHiveIntervalYearMonth.writeUTF8(output, hiveIntervalYearMonthWritable);
        finishPrimitive();
    }

    /*
     * INTERVAL_DAY_TIME.
     */
    @Override
    public void writeHiveIntervalDayTime(HiveIntervalDayTime vidt) throws IOException {
        beginPrimitive();
        if (hiveIntervalDayTimeWritable == null) {
            hiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable();
        }
        hiveIntervalDayTimeWritable.set(vidt);
        LazyHiveIntervalDayTime.writeUTF8(output, hiveIntervalDayTimeWritable);
        finishPrimitive();
    }

    /*
     * DECIMAL.
     *
     * NOTE: The scale parameter is for text serialization (e.g. HiveDecimal.toFormatString) that
     * creates trailing zeroes output decimals.
     */
    @Override
    public void writeHiveDecimal(HiveDecimal dec, int scale) throws IOException {
        beginPrimitive();
        if (decimalScratchBuffer == null) {
            decimalScratchBuffer = new byte[HiveDecimal.SCRATCH_BUFFER_LEN_TO_BYTES];
        }
        LazyHiveDecimal.writeUTF8(output, dec, scale, decimalScratchBuffer);
        finishPrimitive();
    }

    @Override
    public void writeHiveDecimal(HiveDecimalWritable decWritable, int scale) throws IOException {
        beginPrimitive();
        if (decimalScratchBuffer == null) {
            decimalScratchBuffer = new byte[HiveDecimal.SCRATCH_BUFFER_LEN_TO_BYTES];
        }
        LazyHiveDecimal.writeUTF8(output, decWritable, scale, decimalScratchBuffer);
        finishPrimitive();
    }

    private void beginComplex() {
        if (index > 0) {
            output.write(separators[currentLevel]);
        }
        indexStack.push(index);

        // Always use index 0 so the write methods don't write a separator.
        index = 0;

        // Set "global" separator member to next level.
        currentLevel++;
    }

    private void finishComplex() {
        currentLevel--;
        index = indexStack.pop();
        index++;
    }

    @Override
    public void beginList(List list) {
        beginComplex();
    }

    @Override
    public void separateList() {
    }

    @Override
    public void finishList() {
        finishComplex();
    }

    @Override
    public void beginMap(Map<?, ?> map) {
        beginComplex();

        // MAP requires 2 levels: key separator and key-pair separator.
        currentLevel++;
    }

    @Override
    public void separateKey() {
        index = 0;
        output.write(separators[currentLevel]);
    }

    @Override
    public void separateKeyValuePair() {
        index = 0;
        output.write(separators[currentLevel - 1]);
    }

    @Override
    public void finishMap() {
        // Remove MAP extra level.
        currentLevel--;

        finishComplex();
    }

    @Override
    public void beginStruct(List fieldValues) {
        beginComplex();
    }

    @Override
    public void separateStruct() {
    }

    @Override
    public void finishStruct() {
        finishComplex();
    }

    @Override
    public void beginUnion(int tag) throws IOException {
        beginComplex();
        writeInt(tag);
        output.write(separators[currentLevel]);
        index = 0;
    }

    @Override
    public void finishUnion() {
        finishComplex();
    }

    private void beginPrimitive() {
        if (index > 0) {
            output.write(separators[currentLevel]);
        }
    }

    private void finishPrimitive() {
        index++;
    }
}