Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.serde2.lazybinary.fast; import java.io.IOException; import java.sql.Date; import java.sql.Timestamp; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils; import org.apache.hadoop.hive.serde2.fast.SerializeWrite; import org.apache.hive.common.util.DateUtils; /* * Directly serialize, field-by-field, the LazyBinary format. * * This is an alternative way to serialize than what is provided by LazyBinarySerDe. */ public class LazyBinarySerializeWrite implements SerializeWrite { public static final Log LOG = LogFactory.getLog(LazyBinarySerializeWrite.class.getName()); private Output output; private int fieldCount; private int fieldIndex; private byte nullByte; private long nullOffset; // For thread safety, we allocate private writable objects for our use only. private HiveDecimalWritable hiveDecimalWritable; private TimestampWritable timestampWritable; private HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable; private HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable; private HiveIntervalDayTime hiveIntervalDayTime; public LazyBinarySerializeWrite(int fieldCount) { this(); this.fieldCount = fieldCount; } // Not public since we must have the field count and other information. private LazyBinarySerializeWrite() { } /* * Set the buffer that will receive the serialized data. The output buffer will be reset. */ @Override public void set(Output output) { this.output = output; output.reset(); fieldIndex = 0; nullByte = 0; nullOffset = 0; } /* * Set the buffer that will receive the serialized data. The output buffer will NOT be reset. */ @Override public void setAppend(Output output) { this.output = output; fieldIndex = 0; nullByte = 0; nullOffset = output.getLength(); } /* * Reset the previously supplied buffer that will receive the serialized data. */ @Override public void reset() { output.reset(); fieldIndex = 0; nullByte = 0; nullOffset = 0; } /* * General Pattern: * * // Every 8 fields we write a NULL byte. * IF ((fieldIndex % 8) == 0), then * IF (fieldIndex > 0), then * Write back previous NullByte * NullByte = 0 * Remember write position * Allocate room for next NULL byte. * * WHEN NOT NULL: Set bit in NULL byte; Write value. * OTHERWISE NULL: We do not set a bit in the nullByte when we are writing a null. * * Increment fieldIndex * * IF (fieldIndex == fieldCount), then * Write back final NullByte * */ /* * Write a NULL field. */ @Override public void writeNull() throws IOException { // Every 8 fields we write a NULL byte. if ((fieldIndex % 8) == 0) { if (fieldIndex > 0) { // Write back previous 8 field's NULL byte. output.writeByte(nullOffset, nullByte); nullByte = 0; nullOffset = output.getLength(); } // Allocate next NULL byte. output.reserve(1); } // We DO NOT set a bit in the NULL byte when we are writing a NULL. fieldIndex++; if (fieldIndex == fieldCount) { // Write back the final NULL byte before the last fields. output.writeByte(nullOffset, nullByte); } } /* * BOOLEAN. */ @Override public void writeBoolean(boolean v) throws IOException { // Every 8 fields we write a NULL byte. if ((fieldIndex % 8) == 0) { if (fieldIndex > 0) { // Write back previous 8 field's NULL byte. output.writeByte(nullOffset, nullByte); nullByte = 0; nullOffset = output.getLength(); } // Allocate next NULL byte. output.reserve(1); } // Set bit in NULL byte when a field is NOT NULL. nullByte |= 1 << (fieldIndex % 8); output.write((byte) (v ? 1 : 0)); fieldIndex++; if (fieldIndex == fieldCount) { // Write back the final NULL byte before the last fields. output.writeByte(nullOffset, nullByte); } } /* * BYTE. */ @Override public void writeByte(byte v) throws IOException { // Every 8 fields we write a NULL byte. if ((fieldIndex % 8) == 0) { if (fieldIndex > 0) { // Write back previous 8 field's NULL byte. output.writeByte(nullOffset, nullByte); nullByte = 0; nullOffset = output.getLength(); } // Allocate next NULL byte. output.reserve(1); } // Set bit in NULL byte when a field is NOT NULL. nullByte |= 1 << (fieldIndex % 8); output.write(v); fieldIndex++; if (fieldIndex == fieldCount) { // Write back the final NULL byte before the last fields. output.writeByte(nullOffset, nullByte); } } /* * SHORT. */ @Override public void writeShort(short v) throws IOException { // Every 8 fields we write a NULL byte. if ((fieldIndex % 8) == 0) { if (fieldIndex > 0) { // Write back previous 8 field's NULL byte. output.writeByte(nullOffset, nullByte); nullByte = 0; nullOffset = output.getLength(); } // Allocate next NULL byte. output.reserve(1); } // Set bit in NULL byte when a field is NOT NULL. nullByte |= 1 << (fieldIndex % 8); output.write((byte) (v >> 8)); output.write((byte) (v)); fieldIndex++; if (fieldIndex == fieldCount) { // Write back the final NULL byte before the last fields. output.writeByte(nullOffset, nullByte); } } /* * INT. */ @Override public void writeInt(int v) throws IOException { // Every 8 fields we write a NULL byte. if ((fieldIndex % 8) == 0) { if (fieldIndex > 0) { // Write back previous 8 field's NULL byte. output.writeByte(nullOffset, nullByte); nullByte = 0; nullOffset = output.getLength(); } // Allocate next NULL byte. output.reserve(1); } // Set bit in NULL byte when a field is NOT NULL. nullByte |= 1 << (fieldIndex % 8); LazyBinaryUtils.writeVInt(output, v); fieldIndex++; if (fieldIndex == fieldCount) { // Write back the final NULL byte before the last fields. output.writeByte(nullOffset, nullByte); } } /* * LONG. */ @Override public void writeLong(long v) throws IOException { // Every 8 fields we write a NULL byte. if ((fieldIndex % 8) == 0) { if (fieldIndex > 0) { // Write back previous 8 field's NULL byte. output.writeByte(nullOffset, nullByte); nullByte = 0; nullOffset = output.getLength(); } // Allocate next NULL byte. output.reserve(1); } // Set bit in NULL byte when a field is NOT NULL. nullByte |= 1 << (fieldIndex % 8); LazyBinaryUtils.writeVLong(output, v); fieldIndex++; if (fieldIndex == fieldCount) { // Write back the final NULL byte before the last fields. output.writeByte(nullOffset, nullByte); } } /* * FLOAT. */ @Override public void writeFloat(float vf) throws IOException { // Every 8 fields we write a NULL byte. if ((fieldIndex % 8) == 0) { if (fieldIndex > 0) { // Write back previous 8 field's NULL byte. output.writeByte(nullOffset, nullByte); nullByte = 0; nullOffset = output.getLength(); } // Allocate next NULL byte. output.reserve(1); } // Set bit in NULL byte when a field is NOT NULL. nullByte |= 1 << (fieldIndex % 8); int v = Float.floatToIntBits(vf); output.write((byte) (v >> 24)); output.write((byte) (v >> 16)); output.write((byte) (v >> 8)); output.write((byte) (v)); fieldIndex++; if (fieldIndex == fieldCount) { // Write back the final NULL byte before the last fields. output.writeByte(nullOffset, nullByte); } } /* * DOUBLE. */ @Override public void writeDouble(double v) throws IOException { // Every 8 fields we write a NULL byte. if ((fieldIndex % 8) == 0) { if (fieldIndex > 0) { // Write back previous 8 field's NULL byte. output.writeByte(nullOffset, nullByte); nullByte = 0; nullOffset = output.getLength(); } // Allocate next NULL byte. output.reserve(1); } // Set bit in NULL byte when a field is NOT NULL. nullByte |= 1 << (fieldIndex % 8); LazyBinaryUtils.writeDouble(output, v); fieldIndex++; if (fieldIndex == fieldCount) { // Write back the final NULL byte before the last fields. output.writeByte(nullOffset, nullByte); } } /* * STRING. * * Can be used to write CHAR and VARCHAR when the caller takes responsibility for * truncation/padding issues. */ @Override public void writeString(byte[] v) throws IOException { // Every 8 fields we write a NULL byte. if ((fieldIndex % 8) == 0) { if (fieldIndex > 0) { // Write back previous 8 field's NULL byte. output.writeByte(nullOffset, nullByte); nullByte = 0; nullOffset = output.getLength(); } // Allocate next NULL byte. output.reserve(1); } // Set bit in NULL byte when a field is NOT NULL. nullByte |= 1 << (fieldIndex % 8); int length = v.length; LazyBinaryUtils.writeVInt(output, length); output.write(v, 0, length); fieldIndex++; if (fieldIndex == fieldCount) { // Write back the final NULL byte before the last fields. output.writeByte(nullOffset, nullByte); } } @Override public void writeString(byte[] v, int start, int length) throws IOException { // Every 8 fields we write a NULL byte. if ((fieldIndex % 8) == 0) { if (fieldIndex > 0) { // Write back previous 8 field's NULL byte. output.writeByte(nullOffset, nullByte); nullByte = 0; nullOffset = output.getLength(); } // Allocate next NULL byte. output.reserve(1); } // Set bit in NULL byte when a field is NOT NULL. nullByte |= 1 << (fieldIndex % 8); LazyBinaryUtils.writeVInt(output, length); output.write(v, start, length); fieldIndex++; if (fieldIndex == fieldCount) { // Write back the final NULL byte before the last fields. output.writeByte(nullOffset, nullByte); } } /* * CHAR. */ @Override public void writeHiveChar(HiveChar hiveChar) throws IOException { String string = hiveChar.getStrippedValue(); byte[] bytes = string.getBytes(); writeString(bytes); } /* * VARCHAR. */ @Override public void writeHiveVarchar(HiveVarchar hiveVarchar) throws IOException { String string = hiveVarchar.getValue(); byte[] bytes = string.getBytes(); writeString(bytes); } /* * BINARY. */ @Override public void writeBinary(byte[] v) throws IOException { writeString(v); } @Override public void writeBinary(byte[] v, int start, int length) throws IOException { writeString(v, start, length); } /* * DATE. */ @Override public void writeDate(Date date) throws IOException { // Every 8 fields we write a NULL byte. if ((fieldIndex % 8) == 0) { if (fieldIndex > 0) { // Write back previous 8 field's NULL byte. output.writeByte(nullOffset, nullByte); nullByte = 0; nullOffset = output.getLength(); } // Allocate next NULL byte. output.reserve(1); } // Set bit in NULL byte when a field is NOT NULL. nullByte |= 1 << (fieldIndex % 8); LazyBinaryUtils.writeVInt(output, DateWritable.dateToDays(date)); fieldIndex++; if (fieldIndex == fieldCount) { // Write back the final NULL byte before the last fields. output.writeByte(nullOffset, nullByte); } } // We provide a faster way to write a date without a Date object. @Override public void writeDate(int dateAsDays) throws IOException { // Every 8 fields we write a NULL byte. if ((fieldIndex % 8) == 0) { if (fieldIndex > 0) { // Write back previous 8 field's NULL byte. output.writeByte(nullOffset, nullByte); nullByte = 0; nullOffset = output.getLength(); } // Allocate next NULL byte. output.reserve(1); } // Set bit in NULL byte when a field is NOT NULL. nullByte |= 1 << (fieldIndex % 8); LazyBinaryUtils.writeVInt(output, dateAsDays); fieldIndex++; if (fieldIndex == fieldCount) { // Write back the final NULL byte before the last fields. output.writeByte(nullOffset, nullByte); } } /* * TIMESTAMP. */ @Override public void writeTimestamp(Timestamp v) throws IOException { // Every 8 fields we write a NULL byte. if ((fieldIndex % 8) == 0) { if (fieldIndex > 0) { // Write back previous 8 field's NULL byte. output.writeByte(nullOffset, nullByte); nullByte = 0; nullOffset = output.getLength(); } // Allocate next NULL byte. output.reserve(1); } // Set bit in NULL byte when a field is NOT NULL. nullByte |= 1 << (fieldIndex % 8); if (timestampWritable == null) { timestampWritable = new TimestampWritable(); } timestampWritable.set(v); timestampWritable.writeToByteStream(output); fieldIndex++; if (fieldIndex == fieldCount) { // Write back the final NULL byte before the last fields. output.writeByte(nullOffset, nullByte); } } /* * INTERVAL_YEAR_MONTH. */ @Override public void writeHiveIntervalYearMonth(HiveIntervalYearMonth viyt) throws IOException { // Every 8 fields we write a NULL byte. if ((fieldIndex % 8) == 0) { if (fieldIndex > 0) { // Write back previous 8 field's NULL byte. output.writeByte(nullOffset, nullByte); nullByte = 0; nullOffset = output.getLength(); } // Allocate next NULL byte. output.reserve(1); } // Set bit in NULL byte when a field is NOT NULL. nullByte |= 1 << (fieldIndex % 8); if (hiveIntervalYearMonthWritable == null) { hiveIntervalYearMonthWritable = new HiveIntervalYearMonthWritable(); } hiveIntervalYearMonthWritable.set(viyt); hiveIntervalYearMonthWritable.writeToByteStream(output); fieldIndex++; if (fieldIndex == fieldCount) { // Write back the final NULL byte before the last fields. output.writeByte(nullOffset, nullByte); } } @Override public void writeHiveIntervalYearMonth(int totalMonths) throws IOException { // Every 8 fields we write a NULL byte. if ((fieldIndex % 8) == 0) { if (fieldIndex > 0) { // Write back previous 8 field's NULL byte. output.writeByte(nullOffset, nullByte); nullByte = 0; nullOffset = output.getLength(); } // Allocate next NULL byte. output.reserve(1); } // Set bit in NULL byte when a field is NOT NULL. nullByte |= 1 << (fieldIndex % 8); if (hiveIntervalYearMonthWritable == null) { hiveIntervalYearMonthWritable = new HiveIntervalYearMonthWritable(); } hiveIntervalYearMonthWritable.set(totalMonths); hiveIntervalYearMonthWritable.writeToByteStream(output); fieldIndex++; if (fieldIndex == fieldCount) { // Write back the final NULL byte before the last fields. output.writeByte(nullOffset, nullByte); } } /* * INTERVAL_DAY_TIME. */ @Override public void writeHiveIntervalDayTime(HiveIntervalDayTime vidt) throws IOException { // Every 8 fields we write a NULL byte. if ((fieldIndex % 8) == 0) { if (fieldIndex > 0) { // Write back previous 8 field's NULL byte. output.writeByte(nullOffset, nullByte); nullByte = 0; nullOffset = output.getLength(); } // Allocate next NULL byte. output.reserve(1); } // Set bit in NULL byte when a field is NOT NULL. nullByte |= 1 << (fieldIndex % 8); if (hiveIntervalDayTimeWritable == null) { hiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable(); } hiveIntervalDayTimeWritable.set(vidt); hiveIntervalDayTimeWritable.writeToByteStream(output); fieldIndex++; if (fieldIndex == fieldCount) { // Write back the final NULL byte before the last fields. output.writeByte(nullOffset, nullByte); } } @Override public void writeHiveIntervalDayTime(long totalNanos) throws IOException { // Every 8 fields we write a NULL byte. if ((fieldIndex % 8) == 0) { if (fieldIndex > 0) { // Write back previous 8 field's NULL byte. output.writeByte(nullOffset, nullByte); nullByte = 0; nullOffset = output.getLength(); } // Allocate next NULL byte. output.reserve(1); } // Set bit in NULL byte when a field is NOT NULL. nullByte |= 1 << (fieldIndex % 8); if (hiveIntervalDayTime == null) { hiveIntervalDayTime = new HiveIntervalDayTime(); } if (hiveIntervalDayTimeWritable == null) { hiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable(); } DateUtils.setIntervalDayTimeTotalNanos(hiveIntervalDayTime, totalNanos); hiveIntervalDayTimeWritable.set(hiveIntervalDayTime); hiveIntervalDayTimeWritable.writeToByteStream(output); fieldIndex++; if (fieldIndex == fieldCount) { // Write back the final NULL byte before the last fields. output.writeByte(nullOffset, nullByte); } } /* * DECIMAL. */ @Override public void writeHiveDecimal(HiveDecimal v) throws IOException { // Every 8 fields we write a NULL byte. if ((fieldIndex % 8) == 0) { if (fieldIndex > 0) { // Write back previous 8 field's NULL byte. output.writeByte(nullOffset, nullByte); nullByte = 0; nullOffset = output.getLength(); } // Allocate next NULL byte. output.reserve(1); } // Set bit in NULL byte when a field is NOT NULL. nullByte |= 1 << (fieldIndex % 8); if (hiveDecimalWritable == null) { hiveDecimalWritable = new HiveDecimalWritable(); } hiveDecimalWritable.set(v); LazyBinarySerDe.writeToByteStream(output, hiveDecimalWritable); fieldIndex++; if (fieldIndex == fieldCount) { // Write back the final NULL byte before the last fields. output.writeByte(nullOffset, nullByte); } } }