org.apache.hadoop.hive.serde2.io.TimestampWritable.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hive.serde2.io.TimestampWritable.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.serde2.io;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.OutputStream;
import java.math.BigDecimal;
import java.sql.Timestamp;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Arrays;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.serde2.ByteStream.Output;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableUtils;

/**
 * TimestampWritable
 * Writable equivalent of java.sq.Timestamp
 *
 * Timestamps are of the format
 *    YYYY-MM-DD HH:MM:SS.[fff...]
 *
 * We encode Unix timestamp in seconds in 4 bytes, using the MSB to signify
 * whether the timestamp has a fractional portion.
 *
 * The fractional portion is reversed, and encoded as a VInt
 * so timestamps with less precision use fewer bytes.
 *
 *      0.1    -> 1
 *      0.01   -> 10
 *      0.001  -> 100
 *
 */
public class TimestampWritable implements WritableComparable<TimestampWritable> {
    static final private Log LOG = LogFactory.getLog(TimestampWritable.class);

    static final public byte[] nullBytes = { 0x0, 0x0, 0x0, 0x0 };

    private static final int NO_DECIMAL_MASK = 0x7FFFFFFF;
    private static final int HAS_DECIMAL_MASK = 0x80000000;

    private static final ThreadLocal<DateFormat> threadLocalDateFormat = new ThreadLocal<DateFormat>() {
        @Override
        protected synchronized DateFormat initialValue() {
            return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        }
    };

    private Timestamp timestamp = new Timestamp(0);

    /**
     * true if data is stored in timestamp field rather than byte arrays.
     *      allows for lazy conversion to bytes when necessary
     * false otherwise
     */
    private boolean bytesEmpty;
    private boolean timestampEmpty;

    /* Allow use of external byte[] for efficiency */
    private byte[] currentBytes;
    private final byte[] internalBytes = new byte[9];
    private byte[] externalBytes;
    private int offset;

    /* Reused to read VInts */
    static private final VInt vInt = new VInt();

    /* Constructors */
    public TimestampWritable() {
        Arrays.fill(internalBytes, (byte) 0x0);
        bytesEmpty = false;
        currentBytes = internalBytes;
        offset = 0;

        clearTimestamp();
    }

    public TimestampWritable(byte[] bytes, int offset) {
        set(bytes, offset);
    }

    public TimestampWritable(TimestampWritable t) {
        this(t.getBytes(), 0);
    }

    public TimestampWritable(Timestamp t) {
        set(t);
    }

    public void set(byte[] bytes, int offset) {
        externalBytes = bytes;
        this.offset = offset;
        bytesEmpty = false;
        currentBytes = externalBytes;

        clearTimestamp();
    }

    public void set(Timestamp t) {
        if (t == null) {
            timestamp.setTime(0);
            timestamp.setNanos(0);
            return;
        }
        this.timestamp = t;
        bytesEmpty = true;
        timestampEmpty = false;
    }

    public void set(TimestampWritable t) {
        if (t.bytesEmpty) {
            set(t.getTimestamp());
            return;
        }
        if (t.currentBytes == t.externalBytes) {
            set(t.currentBytes, t.offset);
        } else {
            set(t.currentBytes, 0);
        }
    }

    private void clearTimestamp() {
        timestampEmpty = true;
    }

    public void writeToByteStream(Output byteStream) {
        checkBytes();
        byteStream.write(currentBytes, offset, getTotalLength());
    }

    /**
     *
     * @return seconds corresponding to this TimestampWritable
     */
    public int getSeconds() {
        if (bytesEmpty) {
            return (int) (timestamp.getTime() / 1000);
        }
        return TimestampWritable.getSeconds(currentBytes, offset);
    }

    /**
     *
     * @return nanoseconds in this TimestampWritable
     */
    public int getNanos() {
        if (!timestampEmpty) {
            return timestamp.getNanos();
        }

        return hasDecimal() ? TimestampWritable.getNanos(currentBytes, offset + 4) : 0;
    }

    /**
     *
     * @return length of serialized TimestampWritable data
     */
    private int getTotalLength() {
        return 4 + getDecimalLength();
    }

    /**
     *
     * @return number of bytes the variable length decimal takes up
     */
    private int getDecimalLength() {
        checkBytes();
        return hasDecimal() ? WritableUtils.decodeVIntSize(currentBytes[offset + 4]) : 0;
    }

    public Timestamp getTimestamp() {
        if (timestampEmpty) {
            populateTimestamp();
        }
        return timestamp;
    }

    /**
     * Used to create copies of objects
     * @return a copy of the internal TimestampWritable byte[]
     */
    public byte[] getBytes() {
        checkBytes();

        int len = getTotalLength();
        byte[] b = new byte[len];

        System.arraycopy(currentBytes, offset, b, 0, len);
        return b;
    }

    /**
     * @return byte[] representation of TimestampWritable that is binary
     * sortable (4 byte seconds, 4 bytes for nanoseconds)
     */
    public byte[] getBinarySortable() {
        byte[] b = new byte[8];
        int nanos = getNanos();
        int seconds = HAS_DECIMAL_MASK | getSeconds();
        intToBytes(seconds, b, 0);
        intToBytes(nanos, b, 4);
        return b;
    }

    /**
     * Given a byte[] that has binary sortable data, initialize the internal
     * structures to hold that data
     * @param bytes
     * @param offset
     */
    public void setBinarySortable(byte[] bytes, int offset) {
        int seconds = bytesToInt(bytes, offset);
        int nanos = bytesToInt(bytes, offset + 4);
        if (nanos == 0) {
            seconds &= NO_DECIMAL_MASK;
        } else {
            seconds |= HAS_DECIMAL_MASK;
        }
        intToBytes(seconds, internalBytes, 0);
        setNanosBytes(nanos, internalBytes, 4);
        currentBytes = internalBytes;
        this.offset = 0;
    }

    /**
     * The data of TimestampWritable can be stored either in a byte[]
     * or in a Timestamp object. Calling this method ensures that the byte[]
     * is populated from the Timestamp object if previously empty.
     */
    private void checkBytes() {
        if (bytesEmpty) {
            // Populate byte[] from Timestamp
            convertTimestampToBytes(timestamp, internalBytes, 0);
            offset = 0;
            currentBytes = internalBytes;
            bytesEmpty = false;
        }
    }

    /**
     *
     * @return double representation of the timestamp, accurate to nanoseconds
     */
    public double getDouble() {
        double seconds, nanos;
        if (bytesEmpty) {
            seconds = timestamp.getTime() / 1000;
            nanos = timestamp.getNanos();
        } else {
            seconds = getSeconds();
            nanos = getNanos();
        }
        return seconds + ((double) nanos) / 1000000000;
    }

    public void readFields(DataInput in) throws IOException {
        in.readFully(internalBytes, 0, 4);
        if (TimestampWritable.hasDecimal(internalBytes[0])) {
            in.readFully(internalBytes, 4, 1);
            int len = (byte) WritableUtils.decodeVIntSize(internalBytes[4]);
            in.readFully(internalBytes, 5, len - 1);
        }
        currentBytes = internalBytes;
        this.offset = 0;
    }

    public void write(OutputStream out) throws IOException {
        checkBytes();
        out.write(currentBytes, offset, getTotalLength());
    }

    public void write(DataOutput out) throws IOException {
        write((OutputStream) out);
    }

    public int compareTo(TimestampWritable t) {
        checkBytes();
        int s1 = this.getSeconds();
        int s2 = t.getSeconds();
        if (s1 == s2) {
            int n1 = this.getNanos();
            int n2 = t.getNanos();
            if (n1 == n2) {
                return 0;
            }
            return n1 - n2;
        } else {
            return s1 - s2;
        }
    }

    @Override
    public boolean equals(Object o) {
        return compareTo((TimestampWritable) o) == 0;
    }

    @Override
    public String toString() {
        if (timestampEmpty) {
            populateTimestamp();
        }

        String timestampString = timestamp.toString();
        if (timestampString.length() > 19) {
            if (timestampString.length() == 21) {
                if (timestampString.substring(19).compareTo(".0") == 0) {
                    return threadLocalDateFormat.get().format(timestamp);
                }
            }
            return threadLocalDateFormat.get().format(timestamp) + timestampString.substring(19);
        }

        return threadLocalDateFormat.get().format(timestamp);
    }

    @Override
    public int hashCode() {
        long seconds = getSeconds();
        seconds <<= 32;
        seconds |= getNanos();
        return (int) ((seconds >>> 32) ^ seconds);
    }

    private void populateTimestamp() {
        long seconds = getSeconds();
        int nanos = getNanos();
        timestamp.setTime(seconds * 1000);
        timestamp.setNanos(nanos);
    }

    /** Static methods **/

    /**
     * Gets seconds stored as integer at bytes[offset]
     * @param bytes
     * @param offset
     * @return the number of seconds
     */
    public static int getSeconds(byte[] bytes, int offset) {
        return NO_DECIMAL_MASK & bytesToInt(bytes, offset);
    }

    public static int getNanos(byte[] bytes, int offset) {
        LazyBinaryUtils.readVInt(bytes, offset, vInt);
        int val = vInt.value;
        int len = (int) Math.floor(Math.log10(val)) + 1;

        // Reverse the value
        int tmp = 0;
        while (val != 0) {
            tmp *= 10;
            tmp += val % 10;
            val /= 10;
        }
        val = tmp;

        if (len < 9) {
            val *= Math.pow(10, 9 - len);
        }
        return val;
    }

    /**
     * Writes a Timestamp's serialized value to byte array b at
     * @param t
     * @param b
     */
    public static void convertTimestampToBytes(Timestamp t, byte[] b, int offset) {
        if (b.length < 9) {
            LOG.error("byte array too short");
        }
        long millis = t.getTime();
        int nanos = t.getNanos();

        boolean hasDecimal = nanos != 0 && setNanosBytes(nanos, b, offset + 4);
        setSecondsBytes(millis, b, offset, hasDecimal);
    }

    /**
     * Given an integer representing seconds, write its serialized
     * value to the byte array b at offset
     * @param millis
     * @param b
     * @param offset
     * @param hasDecimal
     */
    private static void setSecondsBytes(long millis, byte[] b, int offset, boolean hasDecimal) {
        int seconds = (int) (millis / 1000);

        if (!hasDecimal) {
            seconds &= NO_DECIMAL_MASK;
        } else {
            seconds |= HAS_DECIMAL_MASK;
        }

        intToBytes(seconds, b, offset);
    }

    /**
     * Given an integer representing nanoseconds, write its serialized
     * value to the byte array b at offset
     *
     * @param nanos
     * @param b
     * @param offset
     * @return
     */
    private static boolean setNanosBytes(int nanos, byte[] b, int offset) {
        int decimal = 0;
        if (nanos != 0) {
            int counter = 0;
            while (counter < 9) {
                decimal *= 10;
                decimal += nanos % 10;
                nanos /= 10;
                counter++;
            }
        }

        LazyBinaryUtils.writeVLongToByteArray(b, offset, decimal);
        return decimal != 0;
    }

    /**
     * Interprets a float as a unix timestamp and returns a Timestamp object
     * @param f
     * @return the equivalent Timestamp object
     */
    public static Timestamp floatToTimestamp(float f) {
        return doubleToTimestamp((double) f);
    }

    public static Timestamp decimalToTimestamp(BigDecimal d) {
        BigDecimal seconds = new BigDecimal(d.longValue());
        long millis = d.multiply(new BigDecimal(1000)).longValue();
        int nanos = d.subtract(seconds).multiply(new BigDecimal(1000000000)).intValue();

        Timestamp t = new Timestamp(millis);
        t.setNanos(nanos);

        return t;
    }

    public static Timestamp doubleToTimestamp(double f) {
        long seconds = (long) f;

        // We must ensure the exactness of the double's fractional portion.
        // 0.6 as the fraction part will be converted to 0.59999... and
        // significantly reduce the savings from binary serializtion
        BigDecimal bd = new BigDecimal(String.valueOf(f));
        bd = bd.subtract(new BigDecimal(seconds)).multiply(new BigDecimal(1000000000));
        int nanos = bd.intValue();

        // Convert to millis
        long millis = seconds * 1000;
        Timestamp t = new Timestamp(millis);

        // Set remaining fractional portion to nanos
        t.setNanos(nanos);
        return t;
    }

    public static void setTimestamp(Timestamp t, byte[] bytes, int offset) {
        boolean hasDecimal = hasDecimal(bytes[offset]);
        t.setTime(((long) TimestampWritable.getSeconds(bytes, offset)) * 1000);
        if (hasDecimal) {
            t.setNanos(TimestampWritable.getNanos(bytes, offset + 4));
        }
    }

    public static Timestamp createTimestamp(byte[] bytes, int offset) {
        Timestamp t = new Timestamp(0);
        TimestampWritable.setTimestamp(t, bytes, offset);
        return t;
    }

    public boolean hasDecimal() {
        return hasDecimal(currentBytes[offset]);
    }

    /**
     *
     * @param b first byte in an encoded TimestampWritable
     * @return true if it has a decimal portion, false otherwise
     */
    public static boolean hasDecimal(byte b) {
        return (b >> 7) != 0;
    }

    /**
     * Writes <code>value</code> into <code>dest</code> at <code>offset</code>
     * @param value
     * @param dest
     * @param offset
     */
    private static void intToBytes(int value, byte[] dest, int offset) {
        dest[offset] = (byte) ((value >> 24) & 0xFF);
        dest[offset + 1] = (byte) ((value >> 16) & 0xFF);
        dest[offset + 2] = (byte) ((value >> 8) & 0xFF);
        dest[offset + 3] = (byte) (value & 0xFF);
    }

    /**
     *
     * @param bytes
     * @param offset
     * @return integer represented by the four bytes in <code>bytes</code>
     *  beginning at <code>offset</code>
     */
    private static int bytesToInt(byte[] bytes, int offset) {
        return ((0xFF & bytes[offset]) << 24) | ((0xFF & bytes[offset + 1]) << 16)
                | ((0xFF & bytes[offset + 2]) << 8) | (0xFF & bytes[offset + 3]);
    }
}