com.aliyun.odps.io.TupleReaderWriter.java Source code

Java tutorial

Introduction

Here is the source code for com.aliyun.odps.io.TupleReaderWriter.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.aliyun.odps.io;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.aliyun.odps.utils.ReflectionUtils;

/**
 * TupleReaderWriter ?? {@link Tuple} ????.
 */
public class TupleReaderWriter {

    // IMPORTANT! This list can be used to record values of data on disk,
    // so do not change the values. You may strand user data.
    // IMPORTANT! Order matters here, as compare() below uses the order to
    // order unlike datatypes. Don't change this ordering.
    // Spaced unevenly to leave room for new entries without changing
    // values or creating order issues.
    private static final byte UNKNOWN = 0;
    private static final byte NULL = 1;
    private static final byte NULLWRITABLE = 2;
    private static final byte BOOLEANWRITABLE = 3;
    private static final byte BYTESWRITABLE = 4;
    private static final byte INTWRITABLE = 5;
    private static final byte LONGWRITABLE = 6;
    private static final byte DATETIMEWRITABLE = 7;
    private static final byte DOUBLEWRITABLE = 8;
    private static final byte TEXT = 9;

    private static final byte TUPLE = 100;

    private static Log LOG = LogFactory.getLog(TupleReaderWriter.class);

    private static byte findType(Writable o) {
        if (o == null) {
            return NULL;
        }

        // Try to put the most common first
        if (o instanceof LongWritable) {
            return LONGWRITABLE;
        } else if (o instanceof IntWritable) {
            return INTWRITABLE;
        } else if (o instanceof Text) {
            return TEXT;
        } else if (o instanceof DoubleWritable) {
            return DOUBLEWRITABLE;
        } else if (o instanceof BooleanWritable) {
            return BOOLEANWRITABLE;
        } else if (o instanceof DatetimeWritable) {
            return DATETIMEWRITABLE;
        } else if (o instanceof BytesWritable) {
            return BYTESWRITABLE;
        } else if (o instanceof NullWritable) {
            return NULLWRITABLE;
        } else if (o instanceof Tuple) {
            return TUPLE;
        }

        return UNKNOWN;
    }

    /**
     * /** Compare two objects to each other. This function is necessary because
     * there's no super class that implements compareTo. This function provides an
     * (arbitrary) ordering of objects of different types as follows: NULL <
     * BOOLEAN < BYTE < INTEGER < LONG < FLOAT < DOUBLE * <
     * BYTEARRAY < STRING < MAP < TUPLE < BAG. No other functions
     * should implement this cross object logic. They should call this function
     * for it instead.
     *
     * @param o1
     *     First object
     * @param o2
     *     Second object
     * @return -1 if o1 is less, 0 if they are equal, 1 if o2 is less.
     */
    public static int compare(Writable o1, Writable o2) {

        byte dt1 = findType(o1);
        byte dt2 = findType(o2);
        return compare(o1, o2, dt1, dt2);
    }

    /**
     * Same as {@link #compare(Object, Object)}, but does not use reflection to
     * determine the type of passed in objects, relying instead on the caller to
     * provide the appropriate values, as determined by {@link findType(Object)}.
     *
     * Use this version in cases where multiple objects of the same type have to
     * be repeatedly compared.
     *
     * @param o1
     *     first object
     * @param o2
     *     second object
     * @param dt1
     *     type, as byte value, of o1
     * @param dt2
     *     type, as byte value, of o2
     * @return -1 if o1 is < o2, 0 if they are equal, 1 if o1 > o2
     */
    @SuppressWarnings({ "unchecked", "rawtypes" })
    public static int compare(Writable o1, Writable o2, byte dt1, byte dt2) {
        if (dt1 == dt2) {
            switch (dt1) {
            case NULL:
            case NULLWRITABLE:
                return 0;

            case BOOLEANWRITABLE:
                return ((BooleanWritable) o1).compareTo((BooleanWritable) o2);

            case BYTESWRITABLE:
                return ((BytesWritable) o1).compareTo((BytesWritable) o2);

            case INTWRITABLE:
                return ((IntWritable) o1).compareTo((IntWritable) o2);

            case LONGWRITABLE:
                return ((LongWritable) o1).compareTo((LongWritable) o2);

            case DATETIMEWRITABLE:
                return ((DatetimeWritable) o1).compareTo((DatetimeWritable) o2);

            case DOUBLEWRITABLE:
                return ((DoubleWritable) o1).compareTo((DoubleWritable) o2);

            case TEXT:
                return ((Text) o1).compareTo((Text) o2);

            case TUPLE:
                return ((Tuple) o1).compareTo((Tuple) o2);

            case UNKNOWN:
                if (o1 instanceof WritableComparable && o2 instanceof WritableComparable) {
                    return ((WritableComparable) o1).compareTo((WritableComparable) o2);
                }
                throw new RuntimeException("ODPS-0730001: Class " + o1.getClass().getName() + " is not comparable");

            default:
                throw new RuntimeException("Not support type " + dt1 + " in compare");
            }
        } else if (dt1 < dt2) {
            return -1;
        } else {
            return 1;
        }
    }

    /**
     * {@link Tuple}  {@link WritableComparator} ???.
     */
    public static class TupleRawComparator extends WritableComparator {

        public TupleRawComparator() {
            super(Tuple.class, true);
        }

    }

    /**
     * ????Tuple
     *
     * @param in
     *     ??Tuplefield
     * @param t
     *     ????Tuple
     * @throws IOException
     *     ???Tuple??
     */
    public static void readTuple(DataInput in, Tuple t) throws IOException {
        // Make sure it's a tuple.
        byte b = in.readByte();
        if (b != TUPLE) {
            String msg = "Unexpected data while reading tuple from binary file.";
            throw new IOException(msg);
        }

        // Read the number of fields
        int sz = in.readInt();
        for (int i = 0; i < sz; i++) {
            byte type = in.readByte();
            t.append(readDatum(in, type));
        }
    }

    @SuppressWarnings("unchecked")
    private static Writable readDatum(DataInput in, byte type) throws IOException {
        switch (type) {
        case TUPLE:
            int sz = in.readInt();
            // if sz == 0, we construct an "empty" tuple -
            // presumably the writer wrote an empty tuple!
            if (sz < 0) {
                throw new IOException("Invalid size " + sz + " for a tuple");
            }
            Tuple tp = new Tuple(sz);
            for (int i = 0; i < sz; i++) {
                byte b = in.readByte();
                tp.set(i, readDatum(in, b));
            }

            return tp;

        case NULL:
            return null;

        case INTWRITABLE:
            IntWritable iw = new IntWritable();
            iw.readFields(in);
            return iw;

        case LONGWRITABLE:
            LongWritable lw = new LongWritable();
            lw.readFields(in);
            return lw;

        case DATETIMEWRITABLE:
            DatetimeWritable dtw = new DatetimeWritable();
            dtw.readFields(in);
            return dtw;

        case DOUBLEWRITABLE:
            DoubleWritable dw = new DoubleWritable();
            dw.readFields(in);
            return dw;

        case BOOLEANWRITABLE:
            BooleanWritable bw = new BooleanWritable();
            bw.readFields(in);
            return bw;

        case BYTESWRITABLE:
            BytesWritable bsw = new BytesWritable();
            bsw.readFields(in);
            return bsw;

        case TEXT:
            Text t = new Text();
            t.readFields(in);
            return t;

        case NULLWRITABLE:
            NullWritable nw = NullWritable.get();
            nw.readFields(in);
            return nw;

        case UNKNOWN:
            String clsName = in.readUTF();
            try {
                Class<? extends Writable> cls = (Class<? extends Writable>) Class.forName(clsName);
                Writable w = (Writable) ReflectionUtils.newInstance(cls, null);
                w.readFields(in);
                return w;
            } catch (RuntimeException re) {
                LOG.info(re.getMessage());
                throw new IOException(re);
            } catch (ClassNotFoundException cnfe) {
                throw new IOException(cnfe);
            }

        default:
            throw new RuntimeException("Unexpected data type " + type + " found in stream.");
        }
    }

    /**
     * Tuple??
     *
     * @param out
     *     Tuple??
     * @param t
     *     Tuple
     * @throws IOException
     *     ?Tuple?field
     */
    public static void writeTuple(DataOutput out, Tuple t) throws IOException {
        out.writeByte(TUPLE);
        int sz = t.size();
        out.writeInt(sz);
        for (int i = 0; i < sz; i++) {
            writeDatum(out, t.get(i));
        }
    }

    private static void writeDatum(DataOutput out, Writable val) throws IOException {
        // Read the data type
        byte type = findType(val);
        switch (type) {
        case TUPLE:
            Tuple t = (Tuple) val;
            out.writeByte(TUPLE);
            int sz = t.size();
            out.writeInt(sz);
            for (int i = 0; i < sz; i++) {
                writeDatum(out, t.get(i));
            }
            break;

        case NULL:
            out.writeByte(NULL);
            break;

        case INTWRITABLE:
            out.writeByte(INTWRITABLE);
            ((IntWritable) val).write(out);
            break;

        case LONGWRITABLE:
            out.writeByte(LONGWRITABLE);
            ((LongWritable) val).write(out);
            break;

        case DATETIMEWRITABLE:
            out.writeByte(DATETIMEWRITABLE);
            ((DatetimeWritable) val).write(out);
            break;

        case DOUBLEWRITABLE:
            out.writeByte(DOUBLEWRITABLE);
            ((DoubleWritable) val).write(out);
            break;

        case BOOLEANWRITABLE:
            out.writeByte(BOOLEANWRITABLE);
            ((BooleanWritable) val).write(out);
            break;

        case BYTESWRITABLE:
            out.writeByte(BYTESWRITABLE);
            ((BytesWritable) val).write(out);
            break;

        case TEXT:
            out.writeByte(TEXT);
            ((Text) val).write(out);
            break;

        case NULLWRITABLE:
            out.writeByte(NULLWRITABLE);
            ((NullWritable) val).write(out);
            break;

        case UNKNOWN:
            out.writeByte(UNKNOWN);
            out.writeUTF(val.getClass().getName());
            val.write(out);
            break;

        default:
            throw new RuntimeException("Unexpected data type " + type + " found in stream.");
        }
    }
}