org.apache.pig.data.Tuple.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.pig.data.Tuple.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pig.data;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.WritableComparable;

/**
 * an ordered list of Datums
 */
public class Tuple extends Datum implements WritableComparable {

    private static final Log log = LogFactory.getLog(Tuple.class);
    private static int numFields = 5;

    protected ArrayList<Datum> fields;
    static String defaultDelimiter = "[,\t]";
    static String NULL = "__PIG_NULL__";

    public Tuple() {
        this(0);
    }

    public Tuple(int numFields) {
        fields = new ArrayList<Datum>(numFields);
        for (int i = 0; i < numFields; i++) {
            fields.add(null);
        }
    }

    public Tuple(List<Datum> fieldsIn) {
        fields = new ArrayList<Datum>(fieldsIn.size());
        fields.addAll(fieldsIn);
    }

    /**
     * shortcut, if tuple only has one field
     */
    public Tuple(Datum fieldIn) {
        fields = new ArrayList<Datum>(1);
        fields.add(fieldIn);
    }

    /**
     * Creates a tuple from a delimited line of text
     * 
     * @param textLine
     *            the line containing fields of data
     * @param delimiter 
     *              the delimiter (normal string, NO REGEX!!)
     */
    public Tuple(String textLine, String delimiter) {
        if (delimiter == null) {
            delimiter = defaultDelimiter;
        }

        fields = new ArrayList<Datum>(numFields);
        int delimSize = delimiter.length();
        boolean done = false;

        int lastIdx = 0;

        while (!done) {
            int newIdx = textLine.indexOf(delimiter, lastIdx);
            if (newIdx != (-1)) {
                String token = textLine.substring(lastIdx, newIdx);
                fields.add(new DataAtom(token));
                lastIdx = newIdx + delimSize;
            } else {
                String token = textLine.substring(lastIdx);
                fields.add(new DataAtom(token));
                done = true;
            }
        }

        numFields = fields.size();
    }

    /**
     * Creates a tuple from a delimited line of text. This will invoke Tuple(textLine, null)
     * 
     * @param textLine
     *            the line containing fields of data
     */
    public Tuple(String textLine) {
        this(textLine, defaultDelimiter);
    }

    public Tuple(Tuple[] otherTs) {
        fields = new ArrayList<Datum>(otherTs.length);
        for (int i = 0; i < otherTs.length; i++) {
            appendTuple(otherTs[i]);
        }
    }

    public void copyFrom(Tuple otherT) {
        this.fields = otherT.fields;
    }

    public int arity() {
        return fields.size();
    }

    @Override
    public String toString() {
        StringBuffer sb = new StringBuffer();
        sb.append('(');
        for (Iterator<Datum> it = fields.iterator(); it.hasNext();) {
            Datum d = it.next();
            if (d != null) {
                sb.append(d.toString());
            } else {
                sb.append(NULL);
            }
            if (it.hasNext())
                sb.append(", ");
        }
        sb.append(')');
        String s = sb.toString();
        return s;
    }

    public void setField(int i, Datum val) {
        getField(i); // throws exception if field doesn't exist

        fields.set(i, val);
    }

    public void setField(int i, int val) {
        setField(i, new DataAtom(val));
    }

    public void setField(int i, double val) {
        setField(i, new DataAtom(val));
    }

    public void setField(int i, String val) {
        setField(i, new DataAtom(val));
    }

    public Datum getField(int i) {
        if (fields.size() >= i + 1) {
            return fields.get(i);
        }

        StringBuilder sb = new StringBuilder();
        sb.append("Requested index ");
        sb.append(i);
        sb.append(" from tuple ");
        sb.append(toString());
        throw new IndexOutOfBoundsException(sb.toString());
    }

    // Get field i, if it is an Atom or can be coerced into an Atom
    public DataAtom getAtomField(int i) {
        Datum field = getField(i); // throws exception if field doesn't exist

        if (field instanceof DataAtom) {
            return (DataAtom) field;
        } else if (field instanceof Tuple) {
            Tuple t = (Tuple) field;
            if (t.arity() == 1) {
                log.warn("Requested for an atom field but found a tuple with one field.");
                return t.getAtomField(0);
            }
        } else if (field instanceof DataBag) {
            DataBag b = (DataBag) field;
            if (b.size() == 1) {
                Tuple t = b.iterator().next();
                if (t.arity() == 1) {
                    return t.getAtomField(0);
                }
            }
        }

        throw newTupleAccessException(field, "atom", i);
    }

    private RuntimeException newTupleAccessException(Datum field, String requestedFieldType, int index) {
        return new IllegalArgumentException("Requested " + requestedFieldType + " field at index " + index
                + " but was '" + field.getClass().getName() + "' in tuple: " + toString());
    }

    // Get field i, if it is a Tuple or can be coerced into a Tuple
    public Tuple getTupleField(int i) {
        Datum field = getField(i); // throws exception if field doesn't exist

        if (field instanceof Tuple) {
            return (Tuple) field;
        } else if (field instanceof DataBag) {
            DataBag b = (DataBag) field;
            if (b.size() == 1) {
                return b.iterator().next();
            }
        }

        throw newTupleAccessException(field, "tuple", i);
    }

    // Get field i, if it is a Bag or can be coerced into a Bag
    public DataBag getBagField(int i) {
        Datum field = getField(i); // throws exception if field doesn't exist

        if (field instanceof DataBag) {
            return (DataBag) field;
        }

        throw newTupleAccessException(field, "bag", i);
    }

    public void appendTuple(Tuple other) {
        for (Iterator<Datum> it = other.fields.iterator(); it.hasNext();) {
            this.fields.add(it.next());
        }
    }

    public void appendField(Datum newField) {
        this.fields.add(newField);
    }

    public String toDelimitedString(String delim) throws IOException {
        StringBuffer buf = new StringBuffer();
        for (Iterator<Datum> it = fields.iterator(); it.hasNext();) {
            Datum field = it.next();
            if (!(field instanceof DataAtom)) {
                throw new IOException("Unable to convert non-flat tuple to string.");
            }

            buf.append((DataAtom) field);
            if (it.hasNext())
                buf.append(delim);
        }
        return buf.toString();
    }

    public boolean lessThan(Tuple other) {
        return (this.compareTo(other) < 0);
    }

    public boolean greaterThan(Tuple other) {
        return (this.compareTo(other) > 0);
    }

    @Override
    public boolean equals(Object other) {
        return compareTo(other) == 0;
    }

    public int compareTo(Tuple other) {
        if (other.fields.size() != this.fields.size())
            return other.fields.size() < this.fields.size() ? 1 : -1;

        for (int i = 0; i < this.fields.size(); i++) {
            int c = this.fields.get(i).compareTo(other.fields.get(i));
            if (c != 0)
                return c;
        }
        return 0;
    }

    public int compareTo(Object other) {
        if (other instanceof DataAtom)
            return +1;
        else if (other instanceof DataBag)
            return +1;
        else if (other instanceof Tuple)
            return compareTo((Tuple) other);
        else
            return -1;
    }

    @Override
    public int hashCode() {
        int hash = 1;
        for (Iterator<Datum> it = fields.iterator(); it.hasNext();) {
            hash = 31 * hash + it.next().hashCode();
        }
        return hash;
    }

    // WritableComparable methods:

    @Override
    public void write(DataOutput out) throws IOException {
        out.write(TUPLE);
        int n = arity();
        encodeInt(out, n);
        for (int i = 0; i < n; i++) {
            Datum d = getField(i);
            if (d != null) {
                d.write(out);
            } else {
                throw new RuntimeException("Null field in tuple");
            }
        }
    }

    //This method is invoked when the beginning 'TUPLE' is still on the stream
    public void readFields(DataInput in) throws IOException {
        byte[] b = new byte[1];
        in.readFully(b);
        if (b[0] != TUPLE)
            throw new IOException("Unexpected data while reading tuple from binary file");
        Tuple t = read(in);
        fields = t.fields;
    }

    //This method is invoked when the beginning 'TUPLE' has been read off the stream
    public static Tuple read(DataInput in) throws IOException {
        // nuke the old contents of the tuple
        Tuple ret = new Tuple();
        ret.fields = new ArrayList<Datum>();

        int size = decodeInt(in);

        for (int i = 0; i < size; i++) {
            ret.appendField(readDatum(in));
        }

        return ret;

    }

    public static Datum readDatum(DataInput in) throws IOException {
        byte[] b = new byte[1];
        in.readFully(b);
        switch (b[0]) {
        case TUPLE:
            return Tuple.read(in);
        case BAG:
            return DataBag.read(in);
        case MAP:
            return DataMap.read(in);
        case ATOM:
            return DataAtom.read(in);
        default:
            throw new IOException("Invalid data while reading Datum from binary file");
        }
    }

    //  Encode the integer so that the high bit is set on the last
    // byte
    static void encodeInt(DataOutput os, int i) throws IOException {
        if (i >> 28 != 0)
            os.write((i >> 28) & 0x7f);
        if (i >> 21 != 0)
            os.write((i >> 21) & 0x7f);
        if (i >> 14 != 0)
            os.write((i >> 14) & 0x7f);
        if (i >> 7 != 0)
            os.write((i >> 7) & 0x7f);
        os.write((i & 0x7f) | (1 << 7));
    }

    static int decodeInt(DataInput is) throws IOException {
        int i = 0;
        int c;
        while (true) {
            c = is.readUnsignedByte();
            if (c == -1)
                break;
            i <<= 7;
            i += c & 0x7f;
            if ((c & 0x80) != 0)
                break;
        }
        return i;
    }

    @Override
    public long getMemorySize() {
        long used = 0;
        int sz = fields.size();
        for (int i = 0; i < sz; i++)
            used += getField(i).getMemorySize();
        used += 2 * OBJECT_SIZE + REF_SIZE;
        return used;
    }
}