org.apache.hadoop.hive.ql.exec.persistence.LazyFlatRowContainer.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hive.ql.exec.persistence.LazyFlatRowContainer.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.exec.persistence;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.ObjectOutputStream;
import java.util.AbstractCollection;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.NoSuchElementException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
import org.apache.hadoop.io.BinaryComparable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Writable;

@SuppressWarnings("deprecation")
public class LazyFlatRowContainer extends AbstractCollection<Object>
        implements MapJoinRowContainer, AbstractRowContainer.RowIterator<List<Object>>, List<Object> {
    private static final Object[] EMPTY_OBJECT_ARRAY = new Object[0];
    private static final int UNKNOWN = Integer.MAX_VALUE;

    private static Log LOG = LogFactory.getLog(LazyFlatRowContainer.class);

    /**
     * In lazy mode, 0s element contains context for deserialization and all the other
     * elements contains byte arrays to be deserialized. After deserialization, the array
     * contains row count * row size elements - a matrix of rows stored.
     */
    private Object[] array;
    /**
     * This is kind of tricky. UNKNOWN number means lazy mode. Other positive numbers represent
     * row length (see array javadoc). Non-positive numbers mean row length is zero (thus,
     * array is empty); they represent (negated) number of rows (for joins w/o projections).
     */
    private int rowLength = UNKNOWN;
    private byte aliasFilter = (byte) 0xff;

    public LazyFlatRowContainer() {
        this.array = EMPTY_OBJECT_ARRAY;
    }

    /** Called when loading the hashtable. */
    public void add(MapJoinObjectSerDeContext context, BytesWritable value, boolean allowLazy)
            throws HiveException {
        if (allowLazy) {
            addLazy(context, value);
            return;
        }
        SerDe serde = context.getSerDe();
        boolean hasValues = isLazy() ? setRowLength(serde, 0) : (rowLength > 0);
        int rowCount = rowCount();
        if (hasValues) {
            listRealloc(array.length + rowLength);
            read(serde, value, rowCount);
        } else {
            --rowLength; // see rowLength javadoc
        }
    }

    private void addLazy(MapJoinObjectSerDeContext valueContext, BytesWritable currentValue) {
        if (!isLazy()) {
            throw new AssertionError("Not in lazy mode");
        }
        int size = this.array.length;
        if (size == 0) {
            // TODO: we store valueContext needlessly in each RowContainer because the
            //       accessor cannot pass it to us for lazy deserialization.
            listRealloc(2);
            this.array[0] = valueContext;
            ++size;
        } else {
            if (this.array[0] != valueContext) {
                throw new AssertionError("Different valueContext for the same table");
            }
            listRealloc(size + 1);
        }
        byte[] rawData = new byte[currentValue.getSize()];
        System.arraycopy(currentValue.getBytes(), 0, rawData, 0, rawData.length);
        this.array[size] = rawData;
    }

    // Implementation of AbstractRowContainer and assorted methods

    @Override
    public void addRow(List<Object> t) throws HiveException {
        LOG.debug("Add is called with " + t.size() + " objects");
        // This is not called when building HashTable; we don't expect it to be called ever.
        int offset = prepareForAdd(t.size());
        if (offset < 0)
            return;
        for (int i = 0; i < t.size(); ++i) {
            this.array[offset + i] = t.get(i);
        }
    }

    @Override
    public void addRow(Object[] value) throws HiveException {
        LOG.debug("Add is called with " + value.length + " objects");
        // This is not called when building HashTable; we don't expect it to be called ever.
        int offset = prepareForAdd(value.length);
        if (offset < 0)
            return;
        System.arraycopy(value, 0, this.array, offset, value.length);
    }

    private int prepareForAdd(int len) throws HiveException {
        if (isLazy()) {
            throw new AssertionError("Cannot add in lazy mode");
        }
        if (rowLength < 0) {
            if (len != 0) {
                throw new HiveException("Different size rows: 0 and " + len);
            }
            --rowLength; // see rowLength javadoc
            return -1;
        }
        if (rowLength != len) {
            throw new HiveException("Different size rows: " + rowLength + " and " + len);
        }
        int oldLen = this.array.length;
        listRealloc(oldLen + len);
        return oldLen;
    }

    @Override
    public void write(MapJoinObjectSerDeContext valueContext, ObjectOutputStream out) {
        throw new UnsupportedOperationException(this.getClass().getName() + " cannot be serialized");
    }

    @Override
    public AbstractRowContainer.RowIterator<List<Object>> rowIter() throws HiveException {
        ensureEager(); // if someone wants an iterator they are probably going to use it
        if (array.length == rowLength) {
            // optimize for common case - just one row for a key, container acts as iterator
            return this;
        }
        return rowLength > 0 ? new RowIterator() : new EmptyRowIterator(-rowLength);
    }

    @Override
    public List<Object> first() throws HiveException {
        if (isLazy()) {
            throw new AssertionError("In lazy mode");
        }
        if (array.length != rowLength) {
            throw new AssertionError("Incorrect iterator usage, not single-row");
        }
        return this; // optimize for common case - just one row for a key, container acts as row
    }

    @Override
    public List<Object> next() {
        return null; // single-row case, there's no next
    }

    /** Iterator for row length 0. */
    private static class EmptyRowIterator implements AbstractRowContainer.RowIterator<List<Object>> {
        private static final List<Object> EMPTY_ROW = new ArrayList<Object>();
        private int rowCount;

        public EmptyRowIterator(int rowCount) {
            this.rowCount = rowCount;
        }

        @Override
        public List<Object> first() throws HiveException {
            return next();
        }

        @Override
        public List<Object> next() throws HiveException {
            return (--rowCount < 0) ? null : EMPTY_ROW;
        }
    }

    /** Row iterator for non-zero-length rows. */
    private class RowIterator implements AbstractRowContainer.RowIterator<List<Object>> {
        private int index = 0;

        @Override
        public List<Object> first() throws HiveException {
            assert !isLazy();
            index = 0;
            if (array.length > 0) {
                return new ReadOnlySubList(0, rowLength);
            }
            return null;
        }

        @Override
        public List<Object> next() {
            assert !isLazy();
            index += rowLength;
            if (index < array.length) {
                return new ReadOnlySubList(index, rowLength);
            }
            return null;
        }
    }

    private void ensureEager() throws HiveException {
        if (!isLazy())
            return;
        if (this.array.length == 0) {
            rowLength = 0;
            return;
        }
        Object[] lazyObjs = this.array;
        assert lazyObjs.length > 1;
        MapJoinObjectSerDeContext context = (MapJoinObjectSerDeContext) lazyObjs[0];
        SerDe serde = context.getSerDe();
        int rowCount = lazyObjs.length - 1;
        if (!setRowLength(serde, rowCount))
            return;

        this.array = new Object[rowLength * rowCount];
        ByteBufferWritable writable = new ByteBufferWritable();
        for (int i = 0; i < rowCount; ++i) {
            writable.setBytes((byte[]) lazyObjs[i + 1]);
            read(serde, writable, i);
        }
        setAliasFilter(context);
    }

    private boolean setRowLength(SerDe serde, int rowCount) throws HiveException {
        try {
            rowLength = ObjectInspectorUtils.getStructSize(serde.getObjectInspector());
        } catch (SerDeException ex) {
            throw new HiveException("Get structure size error", ex);
        }
        if (rowLength == 0) {
            rowLength = -rowCount; // see javadoc for rowLength
            array = EMPTY_OBJECT_ARRAY;
            return false;
        }
        return true;
    }

    private void read(SerDe serde, Writable writable, int rowOffset) throws HiveException {
        try {
            ObjectInspectorUtils.copyStructToArray(serde.deserialize(writable), serde.getObjectInspector(),
                    ObjectInspectorCopyOption.WRITABLE, this.array, rowOffset * rowLength);
        } catch (SerDeException ex) {
            throw new HiveException("Lazy deserialize error", ex);
        }
    }

    private boolean isLazy() {
        return rowLength == UNKNOWN;
    }

    @Override
    public int rowCount() throws HiveException {
        ensureEager();
        return rowLength > 0 ? (array.length / rowLength) : -rowLength; // see rowLength javadoc
    }

    @Override
    public void clearRows() {
        assert !isLazy();
        array = EMPTY_OBJECT_ARRAY;
        rowLength = 0;
    }

    @Override
    public byte getAliasFilter() throws HiveException {
        ensureEager();
        return this.aliasFilter;
    }

    private void setAliasFilter(MapJoinObjectSerDeContext context) throws HiveException {
        if (isLazy()) {
            throw new AssertionError("In lazy mode");
        }
        if (rowLength <= 0 || !context.hasFilterTag())
            return;
        for (int offset = rowLength - 1; offset < array.length; offset += rowLength) {
            aliasFilter &= ((ShortWritable) array[offset]).get();
        }
    }

    @Override
    public MapJoinRowContainer copy() throws HiveException {
        ensureEager(); // If someone wants a copy they are probably going to use it.
        LazyFlatRowContainer result = new LazyFlatRowContainer();
        result.array = new Object[this.array.length];
        System.arraycopy(this.array, 0, result.array, 0, this.array.length);
        result.rowLength = rowLength;
        result.aliasFilter = aliasFilter;
        return result;
    }

    // Implementation of List<Object> and assorted methods

    private void listRealloc(int length) {
        Object[] array = new Object[length];
        if (this.array.length > 0) {
            System.arraycopy(this.array, 0, array, 0, this.array.length);
        }
        this.array = array;
    }

    @Override
    public int size() {
        checkSingleRow();
        return array.length;
    }

    @Override
    public Object get(int index) {
        return array[index];
    }

    private class ReadOnlySubList extends AbstractList<Object> {
        private int offset;
        private int size;

        ReadOnlySubList(int from, int size) {
            this.offset = from;
            this.size = size;
        }

        public Object get(int index) {
            return array[index + offset];
        }

        public int size() {
            return size;
        }

        public Iterator<Object> iterator() {
            return listIterator();
        }

        public ListIterator<Object> listIterator(int index) {
            return listIteratorInternal(offset + index, offset, offset + size);
        }

        public List<Object> subList(int fromIndex, int toIndex) {
            return new ReadOnlySubList(offset + fromIndex, toIndex - fromIndex);
        }

        public Object[] toArray() {
            Object[] result = new Object[size];
            System.arraycopy(array, offset, result, 0, size);
            return result;
        }
    } // end ReadOnlySubList

    @Override
    public Object[] toArray() {
        checkSingleRow();
        return array;
    }

    @Override
    public Iterator<Object> iterator() {
        return listIterator();
    }

    @Override
    public ListIterator<Object> listIterator() {
        return listIterator(0);
    }

    @Override
    public ListIterator<Object> listIterator(final int index) {
        checkSingleRow();
        return listIteratorInternal(index, 0, array.length);
    }

    private ListIterator<Object> listIteratorInternal(final int index, final int iterMinPos, final int iterMaxPos) {
        return new ListIterator<Object>() {
            private int pos = index - 1;

            public int nextIndex() {
                return pos + 1;
            }

            public int previousIndex() {
                return pos - 1;
            }

            public boolean hasNext() {
                return nextIndex() < iterMaxPos;
            }

            public boolean hasPrevious() {
                return previousIndex() >= iterMinPos;
            }

            public Object next() {
                if (!hasNext())
                    throw new NoSuchElementException();
                return get(++pos);
            }

            public Object previous() {
                if (!hasPrevious())
                    throw new NoSuchElementException();
                return get(--pos);
            }

            public void remove() {
                throw new UnsupportedOperationException();
            }

            public void set(Object e) {
                throw new UnsupportedOperationException();
            }

            public void add(Object e) {
                throw new UnsupportedOperationException();
            }
        }; // end ListIterator
    }

    /** Fake writable that can be reset with different bytes. */
    private static class ByteBufferWritable extends BinaryComparable implements Writable {
        byte[] bytes = null;

        @Override
        public byte[] getBytes() {
            return bytes;
        }

        @Override
        public int getLength() {
            return bytes.length;
        }

        public void setBytes(byte[] bytes) {
            this.bytes = bytes;
        }

        public void readFields(DataInput arg0) {
            throw new UnsupportedOperationException();
        }

        public void write(DataOutput arg0) {
            throw new UnsupportedOperationException();
        }
    } // end ByteBufferWritable

    @Override
    public int indexOf(Object o) {
        checkSingleRow();
        for (int i = 0; i < array.length; ++i) {
            if (o == null) {
                if (array[i] == null)
                    return i;
            } else {
                if (o.equals(array[i]))
                    return i;
            }
        }
        return -1;
    }

    private void checkSingleRow() throws AssertionError {
        if (array.length != rowLength) {
            throw new AssertionError("Incorrect list usage, not single-row");
        }
    }

    @Override
    public int lastIndexOf(Object o) {
        checkSingleRow();
        for (int i = array.length - 1; i >= 0; --i) {
            if (o == null) {
                if (array[i] == null)
                    return i;
            } else {
                if (o.equals(array[i]))
                    return i;
            }
        }
        return -1;
    }

    @Override
    public List<Object> subList(int fromIndex, int toIndex) {
        checkSingleRow();
        return new ReadOnlySubList(fromIndex, toIndex - fromIndex);
    }

    public boolean addAll(int index, Collection<? extends Object> c) {
        throw new UnsupportedOperationException();
    }

    public Object set(int index, Object element) {
        throw new UnsupportedOperationException();
    }

    public void add(int index, Object element) {
        throw new UnsupportedOperationException();
    }

    public Object remove(int index) {
        throw new UnsupportedOperationException();
    }
}