org.apache.hadoop.hive.ql.io.orc.InStream.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hive.ql.io.orc.InStream.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.ql.io.orc;

import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.ListIterator;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.common.DiskRange;
import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.BufferChunk;

import com.google.common.annotations.VisibleForTesting;

public abstract class InStream extends InputStream {

    private static final Log LOG = LogFactory.getLog(InStream.class);

    protected final String name;
    protected final long length;

    public InStream(String name, long length) {
        this.name = name;
        this.length = length;
    }

    public String getStreamName() {
        return name;
    }

    public long getStreamLength() {
        return length;
    }

    private static class UncompressedStream extends InStream {
        private final List<DiskRange> bytes;
        private final long length;
        private long currentOffset;
        private ByteBuffer range;
        private int currentRange;

        public UncompressedStream(String name, List<DiskRange> input, long length) {
            super(name, length);
            this.bytes = input;
            this.length = length;
            currentRange = 0;
            currentOffset = 0;
        }

        @Override
        public int read() {
            if (range == null || range.remaining() == 0) {
                if (currentOffset == length) {
                    return -1;
                }
                seek(currentOffset);
            }
            currentOffset += 1;
            return 0xff & range.get();
        }

        @Override
        public int read(byte[] data, int offset, int length) {
            if (range == null || range.remaining() == 0) {
                if (currentOffset == this.length) {
                    return -1;
                }
                seek(currentOffset);
            }
            int actualLength = Math.min(length, range.remaining());
            range.get(data, offset, actualLength);
            currentOffset += actualLength;
            return actualLength;
        }

        @Override
        public int available() {
            if (range != null && range.remaining() > 0) {
                return range.remaining();
            }
            return (int) (length - currentOffset);
        }

        @Override
        public void close() {
            currentRange = bytes.size();
            currentOffset = length;
            // explicit de-ref of bytes[]
            bytes.clear();
        }

        @Override
        public void seek(PositionProvider index) throws IOException {
            seek(index.getNext());
        }

        public void seek(long desired) {
            if (desired == 0 && bytes.isEmpty()) {
                logEmptySeek(name);
                return;
            }
            int i = 0;
            for (DiskRange curRange : bytes) {
                if (desired == 0 && curRange.getData().remaining() == 0) {
                    logEmptySeek(name);
                    return;
                }
                if (curRange.getOffset() <= desired && (desired - curRange.getOffset()) < curRange.getLength()) {
                    currentOffset = desired;
                    currentRange = i;
                    this.range = curRange.getData().duplicate();
                    int pos = range.position();
                    pos += (int) (desired - curRange.getOffset()); // this is why we duplicate
                    this.range.position(pos);
                    return;
                }
                ++i;
            }
            throw new IllegalArgumentException("Seek in " + name + " to " + desired + " is outside of the data");
        }

        @Override
        public String toString() {
            return "uncompressed stream " + name + " position: " + currentOffset + " length: " + length + " range: "
                    + currentRange + " offset: " + (range == null ? 0 : range.position()) + " limit: "
                    + (range == null ? 0 : range.limit());
        }
    }

    private static ByteBuffer allocateBuffer(int size, boolean isDirect) {
        // TODO: use the same pool as the ORC readers
        if (isDirect) {
            return ByteBuffer.allocateDirect(size);
        } else {
            return ByteBuffer.allocate(size);
        }
    }

    private static class CompressedStream extends InStream {
        private final List<DiskRange> bytes;
        private final int bufferSize;
        private ByteBuffer uncompressed;
        private final CompressionCodec codec;
        private ByteBuffer compressed;
        private long currentOffset;
        private int currentRange;
        private boolean isUncompressedOriginal;

        public CompressedStream(String name, List<DiskRange> input, long length, CompressionCodec codec,
                int bufferSize) {
            super(name, length);
            this.bytes = input;
            this.codec = codec;
            this.bufferSize = bufferSize;
            currentOffset = 0;
            currentRange = 0;
        }

        private void readHeader() throws IOException {
            if (compressed == null || compressed.remaining() <= 0) {
                seek(currentOffset);
            }
            long originalOffset = currentOffset;
            if (compressed.remaining() > OutStream.HEADER_SIZE) {
                int b0 = compressed.get() & 0xff;
                int b1 = compressed.get() & 0xff;
                int b2 = compressed.get() & 0xff;
                boolean isOriginal = (b0 & 0x01) == 1;
                int chunkLength = (b2 << 15) | (b1 << 7) | (b0 >> 1);

                if (chunkLength > bufferSize) {
                    throw new IllegalArgumentException(
                            "Buffer size too small. size = " + bufferSize + " needed = " + chunkLength);
                }
                // read 3 bytes, which should be equal to OutStream.HEADER_SIZE always
                assert OutStream.HEADER_SIZE == 3 : "The Orc HEADER_SIZE must be the same in OutStream and InStream";
                currentOffset += OutStream.HEADER_SIZE;

                ByteBuffer slice = this.slice(chunkLength);

                if (isOriginal) {
                    uncompressed = slice;
                    isUncompressedOriginal = true;
                } else {
                    if (isUncompressedOriginal) {
                        uncompressed = allocateBuffer(bufferSize, slice.isDirect());
                        isUncompressedOriginal = false;
                    } else if (uncompressed == null) {
                        uncompressed = allocateBuffer(bufferSize, slice.isDirect());
                    } else {
                        uncompressed.clear();
                    }
                    codec.decompress(slice, uncompressed);
                }
            } else {
                throw new IllegalStateException("Can't read header at " + this);
            }
        }

        @Override
        public int read() throws IOException {
            if (uncompressed == null || uncompressed.remaining() == 0) {
                if (currentOffset == length) {
                    return -1;
                }
                readHeader();
            }
            return 0xff & uncompressed.get();
        }

        @Override
        public int read(byte[] data, int offset, int length) throws IOException {
            if (uncompressed == null || uncompressed.remaining() == 0) {
                if (currentOffset == this.length) {
                    return -1;
                }
                readHeader();
            }
            int actualLength = Math.min(length, uncompressed.remaining());
            uncompressed.get(data, offset, actualLength);
            return actualLength;
        }

        @Override
        public int available() throws IOException {
            if (uncompressed == null || uncompressed.remaining() == 0) {
                if (currentOffset == length) {
                    return 0;
                }
                readHeader();
            }
            return uncompressed.remaining();
        }

        @Override
        public void close() {
            uncompressed = null;
            compressed = null;
            currentRange = bytes.size();
            currentOffset = length;
            bytes.clear();
        }

        @Override
        public void seek(PositionProvider index) throws IOException {
            seek(index.getNext());
            long uncompressedBytes = index.getNext();
            if (uncompressedBytes != 0) {
                readHeader();
                uncompressed.position(uncompressed.position() + (int) uncompressedBytes);
            } else if (uncompressed != null) {
                // mark the uncompressed buffer as done
                uncompressed.position(uncompressed.limit());
            }
        }

        /* slices a read only contiguous buffer of chunkLength */
        private ByteBuffer slice(int chunkLength) throws IOException {
            int len = chunkLength;
            final long oldOffset = currentOffset;
            ByteBuffer slice;
            if (compressed.remaining() >= len) {
                slice = compressed.slice();
                // simple case
                slice.limit(len);
                currentOffset += len;
                compressed.position(compressed.position() + len);
                return slice;
            } else if (currentRange >= (bytes.size() - 1)) {
                // nothing has been modified yet
                throw new IOException("EOF in " + this + " while trying to read " + chunkLength + " bytes");
            }

            if (LOG.isDebugEnabled()) {
                LOG.debug(String.format(
                        "Crossing into next BufferChunk because compressed only has %d bytes (needs %d)",
                        compressed.remaining(), len));
            }

            // we need to consolidate 2 or more buffers into 1
            // first copy out compressed buffers
            ByteBuffer copy = allocateBuffer(chunkLength, compressed.isDirect());
            currentOffset += compressed.remaining();
            len -= compressed.remaining();
            copy.put(compressed);
            ListIterator<DiskRange> iter = bytes.listIterator(currentRange);

            while (len > 0 && iter.hasNext()) {
                ++currentRange;
                if (LOG.isDebugEnabled()) {
                    LOG.debug(String.format("Read slow-path, >1 cross block reads with %s", this.toString()));
                }
                DiskRange range = iter.next();
                compressed = range.getData().duplicate();
                if (compressed.remaining() >= len) {
                    slice = compressed.slice();
                    slice.limit(len);
                    copy.put(slice);
                    currentOffset += len;
                    compressed.position(compressed.position() + len);
                    return copy;
                }
                currentOffset += compressed.remaining();
                len -= compressed.remaining();
                copy.put(compressed);
            }

            // restore offsets for exception clarity
            seek(oldOffset);
            throw new IOException("EOF in " + this + " while trying to read " + chunkLength + " bytes");
        }

        private void seek(long desired) throws IOException {
            if (desired == 0 && bytes.isEmpty()) {
                logEmptySeek(name);
                return;
            }
            int i = 0;
            for (DiskRange range : bytes) {
                if (range.getOffset() <= desired && desired < range.getEnd()) {
                    currentRange = i;
                    compressed = range.getData().duplicate();
                    int pos = compressed.position();
                    pos += (int) (desired - range.getOffset());
                    compressed.position(pos);
                    currentOffset = desired;
                    return;
                }
                ++i;
            }
            // if they are seeking to the precise end, go ahead and let them go there
            int segments = bytes.size();
            if (segments != 0 && desired == bytes.get(segments - 1).getEnd()) {
                DiskRange range = bytes.get(segments - 1);
                currentRange = segments - 1;
                compressed = range.getData().duplicate();
                compressed.position(compressed.limit());
                currentOffset = desired;
                return;
            }
            throw new IOException("Seek outside of data in " + this + " to " + desired);
        }

        private String rangeString() {
            StringBuilder builder = new StringBuilder();
            int i = 0;
            for (DiskRange range : bytes) {
                if (i != 0) {
                    builder.append("; ");
                }
                builder.append(
                        " range " + i + " = " + range.getOffset() + " to " + (range.getEnd() - range.getOffset()));
                ++i;
            }
            return builder.toString();
        }

        @Override
        public String toString() {
            return "compressed stream " + name + " position: " + currentOffset + " length: " + length + " range: "
                    + currentRange + " offset: " + (compressed == null ? 0 : compressed.position()) + " limit: "
                    + (compressed == null ? 0 : compressed.limit()) + rangeString() + (uncompressed == null ? ""
                            : " uncompressed: " + uncompressed.position() + " to " + uncompressed.limit());
        }
    }

    public abstract void seek(PositionProvider index) throws IOException;

    private static void logEmptySeek(String name) {
        if (LOG.isWarnEnabled()) {
            LOG.warn("Attempting seek into empty stream (" + name + ") Skipping stream.");
        }
    }

    /**
     * Create an input stream from a list of buffers.
     * @param streamName the name of the stream
     * @param buffers the list of ranges of bytes for the stream
     * @param offsets a list of offsets (the same length as input) that must
     *                contain the first offset of the each set of bytes in input
     * @param length the length in bytes of the stream
     * @param codec the compression codec
     * @param bufferSize the compression buffer size
     * @return an input stream
     * @throws IOException
     */
    @VisibleForTesting
    @Deprecated
    public static InStream create(String streamName, ByteBuffer[] buffers, long[] offsets, long length,
            CompressionCodec codec, int bufferSize) throws IOException {
        List<DiskRange> input = new ArrayList<DiskRange>(buffers.length);
        for (int i = 0; i < buffers.length; ++i) {
            input.add(new BufferChunk(buffers[i], offsets[i]));
        }
        return create(streamName, input, length, codec, bufferSize);
    }

    /**
     * Create an input stream from a list of disk ranges with data.
     * @param name the name of the stream
     * @param input the list of ranges of bytes for the stream; from disk or cache
     * @param length the length in bytes of the stream
     * @param codec the compression codec
     * @param bufferSize the compression buffer size
     * @return an input stream
     * @throws IOException
     */
    public static InStream create(String name, List<DiskRange> input, long length, CompressionCodec codec,
            int bufferSize) throws IOException {
        if (codec == null) {
            return new UncompressedStream(name, input, length);
        } else {
            return new CompressedStream(name, input, length, codec, bufferSize);
        }
    }
}