org.commoncrawl.util.shared.BoundedRangeFileInputStream.java Source code

Java tutorial

Introduction

Here is the source code for org.commoncrawl.util.shared.BoundedRangeFileInputStream.java

Source

package org.commoncrawl.util.shared;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with this
 * work for additional information regarding copyright ownership. The ASF
 * licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

import java.io.IOException;
import java.io.InputStream;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.PositionedReadable;
import org.apache.hadoop.fs.Seekable;

/**
 * BoundedRangeFIleInputStream abstracts a contiguous region of a Hadoop
 * FSDataInputStream as a regular input stream. One can create multiple
 * BoundedRangeFileInputStream on top of the same FSDataInputStream and they
 * would not interfere with each other.
 * Copied from hadoop-335 tfile.
 */
public class BoundedRangeFileInputStream extends InputStream implements Seekable, PositionedReadable {

    static final Log LOG = LogFactory.getLog(BoundedRangeFileInputStream.class);

    private FSDataInputStream in;
    private long pos;
    private long end;
    private long mark;
    private final byte[] oneByte = new byte[1];
    private final boolean pread;

    /**
     * Constructor
     *
     * @param in
     *          The FSDataInputStream we connect to.
     * @param offset
     *          Beginning offset of the region.
     * @param length
     *          Length of the region.
     * @param pread If true, use Filesystem positional read rather than seek+read.
     *
     *          The actual length of the region may be smaller if (off_begin +
     *          length) goes beyond the end of FS input stream.
     */
    public BoundedRangeFileInputStream(FSDataInputStream in, long offset, long length) {
        if (offset < 0 || length < 0) {
            throw new IndexOutOfBoundsException("Invalid offset/length: " + offset + "/" + length);
        }

        this.in = in;
        this.pos = offset;
        this.end = offset + length;
        this.mark = -1;
        this.pread = true;
    }

    @Override
    public int available() throws IOException {
        int avail = in.available();
        if (pos + avail > end) {
            avail = (int) (end - pos);
        }

        return avail;
    }

    @Override
    public int read() throws IOException {
        int ret = read(oneByte);
        if (ret == 1)
            return oneByte[0] & 0xff;
        return -1;
    }

    @Override
    public int read(byte[] b) throws IOException {
        return read(b, 0, b.length);
    }

    @Override
    public int read(byte[] b, int off, int len) throws IOException {
        if ((off | len | (off + len) | (b.length - (off + len))) < 0) {
            throw new IndexOutOfBoundsException();
        }

        int n = (int) Math.min(Integer.MAX_VALUE, Math.min(len, (end - pos)));
        if (n == 0)
            return -1;
        int ret = 0;
        if (this.pread) {
            LOG.info("PREAD Reading at Pos:" + pos + " Bytes:" + n);
            ret = in.read(pos, b, off, n);
        } else {
            synchronized (in) {
                LOG.info("NONPREAD Reading at Pos:" + pos + " Bytes:" + n);
                in.seek(pos);
                ret = in.read(b, off, n);
            }
        }
        if (ret < 0) {
            end = pos;
            return -1;
        }
        pos += ret;
        return ret;
    }

    @Override
    /*
     * We may skip beyond the end of the file.
     */
    public long skip(long n) throws IOException {
        long len = Math.min(n, end - pos);
        pos += len;
        return len;
    }

    @Override
    public void mark(int readlimit) {
        mark = pos;
    }

    @Override
    public void reset() throws IOException {
        if (mark < 0)
            throw new IOException("Resetting to invalid mark");
        pos = mark;
    }

    @Override
    public boolean markSupported() {
        return true;
    }

    @Override
    public void close() {
        // Invalidate the state of the stream.
        in = null;
        pos = end;
        mark = -1;
    }

    @Override
    public long getPos() throws IOException {
        return pos;
    }

    @Override
    public void seek(long pos) throws IOException {
        this.pos = pos;
    }

    @Override
    public boolean seekToNewSource(long targetPos) throws IOException {
        return in.seekToNewSource(targetPos);
    }

    @Override
    public int read(long position, byte[] buffer, int offset, int length) throws IOException {
        LOG.info("Reading at Pos:" + position + " Bytes:" + length);
        return in.read(position, buffer, offset, length);
    }

    @Override
    public void readFully(long position, byte[] buffer) throws IOException {
        LOG.info("Reading at Pos:" + position + " Bytes:" + buffer.length);
        in.readFully(position, buffer);
    }

    @Override
    public void readFully(long position, byte[] buffer, int offset, int length) throws IOException {
        LOG.info("Reading at Pos:" + position + " Bytes:" + length);
        in.readFully(position, buffer, offset, length);
    }
}