org.apache.hadoop.fs.nfs.stream.NFSBufferedInputStream.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.fs.nfs.stream.NFSBufferedInputStream.java

Source

/**
 * Copyright 2014 NetApp Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package org.apache.hadoop.fs.nfs.stream;

import java.io.EOFException;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.nfs.NFSv3FileSystemStore;
import org.apache.hadoop.fs.nfs.StreamStatistics;
import org.apache.hadoop.nfs.nfs3.FileHandle;
import org.apache.hadoop.nfs.nfs3.Nfs3FileAttributes;
import org.apache.hadoop.oncrpc.security.Credentials;

public class NFSBufferedInputStream extends FSInputStream {

    long fileLength;
    long fileOffset;
    long prefetchBlockLimit;
    StreamBlock currentBlock;

    final NFSv3FileSystemStore store;
    final FileHandle handle;
    final Credentials credentials;

    final String pathString;
    final int readBlockSizeBits;
    final long splitSize;
    final StreamStatistics statistics;
    final boolean doPrefetch;
    final AtomicBoolean closed;
    final ExecutorService executors;
    final Map<Long, Future<Read>> ongoing;
    final Map<Long, StreamBlock> cache;

    static final AtomicInteger streamId;

    public static final int DEFAULT_CACHE_SIZE_IN_BLOCKS = 1024;

    public static final int MAX_PREFETCH_POOL_SIZE = 256;
    public static final int DEFAULT_PREFETCH_POOL_SIZE = 4;
    public static final boolean DEFAULT_PREFETCH_ENABLED = true;

    public static final int DEFAULT_READAHEAD_SIZE = 256;

    public final static Log LOG = LogFactory.getLog(NFSBufferedInputStream.class);

    static {
        streamId = new AtomicInteger(1);
    }

    public NFSBufferedInputStream(NFSv3FileSystemStore store, FileHandle handle, Path f, Configuration conf,
            long splitSize, Credentials credentials, FileSystem.Statistics fsStat) throws IOException {

        this.store = store;
        this.handle = handle;
        this.credentials = credentials;
        this.pathString = f.toUri().getPath();

        doPrefetch = conf.getBoolean("fs.nfs.prefetch", DEFAULT_PREFETCH_ENABLED);

        this.fileOffset = 0L;
        this.readBlockSizeBits = store.getReadSizeBits();
        this.splitSize = splitSize;
        this.closed = new AtomicBoolean(false);
        this.ongoing = new ConcurrentHashMap<>(DEFAULT_PREFETCH_POOL_SIZE);
        this.cache = new ConcurrentHashMap<>(DEFAULT_CACHE_SIZE_IN_BLOCKS);
        this.statistics = new StreamStatistics(NFSBufferedInputStream.class + pathString,
                streamId.getAndIncrement(), true);
        this.executors = new ThreadPoolExecutor(DEFAULT_PREFETCH_POOL_SIZE, MAX_PREFETCH_POOL_SIZE, 5,
                TimeUnit.SECONDS, new LinkedBlockingDeque<Runnable>(1024),
                new ThreadPoolExecutor.CallerRunsPolicy());

        // Keep track of the file length at file open
        // NOTE: The file does not get modified while this stream is open
        Nfs3FileAttributes attributes = store.getFileAttributes(handle, credentials);
        if (attributes != null) {
            this.fileLength = attributes.getSize();
            this.prefetchBlockLimit = (long) (Math.min(fileLength, splitSize) >> readBlockSizeBits);
            if (this.fileLength < 0) {
                throw new IOException("File length is invalid: " + this.fileLength);
            }
        } else {
            throw new IOException("Could not get file length from NFS server");
        }

    }

    @Override
    public synchronized void seek(long pos) throws IOException {
        if (pos > fileLength) {
            throw new EOFException("Cannot seek after EOF: pos=" + pos + ", fileLength=" + fileLength);
        }
        fileOffset = pos;
        prefetchBlockLimit = (long) (Math.min(fileLength, pos + this.splitSize) >> readBlockSizeBits);
    }

    @Override
    public synchronized long getPos() throws IOException {
        return fileOffset;
    }

    @Override
    public synchronized boolean seekToNewSource(long targetPos) throws IOException {
        return false;
    }

    @Override
    public synchronized int read() throws IOException {
        byte[] data = new byte[1];
        read(data, 0, 1);
        return (int) data[0];
    }

    @Override
    public synchronized int read(byte data[]) throws IOException {
        return read(data, 0, data.length);
    }

    @Override
    public synchronized int read(byte data[], int offset, int length) throws IOException {
        long enterTime = System.currentTimeMillis();
        int bytesRead = -1;

        try {
            bytesRead = _read(data, offset, length);
        } finally {
            if (bytesRead >= 0) {
                statistics.incrementBytesRead(bytesRead);
                statistics.incrementReadOps(1);
                statistics.incrementTimeRead(System.currentTimeMillis() - enterTime);
            }
        }
        return bytesRead;
    }

    private synchronized int _read(byte data[], int offset, int length) throws IOException {

        int lengthToRead = Math.min(data.length, length);
        int blockSize = (int) (1 << readBlockSizeBits);
        long loBlockId = (long) (fileOffset >> readBlockSizeBits);
        long hiBlockId = (long) ((fileOffset + lengthToRead - 1) >> readBlockSizeBits);
        int loOffset = (int) (fileOffset - (loBlockId << readBlockSizeBits));
        int hiOffset = (int) ((fileOffset + lengthToRead - 1) - (hiBlockId << readBlockSizeBits));

        if (closed.get()) {
            LOG.warn("Reading from an already closed InputStream. Check your code");
        }

        if (loBlockId == hiBlockId) {
            StreamBlock block = getBlock(loBlockId);
            if (block == null) {
                return -1;
            } else {
                int bytesRead = block.readFromBlock(data, offset, loOffset, lengthToRead);
                if (bytesRead != -1) {
                    fileOffset += bytesRead;
                }
                return bytesRead;
            }
        } else {
            int totalBytesRead = offset;
            for (long blk = loBlockId; blk <= hiBlockId; blk++) {
                StreamBlock block = getBlock(blk);
                if (block == null) {
                    if (blk == loBlockId) {
                        return -1;
                    } else {
                        return (totalBytesRead - offset);
                    }
                }

                if (blk == loBlockId) {
                    int bytesRead = block.readFromBlock(data, totalBytesRead, loOffset, blockSize - loOffset);
                    if (bytesRead == -1) {
                        return -1;
                    }
                    totalBytesRead += bytesRead;
                    fileOffset += bytesRead;
                } else if (blk == hiBlockId) {
                    int bytesRead = block.readFromBlock(data, totalBytesRead, 0, hiOffset + 1);
                    if (bytesRead != -1) {
                        totalBytesRead += bytesRead;
                        fileOffset += bytesRead;
                    }
                } else {
                    int bytesRead = block.readFromBlock(data, totalBytesRead, 0, blockSize);
                    if (bytesRead != -1) {
                        totalBytesRead += bytesRead;
                        fileOffset += bytesRead;
                    } else {
                        break;
                    }
                }
            }
            return (totalBytesRead - offset);
        }
    }

    private StreamBlock getBlock(long blockId) throws IOException {

        // Block is current
        if (currentBlock != null && currentBlock.getBlockId() == blockId) {
            LOG.debug("Returning 1 blockId=" + blockId);
            return currentBlock;
        }

        // Issue prefetch for upcoming blocks
        if (doPrefetch) {
            if (blockId >= prefetchBlockLimit) {
                prefetchBlockLimit += (long) (Math.min(fileLength, this.splitSize) >> readBlockSizeBits);
                LOG.info("Changing prefetchBlockLimit to " + prefetchBlockLimit);
            }
            for (long bid = blockId + 1; bid < blockId + DEFAULT_READAHEAD_SIZE
                    && bid < prefetchBlockLimit; ++bid) {
                if (!ongoing.containsKey(bid) && !cache.containsKey(bid)) {
                    StreamBlock block = new StreamBlock(readBlockSizeBits);
                    block.setBlockId(bid);
                    block.setReady(false);
                    cache.put(bid, block);

                    Read task = new Read(store, handle, credentials, statistics, bid, block);
                    Future<Read> future = executors.submit(task);
                    ongoing.put(bid, future);
                }
            }
        }

        // Block is being fetched, so wait for it
        if (ongoing.containsKey(blockId)) {
            Future<Read> future = ongoing.get(blockId);
            while (true) {
                try {
                    LOG.debug("Waiting for read task to complete ongoing reading block id=" + blockId);
                    future.get();
                    break;
                } catch (InterruptedException interrupted) {
                    continue;
                } catch (Exception error) {
                    throw new IOException("Read resulted in an error", error);
                }
            }
        }

        // Some prefetches are done, check for them
        for (Iterator<Entry<Long, Future<Read>>> iter = ongoing.entrySet().iterator(); iter.hasNext();) {
            Future<Read> future = iter.next().getValue();
            if (future.isDone()) {
                try {
                    future.get();
                    iter.remove();
                } catch (InterruptedException interrupted) {
                    // Ignore
                } catch (Exception error) {
                    throw new IOException("Prefetched resulted in error", error);
                }
            }
        }

        // Keep trying until the block is found
        while (true) {

            if (cache.containsKey(blockId)) {
                StreamBlock block = cache.remove(blockId);
                assert (block != null);
                assert (block.getBlockId() == blockId);
                assert (block.isReady() == true);
                currentBlock = block;
                return currentBlock;
            }

            if (cache.size() >= 100 * DEFAULT_CACHE_SIZE_IN_BLOCKS) {
                LOG.error("Cache is wayyy bigger than planned, size=" + cache.size() + " limit="
                        + DEFAULT_CACHE_SIZE_IN_BLOCKS);
            }

            // Issue the read and wait
            StreamBlock block = new StreamBlock(readBlockSizeBits);
            block.setBlockId(blockId);
            block.setReady(false);
            cache.put(blockId, block);

            Read task = new Read(store, handle, credentials, statistics, blockId, block);
            Future<Read> future = executors.submit(task);
            while (true) {
                try {
                    future.get();
                    break;
                } catch (InterruptedException interrupted) {
                    continue;
                } catch (Exception error) {
                    throw new IOException("Read resulted in an error", error);
                }
            }
        }

    }

    @Override
    public void close() throws IOException {

        boolean first = true;
        if (closed.get()) {
            first = false;
            LOG.warn("Closing an already closed InputStream. Check your code");
        }
        closed.set(true);

        // Shutdown the thread pool
        if (first) {
            executors.shutdown();
            try {
                executors.awaitTermination(1, TimeUnit.SECONDS);
            } catch (InterruptedException exception) {
                // Ignore
            }
        }

        LOG.info(statistics);
        super.close();
    }

}