Java tutorial
/* Copyright (C) GridGain Systems. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ /* _________ _____ __________________ _____ * __ ____/___________(_)______ /__ ____/______ ____(_)_______ * _ / __ __ ___/__ / _ __ / _ / __ _ __ `/__ / __ __ \ * / /_/ / _ / _ / / /_/ / / /_/ / / /_/ / _ / _ / / / * \____/ /_/ /_/ \_,__/ \____/ \__,_/ /_/ /_/ /_/ */ package org.gridgain.grid.kernal.ggfs.hadoop; import org.apache.commons.logging.*; import org.apache.hadoop.fs.*; import org.gridgain.grid.*; import org.gridgain.grid.kernal.ggfs.common.*; import org.gridgain.grid.util.lang.*; import org.gridgain.grid.util.typedef.internal.*; import org.jetbrains.annotations.*; import java.io.*; /** * GGFS input stream wrapper for hadoop interfaces. */ @SuppressWarnings("FieldAccessedSynchronizedAndUnsynchronized") public final class GridGgfsHadoopInputStream extends InputStream implements Seekable, PositionedReadable, GridGgfsHadoopStreamEventListener { /** Minimum buffer size. */ private static final int MIN_BUF_SIZE = 4 * 1024; /** Server stream delegate. */ private GridGgfsHadoopStreamDelegate delegate; /** Stream ID used by logger. */ private long logStreamId; /** Stream position. */ private long pos; /** Stream read limit. */ private long limit; /** Mark position. */ private long markPos = -1; /** Prefetch buffer. */ private DoubleFetchBuffer buf = new DoubleFetchBuffer(); /** Buffer half size for double-buffering. */ private int bufHalfSize; /** Closed flag. */ private volatile boolean closed; /** Flag set if stream was closed due to connection breakage. */ private boolean connBroken; /** Logger. */ private Log log; /** Client logger. */ private GridGgfsLogger clientLog; /** Read time. */ private long readTime; /** User time. */ private long userTime; /** Last timestamp. */ private long lastTs; /** Amount of read bytes. */ private long total; /** * Creates input stream. * * @param delegate Server stream delegate. * @param limit Read limit. * @param bufSize Buffer size. * @param log Log. * @param clientLog Client logger. */ public GridGgfsHadoopInputStream(GridGgfsHadoopStreamDelegate delegate, long limit, int bufSize, Log log, GridGgfsLogger clientLog, long logStreamId) { assert limit >= 0; this.delegate = delegate; this.limit = limit; this.log = log; this.clientLog = clientLog; this.logStreamId = logStreamId; bufHalfSize = Math.max(bufSize, MIN_BUF_SIZE); lastTs = System.nanoTime(); delegate.hadoop().addEventListener(delegate, this); } /** * Read start. */ private void readStart() { long now = System.nanoTime(); userTime += now - lastTs; lastTs = now; } /** * Read end. */ private void readEnd() { long now = System.nanoTime(); readTime += now - lastTs; lastTs = now; } /** {@inheritDoc} */ @Override public synchronized int read() throws IOException { checkClosed(); readStart(); try { if (eof()) return -1; buf.refreshAhead(pos); int res = buf.atPosition(pos); pos++; total++; buf.refreshAhead(pos); return res; } catch (GridException e) { throw GridGgfsHadoopUtils.cast(e); } finally { readEnd(); } } /** {@inheritDoc} */ @Override public synchronized int read(@NotNull byte[] b, int off, int len) throws IOException { checkClosed(); if (eof()) return -1; readStart(); try { long remaining = limit - pos; int read = buf.flatten(b, pos, off, len); pos += read; total += read; remaining -= read; if (remaining > 0 && read != len) { int readAmt = (int) Math.min(remaining, len - read); delegate.hadoop().readData(delegate, pos, readAmt, b, off + read, len - read).get(); read += readAmt; pos += readAmt; total += readAmt; } buf.refreshAhead(pos); return read; } catch (GridException e) { throw GridGgfsHadoopUtils.cast(e); } finally { readEnd(); } } /** {@inheritDoc} */ @Override public synchronized long skip(long n) throws IOException { checkClosed(); if (clientLog.isLogEnabled()) clientLog.logSkip(logStreamId, n); long oldPos = pos; if (pos + n <= limit) pos += n; else pos = limit; buf.refreshAhead(pos); return pos - oldPos; } /** {@inheritDoc} */ @Override public synchronized int available() throws IOException { checkClosed(); int available = buf.available(pos); assert available >= 0; return available; } /** {@inheritDoc} */ @Override public synchronized void close() throws IOException { if (!closed) { readStart(); if (log.isDebugEnabled()) log.debug("Closing input stream: " + delegate); delegate.hadoop().closeStream(delegate); readEnd(); if (clientLog.isLogEnabled()) clientLog.logCloseIn(logStreamId, userTime, readTime, total); markClosed(false); if (log.isDebugEnabled()) log.debug("Closed stream [delegate=" + delegate + ", readTime=" + readTime + ", userTime=" + userTime + ']'); } } /** {@inheritDoc} */ @Override public synchronized void mark(int readLimit) { markPos = pos; if (clientLog.isLogEnabled()) clientLog.logMark(logStreamId, readLimit); } /** {@inheritDoc} */ @Override public synchronized void reset() throws IOException { checkClosed(); if (clientLog.isLogEnabled()) clientLog.logReset(logStreamId); if (markPos == -1) throw new IOException("Stream was not marked."); pos = markPos; buf.refreshAhead(pos); } /** {@inheritDoc} */ @Override public boolean markSupported() { return true; } /** {@inheritDoc} */ @Override public synchronized int read(long position, byte[] buf, int off, int len) throws IOException { long remaining = limit - position; int read = (int) Math.min(len, remaining); // Return -1 at EOF. if (read == 0) return -1; readFully(position, buf, off, read); return read; } /** {@inheritDoc} */ @Override public synchronized void readFully(long position, byte[] buf, int off, int len) throws IOException { long remaining = limit - position; checkClosed(); if (len > remaining) throw new EOFException("End of stream reached before data was fully read."); readStart(); try { int read = this.buf.flatten(buf, position, off, len); total += read; if (read != len) { int readAmt = len - read; delegate.hadoop().readData(delegate, position + read, readAmt, buf, off + read, readAmt).get(); total += readAmt; } if (clientLog.isLogEnabled()) clientLog.logRandomRead(logStreamId, position, len); } catch (GridException e) { throw GridGgfsHadoopUtils.cast(e); } finally { readEnd(); } } /** {@inheritDoc} */ @Override public void readFully(long position, byte[] buf) throws IOException { readFully(position, buf, 0, buf.length); } /** {@inheritDoc} */ @Override public synchronized void seek(long pos) throws IOException { A.ensure(pos >= 0, "position must be non-negative"); checkClosed(); if (clientLog.isLogEnabled()) clientLog.logSeek(logStreamId, pos); if (pos > limit) pos = limit; if (log.isDebugEnabled()) log.debug("Seek to position [delegate=" + delegate + ", pos=" + pos + ", oldPos=" + this.pos + ']'); this.pos = pos; buf.refreshAhead(pos); } /** {@inheritDoc} */ @Override public synchronized long getPos() { return pos; } /** {@inheritDoc} */ @Override public synchronized boolean seekToNewSource(long targetPos) { return false; } /** {@inheritDoc} */ @Override public void onClose() { markClosed(true); } /** {@inheritDoc} */ @Override public void onError(String errMsg) { // No-op. } /** * Marks stream as closed. * * @param connBroken {@code True} if connection with server was lost. */ private void markClosed(boolean connBroken) { // It is ok to have race here. if (!closed) { closed = true; this.connBroken = connBroken; delegate.hadoop().removeEventListener(delegate); } } /** * @throws IOException If check failed. */ private void checkClosed() throws IOException { if (closed) { if (connBroken) throw new IOException("Server connection was lost."); else throw new IOException("Stream is closed."); } } /** * @return {@code True} if end of stream reached. */ private boolean eof() { return limit == pos; } /** * Asynchronous prefetch buffer. */ private static class FetchBufferPart { /** Read future. */ private GridPlainFuture<byte[]> readFut; /** Position of cached chunk in file. */ private long pos; /** Prefetch length. Need to store as read future result might be not available yet. */ private int len; /** * Creates fetch buffer part. * * @param readFut Read future for this buffer. * @param pos Read position. * @param len Chunk length. */ private FetchBufferPart(GridPlainFuture<byte[]> readFut, long pos, int len) { this.readFut = readFut; this.pos = pos; this.len = len; } /** * Copies cached data if specified position matches cached region. * * @param dst Destination buffer. * @param pos Read position in file. * @param dstOff Offset in destination buffer from which start writing. * @param len Maximum number of bytes to copy. * @return Number of bytes copied. * @throws GridException If read future failed. */ public int flatten(byte[] dst, long pos, int dstOff, int len) throws GridException { // If read start position is within cached boundaries. if (contains(pos)) { byte[] data = readFut.get(); int srcPos = (int) (pos - this.pos); int cpLen = Math.min(len, data.length - srcPos); U.arrayCopy(data, srcPos, dst, dstOff, cpLen); return cpLen; } return 0; } /** * @return {@code True} if data is ready to be read. */ public boolean ready() { return readFut.isDone(); } /** * Checks if current buffer part contains given position. * * @param pos Position to check. * @return {@code True} if position matches buffer region. */ public boolean contains(long pos) { return this.pos <= pos && this.pos + len > pos; } } private class DoubleFetchBuffer { /** */ private FetchBufferPart first; /** */ private FetchBufferPart second; /** * Copies fetched data from both buffers to destination array if cached region matched read position. * * @param dst Destination buffer. * @param pos Read position in file. * @param dstOff Destination buffer offset. * @param len Maximum number of bytes to copy. * @return Number of bytes copied. * @throws GridException If any read operation failed. */ public int flatten(byte[] dst, long pos, int dstOff, int len) throws GridException { assert dstOff >= 0; assert dstOff + len <= dst.length : "Invalid indices [dst.length=" + dst.length + ", dstOff=" + dstOff + ", len=" + len + ']'; int bytesCopied = 0; if (first != null) { bytesCopied += first.flatten(dst, pos, dstOff, len); if (bytesCopied != len && second != null) { assert second.pos == first.pos + first.len; bytesCopied += second.flatten(dst, pos + bytesCopied, dstOff + bytesCopied, len - bytesCopied); } } return bytesCopied; } /** * Gets byte at specified position in buffer. * * @param pos Stream position. * @return Read byte. * @throws GridException If read failed. */ public int atPosition(long pos) throws GridException { // Should not reach here if stream contains no data. assert first != null; if (first.contains(pos)) { byte[] bytes = first.readFut.get(); return bytes[((int) (pos - first.pos))] & 0xFF; } else { assert second != null; assert second.contains(pos); byte[] bytes = second.readFut.get(); return bytes[((int) (pos - second.pos))] & 0xFF; } } /** * Starts asynchronous buffer refresh if needed, depending on current position. * * @param pos Current stream position. */ public void refreshAhead(long pos) { if (fullPrefetch(pos)) { first = fetch(pos, bufHalfSize); second = fetch(pos + bufHalfSize, bufHalfSize); } else if (needFlip(pos)) { first = second; second = fetch(first.pos + first.len, bufHalfSize); } } /** * @param pos Position from which read is expected. * @return Number of bytes available to be read without blocking. */ public int available(long pos) { int available = 0; if (first != null) { if (first.contains(pos)) { if (first.ready()) { available += (pos - first.pos); if (second != null && second.ready()) available += second.len; } } else { if (second != null && second.contains(pos) && second.ready()) available += (pos - second.pos); } } return available; } /** * Checks if position shifted enough to forget previous buffer. * * @param pos Current position. * @return {@code True} if need flip buffers. */ private boolean needFlip(long pos) { // Return true if we read more then half of second buffer. return second != null && second.contains(pos); } /** * Determines if all cached bytes should be discarded and new region should be * prefetched. * * @param curPos Current stream position. * @return {@code True} if need to refresh both blocks. */ private boolean fullPrefetch(long curPos) { // If no data was prefetched yet, return true. return first == null || curPos < first.pos || (second != null && curPos >= second.pos + second.len); } /** * Starts asynchronous fetch for given region. * * @param pos Position to read from. * @param size Number of bytes to read. * @return Fetch buffer part. */ private FetchBufferPart fetch(long pos, int size) { long remaining = limit - pos; size = (int) Math.min(size, remaining); return size <= 0 ? null : new FetchBufferPart(delegate.hadoop().readData(delegate, pos, size, null, 0, 0), pos, size); } } }