Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the * License. You may obtain a copy of the License at * <p> * http://www.apache.org/licenses/LICENSE-2.0 * <p> * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. */ package com.buaa.cfs.fs; import com.buaa.cfs.exception.ChecksumException; import com.buaa.cfs.utils.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import java.io.EOFException; import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.IntBuffer; import java.util.zip.Checksum; /** * This is a generic input stream for verifying checksums for data before it is read by a user. */ abstract public class FSInputChecker extends FSInputStream { public static final Log LOG = LogFactory.getLog(FSInputChecker.class); /** The file name from which data is read from */ protected Path file; private Checksum sum; private boolean verifyChecksum = true; private int maxChunkSize; // data bytes for checksum (eg 512) private byte[] buf; // buffer for non-chunk-aligned reading private byte[] checksum; private IntBuffer checksumInts; // wrapper on checksum buffer private int pos; // the position of the reader inside buf private int count; // the number of bytes currently in buf private int numOfRetries; // cached file position // this should always be a multiple of maxChunkSize private long chunkPos = 0; // Number of checksum chunks that can be read at once into a user // buffer. Chosen by benchmarks - higher values do not reduce // CPU usage. The size of the data reads made to the underlying stream // will be CHUNKS_PER_READ * maxChunkSize. private static final int CHUNKS_PER_READ = 32; protected static final int CHECKSUM_SIZE = 4; // 32-bit checksum /** * Constructor * * @param file The name of the file to be read * @param numOfRetries Number of read retries when ChecksumError occurs */ protected FSInputChecker(Path file, int numOfRetries) { this.file = file; this.numOfRetries = numOfRetries; } /** * Constructor * * @param file The name of the file to be read * @param numOfRetries Number of read retries when ChecksumError occurs * @param sum the type of Checksum engine * @param chunkSize maximun chunk size * @param checksumSize the number byte of each checksum */ protected FSInputChecker(Path file, int numOfRetries, boolean verifyChecksum, Checksum sum, int chunkSize, int checksumSize) { this(file, numOfRetries); set(verifyChecksum, sum, chunkSize, checksumSize); } /** * Reads in checksum chunks into <code>buf</code> at <code>offset</code> and checksum into <code>checksum</code>. * Since checksums can be disabled, there are two cases implementors need to worry about: * <p> * (a) needChecksum() will return false: - len can be any positive value - checksum will be null Implementors should * simply pass through to the underlying data stream. or (b) needChecksum() will return true: - len >= maxChunkSize * - checksum.length is a multiple of CHECKSUM_SIZE Implementors should read an integer number of data chunks into * buf. The amount read should be bounded by len or by checksum.length / CHECKSUM_SIZE * maxChunkSize. Note that len * may be a value that is not a multiple of maxChunkSize, in which case the implementation may return less than * len. * <p> * The method is used for implementing read, therefore, it should be optimized for sequential reading. * * @param pos chunkPos * @param buf desitination buffer * @param offset offset in buf at which to store data * @param len maximum number of bytes to read * @param checksum the data buffer into which to write checksums * * @return number of bytes read */ abstract protected int readChunk(long pos, byte[] buf, int offset, int len, byte[] checksum) throws IOException; /** * Return position of beginning of chunk containing pos. * * @param pos a postion in the file * * @return the starting position of the chunk which contains the byte */ abstract protected long getChunkPosition(long pos); /** Return true if there is a need for checksum verification */ protected synchronized boolean needChecksum() { return verifyChecksum && sum != null; } /** * Read one checksum-verified byte * * @return the next byte of data, or <code>-1</code> if the end of the stream is reached. * * @throws IOException if an I/O error occurs. */ @Override public synchronized int read() throws IOException { if (pos >= count) { fill(); if (pos >= count) { return -1; } } return buf[pos++] & 0xff; } /** * Read checksum verified bytes from this byte-input stream into the specified byte array, starting at the given * offset. * <p> * <p> This method implements the general contract of the corresponding <code>{@link InputStream#read(byte[], int, * int) read}</code> method of the <code>{@link InputStream}</code> class. As an additional convenience, it * attempts to read as many bytes as possible by repeatedly invoking the <code>read</code> method of the underlying * stream. This iterated <code>read</code> continues until one of the following conditions becomes true: <ul> * <p> * <li> The specified number of bytes have been read, * <p> * <li> The <code>read</code> method of the underlying stream returns <code>-1</code>, indicating end-of-file. * <p> * </ul> If the first <code>read</code> on the underlying stream returns <code>-1</code> to indicate end-of-file * then this method returns <code>-1</code>. Otherwise this method returns the number of bytes actually read. * * @param b destination buffer. * @param off offset at which to start storing bytes. * @param len maximum number of bytes to read. * * @return the number of bytes read, or <code>-1</code> if the end of the stream has been reached. * * @throws IOException if an I/O error occurs. ChecksumException if any checksum error occurs */ @Override public synchronized int read(byte[] b, int off, int len) throws IOException { // parameter check if ((off | len | (off + len) | (b.length - (off + len))) < 0) { throw new IndexOutOfBoundsException(); } else if (len == 0) { return 0; } int n = 0; for (;;) { int nread = read1(b, off + n, len - n); if (nread <= 0) return (n == 0) ? nread : n; n += nread; if (n >= len) return n; } } /** * Fills the buffer with a chunk data. No mark is supported. This method assumes that all data in the buffer has * already been read in, hence pos > count. */ private void fill() throws IOException { assert (pos >= count); // fill internal buffer count = readChecksumChunk(buf, 0, maxChunkSize); if (count < 0) count = 0; } /* * Read characters into a portion of an array, reading from the underlying * stream at most once if necessary. */ private int read1(byte b[], int off, int len) throws IOException { int avail = count - pos; if (avail <= 0) { if (len >= maxChunkSize) { // read a chunk to user buffer directly; avoid one copy int nread = readChecksumChunk(b, off, len); return nread; } else { // read a chunk into the local buffer fill(); if (count <= 0) { return -1; } else { avail = count; } } } // copy content of the local buffer to the user buffer int cnt = (avail < len) ? avail : len; System.arraycopy(buf, pos, b, off, cnt); pos += cnt; return cnt; } /* Read up one or more checksum chunk to array <i>b</i> at pos <i>off</i> * It requires at least one checksum chunk boundary * in between <cur_pos, cur_pos+len> * and it stops reading at the last boundary or at the end of the stream; * Otherwise an IllegalArgumentException is thrown. * This makes sure that all data read are checksum verified. * * @param b the buffer into which the data is read. * @param off the start offset in array <code>b</code> * at which the data is written. * @param len the maximum number of bytes to read. * @return the total number of bytes read into the buffer, or * <code>-1</code> if there is no more data because the end of * the stream has been reached. * @throws IOException if an I/O error occurs. */ private int readChecksumChunk(byte b[], final int off, final int len) throws IOException { // invalidate buffer count = pos = 0; int read = 0; boolean retry = true; int retriesLeft = numOfRetries; do { retriesLeft--; try { read = readChunk(chunkPos, b, off, len, checksum); if (read > 0) { if (needChecksum()) { verifySums(b, off, read); } chunkPos += read; } retry = false; } catch (ChecksumException ce) { LOG.info("Found checksum error: b[" + off + ", " + (off + read) + "]=" + StringUtils.byteToHexString(b, off, off + read), ce); if (retriesLeft == 0) { throw ce; } // try a new replica if (seekToNewSource(chunkPos)) { // Since at least one of the sources is different, // the read might succeed, so we'll retry. seek(chunkPos); } else { // Neither the data stream nor the checksum stream are being read // from different sources, meaning we'll still get a checksum error // if we try to do the read again. We throw an exception instead. throw ce; } } } while (retry); return read; } private void verifySums(final byte b[], final int off, int read) throws ChecksumException { int leftToVerify = read; int verifyOff = 0; checksumInts.rewind(); checksumInts.limit((read - 1) / maxChunkSize + 1); while (leftToVerify > 0) { sum.update(b, off + verifyOff, Math.min(leftToVerify, maxChunkSize)); int expected = checksumInts.get(); int calculated = (int) sum.getValue(); sum.reset(); if (expected != calculated) { long errPos = chunkPos + verifyOff; throw new ChecksumException( "Checksum error: " + file + " at " + errPos + " exp: " + expected + " got: " + calculated, errPos); } leftToVerify -= maxChunkSize; verifyOff += maxChunkSize; } } /** * Convert a checksum byte array to a long This is deprecated since 0.22 since it is no longer in use by this * class. */ @Deprecated static public long checksum2long(byte[] checksum) { long crc = 0L; for (int i = 0; i < checksum.length; i++) { crc |= (0xffL & (long) checksum[i]) << ((checksum.length - i - 1) * 8); } return crc; } @Override public synchronized long getPos() throws IOException { return chunkPos - Math.max(0L, count - pos); } @Override public synchronized int available() throws IOException { return Math.max(0, count - pos); } /** * Skips over and discards <code>n</code> bytes of data from the input stream. * <p> * <p>This method may skip more bytes than are remaining in the backing file. This produces no exception and the * number of bytes skipped may include some number of bytes that were beyond the EOF of the backing file. Attempting * to read from the stream after skipping past the end will result in -1 indicating the end of the file. * <p> * <p>If <code>n</code> is negative, no bytes are skipped. * * @param n the number of bytes to be skipped. * * @return the actual number of bytes skipped. * * @throws IOException if an I/O error occurs. ChecksumException if the chunk to skip to is corrupted */ @Override public synchronized long skip(long n) throws IOException { if (n <= 0) { return 0; } seek(getPos() + n); return n; } /** * Seek to the given position in the stream. The next read() will be from that position. * <p> * <p>This method may seek past the end of the file. This produces no exception and an attempt to read from the * stream will result in -1 indicating the end of the file. * * @param pos the postion to seek to. * * @throws IOException if an I/O error occurs. ChecksumException if the chunk to seek to is corrupted */ @Override public synchronized void seek(long pos) throws IOException { if (pos < 0) { throw new EOFException(FSExceptionMessages.NEGATIVE_SEEK); } // optimize: check if the pos is in the buffer long start = chunkPos - this.count; if (pos >= start && pos < chunkPos) { this.pos = (int) (pos - start); return; } // reset the current state resetState(); // seek to a checksum boundary chunkPos = getChunkPosition(pos); // scan to the desired position int delta = (int) (pos - chunkPos); if (delta > 0) { readFully(this, new byte[delta], 0, delta); } } /** * A utility function that tries to read up to <code>len</code> bytes from <code>stm</code> * * @param stm an input stream * @param buf destiniation buffer * @param offset offset at which to store data * @param len number of bytes to read * * @return actual number of bytes read * * @throws IOException if there is any IO error */ protected static int readFully(InputStream stm, byte[] buf, int offset, int len) throws IOException { int n = 0; for (;;) { int nread = stm.read(buf, offset + n, len - n); if (nread <= 0) return (n == 0) ? nread : n; n += nread; if (n >= len) return n; } } /** * Set the checksum related parameters * * @param verifyChecksum whether to verify checksum * @param sum which type of checksum to use * @param maxChunkSize maximun chunk size * @param checksumSize checksum size */ final protected synchronized void set(boolean verifyChecksum, Checksum sum, int maxChunkSize, int checksumSize) { // The code makes assumptions that checksums are always 32-bit. assert !verifyChecksum || sum == null || checksumSize == CHECKSUM_SIZE; this.maxChunkSize = maxChunkSize; this.verifyChecksum = verifyChecksum; this.sum = sum; this.buf = new byte[maxChunkSize]; // The size of the checksum array here determines how much we can // read in a single call to readChunk this.checksum = new byte[CHUNKS_PER_READ * checksumSize]; this.checksumInts = ByteBuffer.wrap(checksum).asIntBuffer(); this.count = 0; this.pos = 0; } @Override final public boolean markSupported() { return false; } @Override final public void mark(int readlimit) { } @Override final public void reset() throws IOException { throw new IOException("mark/reset not supported"); } /* reset this FSInputChecker's state */ private void resetState() { // invalidate buffer count = 0; pos = 0; // reset Checksum if (sum != null) { sum.reset(); } } }