org.apache.hadoop.hbase.io.hfile.HFileReaderV1.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hbase.io.hfile.HFileReaderV1.java

Source

/*
 * Copyright 2011 The Apache Software Foundation
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.io.hfile;

import java.io.ByteArrayInputStream;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory;
import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.RawComparator;

import com.google.common.base.Preconditions;

/**
 * {@link HFile} reader for version 1. Does not support data block encoding,
 * even in cache only, i.e. HFile v1 blocks are always brought into cache
 * unencoded.
 */
public class HFileReaderV1 extends AbstractHFileReader {
    private static final Log LOG = LogFactory.getLog(HFileReaderV1.class);

    private volatile boolean fileInfoLoaded = false;

    /**
     * Opens a HFile.  You must load the index before you can
     * use it by calling {@link #loadFileInfo()}.
     *
     * @param fsdis input stream.  Caller is responsible for closing the passed
     * stream.
     * @param size Length of the stream.
     * @param cacheConf cache references and configuration
     */
    public HFileReaderV1(Path path, FixedFileTrailer trailer, final FSDataInputStream fsdis, final long size,
            final boolean closeIStream, final CacheConfig cacheConf) throws IOException {
        super(path, trailer, fsdis, size, closeIStream, cacheConf);

        trailer.expectMajorVersion(1);
        fsBlockReader = new HFileBlock.FSReaderV1(fsdis, compressAlgo, fileSize);
    }

    private byte[] readAllIndex(final FSDataInputStream in, final long indexOffset, final int indexSize)
            throws IOException {
        byte[] allIndex = new byte[indexSize];
        in.seek(indexOffset);
        IOUtils.readFully(in, allIndex, 0, allIndex.length);

        return allIndex;
    }

    /**
     * Read in the index and file info.
     *
     * @return A map of fileinfo data.
     * @see Writer#appendFileInfo(byte[], byte[])
     * @throws IOException
     */
    @Override
    public FileInfo loadFileInfo() throws IOException {
        if (fileInfoLoaded)
            return fileInfo;

        // Read in the fileinfo and get what we need from it.
        istream.seek(trailer.getFileInfoOffset());
        fileInfo = new FileInfo();
        fileInfo.readFields(istream);
        lastKey = fileInfo.get(FileInfo.LASTKEY);
        avgKeyLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_KEY_LEN));
        avgValueLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_VALUE_LEN));

        // Comparator is stored in the file info in version 1.
        String clazzName = Bytes.toString(fileInfo.get(FileInfo.COMPARATOR));
        comparator = getComparator(clazzName);

        dataBlockIndexReader = new HFileBlockIndex.BlockIndexReader(comparator, 1);
        metaBlockIndexReader = new HFileBlockIndex.BlockIndexReader(Bytes.BYTES_RAWCOMPARATOR, 1);

        int sizeToLoadOnOpen = (int) (fileSize - trailer.getLoadOnOpenDataOffset() - trailer.getTrailerSize());
        byte[] dataAndMetaIndex = readAllIndex(istream, trailer.getLoadOnOpenDataOffset(), sizeToLoadOnOpen);

        ByteArrayInputStream bis = new ByteArrayInputStream(dataAndMetaIndex);
        DataInputStream dis = new DataInputStream(bis);

        // Read in the data index.
        if (trailer.getDataIndexCount() > 0)
            BlockType.INDEX_V1.readAndCheck(dis);
        dataBlockIndexReader.readRootIndex(dis, trailer.getDataIndexCount());

        // Read in the metadata index.
        if (trailer.getMetaIndexCount() > 0)
            BlockType.INDEX_V1.readAndCheck(dis);
        metaBlockIndexReader.readRootIndex(dis, trailer.getMetaIndexCount());

        fileInfoLoaded = true;
        return fileInfo;
    }

    /**
     * Creates comparator from the given class name.
     *
     * @param clazzName the comparator class name read from the trailer
     * @return an instance of the comparator to use
     * @throws IOException in case comparator class name is invalid
     */
    @SuppressWarnings("unchecked")
    private RawComparator<byte[]> getComparator(final String clazzName) throws IOException {
        if (clazzName == null || clazzName.length() == 0) {
            return null;
        }
        try {
            return (RawComparator<byte[]>) Class.forName(clazzName).newInstance();
        } catch (InstantiationException e) {
            throw new IOException(e);
        } catch (IllegalAccessException e) {
            throw new IOException(e);
        } catch (ClassNotFoundException e) {
            throw new IOException(e);
        }
    }

    /**
     * Create a Scanner on this file. No seeks or reads are done on creation. Call
     * {@link HFileScanner#seekTo(byte[])} to position an start the read. There is
     * nothing to clean up in a Scanner. Letting go of your references to the
     * scanner is sufficient.
     *
     * @param cacheBlocks True if we should cache blocks read in by this scanner.
     * @param pread Use positional read rather than seek+read if true (pread is
     *          better for random reads, seek+read is better scanning).
     * @param isCompaction is scanner being used for a compaction?
     * @return Scanner on this file.
     */
    @Override
    public HFileScanner getScanner(boolean cacheBlocks, final boolean pread, final boolean isCompaction) {
        return new ScannerV1(this, cacheBlocks, pread, isCompaction);
    }

    /**
     * @param key Key to search.
     * @return Block number of the block containing the key or -1 if not in this
     * file.
     */
    protected int blockContainingKey(final byte[] key, int offset, int length) {
        Preconditions.checkState(!dataBlockIndexReader.isEmpty(), "Block index not loaded");
        return dataBlockIndexReader.rootBlockContainingKey(key, offset, length);
    }

    /**
     * @param metaBlockName
     * @param cacheBlock Add block to cache, if found
     * @return Block wrapped in a ByteBuffer
     * @throws IOException
     */
    @Override
    public ByteBuffer getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException {
        if (trailer.getMetaIndexCount() == 0) {
            return null; // there are no meta blocks
        }
        if (metaBlockIndexReader == null) {
            throw new IOException("Meta index not loaded");
        }

        byte[] nameBytes = Bytes.toBytes(metaBlockName);
        int block = metaBlockIndexReader.rootBlockContainingKey(nameBytes, 0, nameBytes.length);
        if (block == -1)
            return null;
        long offset = metaBlockIndexReader.getRootBlockOffset(block);
        long nextOffset;
        if (block == metaBlockIndexReader.getRootBlockCount() - 1) {
            nextOffset = trailer.getFileInfoOffset();
        } else {
            nextOffset = metaBlockIndexReader.getRootBlockOffset(block + 1);
        }

        long startTimeNs = System.nanoTime();

        BlockCacheKey cacheKey = new BlockCacheKey(name, offset, DataBlockEncoding.NONE, BlockType.META);

        BlockCategory effectiveCategory = BlockCategory.META;
        if (metaBlockName.equals(HFileWriterV1.BLOOM_FILTER_META_KEY)
                || metaBlockName.equals(HFileWriterV1.BLOOM_FILTER_DATA_KEY)) {
            effectiveCategory = BlockCategory.BLOOM;
        }

        // Per meta key from any given file, synchronize reads for said block
        synchronized (metaBlockIndexReader.getRootBlockKey(block)) {
            // Check cache for block.  If found return.
            if (cacheConf.isBlockCacheEnabled()) {
                HFileBlock cachedBlock = (HFileBlock) cacheConf.getBlockCache().getBlock(cacheKey,
                        cacheConf.shouldCacheBlockOnRead(effectiveCategory), false);
                if (cachedBlock != null) {
                    getSchemaMetrics().updateOnCacheHit(effectiveCategory, SchemaMetrics.NO_COMPACTION);
                    return cachedBlock.getBufferWithoutHeader();
                }
                // Cache Miss, please load.
            }

            HFileBlock hfileBlock = fsBlockReader.readBlockData(offset, nextOffset - offset,
                    metaBlockIndexReader.getRootBlockDataSize(block), true);
            passSchemaMetricsTo(hfileBlock);
            hfileBlock.expectType(BlockType.META);

            final long delta = System.nanoTime() - startTimeNs;
            HFile.offerReadLatency(delta, true);
            getSchemaMetrics().updateOnCacheMiss(effectiveCategory, SchemaMetrics.NO_COMPACTION, delta);

            // Cache the block
            if (cacheBlock && cacheConf.shouldCacheBlockOnRead(effectiveCategory)) {
                cacheConf.getBlockCache().cacheBlock(cacheKey, hfileBlock, cacheConf.isInMemory());
            }

            return hfileBlock.getBufferWithoutHeader();
        }
    }

    /**
     * Read in a file block.
     * @param block Index of block to read.
     * @param pread Use positional read instead of seek+read (positional is
     * better doing random reads whereas seek+read is better scanning).
     * @param isCompaction is this block being read as part of a compaction
     * @return Block wrapped in a ByteBuffer.
     * @throws IOException
     */
    ByteBuffer readBlockBuffer(int block, boolean cacheBlock, final boolean pread, final boolean isCompaction)
            throws IOException {
        if (dataBlockIndexReader == null) {
            throw new IOException("Block index not loaded");
        }
        if (block < 0 || block >= dataBlockIndexReader.getRootBlockCount()) {
            throw new IOException("Requested block is out of range: " + block + ", max: "
                    + dataBlockIndexReader.getRootBlockCount());
        }

        long offset = dataBlockIndexReader.getRootBlockOffset(block);
        BlockCacheKey cacheKey = new BlockCacheKey(name, offset);

        // For any given block from any given file, synchronize reads for said
        // block.
        // Without a cache, this synchronizing is needless overhead, but really
        // the other choice is to duplicate work (which the cache would prevent you
        // from doing).
        synchronized (dataBlockIndexReader.getRootBlockKey(block)) {
            // Check cache for block.  If found return.
            if (cacheConf.isBlockCacheEnabled()) {
                HFileBlock cachedBlock = (HFileBlock) cacheConf.getBlockCache().getBlock(cacheKey,
                        cacheConf.shouldCacheDataOnRead(), false);
                if (cachedBlock != null) {
                    getSchemaMetrics().updateOnCacheHit(cachedBlock.getBlockType().getCategory(), isCompaction);
                    return cachedBlock.getBufferWithoutHeader();
                }
                // Carry on, please load.
            }

            // Load block from filesystem.
            long startTimeNs = System.nanoTime();
            long nextOffset;

            if (block == dataBlockIndexReader.getRootBlockCount() - 1) {
                // last block!  The end of data block is first meta block if there is
                // one or if there isn't, the fileinfo offset.
                nextOffset = (metaBlockIndexReader.getRootBlockCount() == 0) ? this.trailer.getFileInfoOffset()
                        : metaBlockIndexReader.getRootBlockOffset(0);
            } else {
                nextOffset = dataBlockIndexReader.getRootBlockOffset(block + 1);
            }

            HFileBlock hfileBlock = fsBlockReader.readBlockData(offset, nextOffset - offset,
                    dataBlockIndexReader.getRootBlockDataSize(block), pread);
            passSchemaMetricsTo(hfileBlock);
            hfileBlock.expectType(BlockType.DATA);

            final long delta = System.nanoTime() - startTimeNs;
            HFile.offerReadLatency(delta, pread);
            getSchemaMetrics().updateOnCacheMiss(BlockCategory.DATA, isCompaction, delta);

            // Cache the block
            if (cacheBlock && cacheConf.shouldCacheBlockOnRead(hfileBlock.getBlockType().getCategory())) {
                cacheConf.getBlockCache().cacheBlock(cacheKey, hfileBlock, cacheConf.isInMemory());
            }
            return hfileBlock.getBufferWithoutHeader();
        }
    }

    /**
     * @return Last key in the file.  May be null if file has no entries.
     * Note that this is not the last rowkey, but rather the byte form of
     * the last KeyValue.
     */
    public byte[] getLastKey() {
        if (!fileInfoLoaded) {
            throw new RuntimeException("Load file info first");
        }
        return dataBlockIndexReader.isEmpty() ? null : lastKey;
    }

    /**
     * @return Midkey for this file. We work with block boundaries only so
     *         returned midkey is an approximation only.
     *
     * @throws IOException
     */
    @Override
    public byte[] midkey() throws IOException {
        Preconditions.checkState(isFileInfoLoaded(), "File info is not loaded");
        Preconditions.checkState(!dataBlockIndexReader.isEmpty(), "Data block index is not loaded or is empty");
        return dataBlockIndexReader.midkey();
    }

    @Override
    public void close() throws IOException {
        close(cacheConf.shouldEvictOnClose());
    }

    @Override
    public void close(boolean evictOnClose) throws IOException {
        if (evictOnClose && cacheConf.isBlockCacheEnabled()) {
            int numEvicted = 0;
            for (int i = 0; i < dataBlockIndexReader.getRootBlockCount(); i++) {
                if (cacheConf.getBlockCache().evictBlock(new BlockCacheKey(name,
                        dataBlockIndexReader.getRootBlockOffset(i), DataBlockEncoding.NONE, BlockType.DATA))) {
                    numEvicted++;
                }
            }
            LOG.debug("On close of file " + name + " evicted " + numEvicted + " block(s) of "
                    + dataBlockIndexReader.getRootBlockCount() + " total blocks");
        }
        if (this.closeIStream && this.istream != null) {
            this.istream.close();
            this.istream = null;
        }

        if (isSchemaConfigured())
            getSchemaMetrics().flushMetrics();
    }

    protected abstract static class AbstractScannerV1 extends AbstractHFileReader.Scanner {
        protected int currBlock;

        /**
         * This masks a field with the same name in the superclass and saves us the
         * runtime overhead of casting from abstract reader to reader V1.
         */
        protected HFileReaderV1 reader;

        public AbstractScannerV1(HFileReaderV1 reader, boolean cacheBlocks, final boolean pread,
                final boolean isCompaction) {
            super(reader, cacheBlocks, pread, isCompaction);
            this.reader = (HFileReaderV1) reader;
        }

        /**
         * Within a loaded block, seek looking for the first key
         * that is smaller than (or equal to?) the key we are interested in.
         *
         * A note on the seekBefore - if you have seekBefore = true, AND the
         * first key in the block = key, then you'll get thrown exceptions.
         * @param key to find
         * @param seekBefore find the key before the exact match.
         * @return
         */
        protected abstract int blockSeek(byte[] key, int offset, int length, boolean seekBefore);

        protected abstract void loadBlock(int bloc, boolean rewind) throws IOException;

        @Override
        public int seekTo(byte[] key, int offset, int length) throws IOException {
            int b = reader.blockContainingKey(key, offset, length);
            if (b < 0)
                return -1; // falls before the beginning of the file! :-(
            // Avoid re-reading the same block (that'd be dumb).
            loadBlock(b, true);
            return blockSeek(key, offset, length, false);
        }

        @Override
        public int reseekTo(byte[] key, int offset, int length) throws IOException {
            if (blockBuffer != null && currKeyLen != 0) {
                ByteBuffer bb = getKey();
                int compared = reader.getComparator().compare(key, offset, length, bb.array(), bb.arrayOffset(),
                        bb.limit());
                if (compared < 1) {
                    // If the required key is less than or equal to current key, then
                    // don't do anything.
                    return compared;
                }
            }

            int b = reader.blockContainingKey(key, offset, length);
            if (b < 0) {
                return -1;
            }
            loadBlock(b, false);
            return blockSeek(key, offset, length, false);
        }

        @Override
        public boolean seekBefore(byte[] key, int offset, int length) throws IOException {
            int b = reader.blockContainingKey(key, offset, length);
            if (b < 0)
                return false; // key is before the start of the file.

            // Question: does this block begin with 'key'?
            byte[] firstkKey = reader.getDataBlockIndexReader().getRootBlockKey(b);
            if (reader.getComparator().compare(firstkKey, 0, firstkKey.length, key, offset, length) == 0) {
                // Ok the key we're interested in is the first of the block, so go back
                // by one.
                if (b == 0) {
                    // we have a 'problem', the key we want is the first of the file.
                    return false;
                }
                b--;
                // TODO shortcut: seek forward in this block to the last key of the
                // block.
            }
            loadBlock(b, true);
            blockSeek(key, offset, length, true);
            return true;
        }
    }

    /**
     * Implementation of {@link HFileScanner} interface.
     */

    protected static class ScannerV1 extends AbstractScannerV1 {
        private HFileReaderV1 reader;

        public ScannerV1(HFileReaderV1 reader, boolean cacheBlocks, final boolean pread,
                final boolean isCompaction) {
            super(reader, cacheBlocks, pread, isCompaction);
            this.reader = reader;
        }

        @Override
        public KeyValue getKeyValue() {
            if (blockBuffer == null) {
                return null;
            }
            return new KeyValue(blockBuffer.array(), blockBuffer.arrayOffset() + blockBuffer.position() - 8);
        }

        @Override
        public ByteBuffer getKey() {
            Preconditions.checkState(blockBuffer != null && currKeyLen > 0,
                    "you need to seekTo() before calling getKey()");

            ByteBuffer keyBuff = blockBuffer.slice();
            keyBuff.limit(currKeyLen);
            keyBuff.rewind();
            // Do keyBuff.asReadOnly()?
            return keyBuff;
        }

        @Override
        public ByteBuffer getValue() {
            if (blockBuffer == null || currKeyLen == 0) {
                throw new RuntimeException("you need to seekTo() before calling getValue()");
            }

            // TODO: Could this be done with one ByteBuffer rather than create two?
            ByteBuffer valueBuff = blockBuffer.slice();
            valueBuff.position(currKeyLen);
            valueBuff = valueBuff.slice();
            valueBuff.limit(currValueLen);
            valueBuff.rewind();
            return valueBuff;
        }

        @Override
        public boolean next() throws IOException {
            if (blockBuffer == null) {
                throw new IOException("Next called on non-seeked scanner");
            }

            try {
                blockBuffer.position(blockBuffer.position() + currKeyLen + currValueLen);
            } catch (IllegalArgumentException e) {
                LOG.error("Current pos = " + blockBuffer.position() + "; currKeyLen = " + currKeyLen
                        + "; currValLen = " + currValueLen + "; block limit = " + blockBuffer.limit()
                        + "; HFile name = " + reader.getName() + "; currBlock id = " + currBlock, e);
                throw e;
            }
            if (blockBuffer.remaining() <= 0) {
                currBlock++;
                if (currBlock >= reader.getDataBlockIndexReader().getRootBlockCount()) {
                    // damn we are at the end
                    currBlock = 0;
                    blockBuffer = null;
                    return false;
                }
                blockBuffer = reader.readBlockBuffer(currBlock, cacheBlocks, pread, isCompaction);
                currKeyLen = blockBuffer.getInt();
                currValueLen = blockBuffer.getInt();
                blockFetches++;
                return true;
            }

            currKeyLen = blockBuffer.getInt();
            currValueLen = blockBuffer.getInt();
            return true;
        }

        @Override
        protected int blockSeek(byte[] key, int offset, int length, boolean seekBefore) {
            int klen, vlen;
            int lastLen = 0;
            do {
                klen = blockBuffer.getInt();
                vlen = blockBuffer.getInt();
                int comp = reader.getComparator().compare(key, offset, length, blockBuffer.array(),
                        blockBuffer.arrayOffset() + blockBuffer.position(), klen);
                if (comp == 0) {
                    if (seekBefore) {
                        blockBuffer.position(blockBuffer.position() - lastLen - 16);
                        currKeyLen = blockBuffer.getInt();
                        currValueLen = blockBuffer.getInt();
                        return 1; // non exact match.
                    }
                    currKeyLen = klen;
                    currValueLen = vlen;
                    return 0; // indicate exact match
                }
                if (comp < 0) {
                    // go back one key:
                    blockBuffer.position(blockBuffer.position() - lastLen - 16);
                    currKeyLen = blockBuffer.getInt();
                    currValueLen = blockBuffer.getInt();
                    return 1;
                }
                blockBuffer.position(blockBuffer.position() + klen + vlen);
                lastLen = klen + vlen;
            } while (blockBuffer.remaining() > 0);

            // ok we are at the end, so go back a littleeeeee....
            // The 8 in the below is intentionally different to the 16s in the above
            // Do the math you you'll figure it.
            blockBuffer.position(blockBuffer.position() - lastLen - 8);
            currKeyLen = blockBuffer.getInt();
            currValueLen = blockBuffer.getInt();
            return 1; // didn't exactly find it.
        }

        @Override
        public String getKeyString() {
            return Bytes.toStringBinary(blockBuffer.array(), blockBuffer.arrayOffset() + blockBuffer.position(),
                    currKeyLen);
        }

        @Override
        public String getValueString() {
            return Bytes.toString(blockBuffer.array(),
                    blockBuffer.arrayOffset() + blockBuffer.position() + currKeyLen, currValueLen);
        }

        @Override
        public boolean seekTo() throws IOException {
            if (reader.getDataBlockIndexReader().isEmpty()) {
                return false;
            }
            if (blockBuffer != null && currBlock == 0) {
                blockBuffer.rewind();
                currKeyLen = blockBuffer.getInt();
                currValueLen = blockBuffer.getInt();
                return true;
            }
            currBlock = 0;
            blockBuffer = reader.readBlockBuffer(currBlock, cacheBlocks, pread, isCompaction);
            currKeyLen = blockBuffer.getInt();
            currValueLen = blockBuffer.getInt();
            blockFetches++;
            return true;
        }

        @Override
        protected void loadBlock(int bloc, boolean rewind) throws IOException {
            if (blockBuffer == null) {
                blockBuffer = reader.readBlockBuffer(bloc, cacheBlocks, pread, isCompaction);
                currBlock = bloc;
                blockFetches++;
            } else {
                if (bloc != currBlock) {
                    blockBuffer = reader.readBlockBuffer(bloc, cacheBlocks, pread, isCompaction);
                    currBlock = bloc;
                    blockFetches++;
                } else {
                    // we are already in the same block, just rewind to seek again.
                    if (rewind) {
                        blockBuffer.rewind();
                    } else {
                        // Go back by (size of rowlength + size of valuelength) = 8 bytes
                        blockBuffer.position(blockBuffer.position() - 8);
                    }
                }
            }
        }

    }

    @Override
    public HFileBlock readBlock(long offset, long onDiskBlockSize, boolean cacheBlock, boolean pread,
            boolean isCompaction, BlockType expectedBlockType) {
        throw new UnsupportedOperationException();
    }

    @Override
    public DataInput getGeneralBloomFilterMetadata() throws IOException {
        // Shouldn't cache Bloom filter blocks, otherwise server would abort when
        // splitting, see HBASE-6479
        ByteBuffer buf = getMetaBlock(HFileWriterV1.BLOOM_FILTER_META_KEY, false);
        if (buf == null)
            return null;
        ByteArrayInputStream bais = new ByteArrayInputStream(buf.array(), buf.arrayOffset(), buf.limit());
        return new DataInputStream(bais);
    }

    @Override
    public DataInput getDeleteBloomFilterMetadata() throws IOException {
        return null;
    }

    @Override
    public boolean isFileInfoLoaded() {
        return fileInfoLoaded;
    }

}