List of usage examples for java.nio ByteBuffer arrayOffset
public final int arrayOffset()
From source file:org.apache.hadoop.hive.serde2.compression.SnappyCompDe.java
/** * Decompress a set of columns from a ByteBuffer and update the position of the buffer. * * @param input A ByteBuffer with `position` indicating the starting point of the compressed chunk. * @param chunkSize The length of the compressed chunk to be decompressed from the input buffer. * * @return The set of columns./*ww w. ja v a2s . com*/ */ @Override public ColumnBuffer[] decompress(ByteBuffer input, int chunkSize) { int startPos = input.position(); try { // Read the footer. int footerSize = input.getInt(startPos + chunkSize - 4); Iterator<Integer> compressedSize = Arrays .asList(ArrayUtils.toObject(Snappy.uncompressIntArray(input.array(), input.arrayOffset() + startPos + chunkSize - Integer.SIZE / Byte.SIZE - footerSize, footerSize))) .iterator(); // Read the header. int[] dataType = readIntegers(compressedSize.next(), input); int numOfCols = dataType.length; // Read the columns. ColumnBuffer[] outputCols = new ColumnBuffer[numOfCols]; for (int colNum = 0; colNum < numOfCols; colNum++) { byte[] nulls = readBytes(compressedSize.next(), input); switch (TTypeId.findByValue(dataType[colNum])) { case BOOLEAN_TYPE: { int numRows = input.getInt(); byte[] vals = readBytes(compressedSize.next(), input); BitSet bsBools = BitSet.valueOf(vals); boolean[] bools = new boolean[numRows]; for (int rowNum = 0; rowNum < numRows; rowNum++) { bools[rowNum] = bsBools.get(rowNum); } TBoolColumn column = new TBoolColumn(Arrays.asList(ArrayUtils.toObject(bools)), ByteBuffer.wrap(nulls)); outputCols[colNum] = new ColumnBuffer(TColumn.boolVal(column)); break; } case TINYINT_TYPE: { byte[] vals = readBytes(compressedSize.next(), input); TByteColumn column = new TByteColumn(Arrays.asList(ArrayUtils.toObject(vals)), ByteBuffer.wrap(nulls)); outputCols[colNum] = new ColumnBuffer(TColumn.byteVal(column)); break; } case SMALLINT_TYPE: { short[] vals = readShorts(compressedSize.next(), input); TI16Column column = new TI16Column(Arrays.asList(ArrayUtils.toObject(vals)), ByteBuffer.wrap(nulls)); outputCols[colNum] = new ColumnBuffer(TColumn.i16Val(column)); break; } case INT_TYPE: { int[] vals = readIntegers(compressedSize.next(), input); TI32Column column = new TI32Column(Arrays.asList(ArrayUtils.toObject(vals)), ByteBuffer.wrap(nulls)); outputCols[colNum] = new ColumnBuffer(TColumn.i32Val(column)); break; } case BIGINT_TYPE: { long[] vals = readLongs(compressedSize.next(), input); TI64Column column = new TI64Column(Arrays.asList(ArrayUtils.toObject(vals)), ByteBuffer.wrap(nulls)); outputCols[colNum] = new ColumnBuffer(TColumn.i64Val(column)); break; } case DOUBLE_TYPE: { double[] vals = readDoubles(compressedSize.next(), input); TDoubleColumn column = new TDoubleColumn(Arrays.asList(ArrayUtils.toObject(vals)), ByteBuffer.wrap(nulls)); outputCols[colNum] = new ColumnBuffer(TColumn.doubleVal(column)); break; } case BINARY_TYPE: { int[] rowSize = readIntegers(compressedSize.next(), input); ByteBuffer flattenedData = ByteBuffer.wrap(readBytes(compressedSize.next(), input)); ByteBuffer[] vals = new ByteBuffer[rowSize.length]; for (int rowNum = 0; rowNum < rowSize.length; rowNum++) { vals[rowNum] = ByteBuffer.wrap(flattenedData.array(), flattenedData.position(), rowSize[rowNum]); flattenedData.position(flattenedData.position() + rowSize[rowNum]); } TBinaryColumn column = new TBinaryColumn(Arrays.asList(vals), ByteBuffer.wrap(nulls)); outputCols[colNum] = new ColumnBuffer(TColumn.binaryVal(column)); break; } case STRING_TYPE: { int[] rowSize = readIntegers(compressedSize.next(), input); ByteBuffer flattenedData = ByteBuffer.wrap(readBytes(compressedSize.next(), input)); String[] vals = new String[rowSize.length]; for (int rowNum = 0; rowNum < rowSize.length; rowNum++) { vals[rowNum] = new String(flattenedData.array(), flattenedData.position(), rowSize[rowNum], StandardCharsets.UTF_8); flattenedData.position(flattenedData.position() + rowSize[rowNum]); } TStringColumn column = new TStringColumn(Arrays.asList(vals), ByteBuffer.wrap(nulls)); outputCols[colNum] = new ColumnBuffer(TColumn.stringVal(column)); break; } default: throw new IllegalStateException( "Unrecognized column type: " + TTypeId.findByValue(dataType[colNum])); } } input.position(startPos + chunkSize); return outputCols; } catch (IOException e) { e.printStackTrace(); return (ColumnBuffer[]) null; } }
From source file:org.apache.hadoop.hbase.io.hfile.TestHFileBlock.java
protected void testPreviousOffsetInternals() throws IOException { for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) { for (boolean pread : BOOLEAN_VALUES) { for (boolean cacheOnWrite : BOOLEAN_VALUES) { Random rand = defaultRandom(); LOG.info("testPreviousOffset:Compression algorithm: " + algo + ", pread=" + pread + ", cacheOnWrite=" + cacheOnWrite); Path path = new Path(TEST_UTIL.getDataTestDir(), "prev_offset"); List<Long> expectedOffsets = new ArrayList<Long>(); List<Long> expectedPrevOffsets = new ArrayList<Long>(); List<BlockType> expectedTypes = new ArrayList<BlockType>(); List<ByteBuffer> expectedContents = cacheOnWrite ? new ArrayList<ByteBuffer>() : null; long totalSize = writeBlocks(rand, algo, path, expectedOffsets, expectedPrevOffsets, expectedTypes, expectedContents); FSDataInputStream is = fs.open(path); HFileContext meta = new HFileContextBuilder().withHBaseCheckSum(true) .withIncludesMvcc(includesMemstoreTS).withIncludesTags(includesTag) .withCompression(algo).build(); HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(is, totalSize, meta); long curOffset = 0; for (int i = 0; i < NUM_TEST_BLOCKS; ++i) { if (!pread) { assertEquals(is.getPos(), curOffset + (i == 0 ? 0 : HConstants.HFILEBLOCK_HEADER_SIZE)); }/*w ww.java2s. co m*/ assertEquals(expectedOffsets.get(i).longValue(), curOffset); if (detailedLogging) { LOG.info("Reading block #" + i + " at offset " + curOffset); } HFileBlock b = hbr.readBlockData(curOffset, -1, -1, pread); if (detailedLogging) { LOG.info("Block #" + i + ": " + b); } assertEquals("Invalid block #" + i + "'s type:", expectedTypes.get(i), b.getBlockType()); assertEquals("Invalid previous block offset for block " + i + " of " + "type " + b.getBlockType() + ":", (long) expectedPrevOffsets.get(i), b.getPrevBlockOffset()); b.sanityCheck(); assertEquals(curOffset, b.getOffset()); // Now re-load this block knowing the on-disk size. This tests a // different branch in the loader. HFileBlock b2 = hbr.readBlockData(curOffset, b.getOnDiskSizeWithHeader(), -1, pread); b2.sanityCheck(); assertEquals(b.getBlockType(), b2.getBlockType()); assertEquals(b.getOnDiskSizeWithoutHeader(), b2.getOnDiskSizeWithoutHeader()); assertEquals(b.getOnDiskSizeWithHeader(), b2.getOnDiskSizeWithHeader()); assertEquals(b.getUncompressedSizeWithoutHeader(), b2.getUncompressedSizeWithoutHeader()); assertEquals(b.getPrevBlockOffset(), b2.getPrevBlockOffset()); assertEquals(curOffset, b2.getOffset()); assertEquals(b.getBytesPerChecksum(), b2.getBytesPerChecksum()); assertEquals(b.getOnDiskDataSizeWithHeader(), b2.getOnDiskDataSizeWithHeader()); assertEquals(0, HFile.getChecksumFailuresCount()); curOffset += b.getOnDiskSizeWithHeader(); if (cacheOnWrite) { // In the cache-on-write mode we store uncompressed bytes so we // can compare them to what was read by the block reader. // b's buffer has header + data + checksum while // expectedContents have header + data only ByteBuffer bufRead = b.getBufferWithHeader(); ByteBuffer bufExpected = expectedContents.get(i); boolean bytesAreCorrect = Bytes.compareTo(bufRead.array(), bufRead.arrayOffset(), bufRead.limit() - b.totalChecksumBytes(), bufExpected.array(), bufExpected.arrayOffset(), bufExpected.limit()) == 0; String wrongBytesMsg = ""; if (!bytesAreCorrect) { // Optimization: only construct an error message in case we // will need it. wrongBytesMsg = "Expected bytes in block #" + i + " (algo=" + algo + ", pread=" + pread + ", cacheOnWrite=" + cacheOnWrite + "):\n"; wrongBytesMsg += Bytes.toStringBinary(bufExpected.array(), bufExpected.arrayOffset(), Math.min(32, bufExpected.limit())) + ", actual:\n" + Bytes.toStringBinary(bufRead.array(), bufRead.arrayOffset(), Math.min(32, bufRead.limit())); if (detailedLogging) { LOG.warn("expected header" + HFileBlock.toStringHeader(bufExpected) + "\nfound header" + HFileBlock.toStringHeader(bufRead)); LOG.warn("bufread offset " + bufRead.arrayOffset() + " limit " + bufRead.limit() + " expected offset " + bufExpected.arrayOffset() + " limit " + bufExpected.limit()); LOG.warn(wrongBytesMsg); } } assertTrue(wrongBytesMsg, bytesAreCorrect); } } assertEquals(curOffset, fs.getFileStatus(path).getLen()); is.close(); } } } }
From source file:com.ery.ertc.estorm.util.Bytes.java
/** * Converts the given byte buffer to a printable representation, from the * index 0 (inclusive) to the limit (exclusive), regardless of the current * position. The position and the other index parameters are not changed. * // ww w .jav a 2 s . com * @param buf * a byte buffer * @return a string representation of the buffer's binary contents * @see #toBytes(ByteBuffer) * @see #getBytes(ByteBuffer) */ public static String toStringBinary(ByteBuffer buf) { if (buf == null) return "null"; if (buf.hasArray()) { return toStringBinary(buf.array(), buf.arrayOffset(), buf.limit()); } return toStringBinary(toBytes(buf)); }
From source file:org.apache.hadoop.hbase.io.hfile.LruBlockCache.java
private int compare(Cacheable left, Cacheable right) { ByteBuffer l = ByteBuffer.allocate(left.getSerializedLength()); left.serialize(l);//from w w w . j a va2 s . c o m ByteBuffer r = ByteBuffer.allocate(right.getSerializedLength()); right.serialize(r); return Bytes.compareTo(l.array(), l.arrayOffset(), l.limit(), r.array(), r.arrayOffset(), r.limit()); }
From source file:org.apache.hadoop.hbase.io.hfile.TestHFileWriterV3.java
private void writeDataAndReadFromHFile(Path hfilePath, Algorithm compressAlgo, int entryCount, boolean findMidKey, boolean useTags) throws IOException { HFileContext context = new HFileContextBuilder().withBlockSize(4096).withIncludesTags(useTags) .withCompression(compressAlgo).build(); HFileWriterV3 writer = (HFileWriterV3) new HFileWriterV3.WriterFactoryV3(conf, new CacheConfig(conf)) .withPath(fs, hfilePath).withFileContext(context).withComparator(KeyValue.COMPARATOR).create(); Random rand = new Random(9713312); // Just a fixed seed. List<KeyValue> keyValues = new ArrayList<KeyValue>(entryCount); for (int i = 0; i < entryCount; ++i) { byte[] keyBytes = TestHFileWriterV2.randomOrderedKey(rand, i); // A random-length random value. byte[] valueBytes = TestHFileWriterV2.randomValue(rand); KeyValue keyValue = null; if (useTags) { ArrayList<Tag> tags = new ArrayList<Tag>(); for (int j = 0; j < 1 + rand.nextInt(4); j++) { byte[] tagBytes = new byte[16]; rand.nextBytes(tagBytes); tags.add(new Tag((byte) 1, tagBytes)); }/*from ww w . j a v a2s . com*/ keyValue = new KeyValue(keyBytes, null, null, HConstants.LATEST_TIMESTAMP, valueBytes, tags); } else { keyValue = new KeyValue(keyBytes, null, null, HConstants.LATEST_TIMESTAMP, valueBytes); } writer.append(keyValue); keyValues.add(keyValue); } // Add in an arbitrary order. They will be sorted lexicographically by // the key. writer.appendMetaBlock("CAPITAL_OF_USA", new Text("Washington, D.C.")); writer.appendMetaBlock("CAPITAL_OF_RUSSIA", new Text("Moscow")); writer.appendMetaBlock("CAPITAL_OF_FRANCE", new Text("Paris")); writer.close(); FSDataInputStream fsdis = fs.open(hfilePath); long fileSize = fs.getFileStatus(hfilePath).getLen(); FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis, fileSize); assertEquals(3, trailer.getMajorVersion()); assertEquals(entryCount, trailer.getEntryCount()); HFileContext meta = new HFileContextBuilder().withCompression(compressAlgo).withIncludesMvcc(false) .withIncludesTags(useTags).withHBaseCheckSum(true).build(); HFileBlock.FSReader blockReader = new HFileBlock.FSReaderV2(fsdis, fileSize, meta); // Comparator class name is stored in the trailer in version 2. KVComparator comparator = trailer.createComparator(); HFileBlockIndex.BlockIndexReader dataBlockIndexReader = new HFileBlockIndex.BlockIndexReader(comparator, trailer.getNumDataIndexLevels()); HFileBlockIndex.BlockIndexReader metaBlockIndexReader = new HFileBlockIndex.BlockIndexReader( KeyValue.RAW_COMPARATOR, 1); HFileBlock.BlockIterator blockIter = blockReader.blockRange(trailer.getLoadOnOpenDataOffset(), fileSize - trailer.getTrailerSize()); // Data index. We also read statistics about the block index written after // the root level. dataBlockIndexReader.readMultiLevelIndexRoot(blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX), trailer.getDataIndexCount()); if (findMidKey) { byte[] midkey = dataBlockIndexReader.midkey(); assertNotNull("Midkey should not be null", midkey); } // Meta index. metaBlockIndexReader.readRootIndex(blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX).getByteStream(), trailer.getMetaIndexCount()); // File info FileInfo fileInfo = new FileInfo(); fileInfo.read(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream()); byte[] keyValueFormatVersion = fileInfo.get(HFileWriterV3.KEY_VALUE_VERSION); boolean includeMemstoreTS = keyValueFormatVersion != null && Bytes.toInt(keyValueFormatVersion) > 0; // Counters for the number of key/value pairs and the number of blocks int entriesRead = 0; int blocksRead = 0; long memstoreTS = 0; // Scan blocks the way the reader would scan them fsdis.seek(0); long curBlockPos = 0; while (curBlockPos <= trailer.getLastDataBlockOffset()) { HFileBlock block = blockReader.readBlockData(curBlockPos, -1, -1, false); assertEquals(BlockType.DATA, block.getBlockType()); ByteBuffer buf = block.getBufferWithoutHeader(); int keyLen = -1; while (buf.hasRemaining()) { keyLen = buf.getInt(); int valueLen = buf.getInt(); byte[] key = new byte[keyLen]; buf.get(key); byte[] value = new byte[valueLen]; buf.get(value); byte[] tagValue = null; if (useTags) { int tagLen = buf.getShort(); tagValue = new byte[tagLen]; buf.get(tagValue); } if (includeMemstoreTS) { ByteArrayInputStream byte_input = new ByteArrayInputStream(buf.array(), buf.arrayOffset() + buf.position(), buf.remaining()); DataInputStream data_input = new DataInputStream(byte_input); memstoreTS = WritableUtils.readVLong(data_input); buf.position(buf.position() + WritableUtils.getVIntSize(memstoreTS)); } // A brute-force check to see that all keys and values are correct. assertTrue(Bytes.compareTo(key, keyValues.get(entriesRead).getKey()) == 0); assertTrue(Bytes.compareTo(value, keyValues.get(entriesRead).getValue()) == 0); if (useTags) { assertNotNull(tagValue); KeyValue tkv = keyValues.get(entriesRead); assertEquals(tagValue.length, tkv.getTagsLength()); assertTrue(Bytes.compareTo(tagValue, 0, tagValue.length, tkv.getTagsArray(), tkv.getTagsOffset(), tkv.getTagsLength()) == 0); } ++entriesRead; } ++blocksRead; curBlockPos += block.getOnDiskSizeWithHeader(); } LOG.info("Finished reading: entries=" + entriesRead + ", blocksRead=" + blocksRead); assertEquals(entryCount, entriesRead); // Meta blocks. We can scan until the load-on-open data offset (which is // the root block index offset in version 2) because we are not testing // intermediate-level index blocks here. int metaCounter = 0; while (fsdis.getPos() < trailer.getLoadOnOpenDataOffset()) { LOG.info("Current offset: " + fsdis.getPos() + ", scanning until " + trailer.getLoadOnOpenDataOffset()); HFileBlock block = blockReader.readBlockData(curBlockPos, -1, -1, false); assertEquals(BlockType.META, block.getBlockType()); Text t = new Text(); ByteBuffer buf = block.getBufferWithoutHeader(); if (Writables.getWritable(buf.array(), buf.arrayOffset(), buf.limit(), t) == null) { throw new IOException( "Failed to deserialize block " + this + " into a " + t.getClass().getSimpleName()); } Text expectedText = (metaCounter == 0 ? new Text("Paris") : metaCounter == 1 ? new Text("Moscow") : new Text("Washington, D.C.")); assertEquals(expectedText, t); LOG.info("Read meta block data: " + t); ++metaCounter; curBlockPos += block.getOnDiskSizeWithHeader(); } fsdis.close(); }
From source file:org.apache.spark.sql.execution.datasources.parquet.UnsafeRowParquetRecordReader.java
private void decodeBinaryBatch(int col, int num) throws IOException { for (int n = 0; n < num; ++n) { if (columnReaders[col].next()) { ByteBuffer bytes = columnReaders[col].nextBinary().toByteBuffer(); int len = bytes.remaining(); if (originalTypes[col] == OriginalType.UTF8) { UTF8String str = UTF8String.fromBytes(bytes.array(), bytes.arrayOffset() + bytes.position(), len);/*from w w w. java 2 s. c om*/ rowWriters[n].write(col, str); } else { rowWriters[n].write(col, bytes.array(), bytes.arrayOffset() + bytes.position(), len); } rows[n].setNotNullAt(col); } else { rows[n].setNullAt(col); } } }
From source file:io.warp10.continuum.gts.GTSDecoder.java
/** * Return an encoder with all data from the last value retrieved (post call to next()) * onwards//www . j av a 2 s . c o m * * @param safeMetadata Is it safe to reuse the Metadata? */ public GTSEncoder getEncoder(boolean safeMetadata) throws IOException { if (!nextCalled) { throw new IOException( "Can only get an encoder for a decoder on which 'next' was called at least once."); } // // Copy the remaining data into a new ByteBuffer // ByteBuffer bb = this.buffer.duplicate(); bb.position(this.position); int offset = 0; int len = bb.remaining(); byte[] bytes = null; if (bb.hasArray()) { bytes = bb.array(); offset = bb.arrayOffset() + bb.position(); } else { bytes = new byte[bb.remaining()]; bb.get(bytes); } // // Create an encoder with the same base timestamp and wrapping key, providing a sizing hint // GTSEncoder encoder = new GTSEncoder(this.baseTimestamp, this.wrappingKey, bb.remaining()); if (safeMetadata) { encoder.safeSetMetadata(this.getMetadata()); } else { encoder.setMetadata(this.getMetadata()); } // // Set initial values // encoder.initialize(this.previousLastTimestamp, this.previousLastGeoXPPoint, this.previousLastElevation, this.previousLastLongValue, this.previousLastDoubleValue, this.previousLastBDValue, this.previousLastStringValue); // // Copy the encoded data // encoder.stream.write(bytes, offset, len); // // Put the encoder into 'safe delta' mode, because we don't know what the last // value/ts/elevation/location were, we can't use delta encoding for now // encoder.safeDelta(); encoder.setCount(this.count); return encoder; }
From source file:com.rapleaf.hank.storage.curly.CurlyReader.java
@Override // Note: the buffer in result must be at least readBufferSize long public void readRecord(ByteBuffer location, ReaderResult result) throws IOException { // Attempt to load value from the cache if (cache != null && loadValueFromCache(location, result)) { return;//from www . j a v a2 s. c om } // Deep copy the location if caching is activated ByteBuffer locationDeepCopy = cache != null ? Bytes.byteBufferDeepCopy(location) : null; if (blockCompressionCodec == null) { // When not using block compression, location just contains an offset. Decode it. long recordFileOffset = EncodingHelper.decodeLittleEndianFixedWidthLong(location); // Directly read record into result readRecordAtOffset(recordFileOffset, result); } else { // When using block compression, location contains the block's offset and an offset in the block. Decode them. long recordFileOffset = EncodingHelper.decodeLittleEndianFixedWidthLong(location.array(), location.arrayOffset() + location.position(), offsetNumBytes); long offsetInBlock = EncodingHelper.decodeLittleEndianFixedWidthLong(location.array(), location.arrayOffset() + location.position() + offsetNumBytes, offsetInBlockNumBytes); // Read in the compressed block into the result readRecordAtOffset(recordFileOffset, result); // Decompress the block InputStream blockInputStream = new ByteArrayInputStream(result.getBuffer().array(), result.getBuffer().arrayOffset() + result.getBuffer().position(), result.getBuffer().remaining()); // Build an InputStream corresponding to the compression codec InputStream decompressedBlockInputStream; switch (blockCompressionCodec) { case GZIP: decompressedBlockInputStream = new GZIPInputStream(blockInputStream); break; case SLOW_IDENTITY: decompressedBlockInputStream = new BufferedInputStream(blockInputStream); break; default: throw new RuntimeException("Unknown block compression codec: " + blockCompressionCodec); } // Decompress into the specialized result buffer IOUtils.copy(decompressedBlockInputStream, result.getDecompressionOutputStream()); ByteBuffer decompressedBlockByteBuffer = result.getDecompressionOutputStream().getByteBuffer(); // Position ourselves at the beginning of the actual value decompressedBlockByteBuffer.position((int) offsetInBlock); // Determine result value size int valueSize = EncodingHelper.decodeLittleEndianVarInt(decompressedBlockByteBuffer); // We can exactly wrap our value ByteBuffer value = ByteBuffer.wrap(decompressedBlockByteBuffer.array(), decompressedBlockByteBuffer.arrayOffset() + decompressedBlockByteBuffer.position(), valueSize); // Copy decompressed result into final result buffer result.requiresBufferSize(valueSize); result.getBuffer().clear(); result.getBuffer().put(value); result.getBuffer().flip(); } // Store result in cache if needed if (cache != null) { addValueToCache(locationDeepCopy, result.getBuffer()); } }
From source file:org.apache.hadoop.hbase.io.hfile.TestHFileBlockIndex.java
/** * Testing block index through the HFile writer/reader APIs. Allows to test * setting index block size through configuration, intermediate-level index * blocks, and caching index blocks on write. * * @throws IOException/* w w w.j a va 2s. c o m*/ */ //@Test public void testHFileWriterAndReader() throws IOException { Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), "hfile_for_block_index"); CacheConfig cacheConf = new CacheConfig(conf); BlockCache blockCache = cacheConf.getBlockCache(); for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; ++testI) { int indexBlockSize = INDEX_CHUNK_SIZES[testI]; int expectedNumLevels = EXPECTED_NUM_LEVELS[testI]; LOG.info("Index block size: " + indexBlockSize + ", compression: " + compr); // Evict all blocks that were cached-on-write by the previous invocation. blockCache.evictBlocksByHfileName(hfilePath.getName()); conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize); Set<String> keyStrSet = new HashSet<String>(); byte[][] keys = new byte[NUM_KV][]; byte[][] values = new byte[NUM_KV][]; // Write the HFile { HFileContext meta = new HFileContextBuilder().withBlockSize(SMALL_BLOCK_SIZE).withCompression(compr) .build(); HFile.Writer writer = HFile.getWriterFactory(conf, cacheConf).withPath(fs, hfilePath) .withFileContext(meta).create(); Random rand = new Random(19231737); for (int i = 0; i < NUM_KV; ++i) { byte[] row = TestHFileWriterV2.randomOrderedKey(rand, i); // Key will be interpreted by KeyValue.KEY_COMPARATOR byte[] k = KeyValueUtil.createFirstOnRow(row, 0, row.length, row, 0, 0, row, 0, 0).getKey(); byte[] v = TestHFileWriterV2.randomValue(rand); writer.append(k, v, HConstants.EMPTY_BYTE_ARRAY); keys[i] = k; values[i] = v; keyStrSet.add(Bytes.toStringBinary(k)); if (i > 0) { assertTrue(KeyValue.COMPARATOR.compareFlatKey(keys[i - 1], keys[i]) < 0); } } writer.close(); } // Read the HFile HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, conf); assertEquals(expectedNumLevels, reader.getTrailer().getNumDataIndexLevels()); assertTrue(Bytes.equals(keys[0], reader.getFirstKey())); assertTrue(Bytes.equals(keys[NUM_KV - 1], reader.getLastKey())); LOG.info("Last key: " + Bytes.toStringBinary(keys[NUM_KV - 1])); for (boolean pread : new boolean[] { false, true }) { HFileScanner scanner = reader.getScanner(true, pread); for (int i = 0; i < NUM_KV; ++i) { checkSeekTo(keys, scanner, i); checkKeyValue("i=" + i, keys[i], values[i], scanner.getKey(), scanner.getValue()); } assertTrue(scanner.seekTo()); for (int i = NUM_KV - 1; i >= 0; --i) { checkSeekTo(keys, scanner, i); checkKeyValue("i=" + i, keys[i], values[i], scanner.getKey(), scanner.getValue()); } } // Manually compute the mid-key and validate it. HFileReaderV2 reader2 = (HFileReaderV2) reader; HFileBlock.FSReader fsReader = reader2.getUncachedBlockReader(); HFileBlock.BlockIterator iter = fsReader.blockRange(0, reader.getTrailer().getLoadOnOpenDataOffset()); HFileBlock block; List<byte[]> blockKeys = new ArrayList<byte[]>(); while ((block = iter.nextBlock()) != null) { if (block.getBlockType() != BlockType.LEAF_INDEX) return; ByteBuffer b = block.getBufferReadOnly(); int n = b.getInt(); // One int for the number of items, and n + 1 for the secondary index. int entriesOffset = Bytes.SIZEOF_INT * (n + 2); // Get all the keys from the leaf index block. S for (int i = 0; i < n; ++i) { int keyRelOffset = b.getInt(Bytes.SIZEOF_INT * (i + 1)); int nextKeyRelOffset = b.getInt(Bytes.SIZEOF_INT * (i + 2)); int keyLen = nextKeyRelOffset - keyRelOffset; int keyOffset = b.arrayOffset() + entriesOffset + keyRelOffset + HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD; byte[] blockKey = Arrays.copyOfRange(b.array(), keyOffset, keyOffset + keyLen); String blockKeyStr = Bytes.toString(blockKey); blockKeys.add(blockKey); // If the first key of the block is not among the keys written, we // are not parsing the non-root index block format correctly. assertTrue("Invalid block key from leaf-level block: " + blockKeyStr, keyStrSet.contains(blockKeyStr)); } } // Validate the mid-key. assertEquals(Bytes.toStringBinary(blockKeys.get((blockKeys.size() - 1) / 2)), Bytes.toStringBinary(reader.midkey())); assertEquals(UNCOMPRESSED_INDEX_SIZES[testI], reader.getTrailer().getUncompressedDataIndexSize()); reader.close(); reader2.close(); } }
From source file:org.apache.hadoop.hbase.regionserver.TestStoreFile.java
private void checkHalfHFile(final HRegionFileSystem regionFs, final StoreFile f) throws IOException { byte[] midkey = f.createReader().midkey(); KeyValue midKV = KeyValue.createKeyValueFromKey(midkey); byte[] midRow = midKV.getRow(); // Create top split. HRegionInfo topHri = new HRegionInfo(regionFs.getRegionInfo().getTable(), null, midRow); Path topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, midRow, true); // Create bottom split. HRegionInfo bottomHri = new HRegionInfo(regionFs.getRegionInfo().getTable(), midRow, null); Path bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, midRow, false); // Make readers on top and bottom. StoreFile.Reader top = new StoreFile(this.fs, topPath, conf, cacheConf, BloomType.NONE).createReader(); StoreFile.Reader bottom = new StoreFile(this.fs, bottomPath, conf, cacheConf, BloomType.NONE) .createReader();/*from w ww. ja v a 2s. co m*/ ByteBuffer previous = null; LOG.info("Midkey: " + midKV.toString()); ByteBuffer bbMidkeyBytes = ByteBuffer.wrap(midkey); try { // Now make two HalfMapFiles and assert they can read the full backing // file, one from the top and the other from the bottom. // Test bottom half first. // Now test reading from the top. boolean first = true; ByteBuffer key = null; HFileScanner topScanner = top.getScanner(false, false); while ((!topScanner.isSeeked() && topScanner.seekTo()) || (topScanner.isSeeked() && topScanner.next())) { key = topScanner.getKey(); if (topScanner.getReader().getComparator().compareFlatKey(key.array(), key.arrayOffset(), key.limit(), midkey, 0, midkey.length) < 0) { fail("key=" + Bytes.toStringBinary(key) + " < midkey=" + Bytes.toStringBinary(midkey)); } if (first) { first = false; LOG.info("First in top: " + Bytes.toString(Bytes.toBytes(key))); } } LOG.info("Last in top: " + Bytes.toString(Bytes.toBytes(key))); first = true; HFileScanner bottomScanner = bottom.getScanner(false, false); while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) || bottomScanner.next()) { previous = bottomScanner.getKey(); key = bottomScanner.getKey(); if (first) { first = false; LOG.info("First in bottom: " + Bytes.toString(Bytes.toBytes(previous))); } assertTrue(key.compareTo(bbMidkeyBytes) < 0); } if (previous != null) { LOG.info("Last in bottom: " + Bytes.toString(Bytes.toBytes(previous))); } // Remove references. regionFs.cleanupDaughterRegion(topHri); regionFs.cleanupDaughterRegion(bottomHri); // Next test using a midkey that does not exist in the file. // First, do a key that is < than first key. Ensure splits behave // properly. byte[] badmidkey = Bytes.toBytes(" ."); assertTrue(fs.exists(f.getPath())); topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, badmidkey, true); bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, badmidkey, false); assertNull(bottomPath); top = new StoreFile(this.fs, topPath, conf, cacheConf, BloomType.NONE).createReader(); // Now read from the top. first = true; topScanner = top.getScanner(false, false); while ((!topScanner.isSeeked() && topScanner.seekTo()) || topScanner.next()) { key = topScanner.getKey(); assertTrue(topScanner.getReader().getComparator().compareFlatKey(key.array(), key.arrayOffset(), key.limit(), badmidkey, 0, badmidkey.length) >= 0); if (first) { first = false; KeyValue keyKV = KeyValue.createKeyValueFromKey(key); LOG.info("First top when key < bottom: " + keyKV); String tmp = Bytes.toString(keyKV.getRow()); for (int i = 0; i < tmp.length(); i++) { assertTrue(tmp.charAt(i) == 'a'); } } } KeyValue keyKV = KeyValue.createKeyValueFromKey(key); LOG.info("Last top when key < bottom: " + keyKV); String tmp = Bytes.toString(keyKV.getRow()); for (int i = 0; i < tmp.length(); i++) { assertTrue(tmp.charAt(i) == 'z'); } // Remove references. regionFs.cleanupDaughterRegion(topHri); regionFs.cleanupDaughterRegion(bottomHri); // Test when badkey is > than last key in file ('||' > 'zz'). badmidkey = Bytes.toBytes("|||"); topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, badmidkey, true); bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, badmidkey, false); assertNull(topPath); bottom = new StoreFile(this.fs, bottomPath, conf, cacheConf, BloomType.NONE).createReader(); first = true; bottomScanner = bottom.getScanner(false, false); while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) || bottomScanner.next()) { key = bottomScanner.getKey(); if (first) { first = false; keyKV = KeyValue.createKeyValueFromKey(key); LOG.info("First bottom when key > top: " + keyKV); tmp = Bytes.toString(keyKV.getRow()); for (int i = 0; i < tmp.length(); i++) { assertTrue(tmp.charAt(i) == 'a'); } } } keyKV = KeyValue.createKeyValueFromKey(key); LOG.info("Last bottom when key > top: " + keyKV); for (int i = 0; i < tmp.length(); i++) { assertTrue(Bytes.toString(keyKV.getRow()).charAt(i) == 'z'); } } finally { if (top != null) { top.close(true); // evict since we are about to delete the file } if (bottom != null) { bottom.close(true); // evict since we are about to delete the file } fs.delete(f.getPath(), true); } }