Java tutorial
/** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.mapreduce; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNotSame; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.IOException; import java.util.Arrays; import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; import java.util.Random; import java.util.Set; import java.util.concurrent.Callable; import junit.framework.Assert; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.CompatibilitySingletonFactory; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.HadoopShims; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.LargeTests; import org.apache.hadoop.hbase.PerformanceEvaluation; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFile.Reader; import org.apache.hadoop.hbase.regionserver.BloomType; import org.apache.hadoop.hbase.regionserver.HStore; import org.apache.hadoop.hbase.regionserver.StoreFile; import org.apache.hadoop.hbase.regionserver.TimeRangeTracker; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.hbase.util.Writables; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.junit.Ignore; import org.junit.Test; import org.junit.experimental.categories.Category; import org.mockito.Mockito; /** * Simple test for {@link KeyValueSortReducer} and {@link HFileOutputFormat}. * Sets up and runs a mapreduce job that writes hfile output. * Creates a few inner classes to implement splits and an inputformat that * emits keys and values like those of {@link PerformanceEvaluation}. */ @Category(LargeTests.class) public class TestHFileOutputFormat { private final static int ROWSPERSPLIT = 1024; private static final byte[][] FAMILIES = { Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-A")), Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-B")) }; private static final TableName TABLE_NAME = TableName.valueOf("TestTable"); private HBaseTestingUtility util = new HBaseTestingUtility(); private static Log LOG = LogFactory.getLog(TestHFileOutputFormat.class); /** * Simple mapper that makes KeyValue output. */ static class RandomKVGeneratingMapper extends Mapper<NullWritable, NullWritable, ImmutableBytesWritable, KeyValue> { private int keyLength; private static final int KEYLEN_DEFAULT = 10; private static final String KEYLEN_CONF = "randomkv.key.length"; private int valLength; private static final int VALLEN_DEFAULT = 10; private static final String VALLEN_CONF = "randomkv.val.length"; @Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT); valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT); } protected void map(NullWritable n1, NullWritable n2, Mapper<NullWritable, NullWritable, ImmutableBytesWritable, KeyValue>.Context context) throws java.io.IOException, InterruptedException { byte keyBytes[] = new byte[keyLength]; byte valBytes[] = new byte[valLength]; int taskId = context.getTaskAttemptID().getTaskID().getId(); assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!"; Random random = new Random(); for (int i = 0; i < ROWSPERSPLIT; i++) { random.nextBytes(keyBytes); // Ensure that unique tasks generate unique keys keyBytes[keyLength - 1] = (byte) (taskId & 0xFF); random.nextBytes(valBytes); ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes); for (byte[] family : TestHFileOutputFormat.FAMILIES) { KeyValue kv = new KeyValue(keyBytes, family, PerformanceEvaluation.QUALIFIER_NAME, valBytes); context.write(key, kv); } } } } private void setupRandomGeneratorMapper(Job job) { job.setInputFormatClass(NMapInputFormat.class); job.setMapperClass(RandomKVGeneratingMapper.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); } /** * Test that {@link HFileOutputFormat} RecordWriter amends timestamps if * passed a keyvalue whose timestamp is {@link HConstants#LATEST_TIMESTAMP}. * @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a> */ @Test public void test_LATEST_TIMESTAMP_isReplaced() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); RecordWriter<ImmutableBytesWritable, KeyValue> writer = null; TaskAttemptContext context = null; Path dir = util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced"); try { Job job = new Job(conf); FileOutputFormat.setOutputPath(job, dir); context = createTestTaskAttemptContext(job); HFileOutputFormat hof = new HFileOutputFormat(); writer = hof.getRecordWriter(context); final byte[] b = Bytes.toBytes("b"); // Test 1. Pass a KV that has a ts of LATEST_TIMESTAMP. It should be // changed by call to write. Check all in kv is same but ts. KeyValue kv = new KeyValue(b, b, b); KeyValue original = kv.clone(); writer.write(new ImmutableBytesWritable(), kv); assertFalse(original.equals(kv)); assertTrue(Bytes.equals(original.getRow(), kv.getRow())); assertTrue(CellUtil.matchingColumn(original, kv.getFamily(), kv.getQualifier())); assertNotSame(original.getTimestamp(), kv.getTimestamp()); assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp()); // Test 2. Now test passing a kv that has explicit ts. It should not be // changed by call to record write. kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b); original = kv.clone(); writer.write(new ImmutableBytesWritable(), kv); assertTrue(original.equals(kv)); } finally { if (writer != null && context != null) writer.close(context); dir.getFileSystem(conf).delete(dir, true); } } private TaskAttemptContext createTestTaskAttemptContext(final Job job) throws IOException, Exception { HadoopShims hadoop = CompatibilitySingletonFactory.getInstance(HadoopShims.class); TaskAttemptContext context = hadoop.createTestTaskAttemptContext(job, "attempt_200707121733_0001_m_000000_0"); return context; } /* * Test that {@link HFileOutputFormat} creates an HFile with TIMERANGE * metadata used by time-restricted scans. */ @Test public void test_TIMERANGE() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); RecordWriter<ImmutableBytesWritable, KeyValue> writer = null; TaskAttemptContext context = null; Path dir = util.getDataTestDir("test_TIMERANGE_present"); LOG.info("Timerange dir writing to dir: " + dir); try { // build a record writer using HFileOutputFormat Job job = new Job(conf); FileOutputFormat.setOutputPath(job, dir); context = createTestTaskAttemptContext(job); HFileOutputFormat hof = new HFileOutputFormat(); writer = hof.getRecordWriter(context); // Pass two key values with explicit times stamps final byte[] b = Bytes.toBytes("b"); // value 1 with timestamp 2000 KeyValue kv = new KeyValue(b, b, b, 2000, b); KeyValue original = kv.clone(); writer.write(new ImmutableBytesWritable(), kv); assertEquals(original, kv); // value 2 with timestamp 1000 kv = new KeyValue(b, b, b, 1000, b); original = kv.clone(); writer.write(new ImmutableBytesWritable(), kv); assertEquals(original, kv); // verify that the file has the proper FileInfo. writer.close(context); // the generated file lives 1 directory down from the attempt directory // and is the only file, e.g. // _attempt__0000_r_000000_0/b/1979617994050536795 FileSystem fs = FileSystem.get(conf); Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent(); FileStatus[] sub1 = fs.listStatus(attemptDirectory); FileStatus[] file = fs.listStatus(sub1[0].getPath()); // open as HFile Reader and pull out TIMERANGE FileInfo. HFile.Reader rd = HFile.createReader(fs, file[0].getPath(), new CacheConfig(conf), conf); Map<byte[], byte[]> finfo = rd.loadFileInfo(); byte[] range = finfo.get("TIMERANGE".getBytes()); assertNotNull(range); // unmarshall and check values. TimeRangeTracker timeRangeTracker = new TimeRangeTracker(); Writables.copyWritable(range, timeRangeTracker); LOG.info(timeRangeTracker.getMinimumTimestamp() + "...." + timeRangeTracker.getMaximumTimestamp()); assertEquals(1000, timeRangeTracker.getMinimumTimestamp()); assertEquals(2000, timeRangeTracker.getMaximumTimestamp()); rd.close(); } finally { if (writer != null && context != null) writer.close(context); dir.getFileSystem(conf).delete(dir, true); } } /** * Run small MR job. */ @Test public void testWritingPEData() throws Exception { Configuration conf = util.getConfiguration(); Path testDir = util.getDataTestDirOnTestFS("testWritingPEData"); FileSystem fs = testDir.getFileSystem(conf); // Set down this value or we OOME in eclipse. conf.setInt("mapreduce.task.io.sort.mb", 20); // Write a few files. conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024); Job job = new Job(conf, "testWritingPEData"); setupRandomGeneratorMapper(job); // This partitioner doesn't work well for number keys but using it anyways // just to demonstrate how to configure it. byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT]; byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT]; Arrays.fill(startKey, (byte) 0); Arrays.fill(endKey, (byte) 0xff); job.setPartitionerClass(SimpleTotalOrderPartitioner.class); // Set start and end rows for partitioner. SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey); SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey); job.setReducerClass(KeyValueSortReducer.class); job.setOutputFormatClass(HFileOutputFormat.class); job.setNumReduceTasks(4); job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); FileOutputFormat.setOutputPath(job, testDir); assertTrue(job.waitForCompletion(false)); FileStatus[] files = fs.listStatus(testDir); assertTrue(files.length > 0); } @Test public void testJobConfiguration() throws Exception { Job job = new Job(util.getConfiguration()); job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration")); HTable table = Mockito.mock(HTable.class); setupMockStartKeys(table); HFileOutputFormat.configureIncrementalLoad(job, table); assertEquals(job.getNumReduceTasks(), 4); } private byte[][] generateRandomStartKeys(int numKeys) { Random random = new Random(); byte[][] ret = new byte[numKeys][]; // first region start key is always empty ret[0] = HConstants.EMPTY_BYTE_ARRAY; for (int i = 1; i < numKeys; i++) { ret[i] = PerformanceEvaluation.generateData(random, PerformanceEvaluation.VALUE_LENGTH); } return ret; } @Test public void testMRIncrementalLoad() throws Exception { LOG.info("\nStarting test testMRIncrementalLoad\n"); doIncrementalLoadTest(false); } @Test public void testMRIncrementalLoadWithSplit() throws Exception { LOG.info("\nStarting test testMRIncrementalLoadWithSplit\n"); doIncrementalLoadTest(true); } private void doIncrementalLoadTest(boolean shouldChangeRegions) throws Exception { util = new HBaseTestingUtility(); Configuration conf = util.getConfiguration(); byte[][] startKeys = generateRandomStartKeys(5); HBaseAdmin admin = null; try { util.startMiniCluster(); Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad"); admin = new HBaseAdmin(conf); HTable table = util.createTable(TABLE_NAME, FAMILIES); assertEquals("Should start with empty table", 0, util.countRows(table)); int numRegions = util.createMultiRegions(util.getConfiguration(), table, FAMILIES[0], startKeys); assertEquals("Should make 5 regions", numRegions, 5); // Generate the bulk load files util.startMiniMapReduceCluster(); runIncrementalPELoad(conf, table, testDir); // This doesn't write into the table, just makes files assertEquals("HFOF should not touch actual table", 0, util.countRows(table)); // Make sure that a directory was created for every CF int dir = 0; for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) { for (byte[] family : FAMILIES) { if (Bytes.toString(family).equals(f.getPath().getName())) { ++dir; } } } assertEquals("Column family not found in FS.", FAMILIES.length, dir); // handle the split case if (shouldChangeRegions) { LOG.info("Changing regions in table"); admin.disableTable(table.getTableName()); while (util.getMiniHBaseCluster().getMaster().getAssignmentManager().getRegionStates() .isRegionsInTransition()) { Threads.sleep(200); LOG.info("Waiting on table to finish disabling"); } byte[][] newStartKeys = generateRandomStartKeys(15); util.createMultiRegions(util.getConfiguration(), table, FAMILIES[0], newStartKeys); admin.enableTable(table.getTableName()); while (table.getRegionLocations().size() != 15 || !admin.isTableAvailable(table.getTableName())) { Thread.sleep(200); LOG.info("Waiting for new region assignment to happen"); } } // Perform the actual load new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table); // Ensure data shows up int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT; assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table)); Scan scan = new Scan(); ResultScanner results = table.getScanner(scan); for (Result res : results) { assertEquals(FAMILIES.length, res.rawCells().length); Cell first = res.rawCells()[0]; for (Cell kv : res.rawCells()) { assertTrue(CellUtil.matchingRow(first, kv)); assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv))); } } results.close(); String tableDigestBefore = util.checksumRows(table); // Cause regions to reopen admin.disableTable(TABLE_NAME); while (!admin.isTableDisabled(TABLE_NAME)) { Thread.sleep(200); LOG.info("Waiting for table to disable"); } admin.enableTable(TABLE_NAME); util.waitTableAvailable(TABLE_NAME.getName()); assertEquals("Data should remain after reopening of regions", tableDigestBefore, util.checksumRows(table)); } finally { if (admin != null) admin.close(); util.shutdownMiniMapReduceCluster(); util.shutdownMiniCluster(); } } private void runIncrementalPELoad(Configuration conf, HTable table, Path outDir) throws Exception { Job job = new Job(conf, "testLocalMRIncrementalLoad"); job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad")); job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); setupRandomGeneratorMapper(job); HFileOutputFormat.configureIncrementalLoad(job, table); FileOutputFormat.setOutputPath(job, outDir); Assert.assertFalse(util.getTestFileSystem().exists(outDir)); assertEquals(table.getRegionLocations().size(), job.getNumReduceTasks()); assertTrue(job.waitForCompletion(true)); } /** * Test for {@link HFileOutputFormat#configureCompression(HTable, * Configuration)} and {@link HFileOutputFormat#createFamilyCompressionMap * (Configuration)}. * Tests that the compression map is correctly serialized into * and deserialized from configuration * * @throws IOException */ @Test public void testSerializeDeserializeFamilyCompressionMap() throws IOException { for (int numCfs = 0; numCfs <= 3; numCfs++) { Configuration conf = new Configuration(this.util.getConfiguration()); Map<String, Compression.Algorithm> familyToCompression = getMockColumnFamiliesForCompression(numCfs); HTable table = Mockito.mock(HTable.class); setupMockColumnFamiliesForCompression(table, familyToCompression); HFileOutputFormat.configureCompression(table, conf); // read back family specific compression setting from the configuration Map<byte[], Algorithm> retrievedFamilyToCompressionMap = HFileOutputFormat .createFamilyCompressionMap(conf); // test that we have a value for all column families that matches with the // used mock values for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) { assertEquals("Compression configuration incorrect for column family:" + entry.getKey(), entry.getValue(), retrievedFamilyToCompressionMap.get(entry.getKey().getBytes())); } } } private void setupMockColumnFamiliesForCompression(HTable table, Map<String, Compression.Algorithm> familyToCompression) throws IOException { HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME); for (Entry<String, Compression.Algorithm> entry : familyToCompression.entrySet()) { mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey()).setMaxVersions(1) .setCompressionType(entry.getValue()).setBlockCacheEnabled(false).setTimeToLive(0)); } Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor(); } /** * @return a map from column family names to compression algorithms for * testing column family compression. Column family names have special characters */ private Map<String, Compression.Algorithm> getMockColumnFamiliesForCompression(int numCfs) { Map<String, Compression.Algorithm> familyToCompression = new HashMap<String, Compression.Algorithm>(); // use column family names having special characters if (numCfs-- > 0) { familyToCompression.put("Family1!@#!@#&", Compression.Algorithm.LZO); } if (numCfs-- > 0) { familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.SNAPPY); } if (numCfs-- > 0) { familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.GZ); } if (numCfs-- > 0) { familyToCompression.put("Family3", Compression.Algorithm.NONE); } return familyToCompression; } /** * Test for {@link HFileOutputFormat#configureBloomType(HTable, * Configuration)} and {@link HFileOutputFormat#createFamilyBloomTypeMap * (Configuration)}. * Tests that the compression map is correctly serialized into * and deserialized from configuration * * @throws IOException */ @Test public void testSerializeDeserializeFamilyBloomTypeMap() throws IOException { for (int numCfs = 0; numCfs <= 2; numCfs++) { Configuration conf = new Configuration(this.util.getConfiguration()); Map<String, BloomType> familyToBloomType = getMockColumnFamiliesForBloomType(numCfs); HTable table = Mockito.mock(HTable.class); setupMockColumnFamiliesForBloomType(table, familyToBloomType); HFileOutputFormat.configureBloomType(table, conf); // read back family specific data block encoding settings from the // configuration Map<byte[], BloomType> retrievedFamilyToBloomTypeMap = HFileOutputFormat.createFamilyBloomTypeMap(conf); // test that we have a value for all column families that matches with the // used mock values for (Entry<String, BloomType> entry : familyToBloomType.entrySet()) { assertEquals("BloomType configuration incorrect for column family:" + entry.getKey(), entry.getValue(), retrievedFamilyToBloomTypeMap.get(entry.getKey().getBytes())); } } } private void setupMockColumnFamiliesForBloomType(HTable table, Map<String, BloomType> familyToDataBlockEncoding) throws IOException { HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME); for (Entry<String, BloomType> entry : familyToDataBlockEncoding.entrySet()) { mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey()).setMaxVersions(1) .setBloomFilterType(entry.getValue()).setBlockCacheEnabled(false).setTimeToLive(0)); } Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor(); } /** * @return a map from column family names to compression algorithms for * testing column family compression. Column family names have special characters */ private Map<String, BloomType> getMockColumnFamiliesForBloomType(int numCfs) { Map<String, BloomType> familyToBloomType = new HashMap<String, BloomType>(); // use column family names having special characters if (numCfs-- > 0) { familyToBloomType.put("Family1!@#!@#&", BloomType.ROW); } if (numCfs-- > 0) { familyToBloomType.put("Family2=asdads&!AASD", BloomType.ROWCOL); } if (numCfs-- > 0) { familyToBloomType.put("Family3", BloomType.NONE); } return familyToBloomType; } /** * Test for {@link HFileOutputFormat#configureBlockSize(HTable, * Configuration)} and {@link HFileOutputFormat#createFamilyBlockSizeMap * (Configuration)}. * Tests that the compression map is correctly serialized into * and deserialized from configuration * * @throws IOException */ @Test public void testSerializeDeserializeFamilyBlockSizeMap() throws IOException { for (int numCfs = 0; numCfs <= 3; numCfs++) { Configuration conf = new Configuration(this.util.getConfiguration()); Map<String, Integer> familyToBlockSize = getMockColumnFamiliesForBlockSize(numCfs); HTable table = Mockito.mock(HTable.class); setupMockColumnFamiliesForBlockSize(table, familyToBlockSize); HFileOutputFormat.configureBlockSize(table, conf); // read back family specific data block encoding settings from the // configuration Map<byte[], Integer> retrievedFamilyToBlockSizeMap = HFileOutputFormat.createFamilyBlockSizeMap(conf); // test that we have a value for all column families that matches with the // used mock values for (Entry<String, Integer> entry : familyToBlockSize.entrySet()) { assertEquals("BlockSize configuration incorrect for column family:" + entry.getKey(), entry.getValue(), retrievedFamilyToBlockSizeMap.get(entry.getKey().getBytes())); } } } private void setupMockColumnFamiliesForBlockSize(HTable table, Map<String, Integer> familyToDataBlockEncoding) throws IOException { HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME); for (Entry<String, Integer> entry : familyToDataBlockEncoding.entrySet()) { mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey()).setMaxVersions(1) .setBlocksize(entry.getValue()).setBlockCacheEnabled(false).setTimeToLive(0)); } Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor(); } /** * @return a map from column family names to compression algorithms for * testing column family compression. Column family names have special characters */ private Map<String, Integer> getMockColumnFamiliesForBlockSize(int numCfs) { Map<String, Integer> familyToBlockSize = new HashMap<String, Integer>(); // use column family names having special characters if (numCfs-- > 0) { familyToBlockSize.put("Family1!@#!@#&", 1234); } if (numCfs-- > 0) { familyToBlockSize.put("Family2=asdads&!AASD", Integer.MAX_VALUE); } if (numCfs-- > 0) { familyToBlockSize.put("Family2=asdads&!AASD", Integer.MAX_VALUE); } if (numCfs-- > 0) { familyToBlockSize.put("Family3", 0); } return familyToBlockSize; } /** * Test for {@link HFileOutputFormat#configureDataBlockEncoding(HTable, * Configuration)} and {@link HFileOutputFormat#createFamilyDataBlockEncodingMap * (Configuration)}. * Tests that the compression map is correctly serialized into * and deserialized from configuration * * @throws IOException */ @Test public void testSerializeDeserializeFamilyDataBlockEncodingMap() throws IOException { for (int numCfs = 0; numCfs <= 3; numCfs++) { Configuration conf = new Configuration(this.util.getConfiguration()); Map<String, DataBlockEncoding> familyToDataBlockEncoding = getMockColumnFamiliesForDataBlockEncoding( numCfs); HTable table = Mockito.mock(HTable.class); setupMockColumnFamiliesForDataBlockEncoding(table, familyToDataBlockEncoding); HFileOutputFormat.configureDataBlockEncoding(table, conf); // read back family specific data block encoding settings from the // configuration Map<byte[], DataBlockEncoding> retrievedFamilyToDataBlockEncodingMap = HFileOutputFormat .createFamilyDataBlockEncodingMap(conf); // test that we have a value for all column families that matches with the // used mock values for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) { assertEquals("DataBlockEncoding configuration incorrect for column family:" + entry.getKey(), entry.getValue(), retrievedFamilyToDataBlockEncodingMap.get(entry.getKey().getBytes())); } } } private void setupMockColumnFamiliesForDataBlockEncoding(HTable table, Map<String, DataBlockEncoding> familyToDataBlockEncoding) throws IOException { HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME); for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) { mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey()).setMaxVersions(1) .setDataBlockEncoding(entry.getValue()).setBlockCacheEnabled(false).setTimeToLive(0)); } Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor(); } /** * @return a map from column family names to compression algorithms for * testing column family compression. Column family names have special characters */ private Map<String, DataBlockEncoding> getMockColumnFamiliesForDataBlockEncoding(int numCfs) { Map<String, DataBlockEncoding> familyToDataBlockEncoding = new HashMap<String, DataBlockEncoding>(); // use column family names having special characters if (numCfs-- > 0) { familyToDataBlockEncoding.put("Family1!@#!@#&", DataBlockEncoding.DIFF); } if (numCfs-- > 0) { familyToDataBlockEncoding.put("Family2=asdads&!AASD", DataBlockEncoding.FAST_DIFF); } if (numCfs-- > 0) { familyToDataBlockEncoding.put("Family2=asdads&!AASD", DataBlockEncoding.PREFIX); } if (numCfs-- > 0) { familyToDataBlockEncoding.put("Family3", DataBlockEncoding.NONE); } return familyToDataBlockEncoding; } private void setupMockStartKeys(HTable table) throws IOException { byte[][] mockKeys = new byte[][] { HConstants.EMPTY_BYTE_ARRAY, Bytes.toBytes("aaa"), Bytes.toBytes("ggg"), Bytes.toBytes("zzz") }; Mockito.doReturn(mockKeys).when(table).getStartKeys(); } /** * Test that {@link HFileOutputFormat} RecordWriter uses compression and * bloom filter settings from the column family descriptor */ @Test public void testColumnFamilySettings() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); RecordWriter<ImmutableBytesWritable, KeyValue> writer = null; TaskAttemptContext context = null; Path dir = util.getDataTestDir("testColumnFamilySettings"); // Setup table descriptor HTable table = Mockito.mock(HTable.class); HTableDescriptor htd = new HTableDescriptor(TABLE_NAME); Mockito.doReturn(htd).when(table).getTableDescriptor(); for (HColumnDescriptor hcd : this.util.generateColumnDescriptors()) { htd.addFamily(hcd); } // set up the table to return some mock keys setupMockStartKeys(table); try { // partial map red setup to get an operational writer for testing // We turn off the sequence file compression, because DefaultCodec // pollutes the GZip codec pool with an incompatible compressor. conf.set("io.seqfile.compression.type", "NONE"); Job job = new Job(conf, "testLocalMRIncrementalLoad"); job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings")); setupRandomGeneratorMapper(job); HFileOutputFormat.configureIncrementalLoad(job, table); FileOutputFormat.setOutputPath(job, dir); context = createTestTaskAttemptContext(job); HFileOutputFormat hof = new HFileOutputFormat(); writer = hof.getRecordWriter(context); // write out random rows writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT); writer.close(context); // Make sure that a directory was created for every CF FileSystem fs = dir.getFileSystem(conf); // commit so that the filesystem has one directory per column family hof.getOutputCommitter(context).commitTask(context); hof.getOutputCommitter(context).commitJob(context); FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs)); assertEquals(htd.getFamilies().size(), families.length); for (FileStatus f : families) { String familyStr = f.getPath().getName(); HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr)); // verify that the compression on this file matches the configured // compression Path dataFilePath = fs.listStatus(f.getPath())[0].getPath(); Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf); Map<byte[], byte[]> fileInfo = reader.loadFileInfo(); byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY); if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE"); assertEquals( "Incorrect bloom filter used for column family " + familyStr + "(reader: " + reader + ")", hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter))); assertEquals( "Incorrect compression used for column family " + familyStr + "(reader: " + reader + ")", hcd.getCompression(), reader.getFileContext().getCompression()); } } finally { dir.getFileSystem(conf).delete(dir, true); } } /** * Write random values to the writer assuming a table created using * {@link #FAMILIES} as column family descriptors */ private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, KeyValue> writer, TaskAttemptContext context, Set<byte[]> families, int numRows) throws IOException, InterruptedException { byte keyBytes[] = new byte[Bytes.SIZEOF_INT]; int valLength = 10; byte valBytes[] = new byte[valLength]; int taskId = context.getTaskAttemptID().getTaskID().getId(); assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!"; Random random = new Random(); for (int i = 0; i < numRows; i++) { Bytes.putInt(keyBytes, 0, i); random.nextBytes(valBytes); ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes); for (byte[] family : families) { KeyValue kv = new KeyValue(keyBytes, family, PerformanceEvaluation.QUALIFIER_NAME, valBytes); writer.write(key, kv); } } } /** * This test is to test the scenario happened in HBASE-6901. * All files are bulk loaded and excluded from minor compaction. * Without the fix of HBASE-6901, an ArrayIndexOutOfBoundsException * will be thrown. */ @Ignore("Flakey: See HBASE-9051") @Test public void testExcludeAllFromMinorCompaction() throws Exception { Configuration conf = util.getConfiguration(); conf.setInt("hbase.hstore.compaction.min", 2); generateRandomStartKeys(5); try { util.startMiniCluster(); final FileSystem fs = util.getDFSCluster().getFileSystem(); HBaseAdmin admin = new HBaseAdmin(conf); HTable table = util.createTable(TABLE_NAME, FAMILIES); assertEquals("Should start with empty table", 0, util.countRows(table)); // deep inspection: get the StoreFile dir final Path storePath = HStore.getStoreHomedir(FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME), admin.getTableRegions(TABLE_NAME).get(0), FAMILIES[0]); assertEquals(0, fs.listStatus(storePath).length); // Generate two bulk load files conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", true); util.startMiniMapReduceCluster(); for (int i = 0; i < 2; i++) { Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i); runIncrementalPELoad(conf, table, testDir); // Perform the actual load new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table); } // Ensure data shows up int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT; assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table)); // should have a second StoreFile now assertEquals(2, fs.listStatus(storePath).length); // minor compactions shouldn't get rid of the file admin.compact(TABLE_NAME.getName()); try { quickPoll(new Callable<Boolean>() { public Boolean call() throws Exception { return fs.listStatus(storePath).length == 1; } }, 5000); throw new IOException("SF# = " + fs.listStatus(storePath).length); } catch (AssertionError ae) { // this is expected behavior } // a major compaction should work though admin.majorCompact(TABLE_NAME.getName()); quickPoll(new Callable<Boolean>() { public Boolean call() throws Exception { return fs.listStatus(storePath).length == 1; } }, 5000); } finally { util.shutdownMiniMapReduceCluster(); util.shutdownMiniCluster(); } } @Test public void testExcludeMinorCompaction() throws Exception { Configuration conf = util.getConfiguration(); conf.setInt("hbase.hstore.compaction.min", 2); generateRandomStartKeys(5); try { util.startMiniCluster(); Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction"); final FileSystem fs = util.getDFSCluster().getFileSystem(); HBaseAdmin admin = new HBaseAdmin(conf); HTable table = util.createTable(TABLE_NAME, FAMILIES); assertEquals("Should start with empty table", 0, util.countRows(table)); // deep inspection: get the StoreFile dir final Path storePath = HStore.getStoreHomedir(FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME), admin.getTableRegions(TABLE_NAME).get(0), FAMILIES[0]); assertEquals(0, fs.listStatus(storePath).length); // put some data in it and flush to create a storefile Put p = new Put(Bytes.toBytes("test")); p.add(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1")); table.put(p); admin.flush(TABLE_NAME.getName()); assertEquals(1, util.countRows(table)); quickPoll(new Callable<Boolean>() { public Boolean call() throws Exception { return fs.listStatus(storePath).length == 1; } }, 5000); // Generate a bulk load file with more rows conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", true); util.startMiniMapReduceCluster(); runIncrementalPELoad(conf, table, testDir); // Perform the actual load new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table); // Ensure data shows up int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT; assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows + 1, util.countRows(table)); // should have a second StoreFile now assertEquals(2, fs.listStatus(storePath).length); // minor compactions shouldn't get rid of the file admin.compact(TABLE_NAME.getName()); try { quickPoll(new Callable<Boolean>() { public Boolean call() throws Exception { return fs.listStatus(storePath).length == 1; } }, 5000); throw new IOException("SF# = " + fs.listStatus(storePath).length); } catch (AssertionError ae) { // this is expected behavior } // a major compaction should work though admin.majorCompact(TABLE_NAME.getName()); quickPoll(new Callable<Boolean>() { public Boolean call() throws Exception { return fs.listStatus(storePath).length == 1; } }, 5000); } finally { util.shutdownMiniMapReduceCluster(); util.shutdownMiniCluster(); } } private void quickPoll(Callable<Boolean> c, int waitMs) throws Exception { int sleepMs = 10; int retries = (int) Math.ceil(((double) waitMs) / sleepMs); while (retries-- > 0) { if (c.call().booleanValue()) { return; } Thread.sleep(sleepMs); } fail(); } public static void main(String args[]) throws Exception { new TestHFileOutputFormat().manualTest(args); } public void manualTest(String args[]) throws Exception { Configuration conf = HBaseConfiguration.create(); util = new HBaseTestingUtility(conf); if ("newtable".equals(args[0])) { byte[] tname = args[1].getBytes(); HTable table = util.createTable(tname, FAMILIES); HBaseAdmin admin = new HBaseAdmin(conf); admin.disableTable(tname); byte[][] startKeys = generateRandomStartKeys(5); util.createMultiRegions(conf, table, FAMILIES[0], startKeys); admin.enableTable(tname); } else if ("incremental".equals(args[0])) { byte[] tname = args[1].getBytes(); HTable table = new HTable(conf, tname); Path outDir = new Path("incremental-out"); runIncrementalPELoad(conf, table, outDir); } else { throw new RuntimeException("usage: TestHFileOutputFormat newtable | incremental"); } } }