Java tutorial
/** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.util; import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors; import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors; import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.SynchronousQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.ClusterStatus; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.LargeTests; import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.catalog.MetaEditor; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Durability; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.MetaScanner; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.hfile.TestHFile; import org.apache.hadoop.hbase.master.AssignmentManager; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.RegionStates; import org.apache.hadoop.hbase.master.TableLockManager; import org.apache.hadoop.hbase.master.TableLockManager.TableLock; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.AdminProtos; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction; import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter; import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE; import org.apache.hadoop.hbase.util.HBaseFsck.HbckInfo; import org.apache.hadoop.hbase.util.HBaseFsck.PrintingErrorReporter; import org.apache.hadoop.hbase.util.HBaseFsck.TableInfo; import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker; import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil; import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker; import org.apache.zookeeper.KeeperException; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Ignore; import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.rules.TestName; import com.google.common.collect.Multimap; /** * This tests HBaseFsck's ability to detect reasons for inconsistent tables. */ @Category(LargeTests.class) public class TestHBaseFsck { final static Log LOG = LogFactory.getLog(TestHBaseFsck.class); private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); private final static Configuration conf = TEST_UTIL.getConfiguration(); private final static String FAM_STR = "fam"; private final static byte[] FAM = Bytes.toBytes(FAM_STR); private final static int REGION_ONLINE_TIMEOUT = 800; private static RegionStates regionStates; private static ExecutorService executorService; // for the instance, reset every test run private HTable tbl; private final static byte[][] SPLITS = new byte[][] { Bytes.toBytes("A"), Bytes.toBytes("B"), Bytes.toBytes("C") }; // one row per region. private final static byte[][] ROWKEYS = new byte[][] { Bytes.toBytes("00"), Bytes.toBytes("50"), Bytes.toBytes("A0"), Bytes.toBytes("A5"), Bytes.toBytes("B0"), Bytes.toBytes("B5"), Bytes.toBytes("C0"), Bytes.toBytes("C5") }; @BeforeClass public static void setUpBeforeClass() throws Exception { TEST_UTIL.getConfiguration().setInt("hbase.regionserver.handler.count", 2); TEST_UTIL.getConfiguration().setInt("hbase.regionserver.metahandler.count", 2); TEST_UTIL.startMiniCluster(3); executorService = new ThreadPoolExecutor(1, Integer.MAX_VALUE, 60, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("testhbck")); AssignmentManager assignmentManager = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager(); regionStates = assignmentManager.getRegionStates(); TEST_UTIL.getHBaseAdmin().setBalancerRunning(false, true); } @AfterClass public static void tearDownAfterClass() throws Exception { TEST_UTIL.shutdownMiniCluster(); } @Test public void testHBaseFsck() throws Exception { assertNoErrors(doFsck(conf, false)); String table = "tableBadMetaAssign"; TEST_UTIL.createTable(Bytes.toBytes(table), FAM); // We created 1 table, should be fine assertNoErrors(doFsck(conf, false)); // Now let's mess it up and change the assignment in hbase:meta to // point to a different region server HTable meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getTableName(), executorService); Scan scan = new Scan(); scan.setStartRow(Bytes.toBytes(table + ",,")); ResultScanner scanner = meta.getScanner(scan); HRegionInfo hri = null; Result res = scanner.next(); ServerName currServer = ServerName .parseFrom(res.getValue(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER)); long startCode = Bytes.toLong(res.getValue(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER)); for (JVMClusterUtil.RegionServerThread rs : TEST_UTIL.getHBaseCluster().getRegionServerThreads()) { ServerName sn = rs.getRegionServer().getServerName(); // When we find a diff RS, change the assignment and break if (!currServer.getHostAndPort().equals(sn.getHostAndPort()) || startCode != sn.getStartcode()) { Put put = new Put(res.getRow()); put.setDurability(Durability.SKIP_WAL); put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, Bytes.toBytes(sn.getHostAndPort())); put.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, Bytes.toBytes(sn.getStartcode())); meta.put(put); hri = HRegionInfo.getHRegionInfo(res); break; } } // Try to fix the data assertErrors(doFsck(conf, true), new ERROR_CODE[] { ERROR_CODE.SERVER_DOES_NOT_MATCH_META }); TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().waitForAssignment(hri); // Should be fixed now assertNoErrors(doFsck(conf, false)); // comment needed - what is the purpose of this line HTable t = new HTable(conf, Bytes.toBytes(table), executorService); ResultScanner s = t.getScanner(new Scan()); s.close(); t.close(); scanner.close(); meta.close(); } @Test(timeout = 180000) public void testFixAssignmentsWhenMETAinTransition() throws Exception { MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); HBaseAdmin admin = null; try { admin = new HBaseAdmin(TEST_UTIL.getConfiguration()); admin.closeRegion(cluster.getServerHoldingMeta(), HRegionInfo.FIRST_META_REGIONINFO); } finally { if (admin != null) { admin.close(); } } regionStates.regionOffline(HRegionInfo.FIRST_META_REGIONINFO); MetaRegionTracker.deleteMetaLocation(cluster.getMaster().getZooKeeper()); assertFalse(regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO)); HBaseFsck hbck = doFsck(conf, true); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.UNKNOWN, ERROR_CODE.NO_META_REGION, ERROR_CODE.NULL_META_REGION }); assertNoErrors(doFsck(conf, false)); } /** * Create a new region in META. */ private HRegionInfo createRegion(Configuration conf, final HTableDescriptor htd, byte[] startKey, byte[] endKey) throws IOException { HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService); HRegionInfo hri = new HRegionInfo(htd.getTableName(), startKey, endKey); MetaEditor.addRegionToMeta(meta, hri); meta.close(); return hri; } /** * Debugging method to dump the contents of meta. */ private void dumpMeta(TableName tableName) throws IOException { List<byte[]> metaRows = TEST_UTIL.getMetaTableRows(tableName); for (byte[] row : metaRows) { LOG.info(Bytes.toString(row)); } } /** * This method is used to undeploy a region -- close it and attempt to * remove its state from the Master. */ private void undeployRegion(HBaseAdmin admin, ServerName sn, HRegionInfo hri) throws IOException, InterruptedException { try { HBaseFsckRepair.closeRegionSilentlyAndWait(admin, sn, hri); if (!hri.isMetaTable()) { admin.offline(hri.getRegionName()); } } catch (IOException ioe) { LOG.warn("Got exception when attempting to offline region " + Bytes.toString(hri.getRegionName()), ioe); } } /** * Delete a region from assignments, meta, or completely from hdfs. * @param unassign if true unassign region if assigned * @param metaRow if true remove region's row from META * @param hdfs if true remove region's dir in HDFS */ private void deleteRegion(Configuration conf, final HTableDescriptor htd, byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow, boolean hdfs) throws IOException, InterruptedException { deleteRegion(conf, htd, startKey, endKey, unassign, metaRow, hdfs, false); } /** * Delete a region from assignments, meta, or completely from hdfs. * @param unassign if true unassign region if assigned * @param metaRow if true remove region's row from META * @param hdfs if true remove region's dir in HDFS * @param regionInfoOnly if true remove a region dir's .regioninfo file */ private void deleteRegion(Configuration conf, final HTableDescriptor htd, byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow, boolean hdfs, boolean regionInfoOnly) throws IOException, InterruptedException { LOG.info("** Before delete:"); dumpMeta(htd.getTableName()); Map<HRegionInfo, ServerName> hris = tbl.getRegionLocations(); for (Entry<HRegionInfo, ServerName> e : hris.entrySet()) { HRegionInfo hri = e.getKey(); ServerName hsa = e.getValue(); if (Bytes.compareTo(hri.getStartKey(), startKey) == 0 && Bytes.compareTo(hri.getEndKey(), endKey) == 0) { LOG.info("RegionName: " + hri.getRegionNameAsString()); byte[] deleteRow = hri.getRegionName(); if (unassign) { LOG.info("Undeploying region " + hri + " from server " + hsa); undeployRegion(new HBaseAdmin(conf), hsa, hri); } if (regionInfoOnly) { LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString()); Path rootDir = FSUtils.getRootDir(conf); FileSystem fs = rootDir.getFileSystem(conf); Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()), hri.getEncodedName()); Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE); fs.delete(hriPath, true); } if (hdfs) { LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString()); Path rootDir = FSUtils.getRootDir(conf); FileSystem fs = rootDir.getFileSystem(conf); Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()), hri.getEncodedName()); HBaseFsck.debugLsr(conf, p); boolean success = fs.delete(p, true); LOG.info("Deleted " + p + " sucessfully? " + success); HBaseFsck.debugLsr(conf, p); } if (metaRow) { HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService); Delete delete = new Delete(deleteRow); meta.delete(delete); } } LOG.info(hri.toString() + hsa.toString()); } TEST_UTIL.getMetaTableRows(htd.getTableName()); LOG.info("*** After delete:"); dumpMeta(htd.getTableName()); } /** * Setup a clean table before we start mucking with it. * * @throws IOException * @throws InterruptedException * @throws KeeperException */ HTable setupTable(TableName tablename) throws Exception { HTableDescriptor desc = new HTableDescriptor(tablename); HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM)); desc.addFamily(hcd); // If a table has no CF's it doesn't get checked TEST_UTIL.getHBaseAdmin().createTable(desc, SPLITS); tbl = new HTable(TEST_UTIL.getConfiguration(), tablename, executorService); List<Put> puts = new ArrayList<Put>(); for (byte[] row : ROWKEYS) { Put p = new Put(row); p.add(FAM, Bytes.toBytes("val"), row); puts.add(p); } tbl.put(puts); tbl.flushCommits(); return tbl; } /** * Counts the number of row to verify data loss or non-dataloss. */ int countRows() throws IOException { Scan s = new Scan(); ResultScanner rs = tbl.getScanner(s); int i = 0; while (rs.next() != null) { i++; } return i; } /** * delete table in preparation for next test * * @param tablename * @throws IOException */ void deleteTable(TableName tablename) throws IOException { HBaseAdmin admin = new HBaseAdmin(conf); admin.getConnection().clearRegionCache(); if (admin.isTableEnabled(tablename)) { admin.disableTableAsync(tablename); } long totalWait = 0; long maxWait = 30 * 1000; long sleepTime = 250; while (!admin.isTableDisabled(tablename)) { try { Thread.sleep(sleepTime); totalWait += sleepTime; if (totalWait >= maxWait) { fail("Waited too long for table to be disabled + " + tablename); } } catch (InterruptedException e) { e.printStackTrace(); fail("Interrupted when trying to disable table " + tablename); } } admin.deleteTable(tablename); } /** * This creates a clean table and confirms that the table is clean. */ @Test public void testHBaseFsckClean() throws Exception { assertNoErrors(doFsck(conf, false)); TableName table = TableName.valueOf("tableClean"); try { HBaseFsck hbck = doFsck(conf, false); assertNoErrors(hbck); setupTable(table); assertEquals(ROWKEYS.length, countRows()); // We created 1 table, should be fine hbck = doFsck(conf, false); assertNoErrors(hbck); assertEquals(0, hbck.getOverlapGroups(table).size()); assertEquals(ROWKEYS.length, countRows()); } finally { deleteTable(table); } } /** * Test thread pooling in the case where there are more regions than threads */ @Test public void testHbckThreadpooling() throws Exception { TableName table = TableName.valueOf("tableDupeStartKey"); try { // Create table with 4 regions setupTable(table); // limit number of threads to 1. Configuration newconf = new Configuration(conf); newconf.setInt("hbasefsck.numthreads", 1); assertNoErrors(doFsck(newconf, false)); // We should pass without triggering a RejectedExecutionException } finally { deleteTable(table); } } @Test public void testHbckFixOrphanTable() throws Exception { TableName table = TableName.valueOf("tableInfo"); FileSystem fs = null; Path tableinfo = null; try { setupTable(table); HBaseAdmin admin = TEST_UTIL.getHBaseAdmin(); Path hbaseTableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), table); fs = hbaseTableDir.getFileSystem(conf); FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir); tableinfo = status.getPath(); fs.rename(tableinfo, new Path("/.tableinfo")); //to report error if .tableinfo is missing. HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_TABLEINFO_FILE }); // fix OrphanTable with default .tableinfo (htd not yet cached on master) hbck = doFsck(conf, true); assertNoErrors(hbck); status = null; status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir); assertNotNull(status); HTableDescriptor htd = admin.getTableDescriptor(table); htd.setValue("NOT_DEFAULT", "true"); admin.disableTable(table); admin.modifyTable(table, htd); admin.enableTable(table); fs.delete(status.getPath(), true); // fix OrphanTable with cache htd = admin.getTableDescriptor(table); // warms up cached htd on master hbck = doFsck(conf, true); assertNoErrors(hbck); status = null; status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir); assertNotNull(status); htd = admin.getTableDescriptor(table); assertEquals(htd.getValue("NOT_DEFAULT"), "true"); } finally { fs.rename(new Path("/.tableinfo"), tableinfo); deleteTable(table); } } /** * This create and fixes a bad table with regions that have a duplicate * start key */ @Test public void testDupeStartKey() throws Exception { TableName table = TableName.valueOf("tableDupeStartKey"); try { setupTable(table); assertNoErrors(doFsck(conf, false)); assertEquals(ROWKEYS.length, countRows()); // Now let's mess it up, by adding a region with a duplicate startkey HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("A2")); TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe); TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().waitForAssignment(hriDupe); ServerName server = regionStates.getRegionServerOfRegion(hriDupe); TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT); HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.DUPE_STARTKEYS }); assertEquals(2, hbck.getOverlapGroups(table).size()); assertEquals(ROWKEYS.length, countRows()); // seems like the "bigger" region won. // fix the degenerate region. doFsck(conf, true); // check that the degenerate region is gone and no data loss HBaseFsck hbck2 = doFsck(conf, false); assertNoErrors(hbck2); assertEquals(0, hbck2.getOverlapGroups(table).size()); assertEquals(ROWKEYS.length, countRows()); } finally { deleteTable(table); } } /** * Get region info from local cluster. */ Map<ServerName, List<String>> getDeployedHRIs(final HBaseAdmin admin) throws IOException { ClusterStatus status = admin.getClusterStatus(); Collection<ServerName> regionServers = status.getServers(); Map<ServerName, List<String>> mm = new HashMap<ServerName, List<String>>(); HConnection connection = admin.getConnection(); for (ServerName hsi : regionServers) { AdminProtos.AdminService.BlockingInterface server = connection.getAdmin(hsi); // list all online regions from this region server List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server); List<String> regionNames = new ArrayList<String>(); for (HRegionInfo hri : regions) { regionNames.add(hri.getRegionNameAsString()); } mm.put(hsi, regionNames); } return mm; } /** * Returns the HSI a region info is on. */ ServerName findDeployedHSI(Map<ServerName, List<String>> mm, HRegionInfo hri) { for (Map.Entry<ServerName, List<String>> e : mm.entrySet()) { if (e.getValue().contains(hri.getRegionNameAsString())) { return e.getKey(); } } return null; } /** * This create and fixes a bad table with regions that have a duplicate * start key */ @Test public void testDupeRegion() throws Exception { TableName table = TableName.valueOf("tableDupeRegion"); try { setupTable(table); assertNoErrors(doFsck(conf, false)); assertEquals(ROWKEYS.length, countRows()); // Now let's mess it up, by adding a region with a duplicate startkey HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B")); TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe); TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().waitForAssignment(hriDupe); ServerName server = regionStates.getRegionServerOfRegion(hriDupe); TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT); // Yikes! The assignment manager can't tell between diff between two // different regions with the same start/endkeys since it doesn't // differentiate on ts/regionId! We actually need to recheck // deployments! HBaseAdmin admin = TEST_UTIL.getHBaseAdmin(); while (findDeployedHSI(getDeployedHRIs(admin), hriDupe) == null) { Thread.sleep(250); } LOG.debug("Finished assignment of dupe region"); // TODO why is dupe region different from dupe start keys? HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.DUPE_STARTKEYS }); assertEquals(2, hbck.getOverlapGroups(table).size()); assertEquals(ROWKEYS.length, countRows()); // seems like the "bigger" region won. // fix the degenerate region. doFsck(conf, true); // check that the degenerate region is gone and no data loss HBaseFsck hbck2 = doFsck(conf, false); assertNoErrors(hbck2); assertEquals(0, hbck2.getOverlapGroups(table).size()); assertEquals(ROWKEYS.length, countRows()); } finally { deleteTable(table); } } /** * This creates and fixes a bad table with regions that has startkey == endkey */ @Test public void testDegenerateRegions() throws Exception { TableName table = TableName.valueOf("tableDegenerateRegions"); try { setupTable(table); assertNoErrors(doFsck(conf, false)); assertEquals(ROWKEYS.length, countRows()); // Now let's mess it up, by adding a region with a duplicate startkey HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("B")); TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe); TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().waitForAssignment(hriDupe); ServerName server = regionStates.getRegionServerOfRegion(hriDupe); TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT); HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DEGENERATE_REGION, ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.DUPE_STARTKEYS }); assertEquals(2, hbck.getOverlapGroups(table).size()); assertEquals(ROWKEYS.length, countRows()); // fix the degenerate region. doFsck(conf, true); // check that the degenerate region is gone and no data loss HBaseFsck hbck2 = doFsck(conf, false); assertNoErrors(hbck2); assertEquals(0, hbck2.getOverlapGroups(table).size()); assertEquals(ROWKEYS.length, countRows()); } finally { deleteTable(table); } } /** * This creates and fixes a bad table where a region is completely contained * by another region. */ @Test public void testContainedRegionOverlap() throws Exception { TableName table = TableName.valueOf("tableContainedRegionOverlap"); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); // Mess it up by creating an overlap in the metadata HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B")); TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap); TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().waitForAssignment(hriOverlap); ServerName server = regionStates.getRegionServerOfRegion(hriOverlap); TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT); HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.OVERLAP_IN_REGION_CHAIN }); assertEquals(2, hbck.getOverlapGroups(table).size()); assertEquals(ROWKEYS.length, countRows()); // fix the problem. doFsck(conf, true); // verify that overlaps are fixed HBaseFsck hbck2 = doFsck(conf, false); assertNoErrors(hbck2); assertEquals(0, hbck2.getOverlapGroups(table).size()); assertEquals(ROWKEYS.length, countRows()); } finally { deleteTable(table); } } /** * This creates and fixes a bad table where an overlap group of * 3 regions. Set HBaseFsck.maxMerge to 2 to trigger sideline overlapped * region. Mess around the meta data so that closeRegion/offlineRegion * throws exceptions. */ @Test public void testSidelineOverlapRegion() throws Exception { TableName table = TableName.valueOf("testSidelineOverlapRegion"); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); // Mess it up by creating an overlap MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); HMaster master = cluster.getMaster(); HRegionInfo hriOverlap1 = createRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("AB")); master.assignRegion(hriOverlap1); master.getAssignmentManager().waitForAssignment(hriOverlap1); HRegionInfo hriOverlap2 = createRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("AB"), Bytes.toBytes("B")); master.assignRegion(hriOverlap2); master.getAssignmentManager().waitForAssignment(hriOverlap2); HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.OVERLAP_IN_REGION_CHAIN }); assertEquals(3, hbck.getOverlapGroups(table).size()); assertEquals(ROWKEYS.length, countRows()); // mess around the overlapped regions, to trigger NotServingRegionException Multimap<byte[], HbckInfo> overlapGroups = hbck.getOverlapGroups(table); ServerName serverName = null; byte[] regionName = null; for (HbckInfo hbi : overlapGroups.values()) { if ("A".equals(Bytes.toString(hbi.getStartKey())) && "B".equals(Bytes.toString(hbi.getEndKey()))) { regionName = hbi.getRegionName(); // get an RS not serving the region to force bad assignment info in to META. int k = cluster.getServerWith(regionName); for (int i = 0; i < 3; i++) { if (i != k) { HRegionServer rs = cluster.getRegionServer(i); serverName = rs.getServerName(); break; } } HBaseAdmin admin = TEST_UTIL.getHBaseAdmin(); HBaseFsckRepair.closeRegionSilentlyAndWait(admin, cluster.getRegionServer(k).getServerName(), hbi.getHdfsHRI()); admin.offline(regionName); break; } } assertNotNull(regionName); assertNotNull(serverName); HTable meta = new HTable(conf, TableName.META_TABLE_NAME, executorService); Put put = new Put(regionName); put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, Bytes.toBytes(serverName.getHostAndPort())); meta.put(put); // fix the problem. HBaseFsck fsck = new HBaseFsck(conf); fsck.connect(); fsck.setDisplayFullReport(); // i.e. -details fsck.setTimeLag(0); fsck.setFixAssignments(true); fsck.setFixMeta(true); fsck.setFixHdfsHoles(true); fsck.setFixHdfsOverlaps(true); fsck.setFixHdfsOrphans(true); fsck.setFixVersionFile(true); fsck.setSidelineBigOverlaps(true); fsck.setMaxMerge(2); fsck.onlineHbck(); // verify that overlaps are fixed, and there are less rows // since one region is sidelined. HBaseFsck hbck2 = doFsck(conf, false); assertNoErrors(hbck2); assertEquals(0, hbck2.getOverlapGroups(table).size()); assertTrue(ROWKEYS.length > countRows()); } finally { deleteTable(table); } } /** * This creates and fixes a bad table where a region is completely contained * by another region, and there is a hole (sort of like a bad split) */ @Test public void testOverlapAndOrphan() throws Exception { TableName table = TableName.valueOf("tableOverlapAndOrphan"); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); // Mess it up by creating an overlap in the metadata TEST_UTIL.getHBaseAdmin().disableTable(table); deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), true, true, false, true); TEST_UTIL.getHBaseAdmin().enableTable(table); HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B")); TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap); TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().waitForAssignment(hriOverlap); ServerName server = regionStates.getRegionServerOfRegion(hriOverlap); TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT); HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN }); // fix the problem. doFsck(conf, true); // verify that overlaps are fixed HBaseFsck hbck2 = doFsck(conf, false); assertNoErrors(hbck2); assertEquals(0, hbck2.getOverlapGroups(table).size()); assertEquals(ROWKEYS.length, countRows()); } finally { deleteTable(table); } } /** * This creates and fixes a bad table where a region overlaps two regions -- * a start key contained in another region and its end key is contained in * yet another region. */ @Test public void testCoveredStartKey() throws Exception { TableName table = TableName.valueOf("tableCoveredStartKey"); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); // Mess it up by creating an overlap in the metadata HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B2")); TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap); TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().waitForAssignment(hriOverlap); ServerName server = regionStates.getRegionServerOfRegion(hriOverlap); TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT); HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.OVERLAP_IN_REGION_CHAIN, ERROR_CODE.OVERLAP_IN_REGION_CHAIN }); assertEquals(3, hbck.getOverlapGroups(table).size()); assertEquals(ROWKEYS.length, countRows()); // fix the problem. doFsck(conf, true); // verify that overlaps are fixed HBaseFsck hbck2 = doFsck(conf, false); assertErrors(hbck2, new ERROR_CODE[0]); assertEquals(0, hbck2.getOverlapGroups(table).size()); assertEquals(ROWKEYS.length, countRows()); } finally { deleteTable(table); } } /** * This creates and fixes a bad table with a missing region -- hole in meta * and data missing in the fs. */ @Test public void testRegionHole() throws Exception { TableName table = TableName.valueOf("tableRegionHole"); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); // Mess it up by leaving a hole in the assignment, meta, and hdfs data TEST_UTIL.getHBaseAdmin().disableTable(table); deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true, true, true); TEST_UTIL.getHBaseAdmin().enableTable(table); HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.HOLE_IN_REGION_CHAIN }); // holes are separate from overlap groups assertEquals(0, hbck.getOverlapGroups(table).size()); // fix hole doFsck(conf, true); // check that hole fixed assertNoErrors(doFsck(conf, false)); assertEquals(ROWKEYS.length - 2, countRows()); // lost a region so lost a row } finally { deleteTable(table); } } /** * This creates and fixes a bad table with a missing region -- hole in meta * and data present but .regioinfino missing (an orphan hdfs region)in the fs. */ @Test public void testHDFSRegioninfoMissing() throws Exception { TableName table = TableName.valueOf("tableHDFSRegioininfoMissing"); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); // Mess it up by leaving a hole in the meta data TEST_UTIL.getHBaseAdmin().disableTable(table); deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true, true, false, true); TEST_UTIL.getHBaseAdmin().enableTable(table); HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN }); // holes are separate from overlap groups assertEquals(0, hbck.getOverlapGroups(table).size()); // fix hole doFsck(conf, true); // check that hole fixed assertNoErrors(doFsck(conf, false)); assertEquals(ROWKEYS.length, countRows()); } finally { deleteTable(table); } } /** * This creates and fixes a bad table with a region that is missing meta and * not assigned to a region server. */ @Test public void testNotInMetaOrDeployedHole() throws Exception { TableName table = TableName.valueOf("tableNotInMetaOrDeployedHole"); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); // Mess it up by leaving a hole in the meta data TEST_UTIL.getHBaseAdmin().disableTable(table); deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true, true, false); // don't rm from fs TEST_UTIL.getHBaseAdmin().enableTable(table); HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN }); // holes are separate from overlap groups assertEquals(0, hbck.getOverlapGroups(table).size()); // fix hole assertErrors(doFsck(conf, true), new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN }); // check that hole fixed assertNoErrors(doFsck(conf, false)); assertEquals(ROWKEYS.length, countRows()); } finally { deleteTable(table); } } /** * This creates fixes a bad table with a hole in meta. */ @Test public void testNotInMetaHole() throws Exception { TableName table = TableName.valueOf("tableNotInMetaHole"); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); // Mess it up by leaving a hole in the meta data TEST_UTIL.getHBaseAdmin().disableTable(table); deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false, true, false); // don't rm from fs TEST_UTIL.getHBaseAdmin().enableTable(table); HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN }); // holes are separate from overlap groups assertEquals(0, hbck.getOverlapGroups(table).size()); // fix hole assertErrors(doFsck(conf, true), new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN }); // check that hole fixed assertNoErrors(doFsck(conf, false)); assertEquals(ROWKEYS.length, countRows()); } finally { deleteTable(table); } } /** * This creates and fixes a bad table with a region that is in meta but has * no deployment or data hdfs */ @Test public void testNotInHdfs() throws Exception { TableName table = TableName.valueOf("tableNotInHdfs"); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); // make sure data in regions, if in hlog only there is no data loss TEST_UTIL.getHBaseAdmin().flush(table.getName()); // Mess it up by leaving a hole in the hdfs data deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false, false, true); // don't rm meta HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS }); // holes are separate from overlap groups assertEquals(0, hbck.getOverlapGroups(table).size()); // fix hole doFsck(conf, true); // check that hole fixed assertNoErrors(doFsck(conf, false)); assertEquals(ROWKEYS.length - 2, countRows()); } finally { deleteTable(table); } } /** * This creates entries in hbase:meta with no hdfs data. This should cleanly * remove the table. */ @Test public void testNoHdfsTable() throws Exception { TableName table = TableName.valueOf("NoHdfsTable"); setupTable(table); assertEquals(ROWKEYS.length, countRows()); // make sure data in regions, if in hlog only there is no data loss TEST_UTIL.getHBaseAdmin().flush(table.getName()); // Mess it up by deleting hdfs dirs deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""), Bytes.toBytes("A"), false, false, true); // don't rm meta deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), false, false, true); // don't rm meta deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false, false, true); // don't rm meta deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes(""), false, false, true); // don't rm meta // also remove the table directory in hdfs deleteTableDir(table); HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS, }); // holes are separate from overlap groups assertEquals(0, hbck.getOverlapGroups(table).size()); // fix hole doFsck(conf, true); // detect dangling regions and remove those // check that hole fixed assertNoErrors(doFsck(conf, false)); assertFalse("Table " + table + " should have been deleted", TEST_UTIL.getHBaseAdmin().tableExists(table)); } public void deleteTableDir(TableName table) throws IOException { Path rootDir = FSUtils.getRootDir(conf); FileSystem fs = rootDir.getFileSystem(conf); Path p = FSUtils.getTableDir(rootDir, table); HBaseFsck.debugLsr(conf, p); boolean success = fs.delete(p, true); LOG.info("Deleted " + p + " sucessfully? " + success); } /** * when the hbase.version file missing, It is fix the fault. */ @Test public void testNoVersionFile() throws Exception { // delete the hbase.version file Path rootDir = FSUtils.getRootDir(conf); FileSystem fs = rootDir.getFileSystem(conf); Path versionFile = new Path(rootDir, HConstants.VERSION_FILE_NAME); fs.delete(versionFile, true); // test HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_VERSION_FILE }); // fix hbase.version missing doFsck(conf, true); // no version file fixed assertNoErrors(doFsck(conf, false)); } /** * The region is not deployed when the table is disabled. */ @Test public void testRegionShouldNotBeDeployed() throws Exception { TableName table = TableName.valueOf("tableRegionShouldNotBeDeployed"); try { LOG.info("Starting testRegionShouldNotBeDeployed."); MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); assertTrue(cluster.waitForActiveAndReadyMaster()); byte[][] SPLIT_KEYS = new byte[][] { new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"), Bytes.toBytes("ccc"), Bytes.toBytes("ddd") }; HTableDescriptor htdDisabled = new HTableDescriptor(table); htdDisabled.addFamily(new HColumnDescriptor(FAM)); // Write the .tableinfo FSTableDescriptors fstd = new FSTableDescriptors(conf); fstd.createTableDescriptor(htdDisabled); List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS); // Let's just assign everything to first RS HRegionServer hrs = cluster.getRegionServer(0); // Create region files. TEST_UTIL.getHBaseAdmin().disableTable(table); TEST_UTIL.getHBaseAdmin().enableTable(table); // Disable the table and close its regions TEST_UTIL.getHBaseAdmin().disableTable(table); HRegionInfo region = disabledRegions.remove(0); byte[] regionName = region.getRegionName(); // The region should not be assigned currently assertTrue(cluster.getServerWith(regionName) == -1); // Directly open a region on a region server. // If going through AM/ZK, the region won't be open. // Even it is opened, AM will close it which causes // flakiness of this test. HRegion r = HRegion.openHRegion(region, htdDisabled, hrs.getWAL(region), conf); hrs.addToOnlineRegions(r); HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.SHOULD_NOT_BE_DEPLOYED }); // fix this fault doFsck(conf, true); // check result assertNoErrors(doFsck(conf, false)); } finally { TEST_UTIL.getHBaseAdmin().enableTable(table); deleteTable(table); } } /** * This creates two tables and mess both of them and fix them one by one */ @Test public void testFixByTable() throws Exception { TableName table1 = TableName.valueOf("testFixByTable1"); TableName table2 = TableName.valueOf("testFixByTable2"); try { setupTable(table1); // make sure data in regions, if in hlog only there is no data loss TEST_UTIL.getHBaseAdmin().flush(table1.getName()); // Mess them up by leaving a hole in the hdfs data deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false, false, true); // don't rm meta setupTable(table2); // make sure data in regions, if in hlog only there is no data loss TEST_UTIL.getHBaseAdmin().flush(table2.getName()); // Mess them up by leaving a hole in the hdfs data deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false, false, true); // don't rm meta HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS }); // fix hole in table 1 doFsck(conf, true, table1); // check that hole in table 1 fixed assertNoErrors(doFsck(conf, false, table1)); // check that hole in table 2 still there assertErrors(doFsck(conf, false, table2), new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS }); // fix hole in table 2 doFsck(conf, true, table2); // check that hole in both tables fixed assertNoErrors(doFsck(conf, false)); assertEquals(ROWKEYS.length - 2, countRows()); } finally { deleteTable(table1); deleteTable(table2); } } /** * A split parent in meta, in hdfs, and not deployed */ @Test public void testLingeringSplitParent() throws Exception { TableName table = TableName.valueOf("testLingeringSplitParent"); HTable meta = null; try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); // make sure data in regions, if in hlog only there is no data loss TEST_UTIL.getHBaseAdmin().flush(table.getName()); HRegionLocation location = tbl.getRegionLocation("B"); // Delete one region from meta, but not hdfs, unassign it. deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true, true, false); // Create a new meta entry to fake it as a split parent. meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getTableName(), executorService); HRegionInfo hri = location.getRegionInfo(); HRegionInfo a = new HRegionInfo(tbl.getName(), Bytes.toBytes("B"), Bytes.toBytes("BM")); HRegionInfo b = new HRegionInfo(tbl.getName(), Bytes.toBytes("BM"), Bytes.toBytes("C")); hri.setOffline(true); hri.setSplit(true); MetaEditor.addRegionToMeta(meta, hri, a, b); meta.flushCommits(); TEST_UTIL.getHBaseAdmin().flush(TableName.META_TABLE_NAME.getName()); HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN }); // regular repair cannot fix lingering split parent hbck = doFsck(conf, true); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN }); assertFalse(hbck.shouldRerun()); hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN }); // fix lingering split parent hbck = new HBaseFsck(conf); hbck.connect(); hbck.setDisplayFullReport(); // i.e. -details hbck.setTimeLag(0); hbck.setFixSplitParents(true); hbck.onlineHbck(); assertTrue(hbck.shouldRerun()); Get get = new Get(hri.getRegionName()); Result result = meta.get(get); assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER).isEmpty()); assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER).isEmpty()); TEST_UTIL.getHBaseAdmin().flush(TableName.META_TABLE_NAME.getName()); // fix other issues doFsck(conf, true); // check that all are fixed assertNoErrors(doFsck(conf, false)); assertEquals(ROWKEYS.length, countRows()); } finally { deleteTable(table); IOUtils.closeQuietly(meta); } } /** * Tests that LINGERING_SPLIT_PARENT is not erroneously reported for * valid cases where the daughters are there. */ @Test public void testValidLingeringSplitParent() throws Exception { TableName table = TableName.valueOf("testLingeringSplitParent"); HTable meta = null; try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); // make sure data in regions, if in hlog only there is no data loss TEST_UTIL.getHBaseAdmin().flush(table.getName()); HRegionLocation location = tbl.getRegionLocation("B"); meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getTableName()); HRegionInfo hri = location.getRegionInfo(); // do a regular split HBaseAdmin admin = TEST_UTIL.getHBaseAdmin(); byte[] regionName = location.getRegionInfo().getRegionName(); admin.split(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM")); TestEndToEndSplitTransaction.blockUntilRegionSplit(TEST_UTIL.getConfiguration(), 60000, regionName, true); // TODO: fixHdfsHoles does not work against splits, since the parent dir lingers on // for some time until children references are deleted. HBCK erroneously sees this as // overlapping regions HBaseFsck hbck = doFsck(conf, true, true, false, false, false, true, true, true, false, false, null); assertErrors(hbck, new ERROR_CODE[] {}); //no LINGERING_SPLIT_PARENT reported // assert that the split hbase:meta entry is still there. Get get = new Get(hri.getRegionName()); Result result = meta.get(get); assertNotNull(result); assertNotNull(HRegionInfo.getHRegionInfo(result)); assertEquals(ROWKEYS.length, countRows()); // assert that we still have the split regions assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions pre-split. assertNoErrors(doFsck(conf, false)); } finally { deleteTable(table); IOUtils.closeQuietly(meta); } } /** * Split crashed after write to hbase:meta finished for the parent region, but * failed to write daughters (pre HBASE-7721 codebase) */ @Test(timeout = 75000) public void testSplitDaughtersNotInMeta() throws Exception { TableName table = TableName.valueOf("testSplitdaughtersNotInMeta"); HTable meta = null; try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); // make sure data in regions, if in hlog only there is no data loss TEST_UTIL.getHBaseAdmin().flush(table.getName()); HRegionLocation location = tbl.getRegionLocation("B"); meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getTableName()); HRegionInfo hri = location.getRegionInfo(); // do a regular split HBaseAdmin admin = TEST_UTIL.getHBaseAdmin(); byte[] regionName = location.getRegionInfo().getRegionName(); admin.split(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM")); TestEndToEndSplitTransaction.blockUntilRegionSplit(TEST_UTIL.getConfiguration(), 60000, regionName, true); PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(meta.get(new Get(regionName))); // Delete daughter regions from meta, but not hdfs, unassign it. Map<HRegionInfo, ServerName> hris = tbl.getRegionLocations(); undeployRegion(admin, hris.get(daughters.getFirst()), daughters.getFirst()); undeployRegion(admin, hris.get(daughters.getSecond()), daughters.getSecond()); meta.delete(new Delete(daughters.getFirst().getRegionName())); meta.delete(new Delete(daughters.getSecond().getRegionName())); meta.flushCommits(); HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN }); //no LINGERING_SPLIT_PARENT // now fix it. The fix should not revert the region split, but add daughters to META hbck = doFsck(conf, true, true, false, false, false, false, false, false, false, false, null); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN }); // assert that the split hbase:meta entry is still there. Get get = new Get(hri.getRegionName()); Result result = meta.get(get); assertNotNull(result); assertNotNull(HRegionInfo.getHRegionInfo(result)); assertEquals(ROWKEYS.length, countRows()); // assert that we still have the split regions assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions pre-split. assertNoErrors(doFsck(conf, false)); //should be fixed by now } finally { deleteTable(table); IOUtils.closeQuietly(meta); } } /** * This creates and fixes a bad table with a missing region which is the 1st region -- hole in * meta and data missing in the fs. */ @Test(timeout = 120000) public void testMissingFirstRegion() throws Exception { TableName table = TableName.valueOf("testMissingFirstRegion"); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); // Mess it up by leaving a hole in the assignment, meta, and hdfs data TEST_UTIL.getHBaseAdmin().disableTable(table); deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""), Bytes.toBytes("A"), true, true, true); TEST_UTIL.getHBaseAdmin().enableTable(table); HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY }); // fix hole doFsck(conf, true); // check that hole fixed assertNoErrors(doFsck(conf, false)); } finally { deleteTable(table); } } /** * This creates and fixes a bad table with a missing region which is the 1st region -- hole in * meta and data missing in the fs. */ @Test(timeout = 120000) public void testRegionDeployedNotInHdfs() throws Exception { TableName table = TableName.valueOf("testSingleRegionDeployedNotInHdfs"); try { setupTable(table); TEST_UTIL.getHBaseAdmin().flush(table.getName()); // Mess it up by deleting region dir deleteRegion(conf, tbl.getTableDescriptor(), HConstants.EMPTY_START_ROW, Bytes.toBytes("A"), false, false, true); HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS }); // fix hole doFsck(conf, true); // check that hole fixed assertNoErrors(doFsck(conf, false)); } finally { deleteTable(table); } } /** * This creates and fixes a bad table with missing last region -- hole in meta and data missing in * the fs. */ @Test(timeout = 120000) public void testMissingLastRegion() throws Exception { TableName table = TableName.valueOf("testMissingLastRegion"); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); // Mess it up by leaving a hole in the assignment, meta, and hdfs data TEST_UTIL.getHBaseAdmin().disableTable(table); deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes(""), true, true, true); TEST_UTIL.getHBaseAdmin().enableTable(table); HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY }); // fix hole doFsck(conf, true); // check that hole fixed assertNoErrors(doFsck(conf, false)); } finally { deleteTable(table); } } /** * Test -noHdfsChecking option can detect and fix assignments issue. */ @Test public void testFixAssignmentsAndNoHdfsChecking() throws Exception { TableName table = TableName.valueOf("testFixAssignmentsAndNoHdfsChecking"); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); // Mess it up by closing a region deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), true, false, false, false); // verify there is no other errors HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN }); // verify that noHdfsChecking report the same errors HBaseFsck fsck = new HBaseFsck(conf); fsck.connect(); fsck.setDisplayFullReport(); // i.e. -details fsck.setTimeLag(0); fsck.setCheckHdfs(false); fsck.onlineHbck(); assertErrors(fsck, new ERROR_CODE[] { ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN }); // verify that fixAssignments works fine with noHdfsChecking fsck = new HBaseFsck(conf); fsck.connect(); fsck.setDisplayFullReport(); // i.e. -details fsck.setTimeLag(0); fsck.setCheckHdfs(false); fsck.setFixAssignments(true); fsck.onlineHbck(); assertTrue(fsck.shouldRerun()); fsck.onlineHbck(); assertNoErrors(fsck); assertEquals(ROWKEYS.length, countRows()); } finally { deleteTable(table); } } /** * Test -noHdfsChecking option can detect region is not in meta but deployed. * However, it can not fix it without checking Hdfs because we need to get * the region info from Hdfs in this case, then to patch the meta. */ @Test public void testFixMetaNotWorkingWithNoHdfsChecking() throws Exception { TableName table = TableName.valueOf("testFixMetaNotWorkingWithNoHdfsChecking"); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); // Mess it up by deleting a region from the metadata deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), false, true, false, false); // verify there is no other errors HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN }); // verify that noHdfsChecking report the same errors HBaseFsck fsck = new HBaseFsck(conf); fsck.connect(); fsck.setDisplayFullReport(); // i.e. -details fsck.setTimeLag(0); fsck.setCheckHdfs(false); fsck.onlineHbck(); assertErrors(fsck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN }); // verify that fixMeta doesn't work with noHdfsChecking fsck = new HBaseFsck(conf); fsck.connect(); fsck.setDisplayFullReport(); // i.e. -details fsck.setTimeLag(0); fsck.setCheckHdfs(false); fsck.setFixAssignments(true); fsck.setFixMeta(true); fsck.onlineHbck(); assertFalse(fsck.shouldRerun()); assertErrors(fsck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN }); } finally { deleteTable(table); } } /** * Test -fixHdfsHoles doesn't work with -noHdfsChecking option, * and -noHdfsChecking can't detect orphan Hdfs region. */ @Test public void testFixHdfsHolesNotWorkingWithNoHdfsChecking() throws Exception { TableName table = TableName.valueOf("testFixHdfsHolesNotWorkingWithNoHdfsChecking"); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); // Mess it up by creating an overlap in the metadata TEST_UTIL.getHBaseAdmin().disableTable(table); deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), true, true, false, true); TEST_UTIL.getHBaseAdmin().enableTable(table); HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B")); TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap); TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().waitForAssignment(hriOverlap); ServerName server = regionStates.getRegionServerOfRegion(hriOverlap); TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT); HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN }); // verify that noHdfsChecking can't detect ORPHAN_HDFS_REGION HBaseFsck fsck = new HBaseFsck(conf); fsck.connect(); fsck.setDisplayFullReport(); // i.e. -details fsck.setTimeLag(0); fsck.setCheckHdfs(false); fsck.onlineHbck(); assertErrors(fsck, new ERROR_CODE[] { ERROR_CODE.HOLE_IN_REGION_CHAIN }); // verify that fixHdfsHoles doesn't work with noHdfsChecking fsck = new HBaseFsck(conf); fsck.connect(); fsck.setDisplayFullReport(); // i.e. -details fsck.setTimeLag(0); fsck.setCheckHdfs(false); fsck.setFixHdfsHoles(true); fsck.setFixHdfsOverlaps(true); fsck.setFixHdfsOrphans(true); fsck.onlineHbck(); assertFalse(fsck.shouldRerun()); assertErrors(fsck, new ERROR_CODE[] { ERROR_CODE.HOLE_IN_REGION_CHAIN }); } finally { if (TEST_UTIL.getHBaseAdmin().isTableDisabled(table)) { TEST_UTIL.getHBaseAdmin().enableTable(table); } deleteTable(table); } } /** * We don't have an easy way to verify that a flush completed, so we loop until we find a * legitimate hfile and return it. * @param fs * @param table * @return Path of a flushed hfile. * @throws IOException */ Path getFlushedHFile(FileSystem fs, TableName table) throws IOException { Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), table); Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0); Path famDir = new Path(regionDir, FAM_STR); // keep doing this until we get a legit hfile while (true) { FileStatus[] hfFss = fs.listStatus(famDir); if (hfFss.length == 0) { continue; } for (FileStatus hfs : hfFss) { if (!hfs.isDirectory()) { return hfs.getPath(); } } } } /** * This creates a table and then corrupts an hfile. Hbck should quarantine the file. */ @Test(timeout = 180000) public void testQuarantineCorruptHFile() throws Exception { TableName table = TableName.valueOf(name.getMethodName()); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); TEST_UTIL.getHBaseAdmin().flush(table.getName()); // flush is async. FileSystem fs = FileSystem.get(conf); Path hfile = getFlushedHFile(fs, table); // Mess it up by leaving a hole in the assignment, meta, and hdfs data TEST_UTIL.getHBaseAdmin().disableTable(table); // create new corrupt file called deadbeef (valid hfile name) Path corrupt = new Path(hfile.getParent(), "deadbeef"); TestHFile.truncateFile(fs, hfile, corrupt); LOG.info("Created corrupted file " + corrupt); HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf)); // we cannot enable here because enable never finished due to the corrupt region. HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table); assertEquals(res.getRetCode(), 0); HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker(); assertEquals(hfcc.getHFilesChecked(), 5); assertEquals(hfcc.getCorrupted().size(), 1); assertEquals(hfcc.getFailures().size(), 0); assertEquals(hfcc.getQuarantined().size(), 1); assertEquals(hfcc.getMissing().size(), 0); // Its been fixed, verify that we can enable. TEST_UTIL.getHBaseAdmin().enableTable(table); } finally { deleteTable(table); } } /** * Test that use this should have a timeout, because this method could potentially wait forever. */ private void doQuarantineTest(TableName table, HBaseFsck hbck, int check, int corrupt, int fail, int quar, int missing) throws Exception { try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); TEST_UTIL.getHBaseAdmin().flush(table.getName()); // flush is async. // Mess it up by leaving a hole in the assignment, meta, and hdfs data TEST_UTIL.getHBaseAdmin().disableTable(table); String[] args = { "-sidelineCorruptHFiles", "-repairHoles", "-ignorePreCheckPermission", table.getNameAsString() }; ExecutorService exec = new ScheduledThreadPoolExecutor(10); HBaseFsck res = hbck.exec(exec, args); HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker(); assertEquals(hfcc.getHFilesChecked(), check); assertEquals(hfcc.getCorrupted().size(), corrupt); assertEquals(hfcc.getFailures().size(), fail); assertEquals(hfcc.getQuarantined().size(), quar); assertEquals(hfcc.getMissing().size(), missing); // its been fixed, verify that we can enable HBaseAdmin admin = TEST_UTIL.getHBaseAdmin(); admin.enableTableAsync(table); while (!admin.isTableEnabled(table)) { try { Thread.sleep(250); } catch (InterruptedException e) { e.printStackTrace(); fail("Interrupted when trying to enable table " + table); } } } finally { deleteTable(table); } } /** * This creates a table and simulates the race situation where a concurrent compaction or split * has removed an hfile after the corruption checker learned about it. */ @Test(timeout = 180000) public void testQuarantineMissingHFile() throws Exception { TableName table = TableName.valueOf(name.getMethodName()); ExecutorService exec = new ScheduledThreadPoolExecutor(10); // inject a fault in the hfcc created. final FileSystem fs = FileSystem.get(conf); HBaseFsck hbck = new HBaseFsck(conf, exec) { @Override public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException { return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) { boolean attemptedFirstHFile = false; @Override protected void checkHFile(Path p) throws IOException { if (!attemptedFirstHFile) { attemptedFirstHFile = true; assertTrue(fs.delete(p, true)); // make sure delete happened. } super.checkHFile(p); } }; } }; doQuarantineTest(table, hbck, 4, 0, 0, 0, 1); // 4 attempted, but 1 missing. } /** * This creates a table and simulates the race situation where a concurrent compaction or split * has removed an colfam dir before the corruption checker got to it. */ // Disabled because fails sporadically. Is this test right? Timing-wise, there could be no // files in a column family on initial creation -- as suggested by Matteo. @Ignore @Test(timeout = 180000) public void testQuarantineMissingFamdir() throws Exception { TableName table = TableName.valueOf(name.getMethodName()); ExecutorService exec = new ScheduledThreadPoolExecutor(10); // inject a fault in the hfcc created. final FileSystem fs = FileSystem.get(conf); HBaseFsck hbck = new HBaseFsck(conf, exec) { @Override public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException { return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) { boolean attemptedFirstFamDir = false; @Override protected void checkColFamDir(Path p) throws IOException { if (!attemptedFirstFamDir) { attemptedFirstFamDir = true; assertTrue(fs.delete(p, true)); // make sure delete happened. } super.checkColFamDir(p); } }; } }; doQuarantineTest(table, hbck, 3, 0, 0, 0, 1); } /** * This creates a table and simulates the race situation where a concurrent compaction or split * has removed a region dir before the corruption checker got to it. */ @Test(timeout = 180000) public void testQuarantineMissingRegionDir() throws Exception { TableName table = TableName.valueOf(name.getMethodName()); ExecutorService exec = new ScheduledThreadPoolExecutor(10); // inject a fault in the hfcc created. final FileSystem fs = FileSystem.get(conf); HBaseFsck hbck = new HBaseFsck(conf, exec) { @Override public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException { return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) { boolean attemptedFirstRegionDir = false; @Override protected void checkRegionDir(Path p) throws IOException { if (!attemptedFirstRegionDir) { attemptedFirstRegionDir = true; assertTrue(fs.delete(p, true)); // make sure delete happened. } super.checkRegionDir(p); } }; } }; doQuarantineTest(table, hbck, 3, 0, 0, 0, 1); } /** * Test fixing lingering reference file. */ @Test public void testLingeringReferenceFile() throws Exception { TableName table = TableName.valueOf("testLingeringReferenceFile"); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); // Mess it up by creating a fake reference file FileSystem fs = FileSystem.get(conf); Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), table); Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0); Path famDir = new Path(regionDir, FAM_STR); Path fakeReferenceFile = new Path(famDir, "fbce357483ceea.12144538"); fs.create(fakeReferenceFile); HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LINGERING_REFERENCE_HFILE }); // fix reference file doFsck(conf, true); // check that reference file fixed assertNoErrors(doFsck(conf, false)); } finally { deleteTable(table); } } /** * Test mission REGIONINFO_QUALIFIER in hbase:meta */ @Test public void testMissingRegionInfoQualifier() throws Exception { TableName table = TableName.valueOf("testMissingRegionInfoQualifier"); try { setupTable(table); // Mess it up by removing the RegionInfo for one region. final List<Delete> deletes = new LinkedList<Delete>(); HTable meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getTableName()); MetaScanner.metaScan(conf, new MetaScanner.MetaScannerVisitor() { @Override public boolean processRow(Result rowResult) throws IOException { HRegionInfo hri = MetaScanner.getHRegionInfo(rowResult); if (hri != null && !hri.getTable().isSystemTable()) { Delete delete = new Delete(rowResult.getRow()); delete.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER); deletes.add(delete); } return true; } @Override public void close() throws IOException { } }); meta.delete(deletes); // Mess it up by creating a fake hbase:meta entry with no associated RegionInfo meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")) .add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, Bytes.toBytes("node1:60020"))); meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")) .add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, Bytes.toBytes(1362150791183L))); meta.close(); HBaseFsck hbck = doFsck(conf, false); assertTrue(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL)); // fix reference file hbck = doFsck(conf, true); // check that reference file fixed assertFalse(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL)); } finally { deleteTable(table); } } /** * Test pluggable error reporter. It can be plugged in * from system property or configuration. */ @Test public void testErrorReporter() throws Exception { try { MockErrorReporter.calledCount = 0; doFsck(conf, false); assertEquals(MockErrorReporter.calledCount, 0); conf.set("hbasefsck.errorreporter", MockErrorReporter.class.getName()); doFsck(conf, false); assertTrue(MockErrorReporter.calledCount > 20); } finally { conf.set("hbasefsck.errorreporter", PrintingErrorReporter.class.getName()); MockErrorReporter.calledCount = 0; } } static class MockErrorReporter implements ErrorReporter { static int calledCount = 0; @Override public void clear() { calledCount++; } @Override public void report(String message) { calledCount++; } @Override public void reportError(String message) { calledCount++; } @Override public void reportError(ERROR_CODE errorCode, String message) { calledCount++; } @Override public void reportError(ERROR_CODE errorCode, String message, TableInfo table) { calledCount++; } @Override public void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info) { calledCount++; } @Override public void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info1, HbckInfo info2) { calledCount++; } @Override public int summarize() { return ++calledCount; } @Override public void detail(String details) { calledCount++; } @Override public ArrayList<ERROR_CODE> getErrorList() { calledCount++; return new ArrayList<ERROR_CODE>(); } @Override public void progress() { calledCount++; } @Override public void print(String message) { calledCount++; } @Override public void resetErrors() { calledCount++; } @Override public boolean tableHasErrors(TableInfo table) { calledCount++; return false; } } @Test(timeout = 60000) public void testCheckTableLocks() throws Exception { IncrementingEnvironmentEdge edge = new IncrementingEnvironmentEdge(0); EnvironmentEdgeManager.injectEdge(edge); // check no errors HBaseFsck hbck = doFsck(conf, false); assertNoErrors(hbck); ServerName mockName = ServerName.valueOf("localhost", 60000, 1); // obtain one lock final TableLockManager tableLockManager = TableLockManager.createTableLockManager(conf, TEST_UTIL.getZooKeeperWatcher(), mockName); TableLock writeLock = tableLockManager.writeLock(TableName.valueOf("foo"), "testCheckTableLocks"); writeLock.acquire(); hbck = doFsck(conf, false); assertNoErrors(hbck); // should not have expired, no problems edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT, TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS)); // let table lock expire hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.EXPIRED_TABLE_LOCK }); final CountDownLatch latch = new CountDownLatch(1); new Thread() { @Override public void run() { TableLock readLock = tableLockManager.writeLock(TableName.valueOf("foo"), "testCheckTableLocks"); try { latch.countDown(); readLock.acquire(); } catch (IOException ex) { fail(); } catch (IllegalStateException ex) { return; // expected, since this will be reaped under us. } fail("should not have come here"); }; }.start(); latch.await(); // wait until thread starts Threads.sleep(300); // wait some more to ensure writeLock.acquire() is called hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.EXPIRED_TABLE_LOCK }); // still one expired, one not-expired edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT, TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS)); // let table lock expire hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.EXPIRED_TABLE_LOCK, ERROR_CODE.EXPIRED_TABLE_LOCK }); // both are expired conf.setLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT, 1); // reaping from ZKInterProcessWriteLock uses znode cTime, // which is not injectable through EnvironmentEdge Threads.sleep(10); hbck = doFsck(conf, true); // now fix both cases hbck = doFsck(conf, false); assertNoErrors(hbck); // ensure that locks are deleted writeLock = tableLockManager.writeLock(TableName.valueOf("foo"), "should acquire without blocking"); writeLock.acquire(); // this should not block. writeLock.release(); // release for clean state } @Test public void testMetaOffline() throws Exception { // check no errors HBaseFsck hbck = doFsck(conf, false); assertNoErrors(hbck); deleteMetaRegion(conf, true, false, false); hbck = doFsck(conf, false); // ERROR_CODE.UNKNOWN is coming because we reportError with a message for the hbase:meta // inconsistency and whether we will be fixing it or not. assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN }); hbck = doFsck(conf, true); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN }); hbck = doFsck(conf, false); assertNoErrors(hbck); } private void deleteMetaRegion(Configuration conf, boolean unassign, boolean hdfs, boolean regionInfoOnly) throws IOException, InterruptedException { HConnection connection = HConnectionManager.getConnection(conf); HRegionLocation metaLocation = connection.locateRegion(TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW); ServerName hsa = metaLocation.getServerName(); HRegionInfo hri = metaLocation.getRegionInfo(); if (unassign) { LOG.info("Undeploying meta region " + hri + " from server " + hsa); undeployRegion(new HBaseAdmin(conf), hsa, hri); } if (regionInfoOnly) { LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString()); Path rootDir = FSUtils.getRootDir(conf); FileSystem fs = rootDir.getFileSystem(conf); Path p = new Path(rootDir + "/" + HTableDescriptor.META_TABLEDESC.getNameAsString(), hri.getEncodedName()); Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE); fs.delete(hriPath, true); } if (hdfs) { LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString()); Path rootDir = FSUtils.getRootDir(conf); FileSystem fs = rootDir.getFileSystem(conf); Path p = new Path(rootDir + "/" + HTableDescriptor.META_TABLEDESC.getNameAsString(), hri.getEncodedName()); HBaseFsck.debugLsr(conf, p); boolean success = fs.delete(p, true); LOG.info("Deleted " + p + " sucessfully? " + success); HBaseFsck.debugLsr(conf, p); } } @Test public void testTableWithNoRegions() throws Exception { // We might end up with empty regions in a table // see also testNoHdfsTable() TableName table = TableName.valueOf(name.getMethodName()); try { // create table with one region HTableDescriptor desc = new HTableDescriptor(table); HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM)); desc.addFamily(hcd); // If a table has no CF's it doesn't get checked TEST_UTIL.getHBaseAdmin().createTable(desc); tbl = new HTable(TEST_UTIL.getConfiguration(), table, executorService); // Mess it up by leaving a hole in the assignment, meta, and hdfs data deleteRegion(conf, tbl.getTableDescriptor(), HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, false, false, true); HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS }); doFsck(conf, true); // fix hole doFsck(conf, true); // check that hole fixed assertNoErrors(doFsck(conf, false)); } finally { deleteTable(table); } } @Test public void testHbckAfterRegionMerge() throws Exception { TableName table = TableName.valueOf("testMergeRegionFilesInHdfs"); HTable meta = null; try { // disable CatalogJanitor TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false); setupTable(table); assertEquals(ROWKEYS.length, countRows()); // make sure data in regions, if in hlog only there is no data loss TEST_UTIL.getHBaseAdmin().flush(table.getName()); HRegionInfo region1 = tbl.getRegionLocation("A").getRegionInfo(); HRegionInfo region2 = tbl.getRegionLocation("B").getRegionInfo(); int regionCountBeforeMerge = tbl.getRegionLocations().size(); assertNotEquals(region1, region2); // do a region merge HBaseAdmin admin = TEST_UTIL.getHBaseAdmin(); admin.mergeRegions(region1.getEncodedNameAsBytes(), region2.getEncodedNameAsBytes(), false); // wait until region merged long timeout = System.currentTimeMillis() + 30 * 1000; while (true) { if (tbl.getRegionLocations().size() < regionCountBeforeMerge) { break; } else if (System.currentTimeMillis() > timeout) { fail("Time out waiting on region " + region1.getEncodedName() + " and " + region2.getEncodedName() + " be merged"); } Thread.sleep(10); } assertEquals(ROWKEYS.length, countRows()); HBaseFsck hbck = doFsck(conf, false); assertNoErrors(hbck); // no errors } finally { TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true); deleteTable(table); IOUtils.closeQuietly(meta); } } @Test public void testRegionBoundariesCheck() throws Exception { HBaseFsck hbck = doFsck(conf, false); assertNoErrors(hbck); // no errors try { hbck.checkRegionBoundaries(); } catch (IllegalArgumentException e) { if (e.getMessage().endsWith("not a valid DFS filename.")) { fail("Table directory path is not valid." + e.getMessage()); } } } @org.junit.Rule public TestName name = new TestName(); }