Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs.server.datanode; import com.google.common.base.Joiner; import com.google.common.collect.Lists; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.ReconfigurationException; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.BlockMissingException; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSNNTopology; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB; import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi; import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetTestUtil; import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsVolumeImpl; import org.apache.hadoop.hdfs.server.protocol.BlockReportContext; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport; import org.apache.hadoop.test.GenericTestUtils; import org.junit.After; import org.junit.Test; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Random; import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.CyclicBarrier; import java.util.concurrent.TimeoutException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.mockito.Mockito; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY; import org.apache.hadoop.hdfs.server.protocol.BlockReport; import static org.hamcrest.CoreMatchers.anyOf; import static org.hamcrest.CoreMatchers.containsString; import static org.hamcrest.CoreMatchers.not; import static org.hamcrest.core.Is.is; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import org.junit.Ignore; import static org.mockito.Matchers.any; import static org.mockito.Matchers.anyString; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.timeout; public class TestDataNodeHotSwapVolumes { private static final Log LOG = LogFactory.getLog(TestDataNodeHotSwapVolumes.class); private static final int BLOCK_SIZE = 512; private MiniDFSCluster cluster; @After public void tearDown() { shutdown(); } private void startDFSCluster(int numNameNodes, int numDataNodes) throws IOException { shutdown(); Configuration conf = new Configuration(); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); /* * Lower the DN heartbeat, DF rate, and recheck interval to one second * so state about failures and datanode death propagates faster. */ conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1); conf.setInt(DFSConfigKeys.DFS_DF_INTERVAL_KEY, 1000); conf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 1000); /* Allow 1 volume failure */ conf.setInt(DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY, 1); MiniDFSNNTopology nnTopology = MiniDFSNNTopology.simpleHOPSTopology(numNameNodes); cluster = new MiniDFSCluster.Builder(conf).nnTopology(nnTopology).numDataNodes(numDataNodes).build(); cluster.waitActive(); } private void shutdown() { if (cluster != null) { cluster.shutdown(); cluster = null; } } private void createFile(Path path, int numBlocks) throws IOException, InterruptedException, TimeoutException { final short replicateFactor = 1; createFile(path, numBlocks, replicateFactor); } private void createFile(Path path, int numBlocks, short replicateFactor) throws IOException, InterruptedException, TimeoutException { createFile(0, path, numBlocks, replicateFactor); } private void createFile(int fsIdx, Path path, int numBlocks) throws IOException, InterruptedException, TimeoutException { final short replicateFactor = 1; createFile(fsIdx, path, numBlocks, replicateFactor); } private void createFile(int fsIdx, Path path, int numBlocks, short replicateFactor) throws IOException, TimeoutException, InterruptedException { final int seed = 0; final DistributedFileSystem fs = cluster.getFileSystem(fsIdx); DFSTestUtil.createFile(fs, path, BLOCK_SIZE * numBlocks, replicateFactor, seed); DFSTestUtil.waitReplication(fs, path, replicateFactor); } /** * Verify whether a file has enough content. */ private static void verifyFileLength(FileSystem fs, Path path, int numBlocks) throws IOException { FileStatus status = fs.getFileStatus(path); assertEquals(numBlocks * BLOCK_SIZE, status.getLen()); } /** Return the number of replicas for a given block in the file. */ private static int getNumReplicas(FileSystem fs, Path file, int blockIdx) throws IOException { BlockLocation locs[] = fs.getFileBlockLocations(file, 0, Long.MAX_VALUE); return locs.length < blockIdx + 1 ? 0 : locs[blockIdx].getNames().length; } /** * Wait the block to have the exact number of replicas as expected. */ private static void waitReplication(FileSystem fs, Path file, int blockIdx, int numReplicas) throws IOException, TimeoutException, InterruptedException { int attempts = 50; // Wait 5 seconds. while (attempts > 0) { int actualReplicas = getNumReplicas(fs, file, blockIdx); if (actualReplicas == numReplicas) { return; } System.out.printf("Block %d of file %s has %d replicas (desired %d).\n", blockIdx, file.toString(), actualReplicas, numReplicas); Thread.sleep(100); attempts--; } throw new TimeoutException("Timed out waiting the " + blockIdx + "-th block" + " of " + file + " to have " + numReplicas + " replicas."); } /** Parses data dirs from DataNode's configuration. */ private static List<String> getDataDirs(DataNode datanode) { return new ArrayList<String>( datanode.getConf().getTrimmedStringCollection(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY)); } /** Force the DataNode to report missing blocks immediately. */ private static void triggerDeleteReport(DataNode datanode) throws IOException { datanode.scheduleAllBlockReport(0); DataNodeTestUtils.triggerDeletionReport(datanode); } @Test public void testParseChangedVolumes() throws IOException { startDFSCluster(1, 1); DataNode dn = cluster.getDataNodes().get(0); Configuration conf = dn.getConf(); String oldPaths = conf.get(DFS_DATANODE_DATA_DIR_KEY); List<StorageLocation> oldLocations = new ArrayList<StorageLocation>(); for (String path : oldPaths.split(",")) { oldLocations.add(StorageLocation.parse(path)); } assertFalse(oldLocations.isEmpty()); String newPaths = oldLocations.get(0).getFile().getAbsolutePath() + ",/foo/path1,/foo/path2"; DataNode.ChangedVolumes changedVolumes = dn.parseChangedVolumes(newPaths); List<StorageLocation> newVolumes = changedVolumes.newLocations; assertEquals(2, newVolumes.size()); assertEquals(new File("/foo/path1").getAbsolutePath(), newVolumes.get(0).getFile().getAbsolutePath()); assertEquals(new File("/foo/path2").getAbsolutePath(), newVolumes.get(1).getFile().getAbsolutePath()); List<StorageLocation> removedVolumes = changedVolumes.deactivateLocations; assertEquals(1, removedVolumes.size()); assertEquals(oldLocations.get(1).getFile(), removedVolumes.get(0).getFile()); assertEquals(1, changedVolumes.unchangedLocations.size()); assertEquals(oldLocations.get(0).getFile(), changedVolumes.unchangedLocations.get(0).getFile()); } @Test public void testParseChangedVolumesFailures() throws IOException { startDFSCluster(1, 1); DataNode dn = cluster.getDataNodes().get(0); try { dn.parseChangedVolumes(""); fail("Should throw IOException: empty inputs."); } catch (IOException e) { GenericTestUtils.assertExceptionContains("No directory is specified.", e); } } /** Add volumes to the first DataNode. */ private void addVolumes(int numNewVolumes) throws ReconfigurationException, IOException { File dataDir = new File(cluster.getDataDirectory()); DataNode dn = cluster.getDataNodes().get(0); // First DataNode. Configuration conf = dn.getConf(); String oldDataDir = conf.get(DFS_DATANODE_DATA_DIR_KEY); List<File> newVolumeDirs = new ArrayList<File>(); StringBuilder newDataDirBuf = new StringBuilder(oldDataDir); int startIdx = oldDataDir.split(",").length + 1; // Find the first available (non-taken) directory name for data volume. while (true) { File volumeDir = new File(dataDir, "data" + startIdx); if (!volumeDir.exists()) { break; } startIdx++; } for (int i = startIdx; i < startIdx + numNewVolumes; i++) { File volumeDir = new File(dataDir, "data" + String.valueOf(i)); newVolumeDirs.add(volumeDir); volumeDir.mkdirs(); newDataDirBuf.append(","); newDataDirBuf.append(StorageLocation.parse(volumeDir.toString()).toString()); } String newDataDir = newDataDirBuf.toString(); dn.reconfigurePropertyImpl(DFS_DATANODE_DATA_DIR_KEY, newDataDir); // Verify the configuration value is appropriately set. String[] effectiveDataDirs = conf.get(DFS_DATANODE_DATA_DIR_KEY).split(","); String[] expectDataDirs = newDataDir.split(","); assertEquals(expectDataDirs.length, effectiveDataDirs.length); for (int i = 0; i < expectDataDirs.length; i++) { StorageLocation expectLocation = StorageLocation.parse(expectDataDirs[i]); StorageLocation effectiveLocation = StorageLocation.parse(effectiveDataDirs[i]); assertEquals(expectLocation.getStorageType(), effectiveLocation.getStorageType()); assertEquals(expectLocation.getFile().getCanonicalFile(), effectiveLocation.getFile().getCanonicalFile()); } // Check that all newly created volumes are appropriately formatted. for (File volumeDir : newVolumeDirs) { File curDir = new File(volumeDir, "current"); assertTrue(curDir.exists()); assertTrue(curDir.isDirectory()); } } private List<List<Integer>> getNumBlocksReport(int namesystemIdx) { List<List<Integer>> results = new ArrayList<List<Integer>>(); final String bpid = cluster.getNamesystem(namesystemIdx).getBlockPoolId(); List<Map<DatanodeStorage, BlockReport>> blockReports = cluster.getAllBlockReports(bpid); for (Map<DatanodeStorage, BlockReport> datanodeReport : blockReports) { List<Integer> numBlocksPerDN = new ArrayList<Integer>(); for (BlockReport blocks : datanodeReport.values()) { numBlocksPerDN.add(blocks.getNumberOfBlocks()); } results.add(numBlocksPerDN); } return results; } /** * Test adding one volume on a running MiniDFSCluster with only one NameNode. */ @Test(timeout = 60000) public void testAddOneNewVolume() throws IOException, ReconfigurationException, InterruptedException, TimeoutException { startDFSCluster(1, 1); String bpid = cluster.getNamesystem().getBlockPoolId(); final int numBlocks = 10; addVolumes(1); Path testFile = new Path("/test"); createFile(testFile, numBlocks); List<Map<DatanodeStorage, BlockReport>> blockReports = cluster.getAllBlockReports(bpid); assertEquals(1, blockReports.size()); // 1 DataNode assertEquals(3, blockReports.get(0).size()); // 3 volumes // FSVolumeList uses Round-Robin block chooser by default. Thus the new // blocks should be evenly located in all volumes. int minNumBlocks = Integer.MAX_VALUE; int maxNumBlocks = Integer.MIN_VALUE; for (BlockReport blockList : blockReports.get(0).values()) { minNumBlocks = Math.min(minNumBlocks, blockList.getNumberOfBlocks()); maxNumBlocks = Math.max(maxNumBlocks, blockList.getNumberOfBlocks()); } assertTrue(Math.abs(maxNumBlocks - maxNumBlocks) <= 1); verifyFileLength(cluster.getFileSystem(), testFile, numBlocks); } @Test(timeout = 60000) public void testAddVolumesDuringWrite() throws IOException, InterruptedException, TimeoutException, ReconfigurationException { startDFSCluster(1, 1); String bpid = cluster.getNamesystem().getBlockPoolId(); Path testFile = new Path("/test"); createFile(testFile, 4); // Each volume has 2 blocks. addVolumes(2); // Continue to write the same file, thus the new volumes will have blocks. DFSTestUtil.appendFile(cluster.getFileSystem(), testFile, BLOCK_SIZE * 8); verifyFileLength(cluster.getFileSystem(), testFile, 8 + 4); // After appending data, there should be [2, 2, 4, 4] blocks in each volume // respectively. List<Integer> expectedNumBlocks = Arrays.asList(2, 2, 4, 4); List<Map<DatanodeStorage, BlockReport>> blockReports = cluster.getAllBlockReports(bpid); assertEquals(1, blockReports.size()); // 1 DataNode assertEquals(4, blockReports.get(0).size()); // 4 volumes Map<DatanodeStorage, BlockReport> dnReport = blockReports.get(0); List<Integer> actualNumBlocks = new ArrayList<Integer>(); for (BlockReport blockList : dnReport.values()) { actualNumBlocks.add(blockList.getNumberOfBlocks()); } Collections.sort(actualNumBlocks); assertEquals(expectedNumBlocks, actualNumBlocks); } @Test(timeout = 60000) @Ignore //HOPS does not support federation public void testAddVolumesToFederationNN() throws IOException, TimeoutException, InterruptedException, ReconfigurationException { // Starts a Cluster with 2 NameNode and 3 DataNodes. Each DataNode has 2 // volumes. final int numNameNodes = 2; final int numDataNodes = 1; startDFSCluster(numNameNodes, numDataNodes); Path testFile = new Path("/test"); // Create a file on the first namespace with 4 blocks. createFile(0, testFile, 4); // Create a file on the second namespace with 4 blocks. createFile(1, testFile, 4); // Add 2 volumes to the first DataNode. final int numNewVolumes = 2; addVolumes(numNewVolumes); // Append to the file on the first namespace. DFSTestUtil.appendFile(cluster.getFileSystem(0), testFile, BLOCK_SIZE * 8); List<List<Integer>> actualNumBlocks = getNumBlocksReport(0); assertEquals(cluster.getDataNodes().size(), actualNumBlocks.size()); List<Integer> blocksOnFirstDN = actualNumBlocks.get(0); Collections.sort(blocksOnFirstDN); assertEquals(Arrays.asList(2, 2, 4, 4), blocksOnFirstDN); // Verify the second namespace also has the new volumes and they are empty. actualNumBlocks = getNumBlocksReport(1); assertEquals(4, actualNumBlocks.get(0).size()); assertEquals(numNewVolumes, Collections.frequency(actualNumBlocks.get(0), 0)); } @Test(timeout = 60000) public void testRemoveOneVolume() throws ReconfigurationException, InterruptedException, TimeoutException, IOException { startDFSCluster(1, 1); final short replFactor = 1; Path testFile = new Path("/test"); createFile(testFile, 10, replFactor); DataNode dn = cluster.getDataNodes().get(0); Collection<String> oldDirs = getDataDirs(dn); String newDirs = oldDirs.iterator().next(); // Keep the first volume. dn.reconfigurePropertyImpl(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, newDirs); assertFileLocksReleased(new ArrayList<String>(oldDirs).subList(1, oldDirs.size())); dn.scheduleAllBlockReport(0); try { DFSTestUtil.readFile(cluster.getFileSystem(), testFile); fail("Expect to throw BlockMissingException."); } catch (BlockMissingException e) { GenericTestUtils.assertExceptionContains("Could not obtain block", e); } Path newFile = new Path("/newFile"); createFile(newFile, 6); String bpid = cluster.getNamesystem().getBlockPoolId(); List<Map<DatanodeStorage, BlockReport>> blockReports = cluster.getAllBlockReports(bpid); assertEquals((int) replFactor, blockReports.size()); BlockReport blocksForVolume1 = blockReports.get(0).values().iterator().next(); // The first volume has half of the testFile and full of newFile. assertEquals(10 / 2 + 6, blocksForVolume1.getNumberOfBlocks()); } @Test(timeout = 60000) public void testReplicatingAfterRemoveVolume() throws InterruptedException, TimeoutException, IOException, ReconfigurationException { startDFSCluster(1, 2); final FileSystem fs = cluster.getFileSystem(); final short replFactor = 2; Path testFile = new Path("/test"); createFile(testFile, 4, replFactor); DataNode dn = cluster.getDataNodes().get(0); Collection<String> oldDirs = getDataDirs(dn); String newDirs = oldDirs.iterator().next(); // Keep the first volume. dn.reconfigurePropertyImpl(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, newDirs); assertFileLocksReleased(new ArrayList<String>(oldDirs).subList(1, oldDirs.size())); triggerDeleteReport(dn); waitReplication(fs, testFile, 1, 1); DFSTestUtil.waitReplication(fs, testFile, replFactor); } @Test public void testAddVolumeFailures() throws IOException { startDFSCluster(1, 1); final String dataDir = cluster.getDataDirectory(); DataNode dn = cluster.getDataNodes().get(0); List<String> newDirs = Lists.newArrayList(); final int NUM_NEW_DIRS = 4; for (int i = 0; i < NUM_NEW_DIRS; i++) { File newVolume = new File(dataDir, "new_vol" + i); newDirs.add(newVolume.toString()); if (i % 2 == 0) { // Make addVolume() fail. newVolume.createNewFile(); } } String newValue = dn.getConf().get(DFS_DATANODE_DATA_DIR_KEY) + "," + Joiner.on(",").join(newDirs); try { dn.reconfigurePropertyImpl(DFS_DATANODE_DATA_DIR_KEY, newValue); fail("Expect to throw IOException."); } catch (ReconfigurationException e) { String errorMessage = e.getCause().getMessage(); String messages[] = errorMessage.split("\\r?\\n"); assertEquals(2, messages.length); assertThat(messages[0], containsString("new_vol0")); assertThat(messages[1], containsString("new_vol2")); } // Make sure that vol0 and vol2's metadata are not left in memory. FsDatasetSpi<?> dataset = dn.getFSDataset(); for (FsVolumeSpi volume : dataset.getVolumes()) { assertThat(volume.getBasePath(), is(not(anyOf(is(newDirs.get(0)), is(newDirs.get(2)))))); } DataStorage storage = dn.getStorage(); for (int i = 0; i < storage.getNumStorageDirs(); i++) { Storage.StorageDirectory sd = storage.getStorageDir(i); assertThat(sd.getRoot().toString(), is(not(anyOf(is(newDirs.get(0)), is(newDirs.get(2)))))); } // The newly effective conf does not have vol0 and vol2. String[] effectiveVolumes = dn.getConf().get(DFS_DATANODE_DATA_DIR_KEY).split(","); assertEquals(4, effectiveVolumes.length); for (String ev : effectiveVolumes) { assertThat(StorageLocation.parse(ev).getFile().getCanonicalPath(), is(not(anyOf(is(newDirs.get(0)), is(newDirs.get(2)))))); } } /** * Asserts that the storage lock file in each given directory has been * released. This method works by trying to acquire the lock file itself. If * locking fails here, then the main code must have failed to release it. * * @param dirs every storage directory to check * @throws IOException if there is an unexpected I/O error */ private static void assertFileLocksReleased(Collection<String> dirs) throws IOException { for (String dir : dirs) { try { FsDatasetTestUtil.assertFileLockReleased(dir); } catch (IOException e) { LOG.warn(e); } } } @Test(timeout = 180000) public void testRemoveVolumeBeingWritten() throws InterruptedException, TimeoutException, ReconfigurationException, IOException, BrokenBarrierException { // test against removing volumes on the different DataNode on the pipeline. for (int i = 0; i < 3; i++) { testRemoveVolumeBeingWrittenForDatanode(i); } } /** * Test the case that remove a data volume on a particular DataNode when the * volume is actively being written. * @param dataNodeIdx the index of the DataNode to remove a volume. */ private void testRemoveVolumeBeingWrittenForDatanode(int dataNodeIdx) throws IOException, ReconfigurationException, TimeoutException, InterruptedException, BrokenBarrierException { // Starts DFS cluster with 3 DataNodes to form a pipeline. startDFSCluster(1, 3); final short REPLICATION = 3; final DataNode dn = cluster.getDataNodes().get(dataNodeIdx); final FileSystem fs = cluster.getFileSystem(); final Path testFile = new Path("/test"); final long lastTimeDiskErrorCheck = dn.getLastDiskErrorCheck(); FSDataOutputStream out = fs.create(testFile, REPLICATION); Random rb = new Random(0); byte[] writeBuf = new byte[BLOCK_SIZE / 2]; // half of the block. rb.nextBytes(writeBuf); out.write(writeBuf); out.hflush(); // Make FsDatasetSpi#finalizeBlock a time-consuming operation. So if the // BlockReceiver releases volume reference before finalizeBlock(), the blocks // on the volume will be removed, and finalizeBlock() throws IOE. final FsDatasetSpi<? extends FsVolumeSpi> data = dn.data; dn.data = Mockito.spy(data); doAnswer(new Answer<Object>() { public Object answer(InvocationOnMock invocation) throws IOException, InterruptedException { Thread.sleep(1000); // Bypass the argument to FsDatasetImpl#finalizeBlock to verify that // the block is not removed, since the volume reference should not // be released at this point. data.finalizeBlock((ExtendedBlock) invocation.getArguments()[0]); return null; } }).when(dn.data).finalizeBlock(any(ExtendedBlock.class)); final CyclicBarrier barrier = new CyclicBarrier(2); List<String> oldDirs = getDataDirs(dn); final String newDirs = oldDirs.get(1); // Remove the first volume. final List<Exception> exceptions = new ArrayList<>(); Thread reconfigThread = new Thread() { public void run() { try { barrier.await(); dn.reconfigurePropertyImpl(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, newDirs); } catch (ReconfigurationException | InterruptedException | BrokenBarrierException e) { exceptions.add(e); } } }; reconfigThread.start(); barrier.await(); rb.nextBytes(writeBuf); out.write(writeBuf); out.hflush(); out.close(); reconfigThread.join(); // Verify the file has sufficient replications. DFSTestUtil.waitReplication(fs, testFile, REPLICATION); // Read the content back byte[] content = DFSTestUtil.readFileBuffer(fs, testFile); assertEquals(BLOCK_SIZE, content.length); // If an IOException thrown from BlockReceiver#run, it triggers // DataNode#checkDiskError(). So we can test whether checkDiskError() is called, // to see whether there is IOException in BlockReceiver#run(). assertEquals(lastTimeDiskErrorCheck, dn.getLastDiskErrorCheck()); if (!exceptions.isEmpty()) { throw new IOException(exceptions.get(0).getCause()); } } @Test(timeout = 60000) public void testAddBackRemovedVolume() throws IOException, TimeoutException, InterruptedException, ReconfigurationException { startDFSCluster(1, 2); // Create some data on every volume. createFile(new Path("/test"), 32); DataNode dn = cluster.getDataNodes().get(0); Configuration conf = dn.getConf(); String oldDataDir = conf.get(DFS_DATANODE_DATA_DIR_KEY); String keepDataDir = oldDataDir.split(",")[0]; String removeDataDir = oldDataDir.split(",")[1]; dn.reconfigurePropertyImpl(DFS_DATANODE_DATA_DIR_KEY, keepDataDir); for (int i = 0; i < cluster.getNumNameNodes(); i++) { String bpid = cluster.getNamesystem(i).getBlockPoolId(); BlockPoolSliceStorage bpsStorage = dn.getStorage().getBPStorage(bpid); // Make sure that there is no block pool level storage under removeDataDir. for (int j = 0; j < bpsStorage.getNumStorageDirs(); j++) { Storage.StorageDirectory sd = bpsStorage.getStorageDir(j); assertFalse(sd.getRoot().getAbsolutePath().startsWith(new File(removeDataDir).getAbsolutePath())); } assertEquals(dn.getStorage().getBPStorage(bpid).getNumStorageDirs(), 1); } // Bring the removed directory back. It only successes if all metadata about // this directory were removed from the previous step. dn.reconfigurePropertyImpl(DFS_DATANODE_DATA_DIR_KEY, oldDataDir); } /** Get the FsVolume on the given basePath */ private FsVolumeImpl getVolume(DataNode dn, File basePath) { for (FsVolumeSpi vol : dn.getFSDataset().getVolumes()) { if (vol.getBasePath().equals(basePath.getPath())) { return (FsVolumeImpl) vol; } } return null; } /** * Verify that {@link DataNode#checkDiskErrors()} removes all metadata in * DataNode upon a volume failure. Thus we can run reconfig on the same * configuration to reload the new volume on the same directory as the failed one. */ @Test(timeout = 120000) public void testDirectlyReloadAfterCheckDiskError() throws IOException, TimeoutException, InterruptedException, ReconfigurationException { startDFSCluster(1, 2); createFile(new Path("/test"), 32, (short) 2); DataNode dn = cluster.getDataNodes().get(0); final String oldDataDir = dn.getConf().get(DFS_DATANODE_DATA_DIR_KEY); File dirToFail = new File(cluster.getDataDirectory(), "data_0_0"); FsVolumeImpl failedVolume = getVolume(dn, dirToFail); assertTrue("No FsVolume was found for " + dirToFail, failedVolume != null); long used = failedVolume.getDfsUsed(); DataNodeTestUtils.injectDataDirFailure(dirToFail); // Call and wait DataNode to detect disk failure. long lastDiskErrorCheck = dn.getLastDiskErrorCheck(); dn.checkDiskErrorAsync(); while (dn.getLastDiskErrorCheck() == lastDiskErrorCheck) { Thread.sleep(100); } createFile(new Path("/test1"), 32, (short) 2); assertEquals(used, failedVolume.getDfsUsed()); DataNodeTestUtils.restoreDataDirFromFailure(dirToFail); dn.reconfigurePropertyImpl(DFS_DATANODE_DATA_DIR_KEY, oldDataDir); createFile(new Path("/test2"), 32, (short) 2); FsVolumeImpl restoredVolume = getVolume(dn, dirToFail); assertTrue(restoredVolume != null); assertTrue(restoredVolume != failedVolume); // More data has been written to this volume. assertTrue(restoredVolume.getDfsUsed() > used); } /** Test that a full block report is sent after hot swapping volumes */ @Test(timeout = 100000) public void testFullBlockReportAfterRemovingVolumes() throws IOException, ReconfigurationException, InterruptedException { Configuration conf = new Configuration(); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); // Similar to TestTriggerBlockReport, set a really long value for // dfs.heartbeat.interval, so that incremental block reports and heartbeats // won't be sent during this test unless they're triggered // manually. conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 10800000L); conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1080L); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); cluster.waitActive(); //wait for the first blockreport to be finished otherwise the second won't be triguered. Thread.sleep(10000); final DataNode dn = cluster.getDataNodes().get(0); DatanodeProtocolClientSideTranslatorPB spy = DataNodeTestUtils.spyOnBposToNN(dn, cluster.getNameNode()); // Remove a data dir from datanode File dataDirToKeep = new File(cluster.getDataDirectory(), "data1"); dn.reconfigurePropertyImpl(DFS_DATANODE_DATA_DIR_KEY, dataDirToKeep.toString()); // We should get 1 full report Mockito.verify(spy, timeout(60000).times(1)).blockReport(any(DatanodeRegistration.class), anyString(), any(StorageBlockReport[].class), any(BlockReportContext.class)); } }