Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs; import com.google.common.base.Preconditions; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager; import org.apache.hadoop.hdfs.security.token.block.SecurityTestUtil; import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.io.erasurecode.ErasureCodeNative; import org.apache.hadoop.io.erasurecode.rawcoder.NativeRSRawErasureCoderFactory; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.StringUtils; import org.apache.log4j.Level; import org.junit.Assert; import org.junit.Test; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.LinkedList; import java.util.List; import java.util.Random; import java.util.Stack; import java.util.concurrent.atomic.AtomicInteger; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; public class TestDFSStripedOutputStreamWithFailure { public static final Log LOG = LogFactory.getLog(TestDFSStripedOutputStreamWithFailure.class); static { GenericTestUtils.setLogLevel(DFSOutputStream.LOG, Level.ALL); GenericTestUtils.setLogLevel(DataStreamer.LOG, Level.ALL); GenericTestUtils.setLogLevel(DFSClient.LOG, Level.ALL); ((Log4JLogger) LogFactory.getLog(BlockPlacementPolicy.class)).getLogger().setLevel(Level.ALL); } private static final int NUM_DATA_BLOCKS = StripedFileTestUtil.NUM_DATA_BLOCKS; private static final int NUM_PARITY_BLOCKS = StripedFileTestUtil.NUM_PARITY_BLOCKS; private static final int CELL_SIZE = StripedFileTestUtil.BLOCK_STRIPED_CELL_SIZE; private static final int STRIPES_PER_BLOCK = 4; private static final int BLOCK_SIZE = CELL_SIZE * STRIPES_PER_BLOCK; private static final int BLOCK_GROUP_SIZE = BLOCK_SIZE * NUM_DATA_BLOCKS; private static final int FLUSH_POS = 9 * DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_DEFAULT + 1; static { System.out.println("NUM_DATA_BLOCKS = " + NUM_DATA_BLOCKS); System.out.println("NUM_PARITY_BLOCKS= " + NUM_PARITY_BLOCKS); System.out.println("CELL_SIZE = " + CELL_SIZE + " (=" + StringUtils.TraditionalBinaryPrefix.long2String(CELL_SIZE, "B", 2) + ")"); System.out.println("BLOCK_SIZE = " + BLOCK_SIZE + " (=" + StringUtils.TraditionalBinaryPrefix.long2String(BLOCK_SIZE, "B", 2) + ")"); System.out.println("BLOCK_GROUP_SIZE = " + BLOCK_GROUP_SIZE + " (=" + StringUtils.TraditionalBinaryPrefix.long2String(BLOCK_GROUP_SIZE, "B", 2) + ")"); } static List<Integer> newLengths() { final List<Integer> lengths = new ArrayList<>(); lengths.add(FLUSH_POS + 2); for (int b = 0; b <= 2; b++) { for (int c = 0; c < STRIPES_PER_BLOCK * NUM_DATA_BLOCKS; c++) { for (int delta = -1; delta <= 1; delta++) { final int length = b * BLOCK_GROUP_SIZE + c * CELL_SIZE + delta; System.out.println(lengths.size() + ": length=" + length + ", (b, c, d) = (" + b + ", " + c + ", " + delta + ")"); lengths.add(length); } } } return lengths; } private static final int[][] dnIndexSuite = getDnIndexSuite(); private static int[][] getDnIndexSuite() { final int maxNumLevel = 2; final int maxPerLevel = 8; List<List<Integer>> allLists = new ArrayList<>(); int numIndex = NUM_PARITY_BLOCKS; for (int i = 0; i < maxNumLevel && numIndex > 1; i++) { List<List<Integer>> lists = combinations(NUM_DATA_BLOCKS + NUM_PARITY_BLOCKS, numIndex); if (lists.size() > maxPerLevel) { Collections.shuffle(lists); lists = lists.subList(0, maxPerLevel); } allLists.addAll(lists); numIndex--; } int[][] dnIndexSuite = new int[allLists.size()][]; for (int i = 0; i < dnIndexSuite.length; i++) { int[] list = new int[allLists.get(i).size()]; for (int j = 0; j < list.length; j++) { list[j] = allLists.get(i).get(j); } dnIndexSuite[i] = list; } return dnIndexSuite; } // get all combinations of k integers from {0,...,n-1} private static List<List<Integer>> combinations(int n, int k) { List<List<Integer>> res = new LinkedList<List<Integer>>(); if (k >= 1 && n >= k) { getComb(n, k, new Stack<Integer>(), res); } return res; } private static void getComb(int n, int k, Stack<Integer> stack, List<List<Integer>> res) { if (stack.size() == k) { List<Integer> list = new ArrayList<Integer>(stack); res.add(list); } else { int next = stack.empty() ? 0 : stack.peek() + 1; while (next < n) { stack.push(next); getComb(n, k, stack, res); next++; } } if (!stack.empty()) { stack.pop(); } } private int[] getKillPositions(int fileLen, int num) { int[] positions = new int[num]; for (int i = 0; i < num; i++) { positions[i] = fileLen * (i + 1) / (num + 1); } return positions; } private static final List<Integer> LENGTHS = newLengths(); static Integer getLength(int i) { return i >= 0 && i < LENGTHS.size() ? LENGTHS.get(i) : null; } private static final Random RANDOM = new Random(); private MiniDFSCluster cluster; private DistributedFileSystem dfs; private final Path dir = new Path("/" + TestDFSStripedOutputStreamWithFailure.class.getSimpleName()); private void setup(Configuration conf) throws IOException { final int numDNs = NUM_DATA_BLOCKS + NUM_PARITY_BLOCKS; if (ErasureCodeNative.isNativeCodeLoaded()) { conf.set(CommonConfigurationKeys.IO_ERASURECODE_CODEC_RS_DEFAULT_RAWCODER_KEY, NativeRSRawErasureCoderFactory.class.getCanonicalName()); } cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDNs).build(); cluster.waitActive(); dfs = cluster.getFileSystem(); dfs.mkdirs(dir); dfs.setErasureCodingPolicy(dir, null); } private void tearDown() { if (cluster != null) { cluster.shutdown(); } } private HdfsConfiguration newHdfsConfiguration() { final HdfsConfiguration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REPLICATION_CONSIDERLOAD_KEY, false); conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1); conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 0); return conf; } @Test(timeout = 240000) public void testMultipleDatanodeFailure56() throws Exception { runTestWithMultipleFailure(getLength(56)); } /** * Randomly pick a length and run tests with multiple data failures * TODO: enable this later */ //@Test(timeout=240000) public void testMultipleDatanodeFailureRandomLength() throws Exception { int lenIndex = RANDOM.nextInt(LENGTHS.size()); LOG.info("run testMultipleDatanodeFailureRandomLength with length index: " + lenIndex); runTestWithMultipleFailure(getLength(lenIndex)); } @Test(timeout = 240000) public void testBlockTokenExpired() throws Exception { final int length = NUM_DATA_BLOCKS * (BLOCK_SIZE - CELL_SIZE); final HdfsConfiguration conf = newHdfsConfiguration(); conf.setBoolean(DFSConfigKeys.DFS_BLOCK_ACCESS_TOKEN_ENABLE_KEY, true); conf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, 0); // Set short retry timeouts so this test runs faster conf.setInt(HdfsClientConfigKeys.Retry.WINDOW_BASE_KEY, 10); for (int dn = 0; dn < NUM_DATA_BLOCKS + NUM_PARITY_BLOCKS; dn += 2) { try { setup(conf); runTest(length, new int[] { length / 2 }, new int[] { dn }, true); } catch (Exception e) { LOG.error("failed, dn=" + dn + ", length=" + length); throw e; } finally { tearDown(); } } } @Test(timeout = 90000) public void testAddBlockWhenNoSufficientDataBlockNumOfNodes() throws IOException { HdfsConfiguration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); try { setup(conf); ArrayList<DataNode> dataNodes = cluster.getDataNodes(); // shutdown few datanodes to avoid getting sufficient data blocks number // of datanodes int numDatanodes = dataNodes.size(); while (numDatanodes >= NUM_DATA_BLOCKS) { cluster.stopDataNode(0); numDatanodes--; } cluster.restartNameNodes(); cluster.triggerHeartbeats(); DatanodeInfo[] info = dfs.getClient().datanodeReport(DatanodeReportType.LIVE); assertEquals("Mismatches number of live Dns ", numDatanodes, info.length); final Path dirFile = new Path(dir, "ecfile"); FSDataOutputStream out; try { out = dfs.create(dirFile, true); out.write("something".getBytes()); out.flush(); out.close(); Assert.fail("Failed to validate available dns against blkGroupSize"); } catch (IOException ioe) { // expected GenericTestUtils.assertExceptionContains( "Failed to get " + NUM_DATA_BLOCKS + " nodes from namenode: blockGroupSize= " + (NUM_DATA_BLOCKS + NUM_PARITY_BLOCKS) + ", blocks.length= " + numDatanodes, ioe); } } finally { tearDown(); } } @Test(timeout = 90000) public void testAddBlockWhenNoSufficientParityNumOfNodes() throws IOException { HdfsConfiguration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); try { setup(conf); ArrayList<DataNode> dataNodes = cluster.getDataNodes(); // shutdown few data nodes to avoid writing parity blocks int killDns = (NUM_PARITY_BLOCKS - 1); int numDatanodes = dataNodes.size() - killDns; for (int i = 0; i < killDns; i++) { cluster.stopDataNode(i); } cluster.restartNameNodes(); cluster.triggerHeartbeats(); DatanodeInfo[] info = dfs.getClient().datanodeReport(DatanodeReportType.LIVE); assertEquals("Mismatches number of live Dns ", numDatanodes, info.length); Path srcPath = new Path(dir, "testAddBlockWhenNoSufficientParityNodes"); int fileLength = StripedFileTestUtil.BLOCK_STRIPED_CELL_SIZE - 1000; final byte[] expected = StripedFileTestUtil.generateBytes(fileLength); DFSTestUtil.writeFile(dfs, srcPath, new String(expected)); LOG.info("writing finished. Seek and read the file to verify."); StripedFileTestUtil.verifySeek(dfs, srcPath, fileLength); } finally { tearDown(); } } void runTest(final int length) { final HdfsConfiguration conf = newHdfsConfiguration(); for (int dn = 0; dn < NUM_DATA_BLOCKS + NUM_PARITY_BLOCKS; dn++) { try { LOG.info("runTest: dn=" + dn + ", length=" + length); setup(conf); runTest(length, new int[] { length / 2 }, new int[] { dn }, false); } catch (Throwable e) { final String err = "failed, dn=" + dn + ", length=" + length + StringUtils.stringifyException(e); LOG.error(err); Assert.fail(err); } finally { tearDown(); } } } void runTestWithMultipleFailure(final int length) throws Exception { final HdfsConfiguration conf = newHdfsConfiguration(); for (int[] dnIndex : dnIndexSuite) { int[] killPos = getKillPositions(length, dnIndex.length); try { LOG.info("runTestWithMultipleFailure: length==" + length + ", killPos=" + Arrays.toString(killPos) + ", dnIndex=" + Arrays.toString(dnIndex)); setup(conf); runTest(length, killPos, dnIndex, false); } catch (Throwable e) { final String err = "failed, killPos=" + Arrays.toString(killPos) + ", dnIndex=" + Arrays.toString(dnIndex) + ", length=" + length; LOG.error(err); throw e; } finally { tearDown(); } } } /** * runTest implementation * @param length file length * @param killPos killing positions in ascending order * @param dnIndex DN index to kill when meets killing positions * @param tokenExpire wait token to expire when kill a DN * @throws Exception */ private void runTest(final int length, final int[] killPos, final int[] dnIndex, final boolean tokenExpire) throws Exception { if (killPos[0] <= FLUSH_POS) { LOG.warn("killPos=" + Arrays.toString(killPos) + " <= FLUSH_POS=" + FLUSH_POS + ", length=" + length + ", dnIndex=" + Arrays.toString(dnIndex)); return; //skip test } Preconditions.checkArgument(length > killPos[0], "length=%s <= killPos=%s", length, killPos); Preconditions.checkArgument(killPos.length == dnIndex.length); final Path p = new Path(dir, "dn" + Arrays.toString(dnIndex) + "len" + length + "kill" + Arrays.toString(killPos)); final String fullPath = p.toString(); LOG.info("fullPath=" + fullPath); if (tokenExpire) { final NameNode nn = cluster.getNameNode(); final BlockManager bm = nn.getNamesystem().getBlockManager(); final BlockTokenSecretManager sm = bm.getBlockTokenSecretManager(); // set a short token lifetime (1 second) SecurityTestUtil.setBlockTokenLifetime(sm, 1000L); } final AtomicInteger pos = new AtomicInteger(); final FSDataOutputStream out = dfs.create(p); final DFSStripedOutputStream stripedOut = (DFSStripedOutputStream) out.getWrappedStream(); long firstGS = -1; // first GS of this block group which never proceeds blockRecovery long oldGS = -1; // the old GS before bumping List<Long> gsList = new ArrayList<>(); final List<DatanodeInfo> killedDN = new ArrayList<>(); int numKilled = 0; for (; pos.get() < length;) { final int i = pos.getAndIncrement(); if (numKilled < killPos.length && i == killPos[numKilled]) { assertTrue(firstGS != -1); final long gs = getGenerationStamp(stripedOut); if (numKilled == 0) { assertEquals(firstGS, gs); } else { //TODO: implement hflush/hsync and verify gs strict greater than oldGS assertTrue(gs >= oldGS); } oldGS = gs; if (tokenExpire) { DFSTestUtil.flushInternal(stripedOut); waitTokenExpires(out); } killedDN.add(killDatanode(cluster, stripedOut, dnIndex[numKilled], pos)); numKilled++; } write(out, i); if (i % BLOCK_GROUP_SIZE == FLUSH_POS) { firstGS = getGenerationStamp(stripedOut); oldGS = firstGS; } if (i > 0 && (i + 1) % BLOCK_GROUP_SIZE == 0) { gsList.add(oldGS); } } gsList.add(oldGS); out.close(); assertEquals(dnIndex.length, numKilled); StripedFileTestUtil.waitBlockGroupsReported(dfs, fullPath, numKilled); cluster.triggerBlockReports(); StripedFileTestUtil.checkData(dfs, p, length, killedDN, gsList); } static void write(FSDataOutputStream out, int i) throws IOException { try { out.write(StripedFileTestUtil.getByte(i)); } catch (IOException ioe) { throw new IOException("Failed at i=" + i, ioe); } } static long getGenerationStamp(DFSStripedOutputStream out) throws IOException { final long gs = out.getBlock().getGenerationStamp(); LOG.info("getGenerationStamp returns " + gs); return gs; } static DatanodeInfo getDatanodes(StripedDataStreamer streamer) { for (;;) { DatanodeInfo[] datanodes = streamer.getNodes(); if (datanodes == null) { // try peeking following block. final LocatedBlock lb = streamer.peekFollowingBlock(); if (lb != null) { datanodes = lb.getLocations(); } } if (datanodes != null) { Assert.assertEquals(1, datanodes.length); Assert.assertNotNull(datanodes[0]); return datanodes[0]; } try { Thread.sleep(100); } catch (InterruptedException ie) { Assert.fail(StringUtils.stringifyException(ie)); return null; } } } static DatanodeInfo killDatanode(MiniDFSCluster cluster, DFSStripedOutputStream out, final int dnIndex, final AtomicInteger pos) { final StripedDataStreamer s = out.getStripedDataStreamer(dnIndex); final DatanodeInfo datanode = getDatanodes(s); LOG.info("killDatanode " + dnIndex + ": " + datanode + ", pos=" + pos); if (datanode != null) { cluster.stopDataNode(datanode.getXferAddr()); } return datanode; } private void waitTokenExpires(FSDataOutputStream out) throws IOException { Token<BlockTokenIdentifier> token = DFSTestUtil.getBlockToken(out); while (!SecurityTestUtil.isBlockTokenExpired(token)) { try { Thread.sleep(10); } catch (InterruptedException ignored) { } } } public static abstract class TestBase { static final long TIMEOUT = 240000; int getBase() { final String name = getClass().getSimpleName(); int i = name.length() - 1; for (; i >= 0 && Character.isDigit(name.charAt(i)); i--) ; return Integer.parseInt(name.substring(i + 1)); } private final TestDFSStripedOutputStreamWithFailure test = new TestDFSStripedOutputStreamWithFailure(); private void run(int offset) { final int i = offset + getBase(); final Integer length = getLength(i); if (length == null) { System.out.println("Skip test " + i + " since length=null."); return; } if (RANDOM.nextInt(16) != 0) { System.out.println("Test " + i + ", length=" + length + ", is not chosen to run."); return; } System.out.println("Run test " + i + ", length=" + length); test.runTest(length); } @Test(timeout = TIMEOUT) public void test0() { run(0); } @Test(timeout = TIMEOUT) public void test1() { run(1); } @Test(timeout = TIMEOUT) public void test2() { run(2); } @Test(timeout = TIMEOUT) public void test3() { run(3); } @Test(timeout = TIMEOUT) public void test4() { run(4); } @Test(timeout = TIMEOUT) public void test5() { run(5); } @Test(timeout = TIMEOUT) public void test6() { run(6); } @Test(timeout = TIMEOUT) public void test7() { run(7); } @Test(timeout = TIMEOUT) public void test8() { run(8); } @Test(timeout = TIMEOUT) public void test9() { run(9); } } }