Java tutorial
/* * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 * (the "License"). You may not use this work except in compliance with the License, which is * available at www.apache.org/licenses/LICENSE-2.0 * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied, as more fully set forth in the License. * * See the NOTICE file distributed with this work for information regarding copyright ownership. */ package alluxio.client.hadoop; import alluxio.Constants; import alluxio.conf.PropertyKey; import alluxio.conf.ServerConfiguration; import alluxio.hadoop.FileSystem; import alluxio.hadoop.HadoopConfigurationUtils; import alluxio.testutils.BaseIntegrationTest; import alluxio.testutils.LocalAlluxioClusterResource; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PositionedReadable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.SequenceFile.CompressionType; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.junit.AfterClass; import org.junit.AssumptionViolatedException; import org.junit.BeforeClass; import org.junit.ClassRule; import org.junit.Test; import org.junit.rules.TestRule; import org.junit.runner.Description; import org.junit.runners.model.Statement; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.BufferedReader; import java.io.Closeable; import java.io.DataInputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.PrintStream; import java.net.URI; import java.util.Date; import java.util.Random; import java.util.StringTokenizer; import javax.annotation.Nullable; /** * Distributed i/o benchmark. * <p> * This test writes into or reads from a specified number of files. Number of bytes to write or read * is specified as a parameter to the test. Each file is accessed in a separate map task. * <p> * The reducer collects the following statistics: * <ul> * <li>number of tasks completed</li> * <li>number of bytes written/read</li> * <li>execution time</li> * <li>io rate</li> * <li>io rate squared</li> * </ul> * * Finally, the following information is appended to a local file * <ul> * <li>read or write test</li> * <li>date and time the test finished</li> * <li>number of files</li> * <li>total number of bytes processed</li> * <li>throughput in mb/sec (total number of bytes / sum of processing times)</li> * <li>average i/o rate in mb/sec per file</li> * <li>standard deviation of i/o rate</li> * </ul> */ public class DFSIOIntegrationTest extends BaseIntegrationTest implements Tool { /** * A rule that is used to enforce supported hadoop client versions before running this test. * Value for system property, "alluxio.hadoop.version", is injected by surefire plugin. */ private static class HadoopVersionRule implements TestRule { @Override public Statement apply(Statement base, Description description) { return new Statement() { @Override public void evaluate() throws Throwable { String hadoopVersion = System.getProperty("alluxio.hadoop.version"); if (hadoopVersion != null && (hadoopVersion.startsWith("2.4") || hadoopVersion.startsWith("2.5") || hadoopVersion.startsWith("2.6"))) { throw new AssumptionViolatedException("Hadoop version not supported. Skipping test!"); } else { base.evaluate(); } } }; } } // Constants for DFSIOIntegrationTest private static final Logger LOG = LoggerFactory.getLogger(DFSIOIntegrationTest.class); private static final int DEFAULT_BUFFER_SIZE = 4096; private static final String BASE_FILE_NAME = "test_io_"; private static final String DEFAULT_RES_FILE_NAME = "DFSIOIntegrationTest_results.log"; private static final long MEGA = ByteMultiple.MB.value(); private static final int DEFAULT_NR_BYTES = 16384; private static final int DEFAULT_NR_FILES = 4; private static boolean sGenerateReportFile = false; private static final String USAGE = "Usage: " + DFSIOIntegrationTest.class.getSimpleName() + " [genericOptions]" + " -read [-random | -backward | -skip [-skipSize Size]] |" + " -write | -append | -clean" + " [-compression codecClassName]" + " [-nrFiles N]" + " [-size Size[B|KB|MB|GB|TB]]" + " [-resFile resultFileName] [-bufferSize Bytes]" + " [-rootDir]"; private org.apache.hadoop.conf.Configuration mConfig; @ClassRule public static LocalAlluxioClusterResource sLocalAlluxioClusterResource = new LocalAlluxioClusterResource.Builder() .setProperty(PropertyKey.USER_FILE_WRITE_TYPE_DEFAULT, "CACHE_THROUGH").build(); private static URI sLocalAlluxioClusterUri = null; static { org.apache.hadoop.conf.Configuration.addDefaultResource("hdfs-default.xml"); org.apache.hadoop.conf.Configuration.addDefaultResource("hdfs-site.xml"); org.apache.hadoop.conf.Configuration.addDefaultResource("mapred-default.xml"); org.apache.hadoop.conf.Configuration.addDefaultResource("mapred-site.xml"); } /** * Represents different types of tests. */ private enum TestType { TEST_TYPE_READ("read"), TEST_TYPE_WRITE("write"), TEST_TYPE_CLEANUP("cleanup"), TEST_TYPE_APPEND( "append"), TEST_TYPE_READ_RANDOM( "random read"), TEST_TYPE_READ_BACKWARD("backward read"), TEST_TYPE_READ_SKIP("skip read"); private String mType; TestType(String t) { mType = t; } @Override // String public String toString() { return mType; } } /** * Represents for 5 multiple bytes unit. */ enum ByteMultiple { B(1L), KB(0x400L), MB(0x100000L), GB(0x40000000L), TB(0x10000000000L); private long mMultiplier; ByteMultiple(long mult) { mMultiplier = mult; } long value() { return mMultiplier; } static ByteMultiple parseString(String sMultiple) { if (sMultiple == null || sMultiple.isEmpty()) { // MB by default return MB; } String sMU = sMultiple.toUpperCase(); if (B.name().toUpperCase().endsWith(sMU)) { return B; } if (KB.name().toUpperCase().endsWith(sMU)) { return KB; } if (MB.name().toUpperCase().endsWith(sMU)) { return MB; } if (GB.name().toUpperCase().endsWith(sMU)) { return GB; } if (TB.name().toUpperCase().endsWith(sMU)) { return TB; } throw new IllegalArgumentException("Unsupported ByteMultiple " + sMultiple); } } public DFSIOIntegrationTest() { mConfig = new org.apache.hadoop.conf.Configuration(); } private static String getBaseDir(org.apache.hadoop.conf.Configuration conf) { return conf.get("test.dfsio.build.data", "/benchmarks/DFSIOIntegrationTest"); } private static Path getControlDir(org.apache.hadoop.conf.Configuration conf) { return new Path(getBaseDir(conf), "io_control"); } private static Path getWriteDir(org.apache.hadoop.conf.Configuration conf) { return new Path(getBaseDir(conf), "io_write"); } private static Path getReadDir(org.apache.hadoop.conf.Configuration conf) { return new Path(getBaseDir(conf), "io_read"); } private static Path getAppendDir(org.apache.hadoop.conf.Configuration conf) { return new Path(getBaseDir(conf), "io_append"); } private static Path getRandomReadDir(org.apache.hadoop.conf.Configuration conf) { return new Path(getBaseDir(conf), "io_random_read"); } private static Path getDataDir(org.apache.hadoop.conf.Configuration conf) { return new Path(getBaseDir(conf), "io_data"); } private static DFSIOIntegrationTest sBench; @ClassRule public static HadoopVersionRule sHadoopVersionRule = new HadoopVersionRule(); @BeforeClass public static void beforeClass() throws Exception { // Init DFSIOIntegrationTest sBench = new DFSIOIntegrationTest(); sBench.getConf().setBoolean("dfs.support.append", true); sLocalAlluxioClusterUri = URI.create(sLocalAlluxioClusterResource.get().getMasterURI()); sBench.getConf().set("fs.defaultFS", sLocalAlluxioClusterUri.toString()); sBench.getConf().set("fs.default.name", sLocalAlluxioClusterUri.toString()); sBench.getConf().set("fs." + Constants.SCHEME + ".impl", FileSystem.class.getName()); org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.get(sLocalAlluxioClusterUri, HadoopConfigurationUtils.mergeAlluxioConfiguration(sBench.getConf(), ServerConfiguration.global())); sBench.createControlFile(fs, DEFAULT_NR_BYTES, DEFAULT_NR_FILES); /** Check write here, as it is required for other tests */ writeTest(); } @AfterClass public static void afterClass() throws Exception { // Clear DFSIOIntegrationTest org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.get(sLocalAlluxioClusterUri, HadoopConfigurationUtils.mergeAlluxioConfiguration(sBench.getConf(), ServerConfiguration.global())); sBench.cleanup(fs); } /** * Writes into files, then calculates and collects the write test statistics. */ public static void writeTest() throws Exception { org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.get(sLocalAlluxioClusterUri, HadoopConfigurationUtils.mergeAlluxioConfiguration(sBench.getConf(), ServerConfiguration.global())); long tStart = System.currentTimeMillis(); sBench.mapperWriteTest(fs); long execTime = System.currentTimeMillis() - tStart; sBench.analyzeResult(fs, TestType.TEST_TYPE_WRITE, execTime); } @Test(timeout = 50000) public void read() throws Exception { org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.get(sLocalAlluxioClusterUri, HadoopConfigurationUtils.mergeAlluxioConfiguration(sBench.getConf(), ServerConfiguration.global())); long tStart = System.currentTimeMillis(); sBench.mapperReadTest(fs); long execTime = System.currentTimeMillis() - tStart; sBench.analyzeResult(fs, TestType.TEST_TYPE_READ, execTime); } @Test(timeout = 50000) public void readRandom() throws Exception { org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.get(sLocalAlluxioClusterUri, HadoopConfigurationUtils.mergeAlluxioConfiguration(sBench.getConf(), ServerConfiguration.global())); long tStart = System.currentTimeMillis(); sBench.getConf().setLong("test.io.skip.size", 0); sBench.randomReadTest(fs); long execTime = System.currentTimeMillis() - tStart; sBench.analyzeResult(fs, TestType.TEST_TYPE_READ_RANDOM, execTime); } @Test(timeout = 50000) public void readBackward() throws Exception { org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.get(sLocalAlluxioClusterUri, HadoopConfigurationUtils.mergeAlluxioConfiguration(sBench.getConf(), ServerConfiguration.global())); long tStart = System.currentTimeMillis(); sBench.getConf().setLong("test.io.skip.size", -DEFAULT_BUFFER_SIZE); sBench.randomReadTest(fs); long execTime = System.currentTimeMillis() - tStart; sBench.analyzeResult(fs, TestType.TEST_TYPE_READ_BACKWARD, execTime); } @Test(timeout = 50000) public void readSkip() throws Exception { org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.get(sLocalAlluxioClusterUri, HadoopConfigurationUtils.mergeAlluxioConfiguration(sBench.getConf(), ServerConfiguration.global())); long tStart = System.currentTimeMillis(); sBench.getConf().setLong("test.io.skip.size", 1); sBench.randomReadTest(fs); long execTime = System.currentTimeMillis() - tStart; sBench.analyzeResult(fs, TestType.TEST_TYPE_READ_SKIP, execTime); } @Test(timeout = 50000) public void readLargeSkip() throws Exception { org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.get(sLocalAlluxioClusterUri, HadoopConfigurationUtils.mergeAlluxioConfiguration(sBench.getConf(), ServerConfiguration.global())); long tStart = System.currentTimeMillis(); sBench.getConf().setLong("test.io.skip.size", 5000); sBench.randomReadTest(fs); long execTime = System.currentTimeMillis() - tStart; sBench.analyzeResult(fs, TestType.TEST_TYPE_READ_SKIP, execTime); } // TODO(hy): Should active this unit test after ALLUXIO-25 has been solved // @Test (timeout = 50000) public void append() throws Exception { org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.get(sLocalAlluxioClusterUri, HadoopConfigurationUtils.mergeAlluxioConfiguration(sBench.getConf(), ServerConfiguration.global())); long tStart = System.currentTimeMillis(); sBench.mapperAppendTest(fs); long execTime = System.currentTimeMillis() - tStart; sBench.analyzeResult(fs, TestType.TEST_TYPE_APPEND, execTime); } @SuppressWarnings("deprecation") private void createControlFile(org.apache.hadoop.fs.FileSystem fs, long nrBytes, // in bytes int nrFiles) throws IOException { LOG.info("creating control file: " + nrBytes + " bytes, " + nrFiles + " files"); Path controlDir = getControlDir(mConfig); if (!fs.exists(controlDir)) { fs.delete(controlDir, true); for (int i = 0; i < nrFiles; i++) { String name = getFileName(i); Path controlFile = new Path(controlDir, "in_file_" + name); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(fs, mConfig, controlFile, Text.class, LongWritable.class, CompressionType.NONE); writer.append(new Text(name), new LongWritable(nrBytes)); } catch (Exception e) { throw new IOException(e.getLocalizedMessage()); } finally { if (writer != null) { writer.close(); } writer = null; } } } LOG.info("created control files for: " + nrFiles + " files"); } private static String getFileName(int fIdx) { return BASE_FILE_NAME + Integer.toString(fIdx); } /** * Write/Read mapper base class. * <p> * Collects the following statistics per task: * <ul> * <li>number of tasks completed</li> * <li>number of bytes written/read</li> * <li>execution time</li> * <li>i/o rate</li> * <li>i/o rate squared</li> * </ul> */ private abstract static class IOStatMapper extends AbstractIOMapper<Long> { protected CompressionCodec mCompressionCodec; IOStatMapper() { } @Override // Mapper public void configure(JobConf conf) { super.configure(conf); // grab compression String compression = getConf().get("test.io.compression.class", null); Class<? extends CompressionCodec> codec; // try to initialize codec try { codec = (compression == null) ? null : Class.forName(compression).asSubclass(CompressionCodec.class); } catch (Exception e) { throw new RuntimeException("Compression codec not found: ", e); } if (codec != null) { mCompressionCodec = ReflectionUtils.newInstance(codec, getConf()); } } @Override // AbstractIOMapper void collectStats(OutputCollector<Text, Text> output, String name, long execTime, Long objSize) throws IOException { long totalSize = objSize; float ioRateMbSec = (float) totalSize * 1000 / (execTime * MEGA); LOG.info("Number of bytes processed = " + totalSize); LOG.info("Exec time = " + execTime); LOG.info("IO rate = " + ioRateMbSec); output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "tasks"), new Text(String.valueOf(1))); output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "size"), new Text(String.valueOf(totalSize))); output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "time"), new Text(String.valueOf(execTime))); output.collect(new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "rate"), new Text(String.valueOf(ioRateMbSec * 1000))); output.collect(new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "sqrate"), new Text(String.valueOf(ioRateMbSec * ioRateMbSec * 1000))); } } /** * Write mapper class. */ public static class WriteMapper extends IOStatMapper { public WriteMapper() { for (int i = 0; i < mBufferSize; i++) { mBuffer[i] = (byte) ('0' + i % 50); } } @Override // AbstractIOMapper public Closeable getIOStream(String name) throws IOException { // create file OutputStream out = mFS.create(new Path(getDataDir(getConf()), name), true, mBufferSize); if (mCompressionCodec != null) { out = mCompressionCodec.createOutputStream(out); } LOG.info("out = " + out.getClass().getName()); return out; } @Override // AbstractIOMapper, totalSize is in bytes public Long doIO(Reporter reporter, String name, long totalSize) throws IOException { OutputStream out = (OutputStream) this.mStream; // write to the file long nrRemaining; for (nrRemaining = totalSize; nrRemaining > 0; nrRemaining -= mBufferSize) { int curSize = (mBufferSize < nrRemaining) ? mBufferSize : (int) nrRemaining; out.write(mBuffer, 0, curSize); reporter.setStatus("writing " + name + "@" + (totalSize - nrRemaining) + "/" + totalSize + " ::host = " + mHostname); } return totalSize; } } private void mapperWriteTest(org.apache.hadoop.fs.FileSystem fs) throws IOException { Path writeDir = getWriteDir(mConfig); fs.delete(getDataDir(mConfig), true); fs.delete(writeDir, true); runIOTest(WriteMapper.class, writeDir); } private void runIOTest(Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass, Path outputDir) throws IOException { JobConf job = new JobConf(mConfig, DFSIOIntegrationTest.class); FileInputFormat.setInputPaths(job, getControlDir(mConfig)); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(mapperClass); job.setReducerClass(AccumulatingReducer.class); FileOutputFormat.setOutputPath(job, outputDir); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); JobClient.runJob(job); } /** * Append mapper class. */ public static class AppendMapper extends IOStatMapper { public AppendMapper() { for (int i = 0; i < mBufferSize; i++) { mBuffer[i] = (byte) ('0' + i % 50); } } @Override // AbstractIOMapper public Closeable getIOStream(String name) throws IOException { // open file for append OutputStream out = mFS.append(new Path(getDataDir(getConf()), name), mBufferSize); if (mCompressionCodec != null) { out = mCompressionCodec.createOutputStream(out); } LOG.info("out = " + out.getClass().getName()); return out; } @Override // AbstractIOMapper, totalSize is in Bytes public Long doIO(Reporter reporter, String name, long totalSize) throws IOException { OutputStream out = (OutputStream) this.mStream; // write to the file long nrRemaining; for (nrRemaining = totalSize; nrRemaining > 0; nrRemaining -= mBufferSize) { int curSize = (mBufferSize < nrRemaining) ? mBufferSize : (int) nrRemaining; out.write(mBuffer, 0, curSize); reporter.setStatus("writing " + name + "@" + (totalSize - nrRemaining) + "/" + totalSize + " ::host = " + mHostname); } return totalSize; } } private void mapperAppendTest(org.apache.hadoop.fs.FileSystem fs) throws IOException { Path appendDir = getAppendDir(mConfig); fs.delete(appendDir, true); runIOTest(AppendMapper.class, appendDir); } /** * Read mapper class. */ public static class ReadMapper extends IOStatMapper { public ReadMapper() { } @Override // AbstractIOMapper public Closeable getIOStream(String name) throws IOException { // open file InputStream in = mFS.open(new Path(getDataDir(getConf()), name)); if (mCompressionCodec != null) { in = mCompressionCodec.createInputStream(in); } LOG.info("in = " + in.getClass().getName()); return in; } @Override // AbstractIOMapper, totalSize in Bytes public Long doIO(Reporter reporter, String name, long totalSize) throws IOException { InputStream in = (InputStream) this.mStream; long actualSize = 0; while (actualSize < totalSize) { int curSize = in.read(mBuffer, 0, mBufferSize); if (curSize < 0) { break; } actualSize += curSize; reporter.setStatus( "reading " + name + "@" + actualSize + "/" + totalSize + " ::host = " + mHostname); } return actualSize; } } private void mapperReadTest(org.apache.hadoop.fs.FileSystem fs) throws IOException { Path readDir = getReadDir(mConfig); fs.delete(readDir, true); runIOTest(ReadMapper.class, readDir); } /** * Mapper class for random reads. The mapper chooses a position in the file and reads bufferSize * bytes starting at the chosen position. It stops after reading the totalSize bytes, specified * by size. * * There are three type of reads. 1) Random read always chooses a random position to read from: * skipSize = 0 2) Backward read reads file in reverse order : skipSize < 0 3) Skip-read skips * skipSize bytes after every read : skipSize > 0 */ public static class RandomReadMapper extends IOStatMapper { private Random mRnd; private long mFileSize; private long mSkipSize; @Override // Mapper public void configure(JobConf conf) { super.configure(conf); mSkipSize = conf.getLong("test.io.skip.size", 0); } public RandomReadMapper() { mRnd = new Random(); } @Override // AbstractIOMapper public Closeable getIOStream(String name) throws IOException { Path filePath = new Path(getDataDir(getConf()), name); mFileSize = mFS.getFileStatus(filePath).getLen(); InputStream in = mFS.open(filePath); if (mCompressionCodec != null) { in = new FSDataInputStream(mCompressionCodec.createInputStream(in)); } LOG.info("in = " + in.getClass().getName()); LOG.info("skipSize = " + mSkipSize); return in; } @Override // AbstractIOMapper, totalSize in Bytes public Long doIO(Reporter reporter, String name, long totalSize) throws IOException { PositionedReadable in = (PositionedReadable) this.mStream; long actualSize = 0; for (long pos = nextOffset(-1); actualSize < totalSize; pos = nextOffset(pos)) { int curSize = in.read(pos, mBuffer, 0, mBufferSize); if (curSize < 0) { break; } actualSize += curSize; reporter.setStatus( "reading " + name + "@" + actualSize + "/" + totalSize + " ::host = " + mHostname); } return actualSize; } /** * Get next offset for reading. If current < 0 then choose initial offset according to the read * type. * * @param current offset * @return the next offset for reading */ private long nextOffset(long current) { if (mSkipSize == 0) { return mRnd.nextInt((int) (mFileSize)); } if (mSkipSize > 0) { return (current < 0) ? 0 : (current + mBufferSize + mSkipSize); } // skipSize < 0 return (current < 0) ? Math.max(0, mFileSize - mBufferSize) : Math.max(0, current + mSkipSize); } } private void randomReadTest(org.apache.hadoop.fs.FileSystem fs) throws IOException { Path readDir = getRandomReadDir(mConfig); fs.delete(readDir, true); runIOTest(RandomReadMapper.class, readDir); } // fileSize is in Bytes private void sequentialTest(org.apache.hadoop.fs.FileSystem fs, TestType testType, long fileSize, int nrFiles) throws IOException { IOStatMapper ioer; switch (testType) { case TEST_TYPE_READ: ioer = new ReadMapper(); break; case TEST_TYPE_WRITE: ioer = new WriteMapper(); break; case TEST_TYPE_APPEND: ioer = new AppendMapper(); break; case TEST_TYPE_READ_RANDOM: case TEST_TYPE_READ_BACKWARD: case TEST_TYPE_READ_SKIP: ioer = new RandomReadMapper(); break; default: return; } for (int i = 0; i < nrFiles; i++) { ioer.doIO(Reporter.NULL, BASE_FILE_NAME + Integer.toString(i), fileSize); } ioer.close(); } /** * Runs the integration test for DFS IO. * * @param args arguments */ public static void main(String[] args) { DFSIOIntegrationTest bench = new DFSIOIntegrationTest(); int res; try { res = ToolRunner.run(bench, args); } catch (Exception e) { System.err.print(StringUtils.stringifyException(e)); res = -2; } if (res == -1) { System.err.print(USAGE); } System.exit(res); } @Override // Tool public int run(String[] args) throws IOException { TestType testType = null; int bufferSize = DEFAULT_BUFFER_SIZE; long nrBytes = MEGA; int nrFiles = 1; long skipSize = 0; String resFileName = DEFAULT_RES_FILE_NAME; String compressionClass = null; boolean isSequential = false; String version = DFSIOIntegrationTest.class.getSimpleName() + ".1.7"; sGenerateReportFile = true; LOG.info(version); if (args.length == 0) { System.err.println("Missing arguments."); return -1; } for (int i = 0; i < args.length; i++) { // parse command line if (args[i].startsWith("-read")) { testType = TestType.TEST_TYPE_READ; } else if (args[i].equals("-write")) { testType = TestType.TEST_TYPE_WRITE; } else if (args[i].equals("-append")) { testType = TestType.TEST_TYPE_APPEND; } else if (args[i].equals("-random")) { if (testType != TestType.TEST_TYPE_READ) { return -1; } testType = TestType.TEST_TYPE_READ_RANDOM; } else if (args[i].equals("-backward")) { if (testType != TestType.TEST_TYPE_READ) { return -1; } testType = TestType.TEST_TYPE_READ_BACKWARD; } else if (args[i].equals("-skip")) { if (testType != TestType.TEST_TYPE_READ) { return -1; } testType = TestType.TEST_TYPE_READ_SKIP; } else if (args[i].equals("-clean")) { testType = TestType.TEST_TYPE_CLEANUP; } else if (args[i].startsWith("-seq")) { isSequential = true; } else if (args[i].startsWith("-compression")) { compressionClass = args[++i]; } else if (args[i].equals("-nrFiles")) { nrFiles = Integer.parseInt(args[++i]); } else if (args[i].equals("-fileSize") || args[i].equals("-size")) { nrBytes = parseSize(args[++i]); } else if (args[i].equals("-skipSize")) { skipSize = parseSize(args[++i]); } else if (args[i].equals("-bufferSize")) { bufferSize = Integer.parseInt(args[++i]); } else if (args[i].equals("-resFile")) { resFileName = args[++i]; } else { System.err.println("Illegal argument: " + args[i]); return -1; } } if (testType == null) { return -1; } if (testType == TestType.TEST_TYPE_READ_BACKWARD) { skipSize = -bufferSize; } else if (testType == TestType.TEST_TYPE_READ_SKIP && skipSize == 0) { skipSize = bufferSize; } LOG.info("nrFiles = " + nrFiles); LOG.info("nrBytes (MB) = " + toMB(nrBytes)); LOG.info("bufferSize = " + bufferSize); if (skipSize > 0) { LOG.info("skipSize = " + skipSize); } LOG.info("baseDir = " + getBaseDir(mConfig)); if (compressionClass != null) { mConfig.set("test.io.compression.class", compressionClass); LOG.info("compressionClass = " + compressionClass); } mConfig.setInt("test.io.file.buffer.size", bufferSize); mConfig.setLong("test.io.skip.size", skipSize); mConfig.setBoolean("dfs.support.append", true); org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.get(mConfig); if (isSequential) { long tStart = System.currentTimeMillis(); sequentialTest(fs, testType, nrBytes, nrFiles); long execTime = System.currentTimeMillis() - tStart; String resultLine = "Seq Test exec time sec: " + (float) execTime / 1000; LOG.info(resultLine); return 0; } if (testType == TestType.TEST_TYPE_CLEANUP) { cleanup(fs); return 0; } createControlFile(fs, nrBytes, nrFiles); long tStart = System.currentTimeMillis(); switch (testType) { case TEST_TYPE_WRITE: mapperWriteTest(fs); break; case TEST_TYPE_READ: mapperReadTest(fs); break; case TEST_TYPE_APPEND: mapperAppendTest(fs); break; case TEST_TYPE_READ_RANDOM: case TEST_TYPE_READ_BACKWARD: case TEST_TYPE_READ_SKIP: randomReadTest(fs); break; default: } long execTime = System.currentTimeMillis() - tStart; analyzeResult(fs, testType, execTime, resFileName); return 0; } @Override // Configurable public org.apache.hadoop.conf.Configuration getConf() { return this.mConfig; } @Override // Configurable public void setConf(org.apache.hadoop.conf.Configuration conf) { mConfig = conf; } /** * Returns size in bytes. * * @param arg = {d}[B|KB|MB|GB|TB] * @return */ static long parseSize(String arg) { String[] args = arg.split("\\D", 2); // get digits assert args.length <= 2; long nrBytes = Long.parseLong(args[0]); String bytesMult = arg.substring(args[0].length()); // get byte multiple return nrBytes * ByteMultiple.parseString(bytesMult).value(); } static float toMB(long bytes) { return ((float) bytes) / MEGA; } private void analyzeResult(org.apache.hadoop.fs.FileSystem fs, TestType testType, long execTime, String resFileName) throws IOException { Path reduceFile = getReduceFilePath(testType); long tasks = 0; long size = 0; long time = 0; float rate = 0; float sqrate = 0; DataInputStream in = null; BufferedReader lines = null; try { in = new DataInputStream(fs.open(reduceFile)); lines = new BufferedReader(new InputStreamReader(in)); String line; while ((line = lines.readLine()) != null) { StringTokenizer tokens = new StringTokenizer(line, " \t\n\r\f%"); String attr = tokens.nextToken(); if (attr.endsWith(":tasks")) { tasks = Long.parseLong(tokens.nextToken()); } else if (attr.endsWith(":size")) { size = Long.parseLong(tokens.nextToken()); } else if (attr.endsWith(":time")) { time = Long.parseLong(tokens.nextToken()); } else if (attr.endsWith(":rate")) { rate = Float.parseFloat(tokens.nextToken()); } else if (attr.endsWith(":sqrate")) { sqrate = Float.parseFloat(tokens.nextToken()); } } } finally { if (in != null) { in.close(); } if (lines != null) { lines.close(); } } double med = rate / 1000 / tasks; double stdDev = Math.sqrt(Math.abs(sqrate / 1000 / tasks - med * med)); String[] resultLines = { "----- DFSIOIntegrationTest ----- : " + testType, " Date & time: " + new Date(System.currentTimeMillis()), " Number of files: " + tasks, "Total MBytes processed: " + toMB(size), " Throughput mb/sec: " + size * 1000.0 / (time * MEGA), "Average IO rate mb/sec: " + med, " IO rate std deviation: " + stdDev, " Test exec time sec: " + (float) execTime / 1000, "" }; PrintStream res = null; try { if (sGenerateReportFile) { res = new PrintStream(new FileOutputStream(new File(resFileName), true)); } for (String resultLine : resultLines) { LOG.info(resultLine); if (sGenerateReportFile) { res.println(resultLine); } else { System.out.println(resultLine); } } } finally { if (res != null) { res.close(); } } } private void analyzeResult(org.apache.hadoop.fs.FileSystem fs, TestType testType, long execTime) throws IOException { analyzeResult(fs, testType, execTime, DEFAULT_RES_FILE_NAME); } @Nullable private Path getReduceFilePath(TestType testType) { switch (testType) { case TEST_TYPE_WRITE: return new Path(getWriteDir(mConfig), "part-00000"); case TEST_TYPE_APPEND: return new Path(getAppendDir(mConfig), "part-00000"); case TEST_TYPE_READ: return new Path(getReadDir(mConfig), "part-00000"); case TEST_TYPE_READ_RANDOM: case TEST_TYPE_READ_BACKWARD: case TEST_TYPE_READ_SKIP: return new Path(getRandomReadDir(mConfig), "part-00000"); default: } return null; } private void cleanup(org.apache.hadoop.fs.FileSystem fs) throws IOException { LOG.info("Cleaning up test files"); fs.delete(new Path(getBaseDir(mConfig)), true); } }