com.cloudera.HioBench.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.HioBench.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.cloudera;

import java.io.InputStream;
import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.lang.Thread;
import java.lang.System;
import java.net.URI;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.StringTokenizer;

import org.apache.commons.codec.binary.Hex;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
 * This benchmark tests I/O in HDFS.
 */
public class HioBench { //extends Configured {
    private static void usage(int retval) {
        System.err.println("HioBench: tests random HDFS I/O.\n" + "\n" + "Java system properties to set:\n"
                + "hio.nthreads [number-of-threads]   Number of simultaneous threads\n"
                + "hio.ngigs.to.read [gigabytes]      Number of gigabytes to read in each thread\n"
                + "hio.nmegs.to.read [megs]           Number of megabytes to read in each thread\n"
                + "hio.skip.checksum [boolean]        If true, we will skip HDFS checksum (default false)\n"
                + "hio.read.chunk.bytes [bytes]       Number of bytes to read in each chunk (default 1048576)\n"
                + "hio.read.offset.alignment [bytes]  Number of bytes that random read offset will be aligned (default 1)\n"
                + "hio.ngigs.in.file [gigabytes]      Number of gigabytes in the file to write\n"
                + "hio.hdfs.uri [uri]                 The HDFS URI to talk to.\n"
                + "hio.hdfs.file.name [name]          The name of the input file to use.\n"
                + "                                   If the file already exists, we will use it\n"
                + "                                   rather than rewriting it.\n"
                + "dump.conf                          If set, we will dump out our\n"
                + "                                   configuration to stdout when\n"
                + "                                   starting up.\n" + "\n" + "A few notes about configuration:\n"
                + "If you want to be sure that your reads hit the disk, you need to set\n"
                + "hio.ngigs.in.file to something much higher than the available memory size.\n"
                + "Otherwise, you're mostly reading from the page cache, which may or may not\n"
                + "be what you want.\n" + "\n"
                + "The more threads you have, the more 'seeky' your workload will be, since\n"
                + "each thread independently seeks and reads.\n");
        System.exit(retval);
    }

    static int getIntOrDie(String key) {
        String val = System.getProperty(key);
        if (val == null) {
            System.err.println("You must set the integer property " + key + "\n\n");
            usage(1);
        }
        return Integer.parseInt(val);
    }

    static int getIntWithDefault(String key, int defaultVal) {
        String val = System.getProperty(key);
        if (val == null) {
            return defaultVal;
        }
        return Integer.parseInt(val);
    }

    static boolean getBooleanWithDefault(String key, boolean defaultVal) {
        String val = System.getProperty(key);
        if (val == null) {
            return defaultVal;
        }
        return Boolean.parseBoolean(val);
    }

    static String getStringOrDie(String key) {
        String val = System.getProperty(key);
        if (val == null) {
            System.err.println("You must set the string property " + key + "\n\n");
            usage(1);
        }
        return val;
    }

    static String getStringWithDefault(String key, String defaultVal) {
        String val = System.getProperty(key);
        if (val == null) {
            return defaultVal;
        }
        return val;
    }

    static void fillArrayWithExpected(byte arr[], long off, int fillLen) {
        byte incr = (byte) (off & 0xff);
        for (int i = 0; i < fillLen; i++) {
            arr[i] = incr++;
        }
    }

    static void writeFile(FileSystem fs) throws IOException {
        FSDataOutputStream fos = fs.create(options.filePath, (short) 1);
        byte arr[] = new byte[65536];
        try {
            for (long off = 0; off < options.nBytesInFile;) {
                fillArrayWithExpected(arr, off, arr.length);
                long rem = options.nBytesInFile - off;
                int lim = rem > arr.length ? arr.length : (int) rem;
                fos.write(arr, 0, lim);
                off += lim;
            }
        } finally {
            fos.close();
        }
    }

    static private class Options {
        public final int nThreads;
        public final long nBytesToRead;
        public final int nReadChunkBytes;
        public final int nGigsInFile;
        public final long nBytesInFile;
        public final String hdfsUri;
        public final String filename;
        public final Path filePath;
        public final String testType;
        public final boolean skipChecksum;
        public final boolean dumpConf;
        public final int nReadAlign;

        public Options() {
            nThreads = getIntOrDie("hio.nthreads");
            long nGigsToRead = getIntWithDefault("hio.ngigs.to.read", 0);
            long nMegsToRead = getIntWithDefault("hio.nmegs.to.read", 0);
            if ((nGigsToRead != 0) && (nMegsToRead != 0)) {
                throw new RuntimeException("can't set both hio.ngigs.to.read and " + "hio.nmegs.to.read!");
            } else if (nGigsToRead != 0) {
                nBytesToRead = nGigsToRead * 1024L * 1024L * 1024L;
            } else if (nMegsToRead != 0) {
                nBytesToRead = nMegsToRead * 1024L * 1024L;
            } else {
                throw new RuntimeException("you must set either hio.ngigs.to.read or " + "hio.nmegs.to.read.");
            }
            nReadChunkBytes = getIntWithDefault("hio.read.chunk.bytes", 1048576);
            nGigsInFile = getIntOrDie("hio.ngigs.in.file");
            nBytesInFile = nGigsInFile * 1024L * 1024L * 1024L;
            hdfsUri = getStringOrDie("hio.hdfs.uri");
            filename = getStringWithDefault("hio.hdfs.file.name", "/hio_bench_test." + System.currentTimeMillis());
            dumpConf = (System.getProperty("dump.conf") != null);
            testType = getStringWithDefault("hio.hdfs.test.type", "random");
            skipChecksum = getBooleanWithDefault("hio.skip.checksum", false);
            nReadAlign = getIntWithDefault("hio.read.offset.alignment", 1);
            filePath = new Path(filename);
        }
    };

    private static Options options;

    static private class WorkerThread extends Thread {
        private final boolean shouldPrint;
        private final FSDataInputStream fis;
        private Throwable exception = null;
        private final static int TRIES_BETWEEN_TIMECHECK = 20;
        private final static int MILISECONDS_BETWEEN_PRINT = 5000;
        private final BenchReader benchReader;

        interface BenchReader {
            void read(FSDataInputStream fis) throws IOException;

            void init(FSDataInputStream fis) throws IOException;
        }

        static class RandomSeekBenchReader implements BenchReader {
            final private byte expect[];
            final private byte got[];
            final private Options options;
            final private Random random = new Random(System.nanoTime());

            RandomSeekBenchReader(Options options) {
                this.expect = new byte[options.nReadChunkBytes];
                this.got = new byte[options.nReadChunkBytes];
                this.options = options;
            }

            public void init(FSDataInputStream fis) throws IOException {
            }

            public void read(FSDataInputStream fis) throws IOException {
                // Using modulo here isn't great, but it's good enough here.
                long off = random.nextLong();
                if (off < 0)
                    off = -off;
                off %= (options.nBytesInFile - options.nReadChunkBytes - 1);
                off = (off / options.nReadAlign) * options.nReadAlign;
                fillArrayWithExpected(expect, off, expect.length);
                readFully(fis, off, got, 0, got.length);
                compareArrays(expect, got);
            }
        }

        static class SequentialBenchReader implements BenchReader {
            final private byte expect[];
            final private byte got[];
            private long off;
            final private Options options;

            SequentialBenchReader(Options options, int idx) {
                this.expect = new byte[options.nReadChunkBytes];
                this.got = new byte[options.nReadChunkBytes];

                /*
                 * If there are 3 threads, put each one 1/3 through the file.
                 * Example: if file length is 100, start threads at 0, 33, 66.
                 *
                 * 0----------------100
                 * 0    33     66
                 *
                 * and so forth.
                 */
                this.off = idx * options.nBytesInFile;
                this.off /= options.nThreads;

                this.options = options;
            }

            public void init(FSDataInputStream fis) throws IOException {
                fis.seek(this.off);
            }

            public void read(FSDataInputStream fis) throws IOException {
                if (off + expect.length >= options.nBytesInFile) {
                    off = 0;
                    fis.seek(off);
                }
                fillArrayWithExpected(expect, off, expect.length);
                readFully(fis, got, 0, got.length);
                compareArrays(expect, got);
                off += got.length;
            }
        }

        static BenchReader createBenchReader(Options options, int idx) {
            if (options.testType.equals("random")) {
                return new RandomSeekBenchReader(options);
            } else if (options.testType.equals("sequential")) {
                return new SequentialBenchReader(options, idx);
            } else {
                throw new RuntimeException("can't understand testType " + options.testType
                        + ": valid values are 'random' and 'sequential'");
            }
        }

        public WorkerThread(boolean shouldPrint, FileSystem fs, BenchReader benchReader) throws IOException {
            this.shouldPrint = shouldPrint;
            this.fis = fs.open(options.filePath);
            this.benchReader = benchReader;
        }

        public static void readFully(FSDataInputStream in, byte buf[], int arrayOff, int len) throws IOException {
            while (len > 0) {
                int ret = in.read(buf, arrayOff, len);
                if (ret < 0) {
                    throw new IOException(
                            "Premature EOF from inputStream reading " + len + "bytes from " + options.filename);
                }
                len -= ret;
                arrayOff += ret;
            }
        }

        public static void readFully(FSDataInputStream in, long off, byte buf[], int arrayOff, int len)
                throws IOException {
            while (len > 0) {
                int ret = in.read(off + arrayOff, buf, arrayOff, len);
                if (ret < 0) {
                    throw new IOException("Premature EOF from inputStream reading " + len + "bytes from offset "
                            + off + " in " + options.filename);
                }
                len -= ret;
                arrayOff += ret;
            }
        }

        public static void compareArrays(byte expect[], byte got[]) throws IOException {
            int bad = -1;
            for (int i = 0; i < options.nReadChunkBytes; i++) {
                if (got[i] != expect[i]) {
                    bad = i;
                    break;
                }
            }
            if (bad != -1) {
                throw new IOException("compareArrays: error on byte " + bad + ".\n" + "Expected: "
                        + Hex.encodeHexString(expect) + "\n" + "Got:      " + Hex.encodeHexString(got) + "\n");
            }
        }

        public void run() {
            int checkTimeCounter = 0;
            long prevPrintTime = 0;
            try {
                long amtRead = 0;

                benchReader.init(fis);
                while (amtRead < options.nBytesToRead) {
                    benchReader.read(fis);
                    amtRead += options.nReadChunkBytes;

                    if (shouldPrint) {
                        if (checkTimeCounter++ == TRIES_BETWEEN_TIMECHECK) {
                            long now = System.currentTimeMillis();
                            if (now > prevPrintTime + MILISECONDS_BETWEEN_PRINT) {
                                prevPrintTime = now;
                                float percent = amtRead * 100;
                                percent /= options.nBytesToRead;
                                System.out.println("thread1: read amtRead = " + amtRead + " out of "
                                        + options.nBytesToRead + " (" + percent + "%" + ")");
                            }
                            checkTimeCounter = 0;
                        }
                    }
                }
            } catch (Throwable t) {
                t.printStackTrace(System.err);
                exception = t;
            }
        }

        public Throwable getException() {
            return exception;
        }
    }

    static String prettyPrintByteSize(float size) {
        if (size < 1024) {
            return String.format("%f bytes", size);
        } else if (size < (1024 * 1024)) {
            return String.format("%f KBytes", size / 1024);
        } else if (size < (1024 * 1024 * 1024)) {
            return String.format("%f MBytes", size / (1024 * 1024));
        } else {
            return String.format("%f GBytes", size / (1024 * 1024 * 1024));
        }
    }

    public static void main(String[] args) throws Exception {
        options = new Options();
        final Configuration conf = new Configuration();
        if (options.dumpConf) {
            Configuration.dumpConfiguration(conf, new PrintWriter(System.out));
        }
        final FileSystem fs = FileSystem.get(new URI(options.hdfsUri), conf);
        fs.setVerifyChecksum(!options.skipChecksum);

        if (!fs.exists(options.filePath)) {
            System.out.println("no file at " + options.filePath + "; writing " + "new file now with length "
                    + options.nGigsInFile + " gigs...");
            writeFile(fs);
            System.out.println("done.");
        } else if (fs.getLength(options.filePath) != options.nBytesInFile) {
            System.out.println("existing file " + options.filename + " has length " + fs.getLength(options.filePath)
                    + ", but we wanted length " + options.nBytesInFile + ".  Re-creating.");
            writeFile(fs);
            System.out.println("done.");
        } else {
            System.out.println(
                    "using existing file at " + options.filePath + " of length " + options.nGigsInFile + " gigs.");
        }

        long nanoStart = System.nanoTime();
        WorkerThread threads[] = new WorkerThread[options.nThreads];
        for (int i = 0; i < options.nThreads; i++) {
            threads[i] = new WorkerThread(i == 0, fs, WorkerThread.createBenchReader(options, i));
        }
        for (int i = 0; i < options.nThreads; i++) {
            threads[i].start();
        }
        for (int i = 0; i < options.nThreads; i++) {
            threads[i].join();
        }
        for (int i = 0; i < options.nThreads; i++) {
            Throwable t = threads[i].getException();
            if (t != null) {
                System.err.println("there were exceptions.  Aborting.");
                System.exit(1);
            }
        }
        long nanoEnd = System.nanoTime();
        fs.close();
        long totalIo = options.nThreads;
        totalIo *= options.nBytesToRead;
        float nanoDiff = nanoEnd - nanoStart;
        float seconds = nanoDiff / 1000000000;
        System.out.println(String.format("Using %d threads, read %s in %f seconds", options.nThreads,
                prettyPrintByteSize(totalIo), seconds));
        float rate = totalIo / seconds;
        System.out.println("Average rate was " + prettyPrintByteSize(rate) + "/s");
    }
}