com.bigdata.diane.MiniTestDFSIO.java Source code

Java tutorial

Introduction

Here is the source code for com.bigdata.diane.MiniTestDFSIO.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.bigdata.diane;

import java.io.DataInputStream;
import java.io.IOException;
import java.io.OutputStream;

import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.FileSystem;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.junit.Test;

/**
 * Distributed i/o benchmark.
 * <p>
 * This test writes into or reads from a specified number of files.
 * File size is specified as a parameter to the test. 
 * Each file is accessed in a separate map task.
 * <p>
 * The reducer collects the following statistics:
 * <ul>                     
 * <li>number of tasks completed</li>
 * <li>number of bytes written/read</li>
 * <li>execution time</li>
 * <li>io rate</li>
 * <li>io rate squared</li>
 * </ul>
 *    
 * Finally, the following information is appended to a local file
 * <ul>
 * <li>read or write test</li>
 * <li>date and time the test finished</li>   
 * <li>number of files</li>
 * <li>total number of bytes processed</li>
 * <li>throughput in mb/sec (total number of bytes / sum of processing times)</li>
 * <li>average i/o rate in mb/sec per file</li>
 * <li>standard deviation of i/o rate </li>
 * </ul>
 */

public class MiniTestDFSIO extends Configured implements Tool {
    // Constants
    private static final Log LOG = LogFactory.getLog(MiniTestDFSIO.class);
    private static final int TEST_TYPE_READ = 0;
    private static final int TEST_TYPE_WRITE = 1;
    private static final int DEFAULT_BUFFER_SIZE = 1000000;
    private static final String BASE_FILE_NAME = "test_io_";

    private static final long MEGA = 0x100000;
    private static String TEST_ROOT_DIR = System.getProperty("test.build.data", "/benchmarks/TestDFSIO");
    private static Path CONTROL_DIR = new Path(TEST_ROOT_DIR, "io_control");
    private static Path WRITE_DIR = new Path(TEST_ROOT_DIR, "io_write");
    private static Path DATA_DIR = new Path(TEST_ROOT_DIR, "io_data");
    static {
        System.out.println("Hey look its diane's progream!!!!!!!!!");
        Configuration.addDefaultResource("hdfs-default.xml");
        Configuration.addDefaultResource("hdfs-site.xml");
    }

    /**
     * Run the test with default parameters.
     * 
     * @throws Exception
     */
    @Test
    public void testIOs() throws Exception {
        testIOs(10, 10, new Configuration());
    }

    /**
     * Run the test with the specified parameters.
     * 
     * @param fileSize file size
     * @param nrFiles number of files
     * @throws IOException
     */
    public static void testIOs(int fileSize, int nrFiles, Configuration fsConfig)
            throws IOException, InterruptedException {

        FileSystem fs = FileSystem.get(fsConfig);

        createControlFile(fs, fileSize, nrFiles, fsConfig);
        writeTest(fs, fsConfig);
    }

    private static void createControlFile(FileSystem fs, int fileSize, // in MB 
            int nrFiles, Configuration fsConfig) throws InterruptedException, IOException {
        LOG.info("creating control file: " + fileSize + " mega bytes, " + nrFiles + " files");

        for (int i = 0; i < nrFiles; i++) {
            String name = getFileName(i);
            Path controlFile = new Path(CONTROL_DIR, "in_file_" + name);
            SequenceFile.Writer writer = null;
            try {
                writer = SequenceFile.createWriter(fs, fsConfig, controlFile, Text.class, LongWritable.class,
                        CompressionType.NONE);
                writer.append(new Text(name), new LongWritable(fileSize));
            } catch (Exception e) {
                throw new IOException(e.getLocalizedMessage());
            } finally {
                if (writer != null)
                    writer.close();
                writer = null;
            }
        }
        LOG.info("created control files for: " + nrFiles + " files now sleep 20 seconds");
        Thread.sleep(20000);
    }

    private static String getFileName(int fIdx) {
        return BASE_FILE_NAME + Integer.toString(fIdx);
    }

    /**
     * Write/Read mapper base class.
     * <p>
     * Collects the following statistics per task:
     * <ul>
     * <li>number of tasks completed</li>
     * <li>number of bytes written/read</li>
     * <li>execution time</li>
     * <li>i/o rate</li>
     * <li>i/o rate squared</li>
     * </ul>
     */
    private abstract static class IOStatMapper<T> extends IOMapperBase<T> {
        IOStatMapper() {
        }

        void collectStats(OutputCollector<Text, Text> output, String name, long execTime, Long objSize)
                throws IOException {
            long totalSize = objSize.longValue();
            float ioRateMbSec = (float) totalSize * 1000 / (execTime * MEGA);
            LOG.info("Number of bytes processed = " + totalSize);
            LOG.info("Exec time = " + execTime);
            LOG.info("IO rate = " + ioRateMbSec);

            output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "tasks"), new Text(String.valueOf(1)));
            output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "size"),
                    new Text(String.valueOf(totalSize)));
            output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "time"),
                    new Text(String.valueOf(execTime)));
            output.collect(new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "rate"),
                    new Text(String.valueOf(ioRateMbSec * 1000)));
            output.collect(new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "sqrate"),
                    new Text(String.valueOf(ioRateMbSec * ioRateMbSec * 1000)));
        }
    }

    /**
     * Write mapper class.
     */
    public static class WriteMapper extends IOStatMapper<Long> {

        public WriteMapper() {
            for (int i = 0; i < bufferSize; i++)
                buffer[i] = (byte) ('0' + i % 50);
        }

        public Long doIO(Reporter reporter, String name, long totalSize) throws IOException {
            // create file
            totalSize *= MEGA;
            OutputStream out;
            out = fs.create(new Path(DATA_DIR, name), true, bufferSize);

            try {
                // write to the file
                long nrRemaining;
                for (nrRemaining = totalSize; nrRemaining > 0; nrRemaining -= bufferSize) {
                    int curSize = (bufferSize < nrRemaining) ? bufferSize : (int) nrRemaining;
                    out.write(buffer, 0, curSize);
                    reporter.setStatus("writing " + name + "@" + (totalSize - nrRemaining) + "/" + totalSize
                            + " ::host = " + hostName);
                }
            } finally {
                out.close();
            }
            return Long.valueOf(totalSize);
        }
    }

    private static void writeTest(FileSystem fs, Configuration fsConfig) throws IOException {

        fs.delete(DATA_DIR, true);
        fs.delete(WRITE_DIR, true);

        runIOTest(WriteMapper.class, WRITE_DIR, fsConfig);
    }

    @SuppressWarnings("deprecation")
    private static void runIOTest(Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass,
            Path outputDir, Configuration fsConfig) throws IOException {
        JobConf job = new JobConf(fsConfig, MiniTestDFSIO.class);

        FileInputFormat.setInputPaths(job, CONTROL_DIR);
        job.setInputFormat(SequenceFileInputFormat.class);

        job.setMapperClass(mapperClass);
        job.setReducerClass(AccumulatingReducer.class);

        FileOutputFormat.setOutputPath(job, outputDir);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        job.setNumReduceTasks(1);
        JobClient.runJob(job);
    }

    /**
     * Read mapper class.
     */
    public static class ReadMapper extends IOStatMapper<Long> {

        public ReadMapper() {
        }

        public Long doIO(Reporter reporter, String name, long totalSize) throws IOException {
            totalSize *= MEGA;
            // open file
            DataInputStream in = fs.open(new Path(DATA_DIR, name));
            long actualSize = 0;
            try {
                while (actualSize < totalSize) {
                    int curSize = in.read(buffer, 0, bufferSize);
                    if (curSize < 0)
                        break;
                    actualSize += curSize;
                    reporter.setStatus(
                            "reading " + name + "@" + actualSize + "/" + totalSize + " ::host = " + hostName);
                }
            } finally {
                in.close();
            }
            return Long.valueOf(actualSize);
        }
    }

    private static void sequentialTest(FileSystem fs, int testType, int fileSize, int nrFiles) throws Exception {
        IOStatMapper<Long> ioer = null;
        if (testType == TEST_TYPE_READ)
            ioer = new ReadMapper();
        else if (testType == TEST_TYPE_WRITE)
            ioer = new WriteMapper();
        else
            return;
        for (int i = 0; i < nrFiles; i++)
            ioer.doIO(Reporter.NULL, BASE_FILE_NAME + Integer.toString(i), MEGA * fileSize);
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run(new MiniTestDFSIO(), args);
        System.exit(res);
    }

    @Override
    public int run(String[] args) throws Exception {
        int testType = TEST_TYPE_READ;
        int bufferSize = DEFAULT_BUFFER_SIZE;
        int fileSize = 1;
        int nrFiles = 1;
        /* String resFileName = DEFAULT_RES_FILE_NAME; */
        boolean isSequential = false;
        System.out.println("Hey look its diane's progream!!!!!!!!!");
        System.out.println("Hey look its diane's progream!!!!!!!!!");
        System.out.println("Hey look its diane's progream!!!!!!!!!");
        System.out.println("Hey look its diane's progream!!!!!!!!!");
        System.out.println("Hey look its diane's progream!!!!!!!!!");
        System.out.println("Hey look its diane's progream!!!!!!!!!");
        System.out.println("Hey look its diane's progream!!!!!!!!!");
        System.out.println("Hey look its diane's progream!!!!!!!!!");
        System.out.println("Hey look its diane's progream!!!!!!!!!");
        System.out.println("Hey look its diane's progream!!!!!!!!!");

        String className = MiniTestDFSIO.class.getSimpleName();
        String usage = "Usage: " + className + " -read | -write | -clean "
                + "[-nrFiles N] [-fileSize MB] [-resFile resultFileName] " + "[-bufferSize Bytes] ";

        /* System.out.println(version); */
        if (args.length == 0) {
            System.err.println(usage);
            return -1;
        }
        for (int i = 0; i < args.length; i++) { // parse command line
            if (args[i].startsWith("-write")) {
                testType = TEST_TYPE_WRITE;
            } else if (args[i].equals("-nrFiles")) {
                nrFiles = Integer.parseInt(args[++i]);
            } else if (args[i].equals("-fileSize")) {
                fileSize = Integer.parseInt(args[++i]);
            }
        }

        try {
            Configuration fsConfig = new Configuration(getConf());
            fsConfig.setInt("test.io.file.buffer.size", bufferSize);
            FileSystem fs = FileSystem.get(fsConfig);

            if (isSequential) {
                long tStart = System.currentTimeMillis();
                sequentialTest(fs, testType, fileSize, nrFiles);
                long execTime = System.currentTimeMillis() - tStart;
                String resultLine = "Seq Test exec time sec: " + (float) execTime / 1000;
                LOG.info(resultLine);
                return 0;
            }

            createControlFile(fs, fileSize, nrFiles, fsConfig);
            /*   long tStart = System.currentTimeMillis(); */
            if (testType == TEST_TYPE_WRITE)
                writeTest(fs, fsConfig);
        } catch (Exception e) {
            System.err.print(StringUtils.stringifyException(e));
            return -1;
        }
        return 0;
    }

}