org.apache.hadoop.fs.TestAppendStress.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.fs.TestAppendStress.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.fs;

import static org.junit.Assert.assertTrue;

import java.io.IOException;
import java.io.InputStream;
import java.util.Date;
import java.util.Random;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.util.DataChecksum;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.junit.Test;

public class TestAppendStress extends Configured implements Tool {
    private static final Log LOG = LogFactory.getLog(TestAppendStress.class);

    private static final String BASE_FILE_NAME = "test_append_stress";
    private static String TEST_ROOT_DIR = System.getProperty("test.build.data", "/benchmarks/TestAppendStress");
    private static Path CONTROL_DIR = new Path(TEST_ROOT_DIR, "io_control");
    private static Path DATA_DIR = new Path(TEST_ROOT_DIR, "io_data");
    private static Path APPEND_DIR = new Path(TEST_ROOT_DIR, "io_append");

    private static final String JOB_START_TIME_LABEL = "job_start_time";

    private static final int SIZE_RANGE = 1024 * 1024 * 4; // 4MB
    private static final int ROUND_DEFAULT = 100;
    private static final int NUM_FILES_DEFAULT = 1000;

    private static int numFiles = NUM_FILES_DEFAULT;
    private static int round = ROUND_DEFAULT;

    private static final String USAGE = "Usage: " + TestAppendStress.class.getSimpleName()
            + "[-nFiles N] [-round N]";

    @Test
    public void testAppend() throws Exception {
        Configuration fsConfig = new Configuration();
        fsConfig.setBoolean("dfs.support.append", true);
        MiniDFSCluster cluster = null;
        try {
            cluster = new MiniDFSCluster(fsConfig, 2, true, null);
            FileSystem fs = cluster.getFileSystem();
            Path filePath = new Path(DATA_DIR, "file1");
            round = 10;
            assertTrue(doAppendTest(fs, filePath, new Random(), null));
        } finally {
            if (cluster != null) {
                cluster.shutdown();
            }
        }
    }

    @Override
    public int run(String[] args) throws Exception {
        if (args.length == 0) {
            System.err.println(USAGE);
            return -1;
        }

        for (int i = 0; i < args.length; i++) {
            if (args[i].equals("-nFiles")) {
                numFiles = Integer.parseInt(args[++i]);
            } else if (args[i].equals("-round")) {
                round = Integer.parseInt(args[++i]);
            }
        }

        LOG.info("nFiles = " + numFiles);
        LOG.info("round = " + round);

        Configuration conf = getConf();

        try {
            FileSystem fs = FileSystem.get(conf);

            LOG.info("Cleaning up test files");
            fs.delete(new Path(TEST_ROOT_DIR), true);

            createControlFile(fs, numFiles, conf);
            startAppendJob(conf);
        } catch (Exception e) {
            System.err.print(StringUtils.stringifyException(e));
            return -1;
        }

        return 0;
    }

    private void startAppendJob(Configuration conf) throws IOException {
        JobConf job = new JobConf(conf, TestAppendStress.class);

        job.set(JOB_START_TIME_LABEL, new Date().toString());
        FileInputFormat.setInputPaths(job, CONTROL_DIR);
        FileOutputFormat.setOutputPath(job, APPEND_DIR);
        job.setInputFormat(SequenceFileInputFormat.class);

        job.setMapperClass(AppendMapper.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        job.setNumReduceTasks(0);
        JobClient.runJob(job);
    }

    private static String getFileName(int fIdx) {
        return BASE_FILE_NAME + Integer.toString(fIdx);
    }

    private static void createControlFile(FileSystem fs, int nrFiles, Configuration fsConfig) throws IOException {
        fs.delete(CONTROL_DIR, true);

        for (int i = 0; i < nrFiles; i++) {
            String name = getFileName(i);
            Path controlFile = new Path(CONTROL_DIR, "in_file_" + name);
            SequenceFile.Writer writer = null;
            try {
                writer = SequenceFile.createWriter(fs, fsConfig, controlFile, Text.class, LongWritable.class,
                        CompressionType.NONE);
                writer.append(new Text(name), new LongWritable(0));
            } catch (Exception e) {
                throw new IOException(e.getLocalizedMessage());
            } finally {
                if (writer != null)
                    writer.close();
                writer = null;
            }
        }
        LOG.info("created control files for: " + nrFiles + " files");
    }

    private static void writeToFile(Random random, FSDataOutputStream out, int len, DataChecksum checksum)
            throws IOException {
        if (len == 0) {
            return;
        }

        LOG.info("Write " + len + " bytes to file.");
        int bufferSize = 1024 * 1024;
        byte[] buffer = new byte[bufferSize];
        int toLen = len;
        while (toLen > 0) {
            random.nextBytes(buffer);
            int numWrite = Math.min(toLen, buffer.length);
            out.write(buffer, 0, numWrite);
            checksum.update(buffer, 0, numWrite);
            toLen -= numWrite;

            // randomly do sync or not.
            if (random.nextBoolean()) {
                out.sync();
            }
        }
    }

    /**
     * Verify the file length and file crc.
     */
    private static boolean verifyFile(FileSystem fs, Path filePath, int fileLen, DataChecksum checksum)
            throws IOException {
        FileStatus stat = fs.getFileStatus(filePath);
        if (stat.getLen() != fileLen) {
            return false;
        }

        int fileCRC = fs.getFileCrc(filePath);

        LOG.info("Expected checksum: " + (int) checksum.getValue() + ", get: " + fileCRC);

        InputStream in = fs.open(filePath);
        DataChecksum newChecksum = DataChecksum.newDataChecksum(FSConstants.CHECKSUM_TYPE, 1);
        int toRead = fileLen;
        byte[] buffer = new byte[1024 * 1024];
        while (toRead > 0) {
            int numRead = in.read(buffer);
            newChecksum.update(buffer, 0, numRead);
            toRead -= numRead;
        }

        LOG.info("Read CRC: " + (int) newChecksum.getValue());
        return (int) checksum.getValue() == fileCRC && (int) newChecksum.getValue() == fileCRC;
    }

    public static class AppendMapper<T> extends Configured implements Mapper<Text, LongWritable, Text, Text> {

        private FileSystem fs;
        private Random random = null;
        private JobConf conf;

        public AppendMapper() {
        }

        @Override
        public void configure(JobConf job) {
            conf = job;
            try {
                fs = FileSystem.get(job);
            } catch (IOException e) {
                throw new RuntimeException("Cannot create file system.", e);
            }
        }

        @Override
        public void close() throws IOException {

        }

        @Override
        public void map(Text key, LongWritable value, OutputCollector<Text, Text> output, Reporter reporter)
                throws IOException {
            String name = key.toString();
            String seedStr = name + conf.get(JOB_START_TIME_LABEL);
            LOG.info("random seed string: " + seedStr);
            random = new Random(seedStr.hashCode());
            Path filePath = new Path(DATA_DIR, name);

            if (!doAppendTest(fs, filePath, random, reporter)) {
                throw new RuntimeException("Append operation failed, filePath: " + filePath);
            }
        }
    }

    private static boolean doAppendTest(FileSystem fs, Path filePath, Random random, Reporter reporter)
            throws IOException {
        if (reporter == null) {
            reporter = Reporter.NULL;
        }

        FSDataOutputStream out = fs.create(filePath);
        DataChecksum checksum = DataChecksum.newDataChecksum(FSConstants.CHECKSUM_TYPE, 1);
        checksum.reset();

        int fileLen = 0;
        int len = random.nextInt((int) (SIZE_RANGE + fs.getDefaultBlockSize()));
        fileLen += len;
        writeToFile(random, out, len, checksum);
        out.close();

        reporter.progress();
        for (int i = 0; i < round; i++) {
            out = fs.append(filePath);

            len = random.nextInt(SIZE_RANGE);
            fileLen += len;
            writeToFile(random, out, len, checksum);
            out.close();
            reporter.progress();
        }

        return verifyFile(fs, filePath, fileLen, checksum);
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run(new TestAppendStress(), args);
        System.exit(res);
    }
}