com.intel.hibench.DFSIOWriter.java Source code

Introduction

Here is the source code for com.intel.hibench.DFSIOWriter.java
Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.intel.hibench;

import co.cask.cdap.api.Resources;
import co.cask.cdap.api.annotation.UseDataSet;
import co.cask.cdap.api.dataset.table.Put;
import co.cask.cdap.api.dataset.table.Table;
import co.cask.cdap.api.mapreduce.AbstractMapReduce;
import co.cask.cdap.api.mapreduce.MapReduceContext;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;

/**
 * Created by peilunzh on 5/15/2015.
 * this write the file system with mapper
 */
public class DFSIOWriter extends AbstractMapReduce {

    private static final Logger LOG = LoggerFactory.getLogger(DFSIOWriter.class);

    //We put the benchmark result in this dataset.
    @UseDataSet("benchData")
    private Table benchData;

    private static long numBytesToWrite = 100 * 1024 * 1024;
    private static String BENCH_SIZE = "size";
    public static double startTime = 2;
    public static double endTime = 1;

    static final byte[] ONE = { '1' };
    static final byte[] TWO = { '2' };
    static final byte[] THREE = { '3' };

    @Override
    public void configure() {
        setOutputDataset("lines");
        setMapperResources(new Resources(1024));
    }

    @Override
    public void beforeSubmit(MapReduceContext context) throws Exception {
        startTime = System.currentTimeMillis();
        benchData.put(new Put(ONE, ONE, startTime));

        Job job = context.getHadoopJob();
        job.setInputFormatClass(RandomInputFormat.class);
        job.setMapperClass(Generator.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setNumReduceTasks(0);

        String sizeStr = context.getRuntimeArguments().get("size");
        if (sizeStr != null) {
            LOG.info("size we get in config is : " + sizeStr);
            long totalBytes = Long.valueOf(sizeStr) * 1024 * 1024;
            job.getConfiguration().setLong(BENCH_SIZE, totalBytes);
            benchData.put(new Put(ONE, THREE, totalBytes));
        }

    }

    @Override
    public void onFinish(boolean succeeded, MapReduceContext context) throws Exception {
        endTime = System.currentTimeMillis();
        benchData.put(new Put(ONE, TWO, endTime));

    }

    public static class Generator extends Mapper<Text, Text, Text, Text> {

        private Random random = new Random();
        Text aSentence = generateSentence(10);
        int bufferSize = aSentence.getLength();

        public void map(Text key, Text value, Context context) throws IOException, InterruptedException {

            long totalBytes = context.getConfiguration().getLong(BENCH_SIZE, numBytesToWrite);
            LOG.info("Bytes we get in map is: " + String.valueOf(totalBytes));

            while (totalBytes > 0) {

                // Write the sentence
                context.write(aSentence, aSentence);

                totalBytes -= bufferSize * 2;
            }
        }

        //this method generates a sentence with certain number of words
        private Text generateSentence(int noWords) {
            StringBuffer sentence = new StringBuffer();
            String space = " ";
            for (int i = 0; i < noWords; ++i) {
                sentence.append(words[random.nextInt(words.length)]);
                sentence.append(space);
            }
            return new Text(sentence.toString());
        }
    }

    //Input format class
    static class RandomInputFormat extends InputFormat<Text, Text> {

        /**
         * Generate the requested number of file splits, with the filename
         * set to the filename of the output file.
         */
        public List<InputSplit> getSplits(JobContext job) throws IOException {
            List<InputSplit> result = new ArrayList<InputSplit>();
            Path outDir = FileOutputFormat.getOutputPath(job);
            int numSplits = job.getConfiguration().getInt(MRJobConfig.NUM_MAPS, 1);
            for (int i = 0; i < numSplits; ++i) {
                result.add(new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, (String[]) null));
            }
            return result;
        }

        /**
         * Return a single record (filename, "") where the filename is taken from
         * the file split.
         */
        static class RandomRecordReader extends RecordReader<Text, Text> {
            Path name;
            Text key = null;
            Text value = new Text();

            public RandomRecordReader(Path p) {
                name = p;
            }

            public void initialize(InputSplit split, TaskAttemptContext context)
                    throws IOException, InterruptedException {

            }

            public boolean nextKeyValue() {
                if (name != null) {
                    key = new Text();
                    key.set(name.getName());
                    name = null;
                    return true;
                }
                return false;
            }

            public Text getCurrentKey() {
                return key;
            }

            public Text getCurrentValue() {
                return value;
            }

            public void close() {
            }

            public float getProgress() {
                return 0.0f;
            }
        }

        public RecordReader<Text, Text> createRecordReader(InputSplit split, TaskAttemptContext context)
                throws IOException, InterruptedException {
            return new RandomRecordReader(((FileSplit) split).getPath());
        }
    }

    private static String[] words = { "diurnalness", "Homoiousian", "spiranthic", "tetragynian", "silverhead",
            "ungreat", "lithograph", "exploiter", "physiologian", "by", "hellbender", "Filipendula", "undeterring",
            "antiscolic", "pentagamist", "hypoid", };

}