org.hdp.WordCountDataCompression.WordCountDataCompressionJob.java Source code

Java tutorial

Introduction

Here is the source code for org.hdp.WordCountDataCompression.WordCountDataCompressionJob.java

Source

package org.hdp.WordCountDataCompression;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.SnappyCodec;

public class WordCountDataCompressionJob extends Configured implements Tool {

    @Override
    public int run(String[] args) throws Exception {
        // TODO Auto-generated method stub

        // data compression code  -->
        Configuration conf = getConf();
        conf.setClass("mapreduce.output.TextOutputFormat.compress", SnappyCodec.class, CompressionCodec.class);

        // <--
        Job job = Job.getInstance(conf, "Word Count Job with compression");
        Path in = new Path(args[0]);
        Path out = new Path(args[1]);
        FileSystem fs = FileSystem.get(getConf());
        // does not the HDFS setting that is set for the eclipse env
        Path pathOut = new Path("/test/wordcount/op");
        if (fs.exists(pathOut)) {
            fs.delete(out, true);
        }
        FileInputFormat.setInputPaths(job, in);
        FileOutputFormat.setOutputPath(job, out);
        job.setMapperClass(WordCountMapperDataCompression.class);
        job.setReducerClass(WordCountReducerDataCompression.class);
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        return job.waitForCompletion(true) ? 0 : 1;
    }

    public static void main(String args[]) {
        // args[0]="/test/wordcount/ip/LICENSE.txt";
        // args[1]="/test/wordcount/op";
        try {
            int result = ToolRunner.run(new Configuration(), new WordCountDataCompressionJob(), args);
            System.exit(result);
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
}