com.tfm.utad.reducerdata.ReducerDataPig.java Source code

Java tutorial

Introduction

Here is the source code for com.tfm.utad.reducerdata.ReducerDataPig.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package com.tfm.utad.reducerdata;

import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Counters;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ReducerDataPig {

    private static final String HDFS_LOCALHOST_LOCALDOMAIN = "hdfs://172.16.134.128/";
    private static final String FS_DEFAULT_FS = "fs.defaultFS";

    private final static Logger LOG = LoggerFactory.getLogger(ReducerDataPig.class);

    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

        SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM-dd-HH-mm-ss");
        Date date = new Date();

        Path inputPath = new Path("/home/jab/camus/reducer-data-pig");
        Path outputDir = new Path("/home/jab/camus/pigdata/" + sdf.format(date));

        // Create configuration
        Configuration conf = new Configuration(true);
        conf.set(FS_DEFAULT_FS, HDFS_LOCALHOST_LOCALDOMAIN);
        FileSystem fs = FileSystem.get(conf);
        Path filesPath = new Path(inputPath + "/*");
        FileStatus[] files = fs.globStatus(filesPath);

        // Create job
        Job job = new Job(conf, "ReducerDataPig");
        job.setJarByClass(ReducerDataPig.class);

        // Setup MapReduce
        job.setMapperClass(ReducerDataPigMapper.class);
        job.setReducerClass(ReducerDataPigReducer.class);
        job.setNumReduceTasks(1);

        // Specify key / value
        job.setOutputKeyClass(LongWritable.class);
        job.setOutputValueClass(ReducerPigKey.class);

        // Input
        FileInputFormat.addInputPath(job, inputPath);
        job.setInputFormatClass(SequenceFileInputFormat.class);

        // Output
        FileOutputFormat.setOutputPath(job, outputDir);
        job.setOutputFormatClass(TextOutputFormat.class);

        // Delete output if exists
        if (fs.exists(outputDir)) {
            fs.delete(outputDir, true);
        }

        // Execute job
        int code = job.waitForCompletion(true) ? 0 : 1;
        if (code == 0) {
            Counters counters = job.getCounters();
            Counter malformedCounter = counters.findCounter(ReducerDataEnum.MALFORMED_DATA);
            LOG.info("Counter malformed data: " + malformedCounter.getValue());
            for (FileStatus fStatus : files) {
                LOG.info("File name:" + fStatus.getPath());
                if (fStatus.isFile()) {
                    LOG.info("Removing file in path:" + fStatus.getPath());
                    fs.delete(fStatus.getPath(), false);
                }
            }
        }
    }
}