net.team1.dev.HousingAnalysis.java Source code

Java tutorial

Introduction

Here is the source code for net.team1.dev.HousingAnalysis.java

Source

// ----------------------------------------------------------------------------
// Copyright 2017 team1@course_bigdata, Saint Joseph's University
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ----------------------------------------------------------------------------

package net.team1.dev;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.mapred.lib.MultipleInputs;

import java.io.IOException;
import java.util.HashMap;
import java.util.logging.Logger;

/**
 * A class for the map/reduce process
 */
public class HousingAnalysis {

    /**
     * The main entry point for the map/reduce runner.
     *
     * @param args 2 args: \<input dir\> \<output dir\>
     * @throws Exception Throws IOException
     */
    public static void main(String[] args) throws Exception {
        Path inputDir = new Path(args[0]);
        Path outputDir = new Path(args[1]);
        FileSystem fs = FileSystem.get(new Configuration());

        if (!fs.exists(inputDir))
            throw new IOException("The input path does not exist.");
        if (fs.isFile(inputDir))
            throw new IOException("The input path is a file.");
        if (fs.exists(outputDir))
            fs.delete(outputDir, true);

        // set job configuration
        JobConf conf = new JobConf(HousingAnalysis.class);
        conf.setJobName("housinganalysis");
        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);
        conf.setOutputFormat(TextOutputFormat.class);
        conf.setCombinerClass(HousingReducer.class);
        conf.setReducerClass(HousingReducer.class);

        // set multiple input files
        HashMap<Path, Class<? extends Mapper>> inputMappers = getInputFilePaths(inputDir, fs);
        for (Path p : inputMappers.keySet()) {
            MultipleInputs.addInputPath(conf, p, TextInputFormat.class, inputMappers.get(p));
            LOG.info(p.getName() + ": " + inputMappers.get(p).getName());
        }

        // set output
        FileOutputFormat.setOutputPath(conf, outputDir);

        // start the job
        JobClient.runJob(conf);
    }

    /**
     * Walk through the input directory to get all the input files and construct the file path and mapper class dictionary.
     *
     * @param inputDir The path of the input directory
     * @param fs       The HDFS file system object.
     * @return A {@link HashMap} whose keys are the file path and values are the corresponding mapper classes.
     * @throws IOException IOException
     */
    private static HashMap<Path, Class<? extends Mapper>> getInputFilePaths(Path inputDir, FileSystem fs)
            throws IOException {
        HashMap<Path, Class<? extends Mapper>> mappers = new HashMap<>();
        FileStatus files[] = fs.listStatus(inputDir, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                String name = path.getName();
                return name.endsWith(".txt") && name.startsWith("thads");
            }
        });
        for (FileStatus f : files) {
            Path p = f.getPath();
            String fileName = p.getName();
            if (fileName.contains("2013"))
                mappers.put(p, Mapper2013.class);
            else if (fileName.contains("2003"))
                mappers.put(p, Mapper2003.class);
            else if (fileName.contains("2005"))
                mappers.put(p, Mapper2005.class);
            else if (fileName.contains("2007"))
                mappers.put(p, Mapper2007.class);
            else if (fileName.contains("2009"))
                mappers.put(p, Mapper2009.class);
            else if (fileName.contains("2011"))
                mappers.put(p, Mapper2011.class);
        }
        return mappers;
    }

    private static final Logger LOG = Logger.getLogger(HousingAnalysis.class.getName());
}