com.zfylin.demo.bigdata.hadoop.mr.WordCount2.java Source code

Java tutorial

Introduction

Here is the source code for com.zfylin.demo.bigdata.hadoop.mr.WordCount2.java

Source

package com.zfylin.demo.bigdata.hadoop.mr;

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * <p/>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p/>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.*;

import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;

/**
 * ??MapReduce
 */
public class WordCount2 {
    /**
     * Mapper
     */
    public static class WordCountMapper extends MapReduceBase implements Mapper<Object, Text, Text, IntWritable> {
        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text();

        /**
         * map??
         * ???key1
         * ?map?reduce
         */
        @Override
        public void map(Object key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
                throws IOException {
            /**
             * StringTokenizer?
             * ??
             * java.util.StringTokenizer
             * 1. StringTokenizer(String str) 
             *  ??strStringTokenizer
             *  java??(\t)???(\n)??(\r)?
             * 2. StringTokenizer(String str, String delim) 
             *  ??strStringTokenizer??
             * 3. StringTokenizer(String str, String delim, boolean returnDelims) 
             *  ??strStringTokenizer????
             *
             * java??(\t)???(\n)??(\r)?
             */
            StringTokenizer itr = new StringTokenizer(value.toString());
            while (itr.hasMoreTokens()) {
                word.set(itr.nextToken());
                output.collect(word, one);
            }
        }
    }

    /**
     * reduce?map???
     * ?????????
     * ?
     */
    public static class WordCountReducer extends MapReduceBase
            implements Reducer<Text, IntWritable, Text, IntWritable> {
        private IntWritable result = new IntWritable();

        @Override
        public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output,
                Reporter reporter) throws IOException {
            int sum = 0;
            while (values.hasNext()) {
                sum += values.next().get();
            }
            result.set(sum);
            output.collect(key, result);
        }
    }

    public static void main(String[] args) throws Exception {
        System.setProperty("HADOOP_USER_NAME", "hdfs");

        //?     ???hadoop?
        String input = "hdfs://hadoop-master:8020/data/hive/warehouse/channel_test.db/tbl_student";
        /**
         * HDFSout
         * ???
         */
        String output = "hdfs://hadoop-master:8020/data/hive/warehouse/channel_test.db/tbl_student/output/";

        JobConf conf = new JobConf(WordCount2.class);
        /**
         * ERROR: Exception message: /bin/bash: line 0: fg: no job control
           */
        conf.set("mapreduce.app-submission.cross-platform", "true");

        conf.setJobName("WordCount");
        //        conf.addResource("classpath:/hadoop/core-site.xml");
        //        conf.addResource("classpath:/hadoop/hdfs-site.xml");
        //        conf.addResource("classpath:/hadoop/mapred-site.xml");
        //??
        conf.setOutputKeyClass(Text.class);
        //?? int
        conf.setOutputValueClass(IntWritable.class);
        //mapper
        conf.setMapperClass(WordCountMapper.class);
        /**
         * ??Reducer
         * ???mapreduce??
         * ????
         * ????
         * ?
         * ???
         * ?????
         * ?
         */
        conf.setCombinerClass(WordCountReducer.class);
        //reduce
        conf.setReducerClass(WordCountReducer.class);
        /**
         * ?TextInputFormat?
         * ????
         * LongWritable????
         * Text
         */
        conf.setInputFormat(TextInputFormat.class);
        /**
         * ?TextOutpuTFormat?
         * ????toString()
         * 
         */
        conf.setOutputFormat(TextOutputFormat.class);
        //?
        FileInputFormat.setInputPaths(conf, new Path(input));
        //???
        FileOutputFormat.setOutputPath(conf, new Path(output));
        //?mapreduce
        JobClient.runJob(conf);
        System.exit(0);
    }

}