Java tutorial
package com.zfylin.demo.bigdata.hadoop.mr; /** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * <p/> * http://www.apache.org/licenses/LICENSE-2.0 * <p/> * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.*; import java.io.IOException; import java.util.Iterator; import java.util.StringTokenizer; /** * ??MapReduce */ public class WordCount2 { /** * Mapper */ public static class WordCountMapper extends MapReduceBase implements Mapper<Object, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); /** * map?? * ???key1 * ?map?reduce */ @Override public void map(Object key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { /** * StringTokenizer? * ?? * java.util.StringTokenizer * 1. StringTokenizer(String str) * ??strStringTokenizer * java??(\t)???(\n)??(\r)? * 2. StringTokenizer(String str, String delim) * ??strStringTokenizer?? * 3. StringTokenizer(String str, String delim, boolean returnDelims) * ??strStringTokenizer???? * * java??(\t)???(\n)??(\r)? */ StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); output.collect(word, one); } } } /** * reduce?map??? * ????????? * ? */ public static class WordCountReducer extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> { private IntWritable result = new IntWritable(); @Override public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { int sum = 0; while (values.hasNext()) { sum += values.next().get(); } result.set(sum); output.collect(key, result); } } public static void main(String[] args) throws Exception { System.setProperty("HADOOP_USER_NAME", "hdfs"); //? ???hadoop? String input = "hdfs://hadoop-master:8020/data/hive/warehouse/channel_test.db/tbl_student"; /** * HDFSout * ??? */ String output = "hdfs://hadoop-master:8020/data/hive/warehouse/channel_test.db/tbl_student/output/"; JobConf conf = new JobConf(WordCount2.class); /** * ERROR: Exception message: /bin/bash: line 0: fg: no job control */ conf.set("mapreduce.app-submission.cross-platform", "true"); conf.setJobName("WordCount"); // conf.addResource("classpath:/hadoop/core-site.xml"); // conf.addResource("classpath:/hadoop/hdfs-site.xml"); // conf.addResource("classpath:/hadoop/mapred-site.xml"); //?? conf.setOutputKeyClass(Text.class); //?? int conf.setOutputValueClass(IntWritable.class); //mapper conf.setMapperClass(WordCountMapper.class); /** * ??Reducer * ???mapreduce?? * ???? * ???? * ? * ??? * ????? * ? */ conf.setCombinerClass(WordCountReducer.class); //reduce conf.setReducerClass(WordCountReducer.class); /** * ?TextInputFormat? * ???? * LongWritable???? * Text */ conf.setInputFormat(TextInputFormat.class); /** * ?TextOutpuTFormat? * ????toString() * */ conf.setOutputFormat(TextOutputFormat.class); //? FileInputFormat.setInputPaths(conf, new Path(input)); //??? FileOutputFormat.setOutputPath(conf, new Path(output)); //?mapreduce JobClient.runJob(conf); System.exit(0); } }