Java tutorial
/* * Copyright 2013 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.lt.n2n.hadoop; import java.io.IOException; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.KeyValueTextInputFormat; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.TextOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; /** * * @author Steffen Remus */ public class FlipJoBims { public static void main(String[] args) throws Exception { JobConf conf = new JobConf(FlipJoBims.class); /* begin necessary for UKP cluster */ conf.setMemoryForMapTask(1000L); // 1 GB /* necessary for UKP cdh3 */ conf.setMemoryForReduceTask(1000L); // 1 GB /* necessary for UKP cdh3 */ FileOutputFormat.setCompressOutput(conf, true); // compress output FileOutputFormat.setOutputCompressorClass(conf, org.apache.hadoop.io.compress.BZip2Codec.class); /* use the bzip2 codec for compression */ conf.setCompressMapOutput(true); // compress mapper output /* end necessary for UKP cluster */ conf.setJobName(FlipJoBims.class.getSimpleName()); args = new GenericOptionsParser(conf, args).getRemainingArgs(); conf.setInputFormat(KeyValueTextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapperClass(FlipJoBims.Map.class); conf.setNumReduceTasks(0); // conf.setReducerClass(IdentityReducer.class); conf.setMapOutputKeyClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); } public static class Map extends MapReduceBase implements Mapper<Text, Text, Text, Text> { private Text _key = new Text(); @Override public void map(Text key, Text value, OutputCollector<Text, Text> collector, Reporter reporter) throws IOException { // tab_index String value_as_string = value.toString(); int tab_index = value.toString().indexOf('\t'); _key.set(value_as_string.substring(0, tab_index)); value.set(key.toString() + value_as_string.substring(tab_index)); collector.collect(_key, value); } } }