List of usage examples for org.apache.hadoop.mapreduce Job setReducerClass
public void setReducerClass(Class<? extends Reducer> cls) throws IllegalStateException
From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.AverageMultipleOutputJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf, AverageMultipleOutputJob.class.getSimpleName()); job.setJarByClass(AverageMultipleOutputJob.class); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("Sample Multiple Output Job"); job.setMapperClass(AverageMapper.class); job.setReducerClass(AverageMultipleOutputReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); MultipleOutputs.addNamedOutput(job, "greaterThan1000", TextOutputFormat.class, Text.class, DoubleWritable.class); MultipleOutputs.addNamedOutput(job, "lessThan1000", TextOutputFormat.class, Text.class, DoubleWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); return 0;/* ww w . j a v a2 s .c om*/ }
From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.BloomFilterJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf, BloomFilterJob.class.getSimpleName()); job.setJarByClass(BloomFilterJob.class); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("Sample BloomFilter Job"); job.setMapperClass(BloomFilterMapper.class); job.setReducerClass(BloomFilterReducer.class); job.setNumReduceTasks(1);/*w w w .j a v a 2s .c om*/ job.setInputFormatClass(TextInputFormat.class); /* * We want our reducer to output the final BloomFilter as a binary file. I think * Hadoop doesn't have this format [check later], so using NullOutpuFormat.class. * * In general life gets a little more dangerous when you deviate from MapReduce's input/output * framework and start working with your own files. Your tasks are no longer guaranteed to be idempotent * and you'll need to understand how various failure scenarios can affect your tasks. For example, your files * may only be partially written when some tasks are restarted. Our example here is safe(r) because all the file * operations take place together only once in the close() method and in only one reducer. A more * careful/paranoid implementation would check each individual file operation more closely. */ job.setOutputFormatClass(NullOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BloomFilter.class); System.exit(job.waitForCompletion(true) ? 0 : 1); return 0; }
From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.CountJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf, CountJob.class.getSimpleName()); job.setJarByClass(CountJob.class); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("Sample Count Job"); job.setMapperClass(CountMapper.class); job.setReducerClass(CountReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); return 0;//w w w . j a va 2 s .com }
From source file:com.juniarto.secondsorter.SsJob.java
public int run(String[] allArgs) throws Exception { Configuration conf = getConf(); Job job = new Job(conf, "secondary sort"); job.setJarByClass(SsJob.class); job.setPartitionerClass(NaturalKeyPartitioner.class); job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class); job.setSortComparatorClass(CompositeKeyComparator.class); job.setMapOutputKeyClass(TextDsi.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(SsMapper.class); job.setReducerClass(SsReducer.class); job.setNumReduceTasks(2);//w w w.j av a 2 s.c o m String[] args = new GenericOptionsParser(getConf(), allArgs).getRemainingArgs(); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); //job.submit(); long time1 = System.nanoTime(); boolean status = job.waitForCompletion(true); long time2 = System.nanoTime(); long timeSpent = time2 - time1; LOG.info("TIME: " + timeSpent); return 0; }
From source file:com.justgiving.raven.kissmetrics.jsonenricher.KissmetricsJsonToEnrichedJsonDriver.java
License:Open Source License
public static void main(String[] args) throws Exception { logger.info("Logger - Converting Kissmetrics Json to Valid Json files"); System.out.println("Converting Kissmetrics Json to Valid Json files"); System.out.println("defaultCharacterEncoding by property: " + System.getProperty("file.encoding")); System.out.println("defaultCharacterEncoding by code: " + getDefaultCharEncoding()); System.out.println("defaultCharacterEncoding by charSet: " + Charset.defaultCharset()); Job job = Job.getInstance(); job.setJarByClass(KissmetricsJsonToEnrichedJsonDriver.class); job.setJobName("Kissmetrics Json to valid and enriched Json files"); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); //Add number of reducers int numberOfReducers = 2; if (args.length > 2 && args[2] != null) { numberOfReducers = Integer.parseInt(args[2]); if (numberOfReducers <= 0) { numberOfReducers = 2;/*from w ww .j a v a 2 s.com*/ } } job.setMapperClass(com.justgiving.raven.kissmetrics.jsonenricher.KissmetricsJsonToEnrichedJsonMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass( com.justgiving.raven.kissmetrics.jsonenricher.KissmetricsJsonToEnrichedJsonReducer.class); job.setNumReduceTasks(numberOfReducers); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.justgiving.raven.kissmetrics.schema.KissmetricsJsonToSchemaDriver.java
License:Open Source License
public static void main(String[] args) throws Exception { int numberOfReducers = 1; if (args.length > 2 && args[2] != null) { numberOfReducers = Integer.parseInt(args[2]); if (numberOfReducers <= 0) { numberOfReducers = 1;//from ww w .j av a2s. c o m } } System.out.println("Kissmetrics Json Schema Extrator"); Job job = Job.getInstance(); job.setJarByClass(KissmetricsJsonToSchemaDriver.class); job.setJobName("Kissmetrics Json Schema Extrator"); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(com.justgiving.raven.kissmetrics.schema.KissmetricsJsonToSchemaMapper.class); job.setReducerClass(com.justgiving.raven.kissmetrics.schema.KissmetricsJsonToSchemaReducer.class); job.setNumReduceTasks(numberOfReducers); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.jyz.study.hadoop.hbase.mapreduce.HFileOutputFormatBase.java
License:Apache License
/** * Configure a MapReduce Job to perform an incremental load into the given * table. This/*from w ww . j a va 2s . com*/ * <ul> * <li>Inspects the table to configure a total order partitioner</li> * <li>Uploads the partitions file to the cluster and adds it to the * DistributedCache</li> * <li>Sets the number of reduce tasks to match the current number of * regions</li> * <li>Sets the output key/value class to match HFileOutputFormat's * requirements</li> * <li>Sets the reducer up to perform the appropriate sorting (either * KeyValueSortReducer or PutSortReducer)</li> * </ul> * The user should be sure to set the map output value class to either * KeyValue or Put before running this function. */ public static void configureIncrementalLoad(Job job, HTable table, Class<? extends HFileOutputFormatBase> hfileOutputFormatBase) throws IOException { Configuration conf = job.getConfiguration(); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); job.setOutputFormatClass(hfileOutputFormatBase); // Based on the configured map output class, set the correct reducer to // properly // sort the incoming values. // TODO it would be nice to pick one or the other of these formats. if (KeyValue.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(KeyValueSortReducer.class); } else if (Put.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(PutSortReducer.class); } else if (Text.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(TextSortReducer.class); } else { LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass()); } conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); // Use table's region boundaries for TOP split points. LOG.info("Looking up current regions for table " + Bytes.toString(table.getTableName())); List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table); LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count"); job.setNumReduceTasks(startKeys.size()); configurePartitioner(job, startKeys); // Set compression algorithms based on column families configureCompression(table, conf); configureBloomType(table, conf); configureBlockSize(table, conf); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); LOG.info("Incremental table " + Bytes.toString(table.getTableName()) + " output configured."); }
From source file:com.kangfoo.study.hadoop1.mp.typeformat.TestMapreduceMultipleInputs.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: TestMapreduceMultipleInputs <in1> <in2> <out>"); System.exit(2);/*from www . j a va 2 s. com*/ } Job job = new Job(conf, "TestMapreduceMultipleInputs"); job.setJarByClass(TestMapreduceMultipleInputs.class);// ? // job.setMapperClass(Mapper1.class); // job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); MultipleInputs.addInputPath(job, new Path(otherArgs[0]), TextInputFormat.class, Mapper1.class); MultipleInputs.addInputPath(job, new Path(otherArgs[1]), SequenceFileInputFormat.class, Mapper2.class); //FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[2])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.kangfoo.study.hadoop1.mp.typeformat.TestMapreduceSequenceInputFormat.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: TestMapreduceSequenceInputFormat <in> <out>"); System.exit(2);//w w w .java2s .co m } Job job = new Job(conf, "TestMapreduceSequenceInputFormat"); job.setJarByClass(TestMapreduceSequenceInputFormat.class);//? job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(SequenceFileInputFormat.class); // SequenceFileInputFormat FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.kangfoo.study.hadoop1.mp.typeformat.TestMapreduceTextInputFormat.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: TestMapreduceTextInputFormat <in> <out>"); System.exit(2);/* ww w . j ava 2 s.c o m*/ } Job job = new Job(conf, "TestMapreduceTextInputFormat"); job.setJarByClass(TestMapreduceTextInputFormat.class);//? job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }