List of usage examples for org.apache.hadoop.mapreduce Job setPartitionerClass
public void setPartitionerClass(Class<? extends Partitioner> cls) throws IllegalStateException
From source file:org.apache.tez.mapreduce.examples.MRRSleepJob.java
License:Apache License
@VisibleForTesting public Job createJob(int numMapper, int numReducer, int iReduceStagesCount, int numIReducer, long mapSleepTime, int mapSleepCount, long reduceSleepTime, int reduceSleepCount, long iReduceSleepTime, int iReduceSleepCount) throws IOException { Configuration conf = getConf(); conf.setLong(MAP_SLEEP_TIME, mapSleepTime); conf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime); conf.setLong(IREDUCE_SLEEP_TIME, iReduceSleepTime); conf.setInt(MAP_SLEEP_COUNT, mapSleepCount); conf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount); conf.setInt(IREDUCE_SLEEP_COUNT, iReduceSleepCount); conf.setInt(MRJobConfig.NUM_MAPS, numMapper); conf.setInt(IREDUCE_STAGES_COUNT, iReduceStagesCount); conf.setInt(IREDUCE_TASKS_COUNT, numIReducer); // Configure intermediate reduces conf.setInt(org.apache.tez.mapreduce.hadoop.MRJobConfig.MRR_INTERMEDIATE_STAGES, iReduceStagesCount); LOG.info("Running MRR with " + iReduceStagesCount + " IR stages"); for (int i = 1; i <= iReduceStagesCount; ++i) { // Set reducer class for intermediate reduce conf.setClass(// w w w . j a va2 s. com MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i, "mapreduce.job.reduce.class"), ISleepReducer.class, Reducer.class); // Set reducer output key class conf.setClass( MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i, "mapreduce.map.output.key.class"), IntWritable.class, Object.class); // Set reducer output value class conf.setClass(MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i, "mapreduce.map.output.value.class"), IntWritable.class, Object.class); conf.setInt(MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i, "mapreduce.job.reduces"), numIReducer); } Job job = Job.getInstance(conf, "sleep"); job.setNumReduceTasks(numReducer); job.setJarByClass(MRRSleepJob.class); job.setNumReduceTasks(numReducer); job.setMapperClass(SleepMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(SleepReducer.class); job.setOutputFormatClass(NullOutputFormat.class); job.setInputFormatClass(SleepInputFormat.class); job.setPartitionerClass(MRRSleepJobPartitioner.class); job.setSpeculativeExecution(false); job.setJobName("Sleep job"); FileInputFormat.addInputPath(job, new Path("ignored")); return job; }
From source file:org.apache.tez.mapreduce.examples.SecondarySort.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: secondarysort <in> <out>"); ToolRunner.printGenericCommandUsage(System.out); return 2; }/*from www .j a v a 2s .c o m*/ Job job = new Job(conf, "secondary sort"); job.setJarByClass(SecondarySort.class); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); // group and partition by the first int in the pair job.setPartitionerClass(FirstPartitioner.class); job.setGroupingComparatorClass(FirstGroupingComparator.class); // the map output is IntPair, IntWritable job.setMapOutputKeyClass(IntPair.class); job.setMapOutputValueClass(IntWritable.class); // the reduce output is Text, IntWritable job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:org.archive.wayback.hadoop.CDXSortDriver.java
License:Apache License
/** * The main driver for sort program. Invoke this method to submit the * map/reduce job.// w w w. j a v a 2 s.c o m * * @throws IOException * When there is communication problems with the job tracker. */ public int run(String[] args) throws Exception { String delim = " "; long desiredMaps = 10; boolean compressOutput = false; boolean compressedInput = false; boolean gzipRange = false; List<String> otherArgs = new ArrayList<String>(); int mapMode = CDXCanonicalizingMapper.MODE_FULL; for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { desiredMaps = Integer.parseInt(args[++i]); } else if ("--compress-output".equals(args[i])) { compressOutput = true; } else if ("--compressed-input".equals(args[i])) { compressedInput = true; } else if ("--gzip-range".equals(args[i])) { gzipRange = true; } else if ("--delimiter".equals(args[i])) { delim = args[++i]; } else if ("--map-full".equals(args[i])) { mapMode = CDXCanonicalizingMapper.MODE_FULL; } else if ("--map-global".equals(args[i])) { mapMode = CDXCanonicalizingMapper.MODE_GLOBAL; } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Make sure there are exactly 3 parameters left: split input output if (otherArgs.size() != 3) { System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 3."); return printUsage(); } String splitPathString = otherArgs.get(0); String inputPathString = otherArgs.get(1); String outputPathString = otherArgs.get(2); Path splitPath = new Path(splitPathString); Path inputPath = new Path(inputPathString); Path outputPath = new Path(outputPathString); Job job = new Job(getConf(), "cdx-sort"); Configuration conf = job.getConfiguration(); job.setJarByClass(CDXSortDriver.class); job.setMapperClass(CDXCanonicalizingMapper.class); job.setReducerClass(CDXReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // configure the "map mode" CDXCanonicalizingMapper.setMapMode(conf, mapMode); // set up the delimter: conf.set(TEXT_OUTPUT_DELIM_CONFIG, delim); if (compressOutput) { FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } // set up the Partitioner, including number of reduce tasks: FileSystem fs = inputPath.getFileSystem(conf); int splitCount = countLinesInPath(splitPath, conf); System.err.println("Split/Reduce count:" + splitCount); job.setNumReduceTasks(splitCount); AlphaPartitioner.setPartitionPath(conf, splitPathString); job.setPartitionerClass(AlphaPartitioner.class); // calculate the byte size to get the correct number of map tasks: FileStatus inputStatus = fs.getFileStatus(inputPath); long inputLen = inputStatus.getLen(); long bytesPerMap = (int) inputLen / desiredMaps; FileInputFormat.addInputPath(job, inputPath); FileInputFormat.setMaxInputSplitSize(job, bytesPerMap); if (gzipRange) { job.setInputFormatClass(GZIPRangeLineDereferencingInputFormat.class); } else { job.setInputFormatClass(LineDereferencingInputFormat.class); if (compressedInput) { LineDereferencingRecordReader.forceCompressed(conf); } } FileOutputFormat.setOutputPath(job, outputPath); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:org.avenir.association.AssociationRuleMiner.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Association rule mining from frequent item sets"; job.setJobName(jobName);//from w ww . j a va 2 s .c o m job.setJarByClass(AssociationRuleMiner.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); Utility.setConfiguration(job.getConfiguration(), "avenir"); job.setMapperClass(AssociationRuleMiner.RuleMinerMapper.class); job.setReducerClass(AssociationRuleMiner.RuleMinerReducer.class); job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class); job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); int numReducer = job.getConfiguration().getInt("arm.num.reducer", -1); numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer; job.setNumReduceTasks(numReducer); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
From source file:org.avenir.explore.ClassPartitionGenerator.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Candidate split generator for attributes"; job.setJobName(jobName);//from w w w .j a v a 2 s.c o m job.setJarByClass(ClassPartitionGenerator.class); Utility.setConfiguration(job.getConfiguration(), "avenir"); String[] paths = getPaths(args, job); FileInputFormat.addInputPath(job, new Path(paths[0])); FileOutputFormat.setOutputPath(job, new Path(paths[1])); job.setMapperClass(ClassPartitionGenerator.PartitionGeneratorMapper.class); job.setReducerClass(ClassPartitionGenerator.PartitionGeneratorReducer.class); job.setCombinerClass(ClassPartitionGenerator.PartitionGeneratorCombiner.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setPartitionerClass(AttributeSplitPartitioner.class); job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1)); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
From source file:org.avenir.knn.FeatureCondProbJoiner.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Training vector feature cond probability joiner MR"; job.setJobName(jobName);// w w w. j ava 2 s .co m job.setJarByClass(FeatureCondProbJoiner.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(FeatureCondProbJoiner.JoinerMapper.class); job.setReducerClass(FeatureCondProbJoiner.JoinerReducer.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class); job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class); Utility.setConfiguration(job.getConfiguration()); job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1)); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
From source file:org.avenir.knn.NearestNeighbor.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "K nerest neighbor(KNN) MR"; job.setJobName(jobName);/*w w w . ja v a2 s . c o m*/ job.setJarByClass(NearestNeighbor.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(NearestNeighbor.TopMatchesMapper.class); job.setReducerClass(NearestNeighbor.TopMatchesReducer.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class); job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class); Utility.setConfiguration(job.getConfiguration()); job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1)); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
From source file:org.avenir.reinforce.RandomFirstGreedyBandit.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Random first greedy bandit problem"; job.setJobName(jobName);/* ww w.ja v a2 s .c o m*/ job.setJarByClass(RandomFirstGreedyBandit.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); Utility.setConfiguration(job.getConfiguration(), "avenir"); job.setMapperClass(RandomFirstGreedyBandit.BanditMapper.class); job.setReducerClass(RandomFirstGreedyBandit.BanditReducer.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Text.class); job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class); job.setPartitionerClass(SecondarySort.TupleTextPartitioner.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1)); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
From source file:org.avenir.sequence.CandidateGenerationWithSelfJoin.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Generates k candidate sequence"; job.setJobName(jobName);//from w ww .ja va 2 s .c o m job.setJarByClass(CandidateGenerationWithSelfJoin.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); Utility.setConfiguration(job.getConfiguration(), "avenir"); job.setMapperClass(CandidateGenerationWithSelfJoin.CandidateGenerationMapper.class); job.setReducerClass(CandidateGenerationWithSelfJoin.CandidateGenerationReducer.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class); job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class); int numReducer = job.getConfiguration().getInt("cgs.num.reducer", -1); numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer; job.setNumReduceTasks(numReducer); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
From source file:org.avenir.tree.DataPartitioner.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Partitions data by some split"; job.setJobName(jobName);/*from w w w. j a v a2 s . c o m*/ job.setJarByClass(DataPartitioner.class); Utility.setConfiguration(job.getConfiguration(), "avenir"); debugOn = job.getConfiguration().getBoolean("debug.on", false); if (debugOn) { LOG.setLevel(Level.DEBUG); } job.setMapperClass(DataPartitioner.PartitionerMapper.class); job.setReducerClass(DataPartitioner.PartitionerReducer.class); //find best split and create output path String inPath = getNodePath(job); if (debugOn) System.out.println("inPath:" + inPath); Split split = findBestSplitKey(job, inPath); String outPath = inPath + "/" + "split=" + split.getIndex(); if (debugOn) System.out.println("outPath:" + outPath); FileInputFormat.addInputPath(job, new Path(inPath)); FileOutputFormat.setOutputPath(job, new Path(outPath)); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setPartitionerClass(SecondarySort.RawIntKeyTextPartitioner.class); int numReducers = split.getSegmentCount(); if (debugOn) System.out.println("numReducers:" + numReducers); job.setNumReduceTasks(numReducers); int status = job.waitForCompletion(true) ? 0 : 1; //move output to segment directories if (status == 0) { moveOutputToSegmentDir(outPath, split.getSegmentCount(), job.getConfiguration()); } return status; }