Example usage for org.apache.hadoop.mapreduce Job setPartitionerClass

List of usage examples for org.apache.hadoop.mapreduce Job setPartitionerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setPartitionerClass.

Prototype

public void setPartitionerClass(Class<? extends Partitioner> cls) throws IllegalStateException 

Source Link

Document

Set the Partitioner for the job.

Usage

From source file:org.apache.tez.mapreduce.examples.MRRSleepJob.java

License:Apache License

@VisibleForTesting
public Job createJob(int numMapper, int numReducer, int iReduceStagesCount, int numIReducer, long mapSleepTime,
        int mapSleepCount, long reduceSleepTime, int reduceSleepCount, long iReduceSleepTime,
        int iReduceSleepCount) throws IOException {
    Configuration conf = getConf();
    conf.setLong(MAP_SLEEP_TIME, mapSleepTime);
    conf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime);
    conf.setLong(IREDUCE_SLEEP_TIME, iReduceSleepTime);
    conf.setInt(MAP_SLEEP_COUNT, mapSleepCount);
    conf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount);
    conf.setInt(IREDUCE_SLEEP_COUNT, iReduceSleepCount);
    conf.setInt(MRJobConfig.NUM_MAPS, numMapper);
    conf.setInt(IREDUCE_STAGES_COUNT, iReduceStagesCount);
    conf.setInt(IREDUCE_TASKS_COUNT, numIReducer);

    // Configure intermediate reduces
    conf.setInt(org.apache.tez.mapreduce.hadoop.MRJobConfig.MRR_INTERMEDIATE_STAGES, iReduceStagesCount);
    LOG.info("Running MRR with " + iReduceStagesCount + " IR stages");

    for (int i = 1; i <= iReduceStagesCount; ++i) {
        // Set reducer class for intermediate reduce
        conf.setClass(// w w  w  . j a  va2  s.  com
                MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i, "mapreduce.job.reduce.class"),
                ISleepReducer.class, Reducer.class);
        // Set reducer output key class
        conf.setClass(
                MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i, "mapreduce.map.output.key.class"),
                IntWritable.class, Object.class);
        // Set reducer output value class
        conf.setClass(MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i,
                "mapreduce.map.output.value.class"), IntWritable.class, Object.class);
        conf.setInt(MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i, "mapreduce.job.reduces"),
                numIReducer);
    }

    Job job = Job.getInstance(conf, "sleep");
    job.setNumReduceTasks(numReducer);
    job.setJarByClass(MRRSleepJob.class);
    job.setNumReduceTasks(numReducer);
    job.setMapperClass(SleepMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setReducerClass(SleepReducer.class);
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setInputFormatClass(SleepInputFormat.class);
    job.setPartitionerClass(MRRSleepJobPartitioner.class);
    job.setSpeculativeExecution(false);
    job.setJobName("Sleep job");

    FileInputFormat.addInputPath(job, new Path("ignored"));
    return job;
}

From source file:org.apache.tez.mapreduce.examples.SecondarySort.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysort <in> <out>");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }/*from www  .j a  v a  2s  .c  o m*/
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    // group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);
    job.setGroupingComparatorClass(FirstGroupingComparator.class);

    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    job.setMapOutputValueClass(IntWritable.class);

    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:org.archive.wayback.hadoop.CDXSortDriver.java

License:Apache License

/**
 * The main driver for sort program. Invoke this method to submit the
 * map/reduce job.//  w  w w. j  a v  a  2 s.c o  m
 * 
 * @throws IOException
 *             When there is communication problems with the job tracker.
 */
public int run(String[] args) throws Exception {

    String delim = " ";

    long desiredMaps = 10;
    boolean compressOutput = false;
    boolean compressedInput = false;
    boolean gzipRange = false;
    List<String> otherArgs = new ArrayList<String>();
    int mapMode = CDXCanonicalizingMapper.MODE_FULL;
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                desiredMaps = Integer.parseInt(args[++i]);
            } else if ("--compress-output".equals(args[i])) {
                compressOutput = true;
            } else if ("--compressed-input".equals(args[i])) {
                compressedInput = true;
            } else if ("--gzip-range".equals(args[i])) {
                gzipRange = true;
            } else if ("--delimiter".equals(args[i])) {
                delim = args[++i];
            } else if ("--map-full".equals(args[i])) {
                mapMode = CDXCanonicalizingMapper.MODE_FULL;
            } else if ("--map-global".equals(args[i])) {
                mapMode = CDXCanonicalizingMapper.MODE_GLOBAL;
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }

    // Make sure there are exactly 3 parameters left: split input output
    if (otherArgs.size() != 3) {
        System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 3.");
        return printUsage();
    }

    String splitPathString = otherArgs.get(0);
    String inputPathString = otherArgs.get(1);
    String outputPathString = otherArgs.get(2);

    Path splitPath = new Path(splitPathString);
    Path inputPath = new Path(inputPathString);
    Path outputPath = new Path(outputPathString);

    Job job = new Job(getConf(), "cdx-sort");
    Configuration conf = job.getConfiguration();
    job.setJarByClass(CDXSortDriver.class);

    job.setMapperClass(CDXCanonicalizingMapper.class);

    job.setReducerClass(CDXReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // configure the "map mode"
    CDXCanonicalizingMapper.setMapMode(conf, mapMode);

    // set up the delimter:
    conf.set(TEXT_OUTPUT_DELIM_CONFIG, delim);

    if (compressOutput) {
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    }

    // set up the Partitioner, including number of reduce tasks:
    FileSystem fs = inputPath.getFileSystem(conf);

    int splitCount = countLinesInPath(splitPath, conf);
    System.err.println("Split/Reduce count:" + splitCount);
    job.setNumReduceTasks(splitCount);

    AlphaPartitioner.setPartitionPath(conf, splitPathString);
    job.setPartitionerClass(AlphaPartitioner.class);

    // calculate the byte size to get the correct number of map tasks:
    FileStatus inputStatus = fs.getFileStatus(inputPath);
    long inputLen = inputStatus.getLen();
    long bytesPerMap = (int) inputLen / desiredMaps;

    FileInputFormat.addInputPath(job, inputPath);
    FileInputFormat.setMaxInputSplitSize(job, bytesPerMap);
    if (gzipRange) {
        job.setInputFormatClass(GZIPRangeLineDereferencingInputFormat.class);
    } else {
        job.setInputFormatClass(LineDereferencingInputFormat.class);
        if (compressedInput) {
            LineDereferencingRecordReader.forceCompressed(conf);
        }
    }
    FileOutputFormat.setOutputPath(job, outputPath);

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:org.avenir.association.AssociationRuleMiner.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Association rule mining from frequent item sets";
    job.setJobName(jobName);//from   w ww . j  a va 2  s .c  o  m

    job.setJarByClass(AssociationRuleMiner.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "avenir");
    job.setMapperClass(AssociationRuleMiner.RuleMinerMapper.class);
    job.setReducerClass(AssociationRuleMiner.RuleMinerReducer.class);

    job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class);
    job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    int numReducer = job.getConfiguration().getInt("arm.num.reducer", -1);
    numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer;
    job.setNumReduceTasks(numReducer);

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.avenir.explore.ClassPartitionGenerator.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Candidate split generator for attributes";
    job.setJobName(jobName);//from  w w  w  .j  a  v a 2 s.c o  m
    job.setJarByClass(ClassPartitionGenerator.class);
    Utility.setConfiguration(job.getConfiguration(), "avenir");

    String[] paths = getPaths(args, job);
    FileInputFormat.addInputPath(job, new Path(paths[0]));
    FileOutputFormat.setOutputPath(job, new Path(paths[1]));

    job.setMapperClass(ClassPartitionGenerator.PartitionGeneratorMapper.class);
    job.setReducerClass(ClassPartitionGenerator.PartitionGeneratorReducer.class);
    job.setCombinerClass(ClassPartitionGenerator.PartitionGeneratorCombiner.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(AttributeSplitPartitioner.class);

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.avenir.knn.FeatureCondProbJoiner.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Training vector feature cond probability joiner  MR";
    job.setJobName(jobName);// w  w w.  j ava  2 s  .co m

    job.setJarByClass(FeatureCondProbJoiner.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(FeatureCondProbJoiner.JoinerMapper.class);
    job.setReducerClass(FeatureCondProbJoiner.JoinerReducer.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class);
    job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class);

    Utility.setConfiguration(job.getConfiguration());

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.avenir.knn.NearestNeighbor.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "K nerest neighbor(KNN)  MR";
    job.setJobName(jobName);/*w w  w  .  ja  v a2  s .  c  o  m*/

    job.setJarByClass(NearestNeighbor.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(NearestNeighbor.TopMatchesMapper.class);
    job.setReducerClass(NearestNeighbor.TopMatchesReducer.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class);
    job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class);

    Utility.setConfiguration(job.getConfiguration());

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.avenir.reinforce.RandomFirstGreedyBandit.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Random first greedy  bandit problem";
    job.setJobName(jobName);/*  ww w.ja  v  a2 s  .c  o  m*/

    job.setJarByClass(RandomFirstGreedyBandit.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "avenir");
    job.setMapperClass(RandomFirstGreedyBandit.BanditMapper.class);
    job.setReducerClass(RandomFirstGreedyBandit.BanditReducer.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class);
    job.setPartitionerClass(SecondarySort.TupleTextPartitioner.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.avenir.sequence.CandidateGenerationWithSelfJoin.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Generates k candidate sequence";
    job.setJobName(jobName);//from w ww .ja  va  2 s  .c  o  m

    job.setJarByClass(CandidateGenerationWithSelfJoin.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "avenir");
    job.setMapperClass(CandidateGenerationWithSelfJoin.CandidateGenerationMapper.class);
    job.setReducerClass(CandidateGenerationWithSelfJoin.CandidateGenerationReducer.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class);
    job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class);

    int numReducer = job.getConfiguration().getInt("cgs.num.reducer", -1);
    numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer;
    job.setNumReduceTasks(numReducer);

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.avenir.tree.DataPartitioner.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Partitions data by some split";
    job.setJobName(jobName);/*from   w w w. j  a  v  a2 s  . c o m*/

    job.setJarByClass(DataPartitioner.class);

    Utility.setConfiguration(job.getConfiguration(), "avenir");
    debugOn = job.getConfiguration().getBoolean("debug.on", false);
    if (debugOn) {
        LOG.setLevel(Level.DEBUG);
    }

    job.setMapperClass(DataPartitioner.PartitionerMapper.class);
    job.setReducerClass(DataPartitioner.PartitionerReducer.class);

    //find best split and create output path
    String inPath = getNodePath(job);
    if (debugOn)
        System.out.println("inPath:" + inPath);
    Split split = findBestSplitKey(job, inPath);
    String outPath = inPath + "/" + "split=" + split.getIndex();
    if (debugOn)
        System.out.println("outPath:" + outPath);

    FileInputFormat.addInputPath(job, new Path(inPath));
    FileOutputFormat.setOutputPath(job, new Path(outPath));

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(SecondarySort.RawIntKeyTextPartitioner.class);
    int numReducers = split.getSegmentCount();
    if (debugOn)
        System.out.println("numReducers:" + numReducers);
    job.setNumReduceTasks(numReducers);

    int status = job.waitForCompletion(true) ? 0 : 1;
    //move output to segment directories
    if (status == 0) {
        moveOutputToSegmentDir(outPath, split.getSegmentCount(), job.getConfiguration());
    }
    return status;
}