Example usage for org.apache.hadoop.mapreduce Job setPartitionerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setPartitionerClass.

Prototype

public void setPartitionerClass(Class<? extends Partitioner> cls) throws IllegalStateException

Source Link

Document

Set the Partitioner for the job.

Usage

From source file:org.apache.tez.mapreduce.examples.MRRSleepJob.java

License:Apache License

@VisibleForTesting
public Job createJob(int numMapper, int numReducer, int iReduceStagesCount, int numIReducer, long mapSleepTime,
        int mapSleepCount, long reduceSleepTime, int reduceSleepCount, long iReduceSleepTime,
        int iReduceSleepCount) throws IOException {
    Configuration conf = getConf();
    conf.setLong(MAP_SLEEP_TIME, mapSleepTime);
    conf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime);
    conf.setLong(IREDUCE_SLEEP_TIME, iReduceSleepTime);
    conf.setInt(MAP_SLEEP_COUNT, mapSleepCount);
    conf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount);
    conf.setInt(IREDUCE_SLEEP_COUNT, iReduceSleepCount);
    conf.setInt(MRJobConfig.NUM_MAPS, numMapper);
    conf.setInt(IREDUCE_STAGES_COUNT, iReduceStagesCount);
    conf.setInt(IREDUCE_TASKS_COUNT, numIReducer);

    // Configure intermediate reduces
    conf.setInt(org.apache.tez.mapreduce.hadoop.MRJobConfig.MRR_INTERMEDIATE_STAGES, iReduceStagesCount);
    LOG.info("Running MRR with " + iReduceStagesCount + " IR stages");

    for (int i = 1; i <= iReduceStagesCount; ++i) {
        // Set reducer class for intermediate reduce
        conf.setClass(// w w  w  . j a  va2  s.  com
                MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i, "mapreduce.job.reduce.class"),
                ISleepReducer.class, Reducer.class);
        // Set reducer output key class
        conf.setClass(
                MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i, "mapreduce.map.output.key.class"),
                IntWritable.class, Object.class);
        // Set reducer output value class
        conf.setClass(MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i,
                "mapreduce.map.output.value.class"), IntWritable.class, Object.class);
        conf.setInt(MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i, "mapreduce.job.reduces"),
                numIReducer);
    }

    Job job = Job.getInstance(conf, "sleep");
    job.setNumReduceTasks(numReducer);
    job.setJarByClass(MRRSleepJob.class);
    job.setNumReduceTasks(numReducer);
    job.setMapperClass(SleepMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setReducerClass(SleepReducer.class);
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setInputFormatClass(SleepInputFormat.class);
    job.setPartitionerClass(MRRSleepJobPartitioner.class);
    job.setSpeculativeExecution(false);
    job.setJobName("Sleep job");

    FileInputFormat.addInputPath(job, new Path("ignored"));
    return job;
}

From source file:org.apache.tez.mapreduce.examples.SecondarySort.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysort <in> <out>");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }/*from www  .j a  v a  2s  .c  o m*/
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    // group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);
    job.setGroupingComparatorClass(FirstGroupingComparator.class);

    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    job.setMapOutputValueClass(IntWritable.class);

    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:org.archive.wayback.hadoop.CDXSortDriver.java

License:Apache License

/**
 * The main driver for sort program. Invoke this method to submit the
 * map/reduce job.//  w  w w. j  a v  a  2 s.c o  m
 * 
 * @throws IOException
 *             When there is communication problems with the job tracker.
 */
public int run(String[] args) throws Exception {

    String delim = " ";

    long desiredMaps = 10;
    boolean compressOutput = false;
    boolean compressedInput = false;
    boolean gzipRange = false;
    List<String> otherArgs = new ArrayList<String>();
    int mapMode = CDXCanonicalizingMapper.MODE_FULL;
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                desiredMaps = Integer.parseInt(args[++i]);
            } else if ("--compress-output".equals(args[i])) {
                compressOutput = true;
            } else if ("--compressed-input".equals(args[i])) {
                compressedInput = true;
            } else if ("--gzip-range".equals(args[i])) {
                gzipRange = true;
            } else if ("--delimiter".equals(args[i])) {
                delim = args[++i];
            } else if ("--map-full".equals(args[i])) {
                mapMode = CDXCanonicalizingMapper.MODE_FULL;
            } else if ("--map-global".equals(args[i])) {
                mapMode = CDXCanonicalizingMapper.MODE_GLOBAL;
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }

    // Make sure there are exactly 3 parameters left: split input output
    if (otherArgs.size() != 3) {
        System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 3.");
        return printUsage();
    }

    String splitPathString = otherArgs.get(0);
    String inputPathString = otherArgs.get(1);
    String outputPathString = otherArgs.get(2);

    Path splitPath = new Path(splitPathString);
    Path inputPath = new Path(inputPathString);
    Path outputPath = new Path(outputPathString);

    Job job = new Job(getConf(), "cdx-sort");
    Configuration conf = job.getConfiguration();
    job.setJarByClass(CDXSortDriver.class);

    job.setMapperClass(CDXCanonicalizingMapper.class);

    job.setReducerClass(CDXReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // configure the "map mode"
    CDXCanonicalizingMapper.setMapMode(conf, mapMode);

    // set up the delimter:
    conf.set(TEXT_OUTPUT_DELIM_CONFIG, delim);

    if (compressOutput) {
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    }

    // set up the Partitioner, including number of reduce tasks:
    FileSystem fs = inputPath.getFileSystem(conf);

    int splitCount = countLinesInPath(splitPath, conf);
    System.err.println("Split/Reduce count:" + splitCount);
    job.setNumReduceTasks(splitCount);

    AlphaPartitioner.setPartitionPath(conf, splitPathString);
    job.setPartitionerClass(AlphaPartitioner.class);

    // calculate the byte size to get the correct number of map tasks:
    FileStatus inputStatus = fs.getFileStatus(inputPath);
    long inputLen = inputStatus.getLen();
    long bytesPerMap = (int) inputLen / desiredMaps;

    FileInputFormat.addInputPath(job, inputPath);
    FileInputFormat.setMaxInputSplitSize(job, bytesPerMap);
    if (gzipRange) {
        job.setInputFormatClass(GZIPRangeLineDereferencingInputFormat.class);
    } else {
        job.setInputFormatClass(LineDereferencingInputFormat.class);
        if (compressedInput) {
            LineDereferencingRecordReader.forceCompressed(conf);
        }
    }
    FileOutputFormat.setOutputPath(job, outputPath);

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:org.avenir.association.AssociationRuleMiner.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Association rule mining from frequent item sets";
    job.setJobName(jobName);//from   w ww . j  a va 2  s .c  o  m

    job.setJarByClass(AssociationRuleMiner.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "avenir");
    job.setMapperClass(AssociationRuleMiner.RuleMinerMapper.class);
    job.setReducerClass(AssociationRuleMiner.RuleMinerReducer.class);

    job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class);
    job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    int numReducer = job.getConfiguration().getInt("arm.num.reducer", -1);
    numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer;
    job.setNumReduceTasks(numReducer);

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.avenir.explore.ClassPartitionGenerator.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Candidate split generator for attributes";
    job.setJobName(jobName);//from  w w  w  .j  a  v a 2 s.c o  m
    job.setJarByClass(ClassPartitionGenerator.class);
    Utility.setConfiguration(job.getConfiguration(), "avenir");

    String[] paths = getPaths(args, job);
    FileInputFormat.addInputPath(job, new Path(paths[0]));
    FileOutputFormat.setOutputPath(job, new Path(paths[1]));

    job.setMapperClass(ClassPartitionGenerator.PartitionGeneratorMapper.class);
    job.setReducerClass(ClassPartitionGenerator.PartitionGeneratorReducer.class);
    job.setCombinerClass(ClassPartitionGenerator.PartitionGeneratorCombiner.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(AttributeSplitPartitioner.class);

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.avenir.knn.FeatureCondProbJoiner.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Training vector feature cond probability joiner  MR";
    job.setJobName(jobName);// w  w w.  j ava  2 s  .co m

    job.setJarByClass(FeatureCondProbJoiner.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(FeatureCondProbJoiner.JoinerMapper.class);
    job.setReducerClass(FeatureCondProbJoiner.JoinerReducer.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class);
    job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class);

    Utility.setConfiguration(job.getConfiguration());

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.avenir.knn.NearestNeighbor.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "K nerest neighbor(KNN)  MR";
    job.setJobName(jobName);/*w w  w  .  ja  v a2  s .  c  o  m*/

    job.setJarByClass(NearestNeighbor.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(NearestNeighbor.TopMatchesMapper.class);
    job.setReducerClass(NearestNeighbor.TopMatchesReducer.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class);
    job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class);

    Utility.setConfiguration(job.getConfiguration());

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.avenir.reinforce.RandomFirstGreedyBandit.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Random first greedy  bandit problem";
    job.setJobName(jobName);/*  ww w.ja  v  a2 s  .c  o  m*/

    job.setJarByClass(RandomFirstGreedyBandit.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "avenir");
    job.setMapperClass(RandomFirstGreedyBandit.BanditMapper.class);
    job.setReducerClass(RandomFirstGreedyBandit.BanditReducer.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class);
    job.setPartitionerClass(SecondarySort.TupleTextPartitioner.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.avenir.sequence.CandidateGenerationWithSelfJoin.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Generates k candidate sequence";
    job.setJobName(jobName);//from w ww .ja  va  2 s  .c  o  m

    job.setJarByClass(CandidateGenerationWithSelfJoin.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "avenir");
    job.setMapperClass(CandidateGenerationWithSelfJoin.CandidateGenerationMapper.class);
    job.setReducerClass(CandidateGenerationWithSelfJoin.CandidateGenerationReducer.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class);
    job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class);

    int numReducer = job.getConfiguration().getInt("cgs.num.reducer", -1);
    numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer;
    job.setNumReduceTasks(numReducer);

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.avenir.tree.DataPartitioner.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Partitions data by some split";
    job.setJobName(jobName);/*from   w w w. j  a  v  a2 s  . c o m*/

    job.setJarByClass(DataPartitioner.class);

    Utility.setConfiguration(job.getConfiguration(), "avenir");
    debugOn = job.getConfiguration().getBoolean("debug.on", false);
    if (debugOn) {
        LOG.setLevel(Level.DEBUG);
    }

    job.setMapperClass(DataPartitioner.PartitionerMapper.class);
    job.setReducerClass(DataPartitioner.PartitionerReducer.class);

    //find best split and create output path
    String inPath = getNodePath(job);
    if (debugOn)
        System.out.println("inPath:" + inPath);
    Split split = findBestSplitKey(job, inPath);
    String outPath = inPath + "/" + "split=" + split.getIndex();
    if (debugOn)
        System.out.println("outPath:" + outPath);

    FileInputFormat.addInputPath(job, new Path(inPath));
    FileOutputFormat.setOutputPath(job, new Path(outPath));

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(SecondarySort.RawIntKeyTextPartitioner.class);
    int numReducers = split.getSegmentCount();
    if (debugOn)
        System.out.println("numReducers:" + numReducers);
    job.setNumReduceTasks(numReducers);

    int status = job.waitForCompletion(true) ? 0 : 1;
    //move output to segment directories
    if (status == 0) {
        moveOutputToSegmentDir(outPath, split.getSegmentCount(), job.getConfiguration());
    }
    return status;
}