List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputValueClass
public void setMapOutputValueClass(Class<?> theClass) throws IllegalStateException
From source file:csc555.ebratt.depaul.edu.GildedSorterDriver.java
License:Open Source License
/** * /* w w w .j ava 2 s . c o m*/ * Runs the driver by creating a new hadoop Job based on the configuration. * Defines the path in/out based on the first two arguments. Allows for an * optional combiner based on the 4th argument. * * @param args * [0] the input directory on HDFS * @param args * [1] the output directory on HDFS * @param args * [2] tells the system whether or not to use a combiner ("yes") * and, if so, it will use the GildedSorterReducer.class as the * combiner. * @throws Exception * if there is an issue with any of the arguments * */ @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); StringBuffer sb = new StringBuffer(); sb.append("sorted gild counts"); job.setJobName(sb.toString()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); // to ensure output is sorted job.setNumReduceTasks(1); // Mapper and Reducer Classes to use job.setMapperClass(GildedSorterMapper.class); job.setReducerClass(GildedSorterReducer.class); // Mapper output classes job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); // Input format class job.setInputFormatClass(TextInputFormat.class); // Reducer output classes job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); // Output format class job.setOutputFormatClass(TextOutputFormat.class); // Combiner if (args[2].equals("yes")) { job.setCombinerClass(GildedSorterReducer.class); } // sort in descending order job.setSortComparatorClass(LongWritable.DecreasingComparator.class); // The Jar file to run job.setJarByClass(GildedSorterDriver.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); return 0; }
From source file:csc555.ebratt.depaul.edu.GildPercentDriverPass1.java
License:Open Source License
/** * /*from w ww . ja v a 2s. c o m*/ * Runs the driver by creating a new hadoop Job based on the configuration. * Defines the path in/out based on the first two arguments. Allows for an * optional combiner based on the 4th argument. * * @param args * [0] the input directory on HDFS * @param args * [1] the output directory on HDFS * @param args * [2] tells the system whether or not to use a combiner ("yes") * and, if so, it will use the GildPercentReducerPass1.class as the * combiner. * @throws Exception * if there is an issue with any of the arguments * */ @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String groupBy = getConf().get("groupBy"); StringBuffer sb = new StringBuffer(); sb.append("gild percent of: "); sb.append(groupBy); job.setJobName(sb.toString()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); // testing -- ensure each node gets 2 reducers JobConf jobConf = new JobConf(getConf(), GildPercentDriverPass1.class); JobClient jobClient = new JobClient(jobConf); ClusterStatus cluster = jobClient.getClusterStatus(); job.setNumReduceTasks(cluster.getTaskTrackers() * 2); // Mapper and Reducer Classes to use job.setMapperClass(GildPercentMapperPass1.class); job.setReducerClass(GildPercentReducerPass1.class); // Mapper output classes job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(DoubleWritable.class); // Input format class job.setInputFormatClass(TextInputFormat.class); // Reducer output classes job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); // Output format class job.setOutputFormatClass(TextOutputFormat.class); // Combiner if (args[2].equals("yes")) { job.setCombinerClass(GildPercentReducerPass1.class); } // The Jar file to run job.setJarByClass(GildPercentDriverPass1.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); return 0; }
From source file:csc555.ebratt.depaul.edu.GildPercentDriverPass2.java
License:Open Source License
/** * //from w ww . ja v a 2 s . co m * Runs the driver by creating a new hadoop Job based on the configuration. * Defines the path in/out based on the first two arguments. Allows for an * optional combiner based on the 4th argument. * * @param args * [0] the input directory on HDFS * @param args * [1] the output directory on HDFS * @param args * [2] tells the system whether or not to use a combiner ("yes") * and, if so, it will use the GildPercentReducerPass2.class as * the combiner. * @throws Exception * if there is an issue with any of the arguments * */ @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); StringBuffer sb = new StringBuffer(); sb.append("sorted gild percent"); job.setJobName(sb.toString()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); // to ensure output is sorted job.setNumReduceTasks(1); // Mapper and Reducer Classes to use job.setMapperClass(GildPercentMapperPass2.class); job.setReducerClass(GildPercentReducerPass2.class); // Mapper output classes job.setMapOutputKeyClass(DoubleWritable.class); job.setMapOutputValueClass(Text.class); // Input format class job.setInputFormatClass(TextInputFormat.class); // Reducer output classes job.setOutputKeyClass(DoubleWritable.class); job.setOutputValueClass(Text.class); // Output format class job.setOutputFormatClass(TextOutputFormat.class); // Combiner if (args[2].equals("yes")) { job.setCombinerClass(GildPercentReducerPass2.class); } // sort in descending order job.setSortComparatorClass(DoubleWritableDescendingComparator.class); // The Jar file to run job.setJarByClass(GildPercentDriverPass2.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); return 0; }
From source file:csc555.ebratt.depaul.edu.RCTop10Driver.java
License:Open Source License
/** * /* w ww. j a v a2 s . co m*/ * Runs the driver by creating a new hadoop Job based on the configuration. * Defines the path in/out based on the first two arguments. * * @param args * [0] the input directory on HDFS * @param args * [1] the output directory on HDFS * @throws Exception * if there is an issue with any of the arguments * */ public int run(String[] args) throws Exception { Job job = new Job(getConf(), "Top 10 Reddit"); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); // ensure 1 reduce tasks for ranking job.setNumReduceTasks(1); // Mapper and Reducer Classes to use job.setMapperClass(RCTop10Mapper.class); job.setReducerClass(RCTop10Reducer.class); // Mapper output classes job.setMapOutputKeyClass(GroupByCountPair.class); job.setMapOutputValueClass(Text.class); // set custom partitioner job.setPartitionerClass(GroupByCountPairPartitioner.class); // set custom grouping comparator job.setGroupingComparatorClass(GroupByGroupingComparator.class); // input class job.setInputFormatClass(KeyValueTextInputFormat.class); // Reducer output classes job.setOutputKeyClass(GroupByCountPair.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); // The Jar file to run job.setJarByClass(RCTop10Driver.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); return 0; }
From source file:csc555.ebratt.depaul.edu.RCWordCountAcronymsDriver.java
License:Open Source License
/** * //w w w. j a v a 2 s.co m * Runs the driver by creating a new hadoop Job based on the configuration. * Defines the path in/out based on the first two arguments. Allows for an * optional combiner based on the 4th argument. * * @param args * [0] the input directory on HDFS * @param args * [1] the output directory on HDFS * @param args * [3] tells the system whether or not to use a combiner ("yes") * and, if so, it will use the RCWordCountReducer.class as the * combiner. * @throws Exception * if there is an issue with any of the arguments * */ @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String aggregate = getConf().get("aggregate"); String groupBy = getConf().get("groupBy"); StringBuffer sb = new StringBuffer(); sb.append("count of acronyms in: "); sb.append(aggregate); sb.append("; grouped by: "); sb.append(groupBy); job.setJobName(sb.toString()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); // testing -- ensure each node gets 2 reducers JobConf jobConf = new JobConf(getConf(), RCWordCountAcronymsDriver.class); JobClient jobClient = new JobClient(jobConf); ClusterStatus cluster = jobClient.getClusterStatus(); job.setNumReduceTasks(cluster.getTaskTrackers() * 2); // Mapper and Reducer Classes to use job.setMapperClass(RCWordCountMapper.class); job.setReducerClass(RCWordCountReducer.class); // Mapper output classes job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // Input format class job.setInputFormatClass(TextInputFormat.class); // Reducer output classes job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Output format class job.setOutputFormatClass(TextOutputFormat.class); // Combiner if (args[3].equals("yes")) { job.setCombinerClass(RCWordCountReducer.class); } // The Jar file to run job.setJarByClass(RCWordCountAcronymsDriver.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); return 0; }
From source file:csc555.ebratt.depaul.edu.RCWordCountDriver.java
License:Open Source License
/** * /*from w ww.j ava2 s . c o m*/ * Runs the driver by creating a new hadoop Job based on the configuration. * Defines the path in/out based on the first two arguments. Allows for an * optional combiner based on the 4th argument. * * @param args * [0] the input directory on HDFS * @param args * [1] the output directory on HDFS * @param args * [3] tells the system whether or not to use a combiner ("yes") * and, if so, it will use the RCWordCountReducer.class as the * combiner. * @throws Exception * if there is an issue with any of the arguments * */ @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String aggregate = getConf().get("aggregate"); String groupBy = getConf().get("groupBy"); StringBuffer sb = new StringBuffer(); sb.append("count of: "); sb.append(aggregate); sb.append("; grouped by: "); sb.append(groupBy); job.setJobName(sb.toString()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); // testing -- ensure each node gets 2 reducers JobConf jobConf = new JobConf(getConf(), RCWordCountDriver.class); JobClient jobClient = new JobClient(jobConf); ClusterStatus cluster = jobClient.getClusterStatus(); job.setNumReduceTasks(cluster.getTaskTrackers() * 5); // Mapper and Reducer Classes to use job.setMapperClass(RCWordCountMapper.class); job.setReducerClass(RCWordCountReducer.class); // Mapper output classes job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // Input format class job.setInputFormatClass(TextInputFormat.class); // Reducer output classes job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Output format class job.setOutputFormatClass(TextOutputFormat.class); // Combiner if (args[3].equals("yes")) { job.setCombinerClass(RCWordCountReducer.class); } // The Jar file to run job.setJarByClass(RCWordCountDriver.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); return 0; }
From source file:csc555.ebratt.depaul.edu.VoteCountDriver.java
License:Open Source License
/** * /*w w w . java 2 s.c o m*/ * Runs the driver by creating a new hadoop Job based on the configuration. * Defines the path in/out based on the first two arguments. Allows for an * optional combiner based on the 4th argument. * * @param args * [0] the input directory on HDFS * @param args * [1] the output directory on HDFS * @param args * [2] tells the system whether or not to use a combiner ("yes") * and, if so, it will use the VoteCountReducer.class as the * combiner. * @throws Exception * if there is an issue with any of the arguments * */ @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String groupBy = getConf().get("groupBy"); StringBuffer sb = new StringBuffer(); sb.append("count of votes grouped by: "); sb.append(groupBy); job.setJobName(sb.toString()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); // testing -- ensure each node gets 2 reducers JobConf jobConf = new JobConf(getConf(), VoteCountDriver.class); JobClient jobClient = new JobClient(jobConf); ClusterStatus cluster = jobClient.getClusterStatus(); job.setNumReduceTasks(cluster.getTaskTrackers() * 2); // Mapper and Reducer Classes to use job.setMapperClass(VoteCountMapper.class); job.setReducerClass(LongSumReducer.class); // Mapper output classes job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); // Input format class job.setInputFormatClass(TextInputFormat.class); // Reducer output classes job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); // Output format class job.setOutputFormatClass(TextOutputFormat.class); // Combiner if (args[2].equals("yes")) { job.setCombinerClass(LongSumReducer.class); } // The Jar file to run job.setJarByClass(VoteCountDriver.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); return 0; }
From source file:csc555.ebratt.depaul.edu.VoteSorterDriver.java
License:Open Source License
/** * //from w ww. j av a2s . c o m * Runs the driver by creating a new hadoop Job based on the configuration. * Defines the path in/out based on the first two arguments. Allows for an * optional combiner based on the 4th argument. * * @param args * [0] the input directory on HDFS * @param args * [1] the output directory on HDFS * @param args * [2] tells the system whether or not to use a combiner ("yes") * and, if so, it will use the VoteSorterReducer.class as the * combiner. * @throws Exception * if there is an issue with any of the arguments * */ @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); StringBuffer sb = new StringBuffer(); sb.append("sorted vote counts"); job.setJobName(sb.toString()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); // to ensure output is sorted job.setNumReduceTasks(1); // Mapper and Reducer Classes to use job.setMapperClass(VoteSorterMapper.class); job.setReducerClass(VoteSorterReducer.class); // Mapper output classes job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); // Input format class job.setInputFormatClass(TextInputFormat.class); // Reducer output classes job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); // Output format class job.setOutputFormatClass(TextOutputFormat.class); // Combiner if (args[2].equals("yes")) { job.setCombinerClass(VoteSorterReducer.class); } // sort in descending order job.setSortComparatorClass(LongWritable.DecreasingComparator.class); // The Jar file to run job.setJarByClass(VoteSorterDriver.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); return 0; }
From source file:de.bankmark.bigbench.queries.q18.MRlinearRegression.java
License:Apache License
@Override public int run(String[] args) throws Exception { int NUMBER_REDUCERS = 1; Job job = Job.getInstance(getConf()); job.setJarByClass(MRlinearRegression.class); if (args.length != 2) { usage(job);/*from w w w . j a v a 2 s . c o m*/ return 2; } System.out.println("input:"); job.setJobName(MRlinearRegression.class.getSimpleName() + "::" + args[0] + "->" + args[1]); Path input = new Path(args[0]); Path output = new Path(args[1]); System.out.println("Input: " + input + " out -> " + output); FileInputFormat.addInputPath(job, input); FileOutputFormat.setOutputPath(job, output); job.setMapperClass(MRlinearRegression.LRmapper.class); job.setReducerClass(MRlinearRegression.LRreducer.class); job.setNumReduceTasks(NUMBER_REDUCERS); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(DoubleArrayWritable.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:de.bankmark.bigbench.queries.q28.ToSequenceFile.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); job.setJarByClass(ToSequenceFile.class); if (args.length != 2) { usage(job);/* w w w.ja v a 2s.co m*/ return 2; } System.out.println("input:"); job.setJobName(ToSequenceFile.class.getSimpleName() + "::" + args[0] + "->" + args[1]); Path input = new Path(args[0]); Path output = new Path(args[1]); System.out.println("Input: " + input + " out -> " + output); FileInputFormat.addInputPath(job, input); SequenceFileOutputFormat.setOutputPath(job, output); job.setMapperClass(IdentityMapper.class); job.setReducerClass(Reducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(0); job.setOutputFormatClass(SequenceFileOutputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }