List of usage examples for org.apache.hadoop.mapreduce Job setCombinerClass
public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException
From source file:gr.ntua.h2rdf.sampler.TotalOrderPrep.java
License:Open Source License
public Job createSubmittableJob(String[] args) throws IOException, ClassNotFoundException { Job sample_job = new Job(); // Remember the real input format so the sampling input format can use // it under the hood sample_job.getConfiguration().setBoolean(ARG_INPUTFORMAT, true); sample_job.setInputFormatClass(TextInputFormat.class); //sample_job.getConfiguration().set("mapred.fairscheduler.pool", "pool9"); // Base the sample size on the number of reduce tasks that will be used // by the real job, but only use 1 reducer for this job (maps output very // little)// w ww . ja va 2 s .com sample_job.setNumReduceTasks(1); // Make this job's output a temporary filethe input file for the real job's // TotalOrderPartitioner Path partition = new Path("partitions/"); //partition.getFileSystem(job.getConfiguration()).deleteOnExit(partition); conf = new Configuration(); FileSystem fs; try { fs = FileSystem.get(conf); if (fs.exists(partition)) { fs.delete(partition, true); } } catch (IOException e) { e.printStackTrace(); } FileOutputFormat.setOutputPath(sample_job, partition); FileInputFormat.setInputPaths(sample_job, new Path(args[0])); //TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path(partition, "part-r-00000")); //job.setPartitionerClass(TotalOrderPartitioner.class); // If there's a combiner, turn it into an identity reducer to prevent // destruction of keys. sample_job.setCombinerClass(Combiner.class); sample_job.setMapOutputKeyClass(ImmutableBytesWritable.class); sample_job.setMapOutputValueClass(ImmutableBytesWritable.class); sample_job.setOutputKeyClass(ImmutableBytesWritable.class); sample_job.setOutputValueClass(NullWritable.class); sample_job.setPartitionerClass(HashPartitioner.class); sample_job.setOutputFormatClass(SequenceFileOutputFormat.class); sample_job.setJarByClass(TotalOrderPrep.class); sample_job.setMapperClass(Map.class); sample_job.setReducerClass(PartitioningReducer.class); sample_job.setJobName("(Sampler)"); sample_job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); sample_job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); return sample_job; }
From source file:gws.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.addResource(new Path("/home/ucas/bigdata/hadoop-2.6.2/etc/hadoop/core-site.xml")); conf.addResource(new Path("/home/ucas/bigdata/hadoop-2.6.2/etc/hadoop/hdfs-site.xml")); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);/*from w ww .j ava2s .co m*/ } Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumCombiner.class); job.setReducerClass(IntSumReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // add the input paths as given by command line for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } // add the output path as given by the command line FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:hadoop.examples.DBCountPageView.java
License:Apache License
public int run(String[] args) throws Exception { String driverClassName = DRIVER_CLASS; String url = DB_URL;//w w w . j a v a 2 s.c om if (args.length > 1) { driverClassName = args[0]; url = args[1]; } initialize(driverClassName, url); Configuration conf = getConf(); DBConfiguration.configureDB(conf, driverClassName, url); Job job = new Job(conf); job.setJobName("Count Pageviews of URLs"); job.setJarByClass(DBCountPageView.class); job.setMapperClass(PageviewMapper.class); job.setCombinerClass(LongSumReducer.class); job.setReducerClass(PageviewReducer.class); DBInputFormat.setInput(job, AccessRecord.class, "Access", null, "url", AccessFieldNames); DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(PageviewRecord.class); job.setOutputValueClass(NullWritable.class); int ret; try { ret = job.waitForCompletion(true) ? 0 : 1; boolean correct = verify(); if (!correct) { throw new RuntimeException("Evaluation was not correct!"); } } finally { shutdown(); } return ret; }
From source file:hadoop.examples.mapreduce.WordCountV1.java
License:Open Source License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.print("Usage: wordcountV1 <in> <out>"); System.exit(2);/* w w w .j a v a 2s . c o m*/ } Job job = new Job(conf, "word count"); job.setJarByClass(WordCountV1.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:hadoop.examples.mapreduce.WordCountV2.java
License:Open Source License
public int run(String[] args) throws Exception { Configuration conf = getConf(); List<String> other_args = new ArrayList<String>(); args = new GenericOptionsParser(conf, args).getRemainingArgs(); for (int i = 0; i < args.length; i++) { if ("-skip".equals(args[i])) { DistributedCache.addCacheFile(new Path(args[++i]).toUri(), conf); conf.setBoolean("wordcount.skip.patterns", true); } else if ("-D".equals(args[i])) { String[] arr = args[++i].split("="); conf.setBoolean(arr[0], Boolean.valueOf(arr[1])); } else/*from w w w . j a va 2s .com*/ other_args.add(args[i]); } Job job = new Job(conf); job.setJarByClass(WordCountV2.class); job.setJobName("word count version 2"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(TokenizeMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(other_args.get(0))); FileOutputFormat.setOutputPath(job, new Path(other_args.get(1))); return job.waitForCompletion(true) ? 0 : 1; }
From source file:hadoop.examples.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { System.out.println("hello 4"); System.out.print("gegin"); System.out.print("gegin2"); String hdfspath = "hdfs://10.2.12.93:9000/user/root/"; Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);/*from w w w .ja v a 2 s . c om*/ } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(hdfspath + "input")); FileOutputFormat.setOutputPath(job, new Path(hdfspath + "output7")); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:hadoop.examples.WordMean.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: wordmean <in> <out>"); return 0; }//from ww w. j a va2 s . c o m Configuration conf = getConf(); @SuppressWarnings("deprecation") Job job = new Job(conf, "word mean"); job.setJarByClass(WordMean.class); job.setMapperClass(WordMeanMapper.class); job.setCombinerClass(WordMeanReducer.class); job.setReducerClass(WordMeanReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); Path outputpath = new Path(args[1]); FileOutputFormat.setOutputPath(job, outputpath); boolean result = job.waitForCompletion(true); mean = readAndCalcMean(outputpath, conf); return (result ? 0 : 1); }
From source file:hadoop.examples.WordMedian.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: wordmedian <in> <out>"); return 0; }// w w w .ja v a 2s . c o m setConf(new Configuration()); Configuration conf = getConf(); @SuppressWarnings("deprecation") Job job = new Job(conf, "word median"); job.setJarByClass(WordMedian.class); job.setMapperClass(WordMedianMapper.class); job.setCombinerClass(WordMedianReducer.class); job.setReducerClass(WordMedianReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); boolean result = job.waitForCompletion(true); // Wait for JOB 1 -- get middle value to check for Median long totalWords = job.getCounters().getGroup(TaskCounter.class.getCanonicalName()) .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue(); int medianIndex1 = (int) Math.ceil((totalWords / 2.0)); int medianIndex2 = (int) Math.floor((totalWords / 2.0)); median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf); return (result ? 0 : 1); }
From source file:hadoop.examples.WordStandardDeviation.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: wordstddev <in> <out>"); return 0; }// w w w .j a va 2 s. c o m Configuration conf = getConf(); @SuppressWarnings("deprecation") Job job = new Job(conf, "word stddev"); job.setJarByClass(WordStandardDeviation.class); job.setMapperClass(WordStandardDeviationMapper.class); job.setCombinerClass(WordStandardDeviationReducer.class); job.setReducerClass(WordStandardDeviationReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); Path outputpath = new Path(args[1]); FileOutputFormat.setOutputPath(job, outputpath); boolean result = job.waitForCompletion(true); // read output and calculate standard deviation stddev = readAndCalcStdDev(outputpath, conf); return (result ? 0 : 1); }
From source file:hadoop.TestingDriver.java
License:Open Source License
public int run(String[] args) throws Exception { Configuration conf = getConf(); String input = conf.get("gc.TestingDriver.input"); String output = conf.get("gc.TestingDriver.output"); String jobname = conf.get("gc.TestingDriver.name"); String dataset = conf.get("gc.TestingDriver.dataset"); if (input == null || output == null || dataset == null || jobname == null) { System.out.println(" Incorrect parameters "); System.exit(0);/*from w ww. ja v a 2 s . co m*/ } conf = addPathToDC(conf, conf.get("gc.TestingDriver.dataset") + "*"); Job job = new Job(conf); job.setJarByClass(TestingDriverMapper.class); job.setJobName(jobname); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PIFArray.class); job.setMapperClass(TestingDriverMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PIFArray.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setCombinerClass(TestingDriverReducer.class); job.setReducerClass(TestingDriverReducer.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, new Path(output)); System.out.println(" Input dir = " + conf.get("gc.TestingDriver.input")); System.out.println(" Output dir = " + conf.get("gc.TestingDriver.output")); System.out.println(" Testing Input = " + conf.get("gc.TestingDriver.dataset")); System.out.println(" Name = " + conf.get("gc.TestingDriver.name")); if (job.waitForCompletion(true) == false) { System.err.println(" Job " + jobname + " Failed (miserably)"); System.exit(2); } return 0; }