List of usage examples for org.apache.hadoop.mapreduce Job getInstance
@Deprecated public static Job getInstance(Cluster ignored, Configuration conf) throws IOException
From source file:com.example.Driver.java
License:Open Source License
public int run(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Your job name"); job.setJarByClass(Driver.class); logger.info("job " + job.getJobName() + " [" + job.getJar() + "] started with the following arguments: " + Arrays.toString(args)); if (args.length < 2) { logger.warn("to run this jar are necessary at 2 parameters \"" + job.getJar() + " input_files output_directory"); return 1; }/*from w w w. jav a 2 s. co m*/ job.setMapperClass(WordcountMapper.class); logger.info("mapper class is " + job.getMapperClass()); //job.setMapOutputKeyClass(Text.class); //job.setMapOutputValueClass(IntWritable.class); logger.info("mapper output key class is " + job.getMapOutputKeyClass()); logger.info("mapper output value class is " + job.getMapOutputValueClass()); job.setReducerClass(WordcountReducer.class); logger.info("reducer class is " + job.getReducerClass()); job.setCombinerClass(WordcountReducer.class); logger.info("combiner class is " + job.getCombinerClass()); //When you are not runnign any Reducer //OR job.setNumReduceTasks(0); // logger.info("number of reduce task is " + job.getNumReduceTasks()); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); logger.info("output key class is " + job.getOutputKeyClass()); logger.info("output value class is " + job.getOutputValueClass()); job.setInputFormatClass(TextInputFormat.class); logger.info("input format class is " + job.getInputFormatClass()); job.setOutputFormatClass(TextOutputFormat.class); logger.info("output format class is " + job.getOutputFormatClass()); Path filePath = new Path(args[0]); logger.info("input path " + filePath); FileInputFormat.setInputPaths(job, filePath); Path outputPath = new Path(args[1]); logger.info("output path " + outputPath); FileOutputFormat.setOutputPath(job, outputPath); job.waitForCompletion(true); return 0; }
From source file:com.fanlehai.hadoop.serialize.avro.MapReduceAvroWordCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { printUsage();/*from ww w . j a v a 2s . co m*/ } FileSystem.get(new Configuration()).delete(new Path(args[1]), true); Job job = Job.getInstance(super.getConf(), "AvroWordCount"); job.setJarByClass(MapReduceAvroWordCount.class); job.setJobName("AvroWordCount"); // We call setOutputSchema first so we can override the configuration // parameters it sets AvroJob.setOutputKeySchema(job, Pair.getPairSchema(Schema.create(Type.STRING), Schema.create(Type.INT))); job.setOutputValueClass(NullWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setSortComparatorClass(Text.Comparator.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 1 : 0; }
From source file:com.fanlehai.hadoop.serialize.avro.MapReduceColorCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { printUsage();/*from w w w . j a v a 2s. com*/ } FileSystem.get(new Configuration()).delete(new Path(args[1]), true); Job job = Job.getInstance(super.getConf(), "MapReduceAvroWordCount"); job.setJarByClass(MapReduceColorCount.class); job.setJobName("Color Count"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(AvroKeyInputFormat.class); job.setMapperClass(ColorCountMapper.class); AvroJob.setInputKeySchema(job, User.getClassSchema()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(AvroKeyValueOutputFormat.class); job.setReducerClass(ColorCountReducer.class); AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); return job.waitForCompletion(true) ? 1 : 0; }
From source file:com.fanlehai.hadoop.serialize.json.multiline.ExampleJob.java
License:Apache License
/** * The MapReduce driver - setup and launch the job. * * @param args// w ww . ja v a 2 s . c o m * the command-line arguments * @return the process exit code * @throws Exception * if something goes wrong */ @Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: ExampleJob <in dir> <out dir>"); ToolRunner.printGenericCommandUsage(System.err); System.exit(2); } String input = args[0]; String output = args[1]; Configuration conf = super.getConf(); writeInput(conf, new Path(input)); Job job = Job.getInstance(getConf(), "ExampleJob"); job.setJarByClass(ExampleJob.class); job.setMapperClass(Map.class); job.setNumReduceTasks(0); Path outputPath = new Path(output); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, outputPath); // use the JSON input format job.setInputFormatClass(MultiLineJsonInputFormat.class); // specify the JSON attribute name which is used to determine which // JSON elements are supplied to the mapper MultiLineJsonInputFormat.setInputJsonMember(job, "colorName"); if (job.waitForCompletion(true)) { return 0; } return 1; }
From source file:com.flipkart.fdp.migration.distcp.core.MirrorDistCPDriver.java
License:Apache License
private Job createJob(Configuration configuration) throws Exception { System.out.println("Initializing BlueShift v 2.0..."); System.out.println("Configuration: " + dcmConfig.toString()); Job job = Job.getInstance(configuration, "BlueShift v 2.0 - " + dcmConfig.getBatchName()); job.setJarByClass(MirrorDistCPDriver.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(MirrorMapper.class); job.setReducerClass(MirrorReducer.class); job.setInputFormatClass(MirrorFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, stateManager.getReportPath()); job.setNumReduceTasks(configuration.getInt("mapreduce.reduce.tasks", 1)); System.out.println("Job Initialization Complete, The status of the Mirror job will be written to: " + stateManager.getReportPath()); return job;/*from w w w. ja v a 2 s . c o m*/ }
From source file:com.github.milind.GlobalNumberAddition.java
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Global Addition of Numbers"); job.setJarByClass(GlobalNumberAddition.class); job.setMapperClass(GlobalNumberAdditionMapper.class); job.setNumReduceTasks(0);//from ww w.ja va 2s .com job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.github.milind.GlobalNumberAverage.java
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Global Average of Numbers"); job.setJarByClass(GlobalNumberAverage.class); job.setMapperClass(GlobalNumberAverageMapper.class); job.setNumReduceTasks(0);//from ww w.j a v a2 s .c o m job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.github.milind.NumberAdditionPerLine.java
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Addition of Numbers Per Line"); job.setJarByClass(NumberAdditionPerLine.class); job.setMapperClass(NumberAdditionPerLineMapper.class); job.setNumReduceTasks(0);/*from w ww. j a va2 s . c o m*/ job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.github.sample.mapreduce.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); System.out.println("coder"); //conf.addResource("etc/hadoop/hadoop-local.xml"); //conf.setBoolean("mapreduce.output.fileoutputformat.compress", true); //conf.setClass("mapreduce.output.fileoutputformat.compress.codec", GzipCodec.class, CompressionCodec.class); conf.set("fs.default.name", "hdfs://localhost:9000"); /*//from w w w .ja v a 2 s. c om conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName() ); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName() ); */ String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2); } Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.github.ygf.pagerank.InLinks.java
License:Apache License
private void computeInLinks(Configuration conf, Path linksFile, Path outputDir) throws Exception { // This job computes the number of in-links for every page. The // implementation is very similar to the classic word count example. Job job = Job.getInstance(conf, "InLinks:Computation"); job.setJarByClass(InLinks.class); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(InLinksMapper.class); job.setCombinerClass(InLinksReducer.class); job.setReducerClass(InLinksReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, linksFile); FileOutputFormat.setOutputPath(job, new Path(outputDir, "inlinks")); job.waitForCompletion(true);/* ww w . ja v a 2 s. co m*/ }