List of usage examples for org.apache.hadoop.mapreduce Job getInstance
@Deprecated public static Job getInstance(Cluster ignored, Configuration conf) throws IOException
From source file:com.airline.analytics.AirlineDelayAnalytics.java
@Override public int run(String[] strings) throws Exception { Job job = Job.getInstance(getConf(), "Hadoop Airline Delay Analytics"); job.setJarByClass(AirlineDelayAnalytics.class); job.setMapperClass(AirlineMapper.class); // job.setCombinerClass(AirlineReducer.class); job.setReducerClass(AirlineReducer.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(strings[0])); FileOutputFormat.setOutputPath(job, new Path(strings[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.airline.analytics.AirlineUniqueRoutesAnalytics.java
@Override public int run(String[] strings) throws Exception { Job job = Job.getInstance(getConf(), "Hadoop Airline Orign Destination Analytics"); job.setJarByClass(getClass());// w ww .j a v a 2s . c om // Distributed Cache job.addCacheFile(new URI("/airline/codes.csv")); job.setMapperClass(AirlineMapper.class); // job.setCombinerClass(AirlineReducer.class); job.setReducerClass(AirlineReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(strings[0])); FileOutputFormat.setOutputPath(job, new Path(strings[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.alectenharmsel.hadoop.qa.LineCount.java
License:Apache License
public static void main(String[] args) throws Exception { GenericOptionsParser parse = new GenericOptionsParser(new Configuration(), args); Configuration conf = parse.getConfiguration(); String[] remainingArgs = parse.getRemainingArgs(); if (remainingArgs.length != 2) { System.err.println("Usage: LineCount <input> <output>"); System.exit(-1);//from ww w . jav a 2 s.c o m } Job job = Job.getInstance(conf, "LineCount"); job.setJarByClass(LineCount.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(remainingArgs[0])); FileOutputFormat.setOutputPath(job, new Path(remainingArgs[1])); boolean success = job.waitForCompletion(true); int res = success ? 0 : 1; System.exit(res); }
From source file:com.alectenharmsel.research.hadoop.LcCounters.java
License:Apache License
public static void main(String[] args) throws Exception { GenericOptionsParser parse = new GenericOptionsParser(new Configuration(), args); Configuration conf = parse.getConfiguration(); String[] remainingArgs = parse.getRemainingArgs(); if (remainingArgs.length != 2) { System.err.println("Usage: LineCount <input> <output>"); System.exit(-1);//from ww w. ja v a2 s . c o m } Job job = Job.getInstance(conf, "LineCount"); job.setJarByClass(LineCount.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(remainingArgs[0])); FileOutputFormat.setOutputPath(job, new Path(remainingArgs[1])); boolean success = job.waitForCompletion(true); //Get the counter here and print it Counters counters = job.getCounters(); long total = counters.findCounter(LcCounters.NUM_LINES).getValue(); System.out.println(Long.toString(total)); int res = success ? 0 : 1; System.exit(res); }
From source file:com.alectenharmsel.research.hadoop.MoabLicenseInfo.java
License:Apache License
public static void main(String[] args) throws Exception { GenericOptionsParser parser = new GenericOptionsParser(new Configuration(), args); Configuration conf = parser.getConfiguration(); conf.set("mapreduce.output.textoutputformat.separator", ","); String[] remainingArgs = parser.getRemainingArgs(); if (remainingArgs.length != 2) { System.err.println("Usage: LineCount <input> <output>"); System.exit(-1);//from w w w .j a v a 2s. c om } Job job = Job.getInstance(conf, "MoabLicenseInfo"); job.setJarByClass(MoabLicenseInfo.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(remainingArgs[0])); FileOutputFormat.setOutputPath(job, new Path(remainingArgs[1])); int res = job.waitForCompletion(true) ? 0 : 1; System.exit(res); }
From source file:com.aliyun.emr.example.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser optionParser = new GenericOptionsParser(conf, args); String[] remainingArgs = optionParser.getRemainingArgs(); if (!(remainingArgs.length == 2 || remainingArgs.length == 4)) { System.err.println("Usage: wordcount <in> <out> [-skip skipPatternFile]"); System.exit(2);/*from w w w. j ava 2 s . c o m*/ } Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < remainingArgs.length; ++i) { if ("-skip".equals(remainingArgs[i])) { job.addCacheFile(new Path(EMapReduceOSSUtil.buildOSSCompleteUri(remainingArgs[++i], conf)).toUri()); job.getConfiguration().setBoolean("wordcount.skip.patterns", true); } else { otherArgs.add(remainingArgs[i]); } } FileInputFormat.addInputPath(job, new Path(EMapReduceOSSUtil.buildOSSCompleteUri(otherArgs.get(0), conf))); FileOutputFormat.setOutputPath(job, new Path(EMapReduceOSSUtil.buildOSSCompleteUri(otherArgs.get(1), conf))); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.aliyun.openservices.tablestore.hadoop.RowCounter.java
License:Apache License
public static void main(String[] args) throws Exception { if (!parseArgs(args)) { printUsage();//ww w . ja v a 2 s .c o m System.exit(1); } if (endpoint == null || accessKeyId == null || accessKeySecret == null || table == null || outputPath == null) { printUsage(); System.exit(1); } Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "row count"); job.setJarByClass(RowCounter.class); job.setMapperClass(RowCounterMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setInputFormatClass(TableStoreInputFormat.class); TableStore.setCredential(job, accessKeyId, accessKeySecret, securityToken); TableStore.setEndpoint(job, endpoint, instance); TableStoreInputFormat.addCriteria(job, fetchCriteria()); FileOutputFormat.setOutputPath(job, new Path(outputPath)); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.aliyun.openservices.tablestore.hadoop.TableStoreOutputFormatExample.java
License:Apache License
public static void main(String[] args) throws Exception { if (!parseArgs(args)) { printUsage();/*from w w w .j av a 2 s. c o m*/ System.exit(1); } if (endpoint == null || accessKeyId == null || accessKeySecret == null || inputTable == null || outputTable == null) { printUsage(); System.exit(1); } Configuration conf = new Configuration(); Job job = Job.getInstance(conf, TableStoreOutputFormatExample.class.getName()); job.setMapperClass(OwnerMapper.class); job.setReducerClass(IntoTableReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(MapWritable.class); job.setInputFormatClass(TableStoreInputFormat.class); job.setOutputFormatClass(TableStoreOutputFormat.class); TableStore.setCredential(job, accessKeyId, accessKeySecret, securityToken); TableStore.setEndpoint(job, endpoint, instance); TableStoreInputFormat.addCriteria(job, fetchCriteria()); TableStoreOutputFormat.setOutputTable(job, outputTable); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.awcoleman.ExampleJobSummaryLogWithOutput.BinRecToAvroRecDriver.java
License:Apache License
public int run(String[] args) throws Exception { String input = null;//from ww w . j a va2 s . c o m String output = null; if (args.length < 2) { System.err.printf("Usage: %s <input> <output>\n", this.getClass().getSimpleName()); return -1; } else { input = args[0]; output = args[1]; } Job job = Job.getInstance(getConf(), "BinRecToAvroRecDriver"); Configuration conf = job.getConfiguration(); //Add job log to hold Driver logging (and any summary info about the dataset,job, or counters we want to write) String fapath = createTempFileAppender(job); //get schema Schema outSchema = ReflectData.get().getSchema(com.awcoleman.examples.avro.BinRecForPartitions.class); job.getConfiguration().set("outSchema", outSchema.toString()); //Job conf settings job.setJarByClass(BinRecToAvroRecDriver.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(BinRecInputFormat.class); job.setOutputFormatClass(AvroKeyOutputFormat.class); AvroJob.setOutputKeySchema(job, outSchema); AvroJob.setMapOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setMapOutputValueSchema(job, outSchema); //Job output compression FileOutputFormat.setCompressOutput(job, true); job.getConfiguration().set(AvroJob.CONF_OUTPUT_CODEC, DataFileConstants.DEFLATE_CODEC); //Input and Output Paths FileInputFormat.setInputPaths(job, new Path(input)); Path outPath = new Path(output); FileOutputFormat.setOutputPath(job, outPath); outPath.getFileSystem(conf).delete(outPath, true); boolean jobCompletionStatus = job.waitForCompletion(true); //Print Custom Counters before exiting Counters counters = job.getCounters(); for (MYJOB_CNTRS customCounter : MYJOB_CNTRS.values()) { Counter thisCounter = counters.findCounter(customCounter); System.out.println("Custom Counter " + customCounter + "=" + thisCounter.getValue()); } long mycnt1 = job.getCounters() .findCounter("com.awcoleman.TestingGettingContainerLogger.BinRecToAvroRecDriver$MYJOB_CNTRS", "MYCNT1") .getValue(); long mycnt2 = job.getCounters() .findCounter("com.awcoleman.TestingGettingContainerLogger.BinRecToAvroRecDriver$MYJOB_CNTRS", "MYCNT2") .getValue(); long mycnt3 = job.getCounters() .findCounter("com.awcoleman.TestingGettingContainerLogger.BinRecToAvroRecDriver$MYJOB_CNTRS", "MYCNT3") .getValue(); long myfakekpi = mycnt1 - mycnt2; String msgMyfakekpi = "The Fake KPI of the Dataset: " + String.format("%,d", myfakekpi); System.out.println(msgMyfakekpi); logger.info(msgMyfakekpi); //Finished, so move job log to HDFS in _log dir, clean copyTempFileAppenderToHDFSOutpath(job, fapath, output); return jobCompletionStatus ? 0 : 1; }
From source file:com.baynote.kafka.hadoop.KafkaJobBuilder.java
License:Apache License
/** * Creates a {@link Job} based on how {@code this} {@link KafkaJobBuilder} has been configured. There are no * side-effects on {@code this} instance when you call this method, so you can call it multiple times. * /* www . ja v a2s. com*/ * @param conf * the job conf. * @return a fully configured {@link Job}. * @throws Exception error * @throws IllegalArgumentException * if any required parameters are not set. */ public Job configureJob(final Configuration conf) throws Exception { validateSettings(); final Job job = Job.getInstance(conf, getDefaultedJobName()); // set queue inputs if (getQueueMappers().size() == 1) { job.setInputFormatClass(KafkaInputFormat.class); final TopicConf topicConf = Iterables.getOnlyElement(getQueueMappers()); KafkaInputFormat.setTopic(job, topicConf.getTopic()); KafkaInputFormat.setConsumerGroup(job, topicConf.getConsumerGroup()); job.setMapperClass(topicConf.getMapper()); } else { job.setInputFormatClass(MultipleKafkaInputFormat.class); for (final TopicConf topicConf : getQueueMappers()) { MultipleKafkaInputFormat.addTopic(job, topicConf.getTopic(), topicConf.getConsumerGroup(), topicConf.getMapper()); } } if (getMapOutputKeyClass() != null) { job.setMapOutputKeyClass(getMapOutputKeyClass()); } if (getMapOutputValueClass() != null) { job.setMapOutputValueClass(getMapOutputValueClass()); } if (getReducerClass() == null) { job.setNumReduceTasks(0); } else { job.setReducerClass(getReducerClass()); job.setNumReduceTasks(getNumReduceTasks()); } if (getPartitionerClass() != null) { job.setPartitionerClass(getPartitionerClass()); } // set output job.setOutputFormatClass(getOutputFormatClass()); job.setOutputKeyClass(getOutputKeyClass()); job.setOutputValueClass(getOutputValueClass()); if (getOutputFormat() == SupportedOutputFormat.TEXT_FILE) { TextOutputFormat.setOutputPath(job, getDefaultedOutputPath()); } else if (getOutputFormat() == SupportedOutputFormat.SEQUENCE_FILE) { SequenceFileOutputFormat.setOutputPath(job, getDefaultedOutputPath()); } if (usingS3()) { job.getConfiguration().set("fs.s3n.awsAccessKeyId", getS3AccessKey()); job.getConfiguration().set("fs.s3n.awsSecretAccessKey", getS3SecretyKey()); job.getConfiguration().set("fs.s3.awsAccessKeyId", getS3AccessKey()); job.getConfiguration().set("fs.s3.awsSecretAccessKey", getS3SecretyKey()); } if (isLazyOutputFormat()) { LazyOutputFormat.setOutputFormatClass(job, getOutputFormatClass()); } // setup kafka input format specifics KafkaInputFormat.setZkConnect(job, getZkConnect()); KafkaInputFormat.setKafkaFetchSizeBytes(job, getKafkaFetchSizeBytes()); job.setSpeculativeExecution(false); job.setJarByClass(getClass()); // memory settings for mappers if (!Strings.isNullOrEmpty(getTaskMemorySettings())) { job.getConfiguration().set("mapred.child.java.opts", getTaskMemorySettings()); } return job; }