List of usage examples for org.apache.hadoop.mapreduce Job waitForCompletion
public boolean waitForCompletion(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException
From source file:com.cloudera.avro.MapReduceColorCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: MapReduceColorCount <input path> <output path>"); return -1; }/*from ww w . j a v a2 s.c o m*/ Job job = new Job(getConf()); job.setJarByClass(MapReduceColorCount.class); job.setJobName("Color Count"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(AvroKeyInputFormat.class); job.setMapperClass(ColorCountMapper.class); AvroJob.setInputKeySchema(job, User.getClassSchema()); AvroJob.setMapOutputValueSchema(job, User.getClassSchema()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(AvroKeyValueOutputFormat.class); job.setReducerClass(ColorCountReducer.class); AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:com.cloudera.ByteCount.java
License:Apache License
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(new Configuration()); // Trim off the hadoop-specific args String[] remArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); // Pull in properties Options options = new Options(); Option property = OptionBuilder.withArgName("property=value").hasArgs(2).withValueSeparator() .withDescription("use value for given property").create("D"); options.addOption(property);/* w w w. j a v a 2 s. c o m*/ Option skipChecksums = new Option("skipChecksums", "skip checksums"); options.addOption(skipChecksums); Option profile = new Option("profile", "profile tasks"); options.addOption(profile); CommandLineParser parser = new BasicParser(); CommandLine line = parser.parse(options, remArgs); Properties properties = line.getOptionProperties("D"); for (Entry<Object, Object> prop : properties.entrySet()) { conf.set(prop.getKey().toString(), prop.getValue().toString()); System.out.println("Set config key " + prop.getKey() + " to " + prop.getValue()); } if (line.hasOption("skipChecksums")) { conf.setBoolean("bytecount.skipChecksums", true); System.out.println("Skipping checksums"); } if (line.hasOption("profile")) { conf.setBoolean("mapred.task.profile", true); conf.set("mapred.task.profile.params", "-agentlib:hprof=cpu=samples,depth=100,interval=1ms,lineno=y,thread=y,file=%s"); conf.set(MRJobConfig.NUM_MAP_PROFILES, "0"); conf.set("mapred.task.profile.maps", "1"); System.out.println("Profiling map tasks"); } // Get the positional arguments out remArgs = line.getArgs(); if (remArgs.length != 2) { System.err.println("Usage: ByteCount <inputBase> <outputBase>"); System.exit(1); } String inputBase = remArgs[0]; String outputBase = remArgs[1]; Job job = Job.getInstance(conf); job.setInputFormatClass(ByteBufferInputFormat.class); job.setMapOutputKeyClass(ByteWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setMapperClass(ByteCountMapper.class); job.setReducerClass(ByteCountReducer.class); job.setCombinerClass(ByteCountReducer.class); job.setOutputKeyClass(ByteWritable.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(inputBase)); FileOutputFormat.setOutputPath(job, new Path(outputBase)); job.setJarByClass(ByteCount.class); boolean success = job.waitForCompletion(true); Counters counters = job.getCounters(); System.out.println("\tRead counters"); printCounter(counters, READ_COUNTER.BYTES_READ); printCounter(counters, READ_COUNTER.LOCAL_BYTES_READ); printCounter(counters, READ_COUNTER.SCR_BYTES_READ); printCounter(counters, READ_COUNTER.ZCR_BYTES_READ); System.exit(success ? 0 : 1); }
From source file:com.cloudera.castagna.logparser.mr.StatusCodesStats.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }//from w w w . ja v a2 s. c o m Configuration configuration = getConf(); boolean useCompression = configuration.getBoolean(Constants.OPTION_USE_COMPRESSION, Constants.OPTION_USE_COMPRESSION_DEFAULT); if (useCompression) { configuration.setBoolean("mapred.compress.map.output", true); configuration.set("mapred.output.compression.type", "BLOCK"); configuration.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); } boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERWRITE_OUTPUT, Constants.OPTION_OVERWRITE_OUTPUT_DEFAULT); FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration); if (overrideOutput) { fs.delete(new Path(args[1]), true); } Job job = Job.getInstance(configuration); job.setJobName(Constants.STATUS_CODES_STATS); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(StatusCodesStatsMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setCombinerClass(StatusCodesStatsCombiner.class); job.setReducerClass(StatusCodesStatsReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); Utils.setReducers(job, configuration, log); job.setOutputFormatClass(TextOutputFormat.class); if (log.isDebugEnabled()) Utils.log(job, log); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.cloudera.castagna.logparser.mr.TranscodeLogs.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }//from ww w.j a v a 2 s. c o m Configuration configuration = getConf(); boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERWRITE_OUTPUT, Constants.OPTION_OVERWRITE_OUTPUT_DEFAULT); FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration); if (overrideOutput) { fs.delete(new Path(args[1]), true); } Job job = Job.getInstance(configuration); job.setJobName(Constants.STATUS_CODES_STATS); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(TranscodeLogsMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(0); job.setOutputFormatClass(TextOutputFormat.class); if (log.isDebugEnabled()) Utils.log(job, log); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.cloudera.hbase.WordCount.java
License:Open Source License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: wordcount <in> <out>"); return 2; }/*from w w w. j a va2s .co m*/ Configuration conf = getConf(); Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.cloudera.recordservice.avro.mapreduce.ColorCount.java
License:Apache License
/** * Run the MR2 color count with generic records, and return a map of favorite colors to * the number of users.//from ww w . ja v a2s . c o m */ public static java.util.Map<String, Integer> countColors() throws IOException, ClassNotFoundException, InterruptedException { String output = TestUtil.getTempDirectory(); Path outputPath = new Path(output); JobConf conf = new JobConf(ColorCount.class); conf.setInt("mapreduce.job.reduces", 1); Job job = Job.getInstance(conf); job.setJarByClass(ColorCount.class); job.setJobName("MR2 Color Count With Generic Records"); RecordServiceConfig.setInputTable(job.getConfiguration(), "rs", "users"); job.setInputFormatClass(com.cloudera.recordservice.avro.mapreduce.AvroKeyInputFormat.class); FileOutputFormat.setOutputPath(job, outputPath); job.setMapperClass(Map.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(AvroKeyValueOutputFormat.class); job.setReducerClass(Reduce.class); AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); job.waitForCompletion(false); // Read the result and return it. Since we set the number of reducers to 1, // there is always just one file containing the value. SeekableInput input = new FsInput(new Path(output + "/part-r-00000.avro"), conf); DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(); FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader); java.util.Map<String, Integer> colorMap = new HashMap<String, Integer>(); for (GenericRecord datum : fileReader) { colorMap.put(datum.get(0).toString(), Integer.parseInt(datum.get(1).toString())); } return colorMap; }
From source file:com.cloudera.recordservice.examples.mapreduce.MapReduceAgeCount.java
License:Apache License
public int run(String[] args) throws Exception { org.apache.log4j.BasicConfigurator.configure(); if (args.length != 2) { System.err.println("Usage: MapReduceAgeCount <input path> <output path>"); return -1; }//www.j a va 2s . co m Job job = Job.getInstance(getConf()); job.setJarByClass(MapReduceAgeCount.class); job.setJobName("Age Count"); // RECORDSERVICE: // To read from a table instead of a path, comment out // FileInputFormat.setInputPaths() and instead use: // FileInputFormat.setInputPaths(job, new Path(args[0])); RecordServiceConfig.setInputTable(job.getConfiguration(), null, args[0]); // RECORDSERVICE: // Use the RecordService version of the AvroKeyValueInputFormat job.setInputFormatClass(com.cloudera.recordservice.avro.mapreduce.AvroKeyValueInputFormat.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(AgeCountMapper.class); // Set schema for input key and value. AvroJob.setInputKeySchema(job, UserKey.getClassSchema()); AvroJob.setInputValueSchema(job, UserValue.getClassSchema()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(AvroKeyValueOutputFormat.class); job.setReducerClass(AgeCountReducer.class); AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:com.cloudera.recordservice.examples.mapreduce.MapReduceColorCount.java
License:Apache License
@Override public int run(String[] args) throws Exception { org.apache.log4j.BasicConfigurator.configure(); if (args.length != 2) { System.err.println("Usage: MapReduceColorCount <input path> <output path>"); return -1; }/*from ww w. j a v a 2 s . co m*/ Job job = Job.getInstance(getConf()); job.setJarByClass(MapReduceColorCount.class); job.setJobName("Color Count"); // RECORDSERVICE: // To read from a table instead of a path, comment out // FileInputFormat.setInputPaths() and instead use: //FileInputFormat.setInputPaths(job, new Path(args[0])); RecordServiceConfig.setInputTable(job.getConfiguration(), "rs", "users"); // RECORDSERVICE: // Use the RecordService version of the AvroKeyInputFormat job.setInputFormatClass(com.cloudera.recordservice.avro.mapreduce.AvroKeyInputFormat.class); //job.setInputFormatClass(AvroKeyInputFormat.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(ColorCountMapper.class); AvroJob.setInputKeySchema(job, User.getClassSchema()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(AvroKeyValueOutputFormat.class); job.setReducerClass(ColorCountReducer.class); AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:com.cloudera.recordservice.examples.mapreduce.RecordCount.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: RecordCount <input_query> <output_path>"); System.exit(1);/*from w ww . j a va 2 s . co m*/ } String inputQuery = args[0]; String output = args[1]; Job job = Job.getInstance(getConf()); job.setJobName("recordcount"); job.setJarByClass(RecordCount.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setNumReduceTasks(1); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(LongWritable.class); RecordServiceConfig.setInputQuery(job.getConfiguration(), inputQuery); job.setInputFormatClass(RecordServiceInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileSystem fs = FileSystem.get(job.getConfiguration()); Path outputPath = new Path(output); if (fs.exists(outputPath)) fs.delete(outputPath, true); FileOutputFormat.setOutputPath(job, outputPath); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.cloudera.recordservice.examples.terasort.TeraChecksum.java
License:Apache License
@Override public int run(String[] args) throws Exception { boolean useRecordService = false; Job job = Job.getInstance(getConf()); if (args.length != 2 && args.length != 3) { usage();/*www. j a v a 2s . c o m*/ return 2; } if (args.length == 3) { useRecordService = Boolean.parseBoolean(args[2]); } FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraSum"); job.setJarByClass(TeraChecksum.class); job.setMapperClass(ChecksumMapper.class); job.setReducerClass(ChecksumReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Unsigned16.class); // force a single reducer job.setNumReduceTasks(1); if (useRecordService) { RecordServiceConfig.setInputTable(job.getConfiguration(), null, args[0]); job.setInputFormatClass(RecordServiceTeraInputFormat.class); } else { TeraInputFormat.setInputPaths(job, new Path(args[0])); job.setInputFormatClass(TeraInputFormat.class); } return job.waitForCompletion(true) ? 0 : 1; }