List of usage examples for org.apache.hadoop.mapreduce Job waitForCompletion
public boolean waitForCompletion(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException
From source file:com.jet.hadoop.wordcount.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { long timeBegin = System.currentTimeMillis(); System.out.println("hadoop wordcount begins at" + timeBegin); if (args == null || args.length == 0) { args = new String[2]; args[0] = "E:\\Work\\input\\hello.txt"; args[1] = "E:\\Work\\output"; }// ww w. j av a2 s . c om Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2); } Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); // job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); // job.setNumReduceTasks(2); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); boolean result = job.waitForCompletion(true); long timeEnd = System.currentTimeMillis(); System.out.println("hadoop wordcount ended at" + timeEnd); System.out.println("hadoop wordcount cost time" + (timeEnd - timeBegin) / 1000 + " seconds."); System.exit(result ? 0 : 1); }
From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.AverageJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf, AverageJob.class.getSimpleName()); job.setJarByClass(AverageJob.class); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("Sample Average Job"); job.setMapperClass(AverageMapper.class); job.setCombinerClass(AverageCombiner.class); job.setReducerClass(AverageReducer.class); job.setInputFormatClass(TextInputFormat.class); //job.setOutputFormatClass(TextOutputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); return 0;/*from ww w . ja v a 2 s . com*/ }
From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.AverageMultipleOutputJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf, AverageMultipleOutputJob.class.getSimpleName()); job.setJarByClass(AverageMultipleOutputJob.class); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("Sample Multiple Output Job"); job.setMapperClass(AverageMapper.class); job.setReducerClass(AverageMultipleOutputReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); MultipleOutputs.addNamedOutput(job, "greaterThan1000", TextOutputFormat.class, Text.class, DoubleWritable.class); MultipleOutputs.addNamedOutput(job, "lessThan1000", TextOutputFormat.class, Text.class, DoubleWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); return 0;// ww w . ja v a 2 s . co m }
From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.BloomFilterJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf, BloomFilterJob.class.getSimpleName()); job.setJarByClass(BloomFilterJob.class); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("Sample BloomFilter Job"); job.setMapperClass(BloomFilterMapper.class); job.setReducerClass(BloomFilterReducer.class); job.setNumReduceTasks(1);/*from w w w .jav a 2s . co m*/ job.setInputFormatClass(TextInputFormat.class); /* * We want our reducer to output the final BloomFilter as a binary file. I think * Hadoop doesn't have this format [check later], so using NullOutpuFormat.class. * * In general life gets a little more dangerous when you deviate from MapReduce's input/output * framework and start working with your own files. Your tasks are no longer guaranteed to be idempotent * and you'll need to understand how various failure scenarios can affect your tasks. For example, your files * may only be partially written when some tasks are restarted. Our example here is safe(r) because all the file * operations take place together only once in the close() method and in only one reducer. A more * careful/paranoid implementation would check each individual file operation more closely. */ job.setOutputFormatClass(NullOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BloomFilter.class); System.exit(job.waitForCompletion(true) ? 0 : 1); return 0; }
From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.ChainJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf, ChainJob.class.getSimpleName()); job.setJobName("Sample Chain Job"); job.setJarByClass(ChainJob.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); ChainMapper.addMapper(job, ReverseMapper.class, Text.class, Text.class, Text.class, Text.class, new Configuration(false)); ChainMapper.addMapper(job, AverageMapper.class, Text.class, Text.class, Text.class, AverageWritable.class, new Configuration(false)); ChainReducer.setReducer(job, AverageReducer.class, Text.class, AverageWritable.class, Text.class, DoubleWritable.class, new Configuration(false)); System.exit(job.waitForCompletion(true) ? 0 : 1); return 0;/*from w w w .j av a2 s . co m*/ }
From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.CountJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf, CountJob.class.getSimpleName()); job.setJarByClass(CountJob.class); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("Sample Count Job"); job.setMapperClass(CountMapper.class); job.setReducerClass(CountReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); return 0;/* w w w .java 2s.c o m*/ }
From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.DistributedCacheJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf, DistributedCacheJob.class.getSimpleName()); job.setJarByClass(DistributedCacheJob.class); /*//from w w w. j av a 2 s . co m * The following will disseminate the file to all the nodes and the file defaults to HDFS. * The second and third arguments denote the input and output paths of the standard Hadoop * job. Note that we've limited the number of data sources to two. This is not an inherent * limitation of the technique, but a simplification that makes our code easier to follow. */ //job.addCacheFile(new Path(args[0]).toUri()); Path in = new Path(args[1]); Path out = new Path(args[2]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("Sample DistributedCache Job"); job.setMapperClass(DistributedCacheMapper.class); /* * Took out the Reduce class as the plan is performing the joining in the map phase and will * configure the job to have no reduce. */ job.setNumReduceTasks(0); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); System.exit(job.waitForCompletion(true) ? 0 : 1); return 0; }
From source file:com.jimmy.ibeifeng.hdfs_api.pv.java
License:Apache License
public int run(String[] arg0) throws Exception { Path input = new Path("hdfs://192.168.1.113:9000/user/jimmy/pv/input"); Path output = new Path("hdfs://192.168.1.113:9000/user/jimmy/pv/output"); JobUtil.delete(super.getConf(), output); Job job = prepareJob(input, output, TextInputFormat.class, TokenizerMapper.class, Text.class, Text.class, IntSumReducer.class, Text.class, Text.class, TextOutputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.jimmy.ibeifeng.hdfs_api.uv.java
License:Apache License
public int run(String[] arg0) throws Exception { Path input = new Path("hdfs://192.168.1.113:9000/user/jimmy/pv/input"); Path output = new Path("hdfs://192.168.1.113:9000/user/jimmy/pv/output"); JobUtil.delete(super.getConf(), output); Job job = prepareJob(input, output, TextInputFormat.class, UVMapper.class, Text.class, Text.class, UVReducer.class, Text.class, Text.class, TextOutputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.jumptap.h2redis.RedisDriver.java
License:Open Source License
@Override public int run(String[] args) throws Exception { if (args.length < 5) { usage();/*from ww w . ja v a2 s . co m*/ return 1; } Map<String, String> argMap = new HashMap<String, String>(); String[] kv; for (String arg : args) { kv = arg.split("="); if (kv.length != 2) { usage(); return 1; } argMap.put(kv[0].trim(), kv[1]); } Configuration conf = getConf(); String[] hostPort = argMap.get(REDIS_CMD).split(":"); conf.set(REDIS_HOST, hostPort[0].trim()); conf.setInt(REDIS_PORT, Integer.valueOf(hostPort[1].trim())); conf.setInt(REDIS_KEY_FIELD, Integer.valueOf(argMap.get(KEY_CMD).trim())); conf.setInt(REDIS_HASHKEY_FIELD, Integer.valueOf(argMap.get(HASH_KEY_CMD).trim())); conf.setInt(REDIS_HASHVAL_FIELD, Integer.valueOf(argMap.get(HASH_VAL_CMD).trim())); if (argMap.containsKey(REDIS_DB_CMD)) { conf.set(REDIS_DB, argMap.get(REDIS_DB_CMD).trim()); } if (argMap.containsKey(REDIS_PW_CMD)) { conf.set(REDIS_PW, argMap.get(REDIS_PW_CMD).trim()); } if (argMap.containsKey(KEY_PFX_CMD)) { conf.set(REDIS_KEY_PREFIX, argMap.get(KEY_PFX_CMD).trim()); } if (argMap.containsKey(HASH_KEY_PFX_CMD)) { conf.set(REDIS_HASHKEY_PREFIX, argMap.get(HASH_KEY_PFX_CMD).trim()); } if (argMap.containsKey(KEY_PFX_DELIM_CMD)) { conf.set(REDIS_KEY_PREFIX_DELIM, argMap.get(KEY_PFX_DELIM_CMD).trim()); } if (argMap.containsKey(KEY_FILTER_CMD)) { conf.setPattern(REDIS_KEY_FILTER, Pattern.compile(argMap.get(KEY_FILTER_CMD).trim())); } if (argMap.containsKey(HASH_FILTER_CMD)) { conf.setPattern(REDIS_HASH_FILTER, Pattern.compile(argMap.get(HASH_FILTER_CMD).trim())); } if (argMap.containsKey(VAL_FILTER_CMD)) { conf.setPattern(REDIS_VAL_FILTER, Pattern.compile(argMap.get(VAL_FILTER_CMD).trim())); } if (argMap.containsKey(VAL_FILTER_CMD)) { conf.setPattern(REDIS_VAL_FILTER, Pattern.compile(argMap.get(VAL_FILTER_CMD).trim())); } if (argMap.containsKey(TTL_CMD)) { conf.setInt(REDIS_KEY_TTL, Integer.valueOf(argMap.get(TTL_CMD).trim())); } if (argMap.containsKey(TS_KEY_CMD)) { conf.set(REDIS_KEY_TS, argMap.get(TS_KEY_CMD).trim()); } else { conf.set(REDIS_KEY_TS, "redis.lastupdate"); } Job job = new Job(conf, "RedisDriver"); FileInputFormat.addInputPath(job, new Path(argMap.get(INPUT_CMD))); job.setJarByClass(RedisDriver.class); job.setMapperClass(RedisOutputMapper.class); job.setNumReduceTasks(0); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(RedisOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }