List of usage examples for org.apache.hadoop.mapreduce Job Job
Job(JobStatus status, JobConf conf) throws IOException
From source file:com.mozilla.main.ReadHBaseWriteHdfs.java
License:LGPL
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); conf.set("mapred.job.queue.name", "prod"); Job job = new Job(conf, "ReadHBaseWriteHDFS"); job.setJarByClass(ReadHBaseWriteHdfs.class); Scan scan = new Scan(); scan.addFamily("data".getBytes()); TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, ReadHBaseWriteHdfsMapper.class, Text.class, Text.class, job); job.setReducerClass(ReadHBaseWriteHdfsReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1000);// w w w . j a v a 2 s. c o m job.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); SequenceFileOutputFormat.setOutputPath(job, new Path(args[0])); job.waitForCompletion(true); if (job.isSuccessful()) { System.out.println("DONE"); } return 0; }
From source file:com.mvdb.platform.action.VersionMerge.java
License:Apache License
public static void main(String[] args) throws Exception { logger.error("error1"); logger.warn("warning1"); logger.info("info1"); logger.debug("debug1"); logger.trace("trace1"); ActionUtils.setUpInitFileProperty(); // LoggerContext lc = (LoggerContext) LoggerFactory.getILoggerFactory(); // StatusPrinter.print(lc); Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); //Also add lastMergedTimeStamp and mergeUptoTimestamp and passive db name which would be mv1 or mv2 if (otherArgs.length != 3) { System.err.println("Usage: versionmerge <customer-directory>"); System.exit(2);/*from w ww . java 2s . com*/ } //Example: file:/home/umesh/.mvdb/etl/data/alpha //Example: hdfs://localhost:9000/data/alpha String customerDirectory = otherArgs[0]; String lastMergedDirName = otherArgs[1]; String lastCopiedDirName = otherArgs[2]; org.apache.hadoop.conf.Configuration conf1 = new org.apache.hadoop.conf.Configuration(); //conf1.addResource(new Path("/home/umesh/ops/hadoop-1.2.0/conf/core-site.xml")); FileSystem hdfsFileSystem = FileSystem.get(conf1); Path topPath = new Path(customerDirectory); //Clean scratch db Path passiveDbPath = new Path(topPath, "db/mv1"); Path tempDbPath = new Path(topPath, "db/tmp-" + (int) (Math.random() * 100000)); if (hdfsFileSystem.exists(tempDbPath)) { boolean success = hdfsFileSystem.delete(tempDbPath, true); if (success == false) { System.err.println(String.format("Unable to delete temp directory %s", tempDbPath.toString())); System.exit(1); } } //last three parameters are hardcoded and the nulls must be replaced later after changing inout parameters. Path[] inputPaths = getInputPaths(hdfsFileSystem, topPath, lastMergedDirName, lastCopiedDirName, null); Set<String> tableNameSet = new HashSet<String>(); for (Path path : inputPaths) { tableNameSet.add(path.getName()); } Job job = new Job(conf, "versionmerge"); job.setJarByClass(VersionMerge.class); job.setMapperClass(VersionMergeMapper.class); job.setReducerClass(VersionMergeReducer.class); job.setMapOutputKeyClass(MergeKey.class); job.setMapOutputValueClass(BytesWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); String lastDirName = null; if (inputPaths != null && inputPaths.length > 1) { lastDirName = inputPaths[(inputPaths.length) - 2].getParent().getName(); } for (Path inputPath : inputPaths) { FileInputFormat.addInputPath(job, inputPath); } FileOutputFormat.setOutputPath(job, tempDbPath); for (String table : tableNameSet) { if (table.endsWith(".dat") == false) { continue; } table = table.replaceAll("-", ""); table = table.replaceAll(".dat", ""); MultipleOutputs.addNamedOutput(job, table, SequenceFileOutputFormat.class, Text.class, BytesWritable.class); } boolean success = job.waitForCompletion(true); System.out.println("Success:" + success); System.out.println(ManagementFactory.getRuntimeMXBean().getName()); if (success && lastDirName != null) { ActionUtils.setConfigurationValue(new Path(customerDirectory).getName(), ConfigurationKeys.LAST_MERGE_TO_MVDB_DIRNAME, lastDirName); } //hdfsFileSystem.delete(passiveDbPath, true); //hdfsFileSystem.rename(tempDbPath, passiveDbPath); System.exit(success ? 0 : 1); }
From source file:com.mvdb.platform.scratch.action.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { logger.error("error1"); logger.warn("warning1"); logger.info("info1"); logger.debug("debug1"); logger.trace("trace1"); ILoggerFactory lc = LoggerFactory.getILoggerFactory(); System.err.println("lc:" + lc); // print logback's internal status //StatusPrinter.print(lc); Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs != null) { for (String arg : otherArgs) { System.out.println(arg); }/*from ww w . j a v a 2 s .c o m*/ } if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2); } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.mycompany.hadooptrain.WordCount.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Path inputPath = new Path(args[0]); Path outputDir = new Path(args[1]); // Create configuration Configuration conf = new Configuration(true); // Create job Job job = new Job(conf, "WordCount"); job.setJarByClass(WordCountMapper.class); // Setup MapReduce job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setNumReduceTasks(1);/*from w ww .ja v a2 s. c om*/ // Specify key / value job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // Input FileInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(TextInputFormat.class); // Output FileOutputFormat.setOutputPath(job, outputDir); job.setOutputFormatClass(TextOutputFormat.class); // Delete output if exists FileSystem hdfs = FileSystem.get(conf); if (hdfs.exists(outputDir)) hdfs.delete(outputDir, true); // Execute job int code = job.waitForCompletion(true) ? 0 : 1; System.exit(code); }
From source file:com.nearinfinity.blur.mapreduce.BlurTask.java
License:Apache License
public Job configureJob(Configuration configuration) throws IOException { if (getIndexingType() == INDEXING_TYPE.UPDATE) { checkTable();//w w w. ja va2 s . co m } ByteArrayOutputStream os = new ByteArrayOutputStream(); DataOutputStream output = new DataOutputStream(os); write(output); output.close(); String blurTask = new String(Base64.encodeBase64(os.toByteArray())); configuration.set(BLUR_BLURTASK, blurTask); Job job = new Job(configuration, "Blur Indexer"); job.setReducerClass(BlurReducer.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BlurMutate.class); job.setNumReduceTasks(getNumReducers(configuration)); return job; }
From source file:com.neu.cs6240.AvgTimeToAnswer.AvgTimeToAnsPerHashTag.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.set("mapred.textoutputformat.separator", ","); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: AvgTimeToAnsPerHashTag <in> <out>"); System.exit(2);// www .j a v a2 s. c om } Job job = new Job(conf, "AvgTimeToAnsPerHashTag"); job.setJarByClass(AvgTimeToAnsPerHashTag.class); job.setMapperClass(AvgTimeToAnsPerHashTagMapper.class); job.setReducerClass(AvgTimeToAnsPerHashTagReducer.class); job.setCombinerClass(AvgTimeToAnsPerHashTagReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setPartitionerClass(AvgTimeToAnsPerHashTagPartitioner.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.neu.cs6240.TopKExperts.JoinQA.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: JoinQA <in> <out>"); System.exit(2);/*from w ww . ja v a 2s . c om*/ } Job job = new Job(conf, "JoinQA"); job.setJarByClass(JoinQA.class); job.setMapperClass(JoinQAMapper.class); job.setReducerClass(JoinQAReducer.class); job.setOutputKeyClass(JoinQAKey.class); job.setOutputValueClass(JoinQAValue.class); job.setPartitionerClass(JoinQAPartitioner.class); job.setGroupingComparatorClass(JoinQAGroupComparator.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); boolean isSucess = false; isSucess = job.waitForCompletion(true); if (isSucess) { // On successful completion of JoinQA start UserAnswerCountPerHashTag System.out.println("MR - JoinQA complete. Starting UserAnswerCountPerHashTag..."); String[] argsForMR2 = new String[2]; argsForMR2[0] = otherArgs[1]; argsForMR2[1] = otherArgs[1] + "MR2"; isSucess = UserAnswerCountPerHashTag.initUserAnswerCountPerHashTag(argsForMR2); if (isSucess) { // On successful completion of UserAnswerCountPerHashTag start TopKPerHashTag System.out.println("MR - UserAnswerCountPerHashTag complete. Starting TopKPerHashTag..."); String[] argsForMR3 = new String[2]; argsForMR3[0] = argsForMR2[1]; argsForMR3[1] = argsForMR2[1] + "MR3"; isSucess = TopKPerHashTag.initTopKPerHashTag(argsForMR3); if (isSucess) { // Successfully complete TopKPerHashTag MR System.out.println("All MR - Successful."); } else { // Failed UserAnswerCountPerHashTag MR System.out.println("MR - TopKPerHashTag failed."); } } else { // On unsuccessful completion of JoinQA end MR System.out.println("MR - UserAnswerCountPerHashTag failed."); } } else { // On unsuccessful completion of JoinQA end MR System.out.println("MR - JoinQA failed."); } System.exit(isSucess ? 0 : 1); }
From source file:com.neu.cs6240.TopKExperts.TopKPerHashTag.java
License:Apache License
public static boolean initTopKPerHashTag(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: TopKPerHashTag <in> <out>"); System.exit(2);/* w w w . j a v a 2 s . com*/ } Job job = new Job(conf, "TopKPerHashTag"); job.setJarByClass(TopKPerHashTag.class); job.setMapperClass(TopKPerHashTagMapper.class); job.setReducerClass(TopKPerHashTagReducer.class); job.setOutputKeyClass(TopKPerHashTagKey.class); job.setOutputValueClass(Text.class); job.setPartitionerClass(TopKPerHashTagPartitioner.class); job.setGroupingComparatorClass(TopKPerHashTagGroupComparator.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); return job.waitForCompletion(true); }
From source file:com.neu.cs6240.TopKExperts.UserAnswerCountPerHashTag.java
License:Apache License
public static boolean initUserAnswerCountPerHashTag(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: UserAnswerCountPerHashTag <in> <out>"); System.exit(2);//from w w w .j a v a2s . c o m } Job job = new Job(conf, "UserAnswerCountPerHashTag"); job.setJarByClass(UserAnswerCountPerHashTag.class); job.setMapperClass(UserAnswerCountPerHashTagMapper.class); job.setReducerClass(UserAnswerCountPerHashTagReducer.class); job.setOutputKeyClass(UserAnswerCountPerHashTagKey.class); job.setOutputValueClass(IntWritable.class); job.setPartitionerClass(UserAnswerCountPerHashTagPartitioner.class); job.setGroupingComparatorClass(UserAnswerCountPerHashTagGroupComparator.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); return job.waitForCompletion(true); }
From source file:com.neu.cs6240.Xml2csv.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // Setting up the xml tag configurator for splitter conf.set("xmlinput.start", "<row "); conf.set("xmlinput.end", " />"); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: Xml2csv <in> <out>"); System.exit(2);//from w ww. j a v a2s. com } Job job = new Job(conf, "Converts Posts.xml to .csv"); job.setJarByClass(Xml2csv.class); job.setInputFormatClass(XmlInputFormat.class); job.setMapperClass(PostsMapper.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }