List of usage examples for org.apache.hadoop.mapreduce Job Job
Job(JobStatus status, JobConf conf) throws IOException
From source file:com.lakala.hbase.IndexBuilder2.java
License:Apache License
/** * Job configuration.// w w w .j ava 2 s .c o m */ public static Job configureJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; String columnFamily = args[1]; System.out.println("****" + tableName); conf.set(TableInputFormat.SCAN, convertScanToString(new Scan())); conf.set(TableInputFormat.INPUT_TABLE, tableName); conf.set("index.tablename", tableName); conf.set("index.familyname", columnFamily); String[] fields = new String[args.length - 2]; for (int i = 0; i < fields.length; i++) { fields[i] = args[i + 2]; } conf.setStrings("index.fields", fields); conf.set("index.familyname", "attributes"); Job job = new Job(conf, tableName); job.setJarByClass(IndexBuilder.class); job.setMapperClass(Map.class); job.setNumReduceTasks(0); job.setInputFormatClass(TableInputFormat.class); job.setOutputFormatClass(MultiTableOutputFormat.class); return job; }
From source file:com.lakhani.anchorgraph.anchorgraph.java
public int run(String[] args) throws Exception { Configuration conf = new Configuration(); //DistributedCache.addCacheFile(new URI("hdfs://zphdc1n1:8020/user/clakhani/anchorgraph/centroids.txt"), conf); conf.set("numberCentroids", args[3]); conf.set("numberFeatures", args[4]); Job job = new Job(conf, "anchorgraph"); job.addCacheFile(new URI(args[2])); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[2])); job.setJarByClass(anchorgraph.class); job.submit();/*from w w w . j av a2 s .c o m*/ int rc = (job.waitForCompletion(true)) ? 1 : 0; return rc; }
From source file:com.lakhani.anchorgraph.testCache.java
public int run(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "testCache"); job.addCacheFile(new URI("hdfs://zphdc1n1:8020/user/clakhani/anchorgraph/centroids.txt")); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJarByClass(testCache.class); job.submit();/*from www . j av a2 s . c o m*/ int rc = (job.waitForCompletion(true)) ? 1 : 0; return rc; }
From source file:com.lakhani.anchorgraph.wordcount.java
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "wordcount"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setJarByClass(wordcount.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true);//from w w w.j a v a 2 s .co m }
From source file:com.leon.hadoop.loganalyse.DistributedGrep.java
License:Open Source License
public int run(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] otherArgs = parser.getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: DistributedGrep <regex> <in> <out>"); ToolRunner.printGenericCommandUsage(System.err); System.exit(2);/*w ww .j a va 2 s .c om*/ } @SuppressWarnings("deprecation") Job job = new Job(conf, "Distributed Grep"); job.setJarByClass(DistributedGrep.class); job.setMapperClass(GrepMapper.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.getConfiguration().set(REGEX_KEY, otherArgs[0]); FileInputFormat.addInputPath(job, new Path(otherArgs[1])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[2])); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:com.lightboxtechnologies.nsrl.HashLoader.java
License:Apache License
public static void main(String[] args) throws Exception { final Configuration conf = new Configuration(); final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 6) { System.err//from w w w .j a v a 2s. c om .println("Usage: HashLoader <mfgfile> <osfile> <prodfile> <hashfile> <outpath> <num_reducers>"); System.exit(2); } final String mfg_filename = otherArgs[0]; final String os_filename = otherArgs[1]; final String prod_filename = otherArgs[2]; final String hash_filename = otherArgs[3]; final String output_filename = otherArgs[4]; conf.set("mfg_filename", mfg_filename); conf.set("os_filename", os_filename); conf.set("prod_filename", prod_filename); conf.setLong("timestamp", System.currentTimeMillis()); SKJobFactory.addDependencies(conf); final Job job = new Job(conf, "HashLoader"); job.setJarByClass(HashLoader.class); job.setMapperClass(HashLoaderMapper.class); job.setReducerClass(KeyValueSortReducer.class); job.setNumReduceTasks(Integer.parseInt(otherArgs[5])); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(HFileOutputFormat.class); TextInputFormat.addInputPath(job, new Path(hash_filename)); HFileOutputFormat.setOutputPath(job, new Path(output_filename)); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.lightboxtechnologies.nsrl.TableDumper.java
License:Apache License
public static void main(String[] args) throws Exception { final Configuration conf = HBaseConfiguration.create(); final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: TableDumper <table> <outpath>"); System.exit(2);//from ww w .ja v a2s . c om } final String table_name = otherArgs[0]; final String output_filename = otherArgs[1]; final Job job = new Job(conf, "TableDumper"); job.setJarByClass(TableDumper.class); final Scan scan = new Scan(); TableMapReduceUtil.initTableMapperJob(table_name, scan, TableDumperMapper.class, Text.class, Text.class, job); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(output_filename)); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.lightboxtechnologies.spectrum.FolderCount.java
License:Apache License
public static void main(String[] args) throws Exception { final Configuration conf = new Configuration(); final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: FolderCount <table> <outpath>"); System.exit(2);//from w w w . j a v a 2 s. c o m } final Job job = new Job(conf, "FolderCount"); job.setJarByClass(FolderCount.class); job.setMapperClass(FolderCountMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setNumReduceTasks(1); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(FsEntryHBaseInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); final Scan scan = new Scan(); scan.addFamily(HBaseTables.ENTRIES_COLFAM_B); job.getConfiguration().set(TableInputFormat.INPUT_TABLE, otherArgs[0]); job.getConfiguration().set(TableInputFormat.SCAN, convertScanToString(scan)); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.lightboxtechnologies.spectrum.MRCoffeeJob.java
License:Apache License
public static int run(String imageID, String outpath, String[] command, Configuration conf) throws ClassNotFoundException, DecoderException, IOException, InterruptedException { conf.setStrings("command", command); conf.setLong("timestamp", System.currentTimeMillis()); final Job job = new Job(conf, "MRCoffeeJob"); job.setJarByClass(MRCoffeeJob.class); job.setMapperClass(MRCoffeeMapper.class); // job.setReducerClass(KeyValueSortReducer.class); // job.setNumReduceTasks(1); job.setNumReduceTasks(0);/*from w w w . j av a2 s .c om*/ FsEntryHBaseInputFormat.setupJob(job, imageID); job.setInputFormatClass(FsEntryHBaseInputFormat.class); job.setOutputKeyClass(ImmutableHexWritable.class); // job.setOutputValueClass(KeyValue.class); job.setOutputValueClass(JsonWritable.class); // job.setOutputFormatClass(HFileOutputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // HFileOutputFormat.setOutputPath(job, new Path(outpath)); TextOutputFormat.setOutputPath(job, new Path(outpath)); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.linkedin.oneclick.wordcount.WordCount.java
License:Apache License
public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf, "Word Count"); job.setJarByClass(WordCount.class); String workDirectory = args.length >= 1 ? args[0] : "wordcount"; Path input = new Path(workDirectory, "input.txt"); FileSystem fs = input.getFileSystem(conf); fs.mkdirs(input.getParent());// w ww . jav a 2s. co m copy(resourceInputStream(getClass().getResource("/onegin.txt")), createOutputStream(conf, input), conf); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(WordCountMapper.class); FileInputFormat.addInputPath(job, input); job.setCombinerClass(WordCountReducer.class); job.setReducerClass(WordCountReducer.class); job.setOutputFormatClass(TextOutputFormat.class); Path output = clean(conf, new Path(workDirectory, "wordcount")); FileOutputFormat.setOutputPath(job, output); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); return job.waitForCompletion(true) ? 0 : -1; }