List of usage examples for org.apache.hadoop.mapreduce Job Job
Job(JobStatus status, JobConf conf) throws IOException
From source file:com.siwind.routingloop.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);// ww w . ja va 2s .c om } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } Path outputpath = new Path(otherArgs[otherArgs.length - 1]); FileSystem.get(conf).delete(outputpath, true); FileOutputFormat.setOutputPath(job, outputpath); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.skp.experiment.cf.als.hadoop.UploloadToHbaseTableJob.java
License:Apache License
/** * Job configuration./* w ww . j a v a 2s .com*/ */ public static Job configureJob(Configuration conf, String[] args) throws IOException { Path inputPath = new Path(args[0]); String tableName = args[1]; Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(Uploader.class); FileInputFormat.setInputPaths(job, inputPath); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(Uploader.class); // No reducers. Just write straight to table. Call initTableReducerJob // because it sets up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName, null, job); job.setNumReduceTasks(0); return job; }
From source file:com.sohu.rdc.inf.cdn.offline.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);/*from w w w . ja v a 2s . c o m*/ } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setInputFormatClass(LzoTextInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.splicemachine.mrio.api.core.SMInputFormatIT.java
License:Apache License
@Test public void testSparkIntegrationWithInputFormat() throws IOException { config.set(MRConstants.SPLICE_TABLE_NAME, tableWatcherA.toString()); Job job = new Job(config, "Test Scan"); JavaPairRDD<RowLocation, ExecRow> table = sparkWatcher.jsc.newAPIHadoopRDD(job.getConfiguration(), SMInputFormat.class, RowLocation.class, ExecRow.class); List<Tuple2<RowLocation, ExecRow>> data = table.collect(); int i = 0;//w w w .jav a2 s . c om for (Tuple2<RowLocation, ExecRow> tuple : data) { i++; Assert.assertNotNull(tuple._1()); Assert.assertNotNull(tuple._2()); } Assert.assertEquals("Incorrect Results Returned", 2, i); }
From source file:com.splicemachine.mrio.api.core.SMInputFormatIT.java
License:Apache License
@Test public void testCountOverMultipleRegionsInSpark() throws IOException { config.set(MRConstants.SPLICE_TABLE_NAME, tableWatcherB.toString()); Job job = new Job(config, "Test Scan"); JavaPairRDD<RowLocation, ExecRow> table = sparkWatcher.jsc.newAPIHadoopRDD(job.getConfiguration(), SMInputFormat.class, RowLocation.class, ExecRow.class); List<Tuple2<RowLocation, ExecRow>> data = table.collect(); int i = 0;/*from w w w . j av a 2s . c om*/ for (Tuple2<RowLocation, ExecRow> tuple : data) { i++; Assert.assertNotNull(tuple._1()); Assert.assertNotNull(tuple._2()); } Assert.assertEquals("Incorrect Results Returned", 10000, i); }
From source file:com.splunk.shuttl.integration.hadoop.hbase.CSVJobFactory.java
License:Apache License
/** * @return the hadoopConfiguration/*from w ww . j a va2 s.c om*/ * @throws IOException */ public static Job getConfiguredJob(String[] arguments) throws IOException { Configuration jobConfiguration = new Configuration(true); // Load hbase-site.xml HBaseConfiguration.addHbaseResources(jobConfiguration); jobConfiguration.set("fs.default.name", arguments[0]); jobConfiguration.set("mapred.job.tracker", arguments[1]); jobConfiguration.set(JobConfigurationConstants.FILENAME, arguments[2]); jobConfiguration.set(JobConfigurationConstants.OUTPUT_PATH, arguments[3]); jobConfiguration.set(JobConfigurationConstants.TABLE_NAME, arguments[4]); jobConfiguration.set(JobConfigurationConstants.COLUMN_FAMILY, "d"); Job job = new Job(jobConfiguration, "BucketToHbase"); job.setJarByClass(CSVMapper.class); job.setMapperClass(CSVMapper.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); job.setInputFormatClass(TextInputFormat.class); return job; }
From source file:com.springsource.insight.plugin.hadoop.WordCount.java
License:Open Source License
public int run(String[] args) throws Exception { String INPUT = "src/test/resources"; String OUTPUT = "target/out"; Configuration conf = new Configuration(); File targetFolder = FileUtil.detectTargetFolder(getClass()); if (targetFolder == null) { throw new IllegalStateException("Cannot detect target folder"); }//from ww w . j a v a 2 s.c om File tempFolder = new File(targetFolder, "temp"); conf.set("hadoop.tmp.dir", tempFolder.getAbsolutePath()); Job job = new Job(conf, "wordcount"); job.setJarByClass(WordCount.class); job.setMapperClass(WordCountMapper.class); job.setCombinerClass(WordCountReducer.class); job.setReducerClass(WordCountReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileUtils.deleteDirectory(new File(OUTPUT)); // delete old output data FileInputFormat.addInputPath(job, new Path(INPUT)); FileOutputFormat.setOutputPath(job, new Path(OUTPUT)); return job.waitForCompletion(true) ? 0 : -1; }
From source file:com.stride.cartrek.core.hbase.RowKeyDistributorTestBase.java
License:Apache License
private void testMapReduceInternal(long origKeyPrefix, Scan scan, int numValues, int startWithValue, int seekIntervalMinValue, int seekIntervalMaxValue) throws IOException, InterruptedException, ClassNotFoundException { int valuesCountInSeekInterval = writeTestData(origKeyPrefix, numValues, startWithValue, seekIntervalMinValue, seekIntervalMaxValue); // Reading data Configuration conf = testingUtility.getConfiguration(); Job job = new Job(conf, "testMapReduceInternal()-Job"); job.setJarByClass(this.getClass()); TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); // Substituting standard TableInputFormat which was set in // TableMapReduceUtil.initTableMapperJob(...) job.setInputFormatClass(WdTableInputFormat.class); keyDistributor.addInfo(job.getConfiguration()); job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0);//from ww w .j av a2s. c o m boolean succeeded = job.waitForCompletion(true); Assert.assertTrue(succeeded); long mapInputRecords = job.getCounters().findCounter(RowCounterMapper.Counters.ROWS).getValue(); Assert.assertEquals(valuesCountInSeekInterval, mapInputRecords); }
From source file:com.talis.labs.pagerank.mapreduce.CheckConvergence.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: CheckConvergence <input path> <output path>"); return -1; }// ww w . j a va 2 s . c o m FileSystem.get(getConf()).delete(new Path(args[1]), true); Job job = new Job(getConf(), "CheckConvergence"); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(CheckConvergenceMapper.class); job.setCombinerClass(CheckConvergenceReducer.class); job.setReducerClass(CheckConvergenceReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(DoubleWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); job.setNumReduceTasks(1); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.talis.labs.pagerank.mapreduce.CheckingData.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: CheckingData <input path> <output path>"); return -1; }//from www . j a v a 2s . com FileSystem.get(getConf()).delete(new Path(args[1]), true); Job job = new Job(getConf(), "CheckingData"); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(CheckingDataMapper.class); job.setReducerClass(CheckingDataReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }