List of usage examples for org.apache.hadoop.mapreduce Job Job
Job(JobStatus status, JobConf conf) throws IOException
From source file:com.placeiq.piqconnect.Runner.java
License:Apache License
private Job buildJob3(Path input, Path output) throws Exception { Configuration conf = getConf(); conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize); Job job = new Job(conf, "data-piqid.piqconnect.FinalResultBuilder"); job.setJarByClass(Runner.class); job.setMapperClass(FinalResultBuilder._Mapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setNumReduceTasks(0);//from w ww .jav a 2s. co m job.setOutputKeyClass(VLongWritable.class); job.setOutputValueClass(VLongWritable.class); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); FileOutputFormat.setCompressOutput(job, true); setCompression(job); return job; }
From source file:com.renren.hadoop.oiv.tools.MoveData.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3 && otherArgs.length != 4) { System.err.println("Usage: MoveData <data_from_dir> <data_to_dir> <job_out_dir> [<black_list_file>]"); System.exit(2);/*from ww w. j a v a2 s. c om*/ } String blackListFile = null; if (otherArgs.length == 4) { blackListFile = otherArgs[3]; } else { System.err.println("Warn: black_list_file param is not given"); } String dataFromDir = otherArgs[0]; conf.set(DATA_TO_DIR, otherArgs[1]); conf.set(BlackListManager.BLACK_LIST_FILE, blackListFile); String jobOutDir = otherArgs[2]; Job job = new Job(conf, "clearData"); job.setJarByClass(MoveData.class); job.setMapperClass(MoveDataMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(dataFromDir)); FileOutputFormat.setOutputPath(job, new Path(jobOutDir)); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.rockstor.compact.CompactDataTool.java
License:Apache License
private Job createSubmittableJob(Configuration conf) throws IOException { Job job = new Job(conf, NAME); job.setJarByClass(CompactDataTool.class); job.setInputFormatClass(CompactDirInputFormat.class); job.setMapOutputValueClass(NullWritable.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapperClass(CompactDataMapper.class); job.setNumReduceTasks(0);/* w w w .j a v a 2 s. co m*/ job.setOutputFormatClass(NullOutputFormat.class); LOG.info("init job " + NAME + " OK"); return job; }
From source file:com.rockstor.compact.GenGarbageIndexTool.java
License:Apache License
private Job createSubmittableJob(Configuration conf) throws IOException { Job job = new Job(conf, NAME); job.setJarByClass(GenGarbageIndexTool.class); Scan scan = new Scan(); TableMapReduceUtil.initTableMapperJob(GarbageChunkDB.TAB_NAME, scan, GarbageChunkMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); TableMapReduceUtil.setScannerCaching(job, batchSize); job.setReducerClass(GarbageChunkReduce.class); job.setPartitionerClass(GarbageChunkPartition.class); job.setCombinerClass(GarbageChunkCombine.class); job.setNumReduceTasks(Compactor.getInstance().getReduceNum()); job.setOutputFormatClass(NullOutputFormat.class); LOG.info("init job " + NAME + " finished!"); return job;//from w w w.j av a2s .c o m }
From source file:com.rockstor.compact.RecoveryTool.java
License:Apache License
private Job createSubmittableJob(Configuration conf) throws IOException { Job job = new Job(conf, NAME); job.setJarByClass(RecoveryTool.class); job.setInputFormatClass(CompactDirInputFormat.class); job.setMapOutputValueClass(NullWritable.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapperClass(RecoveryMapper.class); job.setNumReduceTasks(0);/*from w w w. ja va2s . co m*/ job.setOutputFormatClass(NullOutputFormat.class); LOG.info("init job " + NAME + " OK!"); return job; }
From source file:com.sanjay.mapreduce.SiCombiner.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);/*w w w . ja v a 2 s . co m*/ } Job job = new Job(conf, "word count"); job.setJarByClass(SiCombiner.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setPartitionerClass(WordPartitioner.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(5); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.sematext.hbase.hut.RollbackUpdatesMrJob.java
License:Apache License
/** * Sets up the actual job./*ww w . j a v a 2 s. c o m*/ * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; conf.set("mapred.map.tasks.speculative.execution", "false"); Job job = new Job(conf, NAME + "_" + tableName); job.setJobName(NAME + "_" + tableName); job.setJarByClass(RollbackUpdatesMapper.class); // TODO: Allow passing filter and subset of rows/columns. Scan s = new Scan(); // Optional arguments. long startTime = args.length > 1 ? Long.parseLong(args[1]) : 0L; long endTime = args.length > 2 ? Long.parseLong(args[2]) : Long.MAX_VALUE; // TODO: consider using scan.setTimeRange() for limiting scanned data range. It may // not be good way to do if tss are artificial in HutPuts though // s.setTimeRange(startTime, endTime); job.getConfiguration().set(RollbackUpdatesMapper.HUT_ROLLBACK_UPDATE_MIN_TIME_ATTR, String.valueOf(startTime)); job.getConfiguration().set(RollbackUpdatesMapper.HUT_ROLLBACK_UPDATE_MAX_TIME_ATTR, String.valueOf(endTime)); s.setFilter(new HutWriteTimeRowsFilter(endTime, startTime)); s.setCacheBlocks(false); // TODO: allow user change using job params s.setCaching(512); s.setCacheBlocks(false); LOG.info("Using scan: " + s.toString()); // TODO: allow better limiting of data to be fetched if (conf.get(TableInputFormat.SCAN_COLUMN_FAMILY) != null) { s.addFamily(Bytes.toBytes(conf.get(TableInputFormat.SCAN_COLUMN_FAMILY))); } LOG.info("starttime (inclusive): " + startTime + " (" + new Date(startTime) + ")" + ", endtime (inclusive): " + endTime + " (" + new Date(endTime) + ")"); TableMapReduceUtil.initTableMapperJob(tableName, s, RollbackUpdatesMapper.class, null, null, job); TableMapReduceUtil.initTableReducerJob(tableName, null, job); // No reducers. Just write straight to output files. job.setNumReduceTasks(0); return job; }
From source file:com.sematext.hbase.wd.RowKeyDistributorTestBase.java
License:Apache License
private void testMapReduceInternal(long origKeyPrefix, Scan scan, int numValues, int startWithValue, int seekIntervalMinValue, int seekIntervalMaxValue) throws IOException, InterruptedException, ClassNotFoundException { int valuesCountInSeekInterval = writeTestData(origKeyPrefix, numValues, startWithValue, seekIntervalMinValue, seekIntervalMaxValue); // Reading data Configuration conf = testingUtility.getConfiguration(); Job job = new Job(conf, "testMapReduceInternal()-Job"); job.setJarByClass(this.getClass()); TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); // Substituting standard TableInputFormat which was set in TableMapReduceUtil.initTableMapperJob(...) job.setInputFormatClass(WdTableInputFormat.class); keyDistributor.addInfo(job.getConfiguration()); job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0);/*w w w.jav a2 s . c o m*/ boolean succeeded = job.waitForCompletion(true); Assert.assertTrue(succeeded); long mapInputRecords = job.getCounters().findCounter(RowCounterMapper.Counters.ROWS).getValue(); Assert.assertEquals(valuesCountInSeekInterval, mapInputRecords); }
From source file:com.shopping.hbase.mapreduce.Import.java
License:Apache License
/** * Job configuration./*from www .j a va 2 s . co m*/ */ protected Job configureJob(Configuration conf, String inputPathName, String tableName) throws IOException { Path inputPath = new Path(inputPathName); Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(Importer.class); FileInputFormat.setInputPaths(job, inputPath); job.setInputFormatClass(SequenceFileInputFormat.class); // job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(Importer.class); // No reducers. Just write straight to table. Call initTableReducerJob // because it sets up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName, null, job); job.setNumReduceTasks(0); return job; }
From source file:com.shopping.hbase.sample.mapreduce.SampleUploader.java
License:Apache License
/** * Job configuration.// w w w. ja v a 2 s .co m */ public static Job configureJob(Configuration conf, String[] args) throws IOException { Path inputPath = new Path(args[0]); String tableName = args[1]; Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(Uploader.class); FileInputFormat.setInputPaths(job, inputPath); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(Uploader.class); // No reducers. Just write straight to table. Call initTableReducerJob // because it sets up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName, null, job); job.setNumReduceTasks(0); return job; }