List of usage examples for org.apache.hadoop.mapreduce Job getInstance
@Deprecated public static Job getInstance(Cluster ignored, Configuration conf) throws IOException
From source file:com.j.distributed.counter.CounterJob.java
@Override public int run(String... options) throws Exception { Job job = Job.getInstance(getConf(), getClass().toString()); job.setJarByClass(getClass());//w w w . j av a 2 s .c om job.setMapperClass(CounterMapper.class); job.setCombinerClass(CounterReducer.class); job.setReducerClass(CounterReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(options[0])); FileOutputFormat.setOutputPath(job, new Path(options[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.j.distributed.sorter.SorterJob.java
@Override public int run(String... options) throws Exception { Job job = Job.getInstance(getConf(), getClass().toString()); job.setJarByClass(getClass());/*from w w w . jav a2 s . co m*/ job.setMapperClass(SorterMapper.class); job.setCombinerClass(SorterReducer.class); job.setReducerClass(SorterReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setSortComparatorClass(LongWritable.DecreasingComparator.class); FileInputFormat.addInputPath(job, new Path(options[1])); FileOutputFormat.setOutputPath(job, new Path(options[2])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.jet.hadoop.wordcount.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { long timeBegin = System.currentTimeMillis(); System.out.println("hadoop wordcount begins at" + timeBegin); if (args == null || args.length == 0) { args = new String[2]; args[0] = "E:\\Work\\input\\hello.txt"; args[1] = "E:\\Work\\output"; }/* w ww . ja v a 2 s . c o m*/ Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2); } Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); // job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); // job.setNumReduceTasks(2); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); boolean result = job.waitForCompletion(true); long timeEnd = System.currentTimeMillis(); System.out.println("hadoop wordcount ended at" + timeEnd); System.out.println("hadoop wordcount cost time" + (timeEnd - timeBegin) / 1000 + " seconds."); System.exit(result ? 0 : 1); }
From source file:com.kylinolap.job.hadoop.cube.CubeHFileJob.java
License:Apache License
public int run(String[] args) throws Exception { Options options = new Options(); try {/*from w ww. j a v a 2 s .com*/ options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_OUTPUT_PATH); options.addOption(OPTION_HTABLE_NAME); parseOptions(options, args); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(); CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); CubeInstance cube = cubeMgr.getCube(cubeName); job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); File JarFile = new File(KylinConfig.getInstanceFromEnv().getKylinJobJarPath()); if (JarFile.exists()) { job.setJar(KylinConfig.getInstanceFromEnv().getKylinJobJarPath()); } else { job.setJarByClass(this.getClass()); } addInputDirs(getOptionValue(OPTION_INPUT_PATH), job); FileOutputFormat.setOutputPath(job, output); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(CubeHFileMapper.class); job.setReducerClass(KeyValueSortReducer.class); // set job configuration job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); Configuration conf = HBaseConfiguration.create(getConf()); // add metadata to distributed cache attachKylinPropsAndMetadata(cube, job.getConfiguration()); String tableName = getOptionValue(OPTION_HTABLE_NAME).toUpperCase(); HTable htable = new HTable(conf, tableName); //Automatic config ! HFileOutputFormat.configureIncrementalLoad(job, htable); // set block replication to 3 for hfiles conf.set(DFSConfigKeys.DFS_REPLICATION_KEY, "3"); this.deletePath(job.getConfiguration(), output); return waitForCompletion(job); } catch (Exception e) { printUsage(options); log.error(e.getLocalizedMessage(), e); return 2; } }
From source file:com.kylinolap.job.hadoop.cube.CuboidJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Options options = new Options(); try {/* w w w . ja va 2s.com*/ options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_SEGMENT_NAME); options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_OUTPUT_PATH); options.addOption(OPTION_NCUBOID_LEVEL); options.addOption(OPTION_INPUT_FORMAT); parseOptions(options, args); Path input = new Path(getOptionValue(OPTION_INPUT_PATH)); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(); int nCuboidLevel = Integer.parseInt(getOptionValue(OPTION_NCUBOID_LEVEL)); String segmentName = getOptionValue(OPTION_SEGMENT_NAME); KylinConfig config = KylinConfig.getInstanceFromEnv(); CubeManager cubeMgr = CubeManager.getInstance(config); CubeInstance cube = cubeMgr.getCube(cubeName); job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); System.out.println("Starting: " + job.getJobName()); FileInputFormat.setInputPaths(job, input); File jarFile = new File(config.getKylinJobJarPath()); if (jarFile.exists()) { job.setJar(config.getKylinJobJarPath()); } else { job.setJarByClass(this.getClass()); } // Mapper if (this.mapperClass == null) { throw new Exception("Mapper class is not set!"); } boolean isInputTextFormat = false; if (hasOption(OPTION_INPUT_FORMAT) && ("textinputformat".equalsIgnoreCase(getOptionValue(OPTION_INPUT_FORMAT)))) { isInputTextFormat = true; } if (isInputTextFormat) { job.setInputFormatClass(TextInputFormat.class); } else { job.setInputFormatClass(SequenceFileInputFormat.class); } job.setMapperClass(this.mapperClass); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setCombinerClass(CuboidReducer.class); // for base cuboid shuffle skew, some rowkey aggregates far more records than others // Reducer job.setReducerClass(CuboidReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, output); // set job configuration job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName); // add metadata to distributed cache attachKylinPropsAndMetadata(cube, job.getConfiguration()); setReduceTaskNum(job, config, cubeName, nCuboidLevel); this.deletePath(job.getConfiguration(), output); return waitForCompletion(job); } catch (Exception e) { printUsage(options); log.error(e.getLocalizedMessage(), e); return 2; } }
From source file:com.kylinolap.job.hadoop.cube.FactDistinctColumnsJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Options options = new Options(); try {/*from www. jav a 2 s.co m*/ options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_INPUT_FORMAT); options.addOption(OPTION_OUTPUT_PATH); parseOptions(options, args); job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); String cubeName = getOptionValue(OPTION_CUBE_NAME); Path input = new Path(getOptionValue(OPTION_INPUT_PATH)); String inputFormat = getOptionValue(OPTION_INPUT_FORMAT); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); // ---------------------------------------------------------------------------- job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); System.out.println("Starting: " + job.getJobName()); setupMapInput(input, inputFormat); setupReduceOutput(output); // add metadata to distributed cache CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); // CubeSegment seg = cubeMgr.getCube(cubeName).getTheOnlySegment(); attachKylinPropsAndMetadata(cubeMgr.getCube(cubeName), job.getConfiguration()); return waitForCompletion(job); } catch (Exception e) { printUsage(options); log.error(e.getLocalizedMessage(), e); addErrorLog(e); return 2; } }
From source file:com.kylinolap.job.hadoop.cube.MergeCuboidJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Options options = new Options(); try {//www.ja v a 2s . co m options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_SEGMENT_NAME); options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_OUTPUT_PATH); parseOptions(options, args); String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(); String segmentName = getOptionValue(OPTION_SEGMENT_NAME).toUpperCase(); KylinConfig config = KylinConfig.getInstanceFromEnv(); CubeManager cubeMgr = CubeManager.getInstance(config); CubeInstance cube = cubeMgr.getCube(cubeName); // CubeSegment cubeSeg = cubeMgr.findSegment(cube, segmentName); // start job String jobName = getOptionValue(OPTION_JOB_NAME); System.out.println("Starting: " + jobName); job = Job.getInstance(getConf(), jobName); // set job configuration - basic File JarFile = new File(config.getKylinJobJarPath()); if (JarFile.exists()) { job.setJar(config.getKylinJobJarPath()); } else { job.setJarByClass(this.getClass()); } // setJobJar(job); addInputDirs(getOptionValue(OPTION_INPUT_PATH), job); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); FileOutputFormat.setOutputPath(job, output); // Mapper job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(MergeCuboidMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // Reducer - only one job.setReducerClass(CuboidReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // set job configuration job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName); // add metadata to distributed cache attachKylinPropsAndMetadata(cube, job.getConfiguration()); setReduceTaskNum(job, config, cubeName, 0); this.deletePath(job.getConfiguration(), output); return waitForCompletion(job); } catch (Exception e) { printUsage(options); log.error(e.getLocalizedMessage(), e); return 2; } }
From source file:com.kylinolap.job.hadoop.cube.RangeKeyDistributionJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Options options = new Options(); try {//from w w w . j av a 2 s .c om options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_OUTPUT_PATH); options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); parseOptions(options, args); // start job String jobName = getOptionValue(OPTION_JOB_NAME); job = Job.getInstance(getConf(), jobName); File JarFile = new File(KylinConfig.getInstanceFromEnv().getKylinJobJarPath()); if (JarFile.exists()) { job.setJar(KylinConfig.getInstanceFromEnv().getKylinJobJarPath()); } else { job.setJarByClass(this.getClass()); } addInputDirs(getOptionValue(OPTION_INPUT_PATH), job); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); FileOutputFormat.setOutputPath(job, output); // job.getConfiguration().set("dfs.block.size", "67108864"); // Mapper job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(RangeKeyDistributionMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); // Reducer - only one job.setReducerClass(RangeKeyDistributionReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setNumReduceTasks(1); this.deletePath(job.getConfiguration(), output); String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(); CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); CubeInstance cube = cubeMgr.getCube(cubeName); CubeCapacity cubeCapacity = cube.getDescriptor().getCapacity(); job.getConfiguration().set(BatchConstants.CUBE_CAPACITY, cubeCapacity.toString()); return waitForCompletion(job); } catch (Exception e) { printUsage(options); log.error(e.getLocalizedMessage(), e); return 2; } }
From source file:com.kylinolap.job.hadoop.cube.RowKeyDistributionCheckerJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Options options = new Options(); try {/*from w w w .j a v a 2s.c om*/ options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_OUTPUT_PATH); options.addOption(OPTION_JOB_NAME); options.addOption(rowKeyStatsFilePath); parseOptions(options, args); String statsFilePath = getOptionValue(rowKeyStatsFilePath); // start job String jobName = getOptionValue(OPTION_JOB_NAME); job = Job.getInstance(getConf(), jobName); job.setJarByClass(this.getClass()); addInputDirs(getOptionValue(OPTION_INPUT_PATH), job); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); FileOutputFormat.setOutputPath(job, output); // Mapper job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(RowKeyDistributionCheckerMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); // Reducer - only one job.setReducerClass(RowKeyDistributionCheckerReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setNumReduceTasks(1); job.getConfiguration().set("rowKeyStatsFilePath", statsFilePath); this.deletePath(job.getConfiguration(), output); return waitForCompletion(job); } catch (Exception e) { printUsage(options); log.error(e.getLocalizedMessage(), e); return 2; } }
From source file:com.kylinolap.job.hadoop.invertedindex.IICreateHFileJob.java
License:Apache License
public int run(String[] args) throws Exception { Options options = new Options(); try {// w ww . j a va 2 s . c o m options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_OUTPUT_PATH); options.addOption(OPTION_HTABLE_NAME); parseOptions(options, args); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); File JarFile = new File(KylinConfig.getInstanceFromEnv().getKylinJobJarPath()); if (JarFile.exists()) { job.setJar(KylinConfig.getInstanceFromEnv().getKylinJobJarPath()); } else { job.setJarByClass(this.getClass()); } addInputDirs(getOptionValue(OPTION_INPUT_PATH), job); FileOutputFormat.setOutputPath(job, output); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(IICreateHFileMapper.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); String tableName = getOptionValue(OPTION_HTABLE_NAME); HTable htable = new HTable(getConf(), tableName); HFileOutputFormat.configureIncrementalLoad(job, htable); this.deletePath(job.getConfiguration(), output); return waitForCompletion(job); } catch (Exception e) { printUsage(options); log.error(e.getLocalizedMessage(), e); return 2; } }