Example usage for org.apache.hadoop.mapreduce Job getInstance

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getInstance.

Prototype

@Deprecated
public static Job getInstance(Cluster ignored, Configuration conf) throws IOException

Source Link

Document

Creates a new Job with no particular Cluster and given Configuration .

Usage

From source file:com.j.distributed.counter.CounterJob.java

@Override
public int run(String... options) throws Exception {

    Job job = Job.getInstance(getConf(), getClass().toString());
    job.setJarByClass(getClass());//w  w  w . j av a 2 s .c om

    job.setMapperClass(CounterMapper.class);
    job.setCombinerClass(CounterReducer.class);
    job.setReducerClass(CounterReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(options[0]));
    FileOutputFormat.setOutputPath(job, new Path(options[1]));
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.j.distributed.sorter.SorterJob.java

@Override
public int run(String... options) throws Exception {

    Job job = Job.getInstance(getConf(), getClass().toString());
    job.setJarByClass(getClass());/*from   w  w w  . jav a2 s  . co m*/

    job.setMapperClass(SorterMapper.class);
    job.setCombinerClass(SorterReducer.class);
    job.setReducerClass(SorterReducer.class);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);
    job.setSortComparatorClass(LongWritable.DecreasingComparator.class);

    FileInputFormat.addInputPath(job, new Path(options[1]));
    FileOutputFormat.setOutputPath(job, new Path(options[2]));
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.jet.hadoop.wordcount.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {

    long timeBegin = System.currentTimeMillis();
    System.out.println("hadoop wordcount begins at" + timeBegin);

    if (args == null || args.length == 0) {
        args = new String[2];
        args[0] = "E:\\Work\\input\\hello.txt";
        args[1] = "E:\\Work\\output";
    }/* w ww  . ja  v  a 2 s  .  c  o  m*/

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    //      job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    //      job.setNumReduceTasks(2);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    boolean result = job.waitForCompletion(true);

    long timeEnd = System.currentTimeMillis();
    System.out.println("hadoop wordcount ended at" + timeEnd);
    System.out.println("hadoop wordcount cost time" + (timeEnd - timeBegin) / 1000 + " seconds.");

    System.exit(result ? 0 : 1);
}

From source file:com.kylinolap.job.hadoop.cube.CubeHFileJob.java

License:Apache License

public int run(String[] args) throws Exception {
    Options options = new Options();

    try {/*from   w ww.  j  a v a  2 s  .com*/
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        options.addOption(OPTION_HTABLE_NAME);
        parseOptions(options, args);

        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase();

        CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());

        CubeInstance cube = cubeMgr.getCube(cubeName);
        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));

        File JarFile = new File(KylinConfig.getInstanceFromEnv().getKylinJobJarPath());
        if (JarFile.exists()) {
            job.setJar(KylinConfig.getInstanceFromEnv().getKylinJobJarPath());
        } else {
            job.setJarByClass(this.getClass());
        }

        addInputDirs(getOptionValue(OPTION_INPUT_PATH), job);
        FileOutputFormat.setOutputPath(job, output);

        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setMapperClass(CubeHFileMapper.class);
        job.setReducerClass(KeyValueSortReducer.class);

        // set job configuration
        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        Configuration conf = HBaseConfiguration.create(getConf());
        // add metadata to distributed cache
        attachKylinPropsAndMetadata(cube, job.getConfiguration());

        String tableName = getOptionValue(OPTION_HTABLE_NAME).toUpperCase();
        HTable htable = new HTable(conf, tableName);

        //Automatic config !
        HFileOutputFormat.configureIncrementalLoad(job, htable);

        // set block replication to 3 for hfiles
        conf.set(DFSConfigKeys.DFS_REPLICATION_KEY, "3");

        this.deletePath(job.getConfiguration(), output);

        return waitForCompletion(job);
    } catch (Exception e) {
        printUsage(options);
        log.error(e.getLocalizedMessage(), e);
        return 2;
    }
}

From source file:com.kylinolap.job.hadoop.cube.CuboidJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {/*  w  w w .  ja va  2s.com*/
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_SEGMENT_NAME);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        options.addOption(OPTION_NCUBOID_LEVEL);
        options.addOption(OPTION_INPUT_FORMAT);
        parseOptions(options, args);

        Path input = new Path(getOptionValue(OPTION_INPUT_PATH));
        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase();
        int nCuboidLevel = Integer.parseInt(getOptionValue(OPTION_NCUBOID_LEVEL));
        String segmentName = getOptionValue(OPTION_SEGMENT_NAME);

        KylinConfig config = KylinConfig.getInstanceFromEnv();
        CubeManager cubeMgr = CubeManager.getInstance(config);
        CubeInstance cube = cubeMgr.getCube(cubeName);

        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        System.out.println("Starting: " + job.getJobName());
        FileInputFormat.setInputPaths(job, input);

        File jarFile = new File(config.getKylinJobJarPath());
        if (jarFile.exists()) {
            job.setJar(config.getKylinJobJarPath());
        } else {
            job.setJarByClass(this.getClass());
        }

        // Mapper
        if (this.mapperClass == null) {
            throw new Exception("Mapper class is not set!");
        }

        boolean isInputTextFormat = false;
        if (hasOption(OPTION_INPUT_FORMAT)
                && ("textinputformat".equalsIgnoreCase(getOptionValue(OPTION_INPUT_FORMAT)))) {
            isInputTextFormat = true;
        }

        if (isInputTextFormat) {
            job.setInputFormatClass(TextInputFormat.class);

        } else {
            job.setInputFormatClass(SequenceFileInputFormat.class);
        }
        job.setMapperClass(this.mapperClass);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setCombinerClass(CuboidReducer.class); // for base cuboid shuffle skew, some rowkey aggregates far more records than others

        // Reducer
        job.setReducerClass(CuboidReducer.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileOutputFormat.setOutputPath(job, output);

        // set job configuration
        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
        // add metadata to distributed cache
        attachKylinPropsAndMetadata(cube, job.getConfiguration());

        setReduceTaskNum(job, config, cubeName, nCuboidLevel);

        this.deletePath(job.getConfiguration(), output);

        return waitForCompletion(job);
    } catch (Exception e) {
        printUsage(options);
        log.error(e.getLocalizedMessage(), e);
        return 2;
    }
}

From source file:com.kylinolap.job.hadoop.cube.FactDistinctColumnsJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {/*from  www.  jav  a 2 s.co  m*/
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_INPUT_FORMAT);
        options.addOption(OPTION_OUTPUT_PATH);
        parseOptions(options, args);

        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        String cubeName = getOptionValue(OPTION_CUBE_NAME);
        Path input = new Path(getOptionValue(OPTION_INPUT_PATH));
        String inputFormat = getOptionValue(OPTION_INPUT_FORMAT);
        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));

        // ----------------------------------------------------------------------------

        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        System.out.println("Starting: " + job.getJobName());

        setupMapInput(input, inputFormat);
        setupReduceOutput(output);

        // add metadata to distributed cache
        CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
        // CubeSegment seg = cubeMgr.getCube(cubeName).getTheOnlySegment();
        attachKylinPropsAndMetadata(cubeMgr.getCube(cubeName), job.getConfiguration());

        return waitForCompletion(job);

    } catch (Exception e) {
        printUsage(options);
        log.error(e.getLocalizedMessage(), e);
        addErrorLog(e);
        return 2;
    }

}

From source file:com.kylinolap.job.hadoop.cube.MergeCuboidJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {//www.ja  v  a  2s  . co  m
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_SEGMENT_NAME);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        parseOptions(options, args);

        String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase();
        String segmentName = getOptionValue(OPTION_SEGMENT_NAME).toUpperCase();
        KylinConfig config = KylinConfig.getInstanceFromEnv();
        CubeManager cubeMgr = CubeManager.getInstance(config);
        CubeInstance cube = cubeMgr.getCube(cubeName);
        // CubeSegment cubeSeg = cubeMgr.findSegment(cube, segmentName);

        // start job
        String jobName = getOptionValue(OPTION_JOB_NAME);
        System.out.println("Starting: " + jobName);
        job = Job.getInstance(getConf(), jobName);

        // set job configuration - basic
        File JarFile = new File(config.getKylinJobJarPath());
        if (JarFile.exists()) {
            job.setJar(config.getKylinJobJarPath());
        } else {
            job.setJarByClass(this.getClass());
        }

        // setJobJar(job);
        addInputDirs(getOptionValue(OPTION_INPUT_PATH), job);

        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        FileOutputFormat.setOutputPath(job, output);

        // Mapper
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setMapperClass(MergeCuboidMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        // Reducer - only one
        job.setReducerClass(CuboidReducer.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        // set job configuration
        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);

        // add metadata to distributed cache
        attachKylinPropsAndMetadata(cube, job.getConfiguration());

        setReduceTaskNum(job, config, cubeName, 0);

        this.deletePath(job.getConfiguration(), output);

        return waitForCompletion(job);
    } catch (Exception e) {
        printUsage(options);
        log.error(e.getLocalizedMessage(), e);
        return 2;
    }
}

From source file:com.kylinolap.job.hadoop.cube.RangeKeyDistributionJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {//from w w w  . j av  a 2 s  .c om
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);

        parseOptions(options, args);

        // start job
        String jobName = getOptionValue(OPTION_JOB_NAME);
        job = Job.getInstance(getConf(), jobName);

        File JarFile = new File(KylinConfig.getInstanceFromEnv().getKylinJobJarPath());
        if (JarFile.exists()) {
            job.setJar(KylinConfig.getInstanceFromEnv().getKylinJobJarPath());
        } else {
            job.setJarByClass(this.getClass());
        }

        addInputDirs(getOptionValue(OPTION_INPUT_PATH), job);

        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        FileOutputFormat.setOutputPath(job, output);
        // job.getConfiguration().set("dfs.block.size", "67108864");

        // Mapper
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setMapperClass(RangeKeyDistributionMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);

        // Reducer - only one
        job.setReducerClass(RangeKeyDistributionReducer.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);
        job.setNumReduceTasks(1);

        this.deletePath(job.getConfiguration(), output);

        String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase();
        CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
        CubeInstance cube = cubeMgr.getCube(cubeName);
        CubeCapacity cubeCapacity = cube.getDescriptor().getCapacity();
        job.getConfiguration().set(BatchConstants.CUBE_CAPACITY, cubeCapacity.toString());

        return waitForCompletion(job);
    } catch (Exception e) {
        printUsage(options);
        log.error(e.getLocalizedMessage(), e);
        return 2;
    }
}

From source file:com.kylinolap.job.hadoop.cube.RowKeyDistributionCheckerJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {/*from  w  w w  .j a  v  a 2s.c om*/
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        options.addOption(OPTION_JOB_NAME);
        options.addOption(rowKeyStatsFilePath);

        parseOptions(options, args);

        String statsFilePath = getOptionValue(rowKeyStatsFilePath);

        // start job
        String jobName = getOptionValue(OPTION_JOB_NAME);
        job = Job.getInstance(getConf(), jobName);

        job.setJarByClass(this.getClass());

        addInputDirs(getOptionValue(OPTION_INPUT_PATH), job);

        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        FileOutputFormat.setOutputPath(job, output);

        // Mapper
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setMapperClass(RowKeyDistributionCheckerMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);

        // Reducer - only one
        job.setReducerClass(RowKeyDistributionCheckerReducer.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);
        job.setNumReduceTasks(1);

        job.getConfiguration().set("rowKeyStatsFilePath", statsFilePath);

        this.deletePath(job.getConfiguration(), output);

        return waitForCompletion(job);
    } catch (Exception e) {
        printUsage(options);
        log.error(e.getLocalizedMessage(), e);
        return 2;
    }
}

From source file:com.kylinolap.job.hadoop.invertedindex.IICreateHFileJob.java

License:Apache License

public int run(String[] args) throws Exception {
    Options options = new Options();

    try {//  w  ww . j  a  va  2  s .  c  o m
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        options.addOption(OPTION_HTABLE_NAME);
        parseOptions(options, args);

        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));

        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));

        File JarFile = new File(KylinConfig.getInstanceFromEnv().getKylinJobJarPath());
        if (JarFile.exists()) {
            job.setJar(KylinConfig.getInstanceFromEnv().getKylinJobJarPath());
        } else {
            job.setJarByClass(this.getClass());
        }

        addInputDirs(getOptionValue(OPTION_INPUT_PATH), job);
        FileOutputFormat.setOutputPath(job, output);

        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setMapperClass(IICreateHFileMapper.class);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(KeyValue.class);

        String tableName = getOptionValue(OPTION_HTABLE_NAME);
        HTable htable = new HTable(getConf(), tableName);
        HFileOutputFormat.configureIncrementalLoad(job, htable);

        this.deletePath(job.getConfiguration(), output);

        return waitForCompletion(job);
    } catch (Exception e) {
        printUsage(options);
        log.error(e.getLocalizedMessage(), e);
        return 2;
    }
}