Example usage for org.apache.hadoop.mapreduce Job getInstance

List of usage examples for org.apache.hadoop.mapreduce Job getInstance

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getInstance.

Prototype

@Deprecated
public static Job getInstance(Cluster ignored, Configuration conf) throws IOException 

Source Link

Document

Creates a new Job with no particular Cluster and given Configuration .

Usage

From source file:com.airline.analytics.AirlineDelayAnalytics.java

@Override
public int run(String[] strings) throws Exception {

    Job job = Job.getInstance(getConf(), "Hadoop Airline Delay Analytics");

    job.setJarByClass(AirlineDelayAnalytics.class);

    job.setMapperClass(AirlineMapper.class);
    // job.setCombinerClass(AirlineReducer.class);
    job.setReducerClass(AirlineReducer.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(strings[0]));
    FileOutputFormat.setOutputPath(job, new Path(strings[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.airline.analytics.AirlineUniqueRoutesAnalytics.java

@Override
public int run(String[] strings) throws Exception {

    Job job = Job.getInstance(getConf(), "Hadoop Airline Orign Destination Analytics");

    job.setJarByClass(getClass());//  w ww .j  a  v  a  2s  . c om

    // Distributed Cache
    job.addCacheFile(new URI("/airline/codes.csv"));

    job.setMapperClass(AirlineMapper.class);
    // job.setCombinerClass(AirlineReducer.class);
    job.setReducerClass(AirlineReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(strings[0]));
    FileOutputFormat.setOutputPath(job, new Path(strings[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.alectenharmsel.hadoop.qa.LineCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    GenericOptionsParser parse = new GenericOptionsParser(new Configuration(), args);
    Configuration conf = parse.getConfiguration();

    String[] remainingArgs = parse.getRemainingArgs();
    if (remainingArgs.length != 2) {
        System.err.println("Usage: LineCount <input> <output>");
        System.exit(-1);//from  ww w .  jav a 2  s.c o m
    }

    Job job = Job.getInstance(conf, "LineCount");
    job.setJarByClass(LineCount.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    FileInputFormat.addInputPath(job, new Path(remainingArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(remainingArgs[1]));

    boolean success = job.waitForCompletion(true);

    int res = success ? 0 : 1;
    System.exit(res);
}

From source file:com.alectenharmsel.research.hadoop.LcCounters.java

License:Apache License

public static void main(String[] args) throws Exception {
    GenericOptionsParser parse = new GenericOptionsParser(new Configuration(), args);
    Configuration conf = parse.getConfiguration();

    String[] remainingArgs = parse.getRemainingArgs();
    if (remainingArgs.length != 2) {
        System.err.println("Usage: LineCount <input> <output>");
        System.exit(-1);//from  ww  w.  ja  v  a2  s . c  o m
    }

    Job job = Job.getInstance(conf, "LineCount");
    job.setJarByClass(LineCount.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    FileInputFormat.addInputPath(job, new Path(remainingArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(remainingArgs[1]));

    boolean success = job.waitForCompletion(true);

    //Get the counter here and print it
    Counters counters = job.getCounters();
    long total = counters.findCounter(LcCounters.NUM_LINES).getValue();
    System.out.println(Long.toString(total));

    int res = success ? 0 : 1;
    System.exit(res);
}

From source file:com.alectenharmsel.research.hadoop.MoabLicenseInfo.java

License:Apache License

public static void main(String[] args) throws Exception {
    GenericOptionsParser parser = new GenericOptionsParser(new Configuration(), args);
    Configuration conf = parser.getConfiguration();
    conf.set("mapreduce.output.textoutputformat.separator", ",");

    String[] remainingArgs = parser.getRemainingArgs();
    if (remainingArgs.length != 2) {
        System.err.println("Usage: LineCount <input> <output>");
        System.exit(-1);//from   w w w  .j a v a 2s.  c  om
    }

    Job job = Job.getInstance(conf, "MoabLicenseInfo");
    job.setJarByClass(MoabLicenseInfo.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(remainingArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(remainingArgs[1]));

    int res = job.waitForCompletion(true) ? 0 : 1;
    System.exit(res);
}

From source file:com.aliyun.emr.example.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    GenericOptionsParser optionParser = new GenericOptionsParser(conf, args);
    String[] remainingArgs = optionParser.getRemainingArgs();
    if (!(remainingArgs.length == 2 || remainingArgs.length == 4)) {
        System.err.println("Usage: wordcount <in> <out> [-skip skipPatternFile]");
        System.exit(2);/*from w  w w.  j ava  2 s  . c  o  m*/
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < remainingArgs.length; ++i) {
        if ("-skip".equals(remainingArgs[i])) {
            job.addCacheFile(new Path(EMapReduceOSSUtil.buildOSSCompleteUri(remainingArgs[++i], conf)).toUri());
            job.getConfiguration().setBoolean("wordcount.skip.patterns", true);
        } else {
            otherArgs.add(remainingArgs[i]);
        }
    }
    FileInputFormat.addInputPath(job, new Path(EMapReduceOSSUtil.buildOSSCompleteUri(otherArgs.get(0), conf)));
    FileOutputFormat.setOutputPath(job,
            new Path(EMapReduceOSSUtil.buildOSSCompleteUri(otherArgs.get(1), conf)));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.aliyun.openservices.tablestore.hadoop.RowCounter.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (!parseArgs(args)) {
        printUsage();//ww  w . ja v a 2 s .c  o m
        System.exit(1);
    }
    if (endpoint == null || accessKeyId == null || accessKeySecret == null || table == null
            || outputPath == null) {
        printUsage();
        System.exit(1);
    }

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "row count");
    job.setJarByClass(RowCounter.class);
    job.setMapperClass(RowCounterMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setInputFormatClass(TableStoreInputFormat.class);

    TableStore.setCredential(job, accessKeyId, accessKeySecret, securityToken);
    TableStore.setEndpoint(job, endpoint, instance);
    TableStoreInputFormat.addCriteria(job, fetchCriteria());
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.aliyun.openservices.tablestore.hadoop.TableStoreOutputFormatExample.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (!parseArgs(args)) {
        printUsage();/*from   w w  w  .j  av  a  2 s.  c o  m*/
        System.exit(1);
    }
    if (endpoint == null || accessKeyId == null || accessKeySecret == null || inputTable == null
            || outputTable == null) {
        printUsage();
        System.exit(1);
    }

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, TableStoreOutputFormatExample.class.getName());
    job.setMapperClass(OwnerMapper.class);
    job.setReducerClass(IntoTableReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(MapWritable.class);
    job.setInputFormatClass(TableStoreInputFormat.class);
    job.setOutputFormatClass(TableStoreOutputFormat.class);

    TableStore.setCredential(job, accessKeyId, accessKeySecret, securityToken);
    TableStore.setEndpoint(job, endpoint, instance);
    TableStoreInputFormat.addCriteria(job, fetchCriteria());
    TableStoreOutputFormat.setOutputTable(job, outputTable);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.awcoleman.ExampleJobSummaryLogWithOutput.BinRecToAvroRecDriver.java

License:Apache License

public int run(String[] args) throws Exception {

    String input = null;//from  ww w . j a  va2 s  .  c o  m
    String output = null;

    if (args.length < 2) {
        System.err.printf("Usage: %s <input> <output>\n", this.getClass().getSimpleName());
        return -1;
    } else {
        input = args[0];
        output = args[1];
    }

    Job job = Job.getInstance(getConf(), "BinRecToAvroRecDriver");
    Configuration conf = job.getConfiguration();

    //Add job log to hold Driver logging (and any summary info about the dataset,job, or counters we want to write)
    String fapath = createTempFileAppender(job);

    //get schema
    Schema outSchema = ReflectData.get().getSchema(com.awcoleman.examples.avro.BinRecForPartitions.class);
    job.getConfiguration().set("outSchema", outSchema.toString());

    //Job conf settings
    job.setJarByClass(BinRecToAvroRecDriver.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);
    job.setInputFormatClass(BinRecInputFormat.class);
    job.setOutputFormatClass(AvroKeyOutputFormat.class);
    AvroJob.setOutputKeySchema(job, outSchema);

    AvroJob.setMapOutputKeySchema(job, Schema.create(Schema.Type.STRING));
    AvroJob.setMapOutputValueSchema(job, outSchema);

    //Job output compression
    FileOutputFormat.setCompressOutput(job, true);
    job.getConfiguration().set(AvroJob.CONF_OUTPUT_CODEC, DataFileConstants.DEFLATE_CODEC);

    //Input and Output Paths
    FileInputFormat.setInputPaths(job, new Path(input));
    Path outPath = new Path(output);
    FileOutputFormat.setOutputPath(job, outPath);
    outPath.getFileSystem(conf).delete(outPath, true);

    boolean jobCompletionStatus = job.waitForCompletion(true);

    //Print Custom Counters before exiting
    Counters counters = job.getCounters();
    for (MYJOB_CNTRS customCounter : MYJOB_CNTRS.values()) {
        Counter thisCounter = counters.findCounter(customCounter);
        System.out.println("Custom Counter " + customCounter + "=" + thisCounter.getValue());
    }

    long mycnt1 = job.getCounters()
            .findCounter("com.awcoleman.TestingGettingContainerLogger.BinRecToAvroRecDriver$MYJOB_CNTRS",
                    "MYCNT1")
            .getValue();
    long mycnt2 = job.getCounters()
            .findCounter("com.awcoleman.TestingGettingContainerLogger.BinRecToAvroRecDriver$MYJOB_CNTRS",
                    "MYCNT2")
            .getValue();
    long mycnt3 = job.getCounters()
            .findCounter("com.awcoleman.TestingGettingContainerLogger.BinRecToAvroRecDriver$MYJOB_CNTRS",
                    "MYCNT3")
            .getValue();

    long myfakekpi = mycnt1 - mycnt2;

    String msgMyfakekpi = "The Fake KPI of the Dataset: " + String.format("%,d", myfakekpi);
    System.out.println(msgMyfakekpi);
    logger.info(msgMyfakekpi);

    //Finished, so move job log to HDFS in _log dir, clean
    copyTempFileAppenderToHDFSOutpath(job, fapath, output);

    return jobCompletionStatus ? 0 : 1;
}

From source file:com.baynote.kafka.hadoop.KafkaJobBuilder.java

License:Apache License

/**
 * Creates a {@link Job} based on how {@code this} {@link KafkaJobBuilder} has been configured. There are no
 * side-effects on {@code this} instance when you call this method, so you can call it multiple times.
 * /*  www  .  ja  v  a2s.  com*/
 * @param conf
 *            the job conf.
 * @return a fully configured {@link Job}.
 * @throws Exception error
 * @throws IllegalArgumentException
 *             if any required parameters are not set.
 */
public Job configureJob(final Configuration conf) throws Exception {
    validateSettings();
    final Job job = Job.getInstance(conf, getDefaultedJobName());

    // set queue inputs
    if (getQueueMappers().size() == 1) {
        job.setInputFormatClass(KafkaInputFormat.class);
        final TopicConf topicConf = Iterables.getOnlyElement(getQueueMappers());
        KafkaInputFormat.setTopic(job, topicConf.getTopic());
        KafkaInputFormat.setConsumerGroup(job, topicConf.getConsumerGroup());
        job.setMapperClass(topicConf.getMapper());
    } else {
        job.setInputFormatClass(MultipleKafkaInputFormat.class);
        for (final TopicConf topicConf : getQueueMappers()) {
            MultipleKafkaInputFormat.addTopic(job, topicConf.getTopic(), topicConf.getConsumerGroup(),
                    topicConf.getMapper());
        }
    }

    if (getMapOutputKeyClass() != null) {
        job.setMapOutputKeyClass(getMapOutputKeyClass());
    }

    if (getMapOutputValueClass() != null) {
        job.setMapOutputValueClass(getMapOutputValueClass());
    }

    if (getReducerClass() == null) {
        job.setNumReduceTasks(0);
    } else {
        job.setReducerClass(getReducerClass());
        job.setNumReduceTasks(getNumReduceTasks());
    }

    if (getPartitionerClass() != null) {
        job.setPartitionerClass(getPartitionerClass());
    }

    // set output
    job.setOutputFormatClass(getOutputFormatClass());
    job.setOutputKeyClass(getOutputKeyClass());
    job.setOutputValueClass(getOutputValueClass());
    if (getOutputFormat() == SupportedOutputFormat.TEXT_FILE) {
        TextOutputFormat.setOutputPath(job, getDefaultedOutputPath());
    } else if (getOutputFormat() == SupportedOutputFormat.SEQUENCE_FILE) {
        SequenceFileOutputFormat.setOutputPath(job, getDefaultedOutputPath());
    }

    if (usingS3()) {
        job.getConfiguration().set("fs.s3n.awsAccessKeyId", getS3AccessKey());
        job.getConfiguration().set("fs.s3n.awsSecretAccessKey", getS3SecretyKey());
        job.getConfiguration().set("fs.s3.awsAccessKeyId", getS3AccessKey());
        job.getConfiguration().set("fs.s3.awsSecretAccessKey", getS3SecretyKey());
    }

    if (isLazyOutputFormat()) {
        LazyOutputFormat.setOutputFormatClass(job, getOutputFormatClass());
    }

    // setup kafka input format specifics
    KafkaInputFormat.setZkConnect(job, getZkConnect());
    KafkaInputFormat.setKafkaFetchSizeBytes(job, getKafkaFetchSizeBytes());

    job.setSpeculativeExecution(false);
    job.setJarByClass(getClass());

    // memory settings for mappers
    if (!Strings.isNullOrEmpty(getTaskMemorySettings())) {
        job.getConfiguration().set("mapred.child.java.opts", getTaskMemorySettings());
    }

    return job;
}