Example usage for org.apache.hadoop.mapreduce Job Job

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job Job.

Prototype

Job(JobStatus status, JobConf conf) throws IOException

Source Link

Usage

From source file:cn.lhfei.hadoop.ch05.v2.MaxTemperatureDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }//ww  w . j  a  v  a  2 s.c om

    Job job = new Job(getConf(), "Max temperature");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(MaxTemperatureMapper.class);
    job.setCombinerClass(MaxTemperatureReducer.class);
    job.setReducerClass(MaxTemperatureReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.aerospike.hadoop.examples.aggregateintinput.AggregateIntInput.java

License:Apache License

public int run(final String[] args) throws Exception {
    final Configuration conf = getConf();

    @SuppressWarnings("deprecation")
    final Job job = new Job(conf, "AerospikeAggregateIntInput");

    log.info("run starting on bin " + binName);

    job.setJarByClass(AggregateIntInput.class);
    job.setInputFormatClass(AerospikeInputFormat.class);
    job.setMapperClass(Map.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(LongWritable.class);
    // job.setCombinerClass(Reduce.class); // no combiner
    job.setReducerClass(Reduce.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job, new Path(args[0]));

    int status = job.waitForCompletion(true) ? 0 : 1;
    log.info("run finished, status=" + status);
    return status;
}

From source file:com.ailk.oci.ocnosql.tools.load.csvbulkload.CsvBulkLoadTool.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    HBaseConfiguration.addHbaseResources(getConf());
    Configuration conf = getConf();
    String quorum = conf.get("hbase.zookeeper.quorum");
    String clientPort = conf.get("hbase.zookeeper.property.clientPort");
    LOG.info("hbase.zookeeper.quorum=" + quorum);
    LOG.info("hbase.zookeeper.property.clientPort=" + clientPort);
    LOG.info("phoenix.query.dateFormat=" + conf.get("phoenix.query.dateFormat"));

    CommandLine cmdLine = null;//from   w w w.  j  a v a2  s  . co  m
    try {
        cmdLine = parseOptions(args);
        LOG.info("JdbcUrl=" + getJdbcUrl(quorum + ":" + clientPort));
    } catch (IllegalStateException e) {
        printHelpAndExit(e.getMessage(), getOptions());
    }
    Class.forName(DriverManager.class.getName());
    Connection conn = DriverManager.getConnection(getJdbcUrl(quorum + ":" + clientPort));
    String tableName = cmdLine.getOptionValue(TABLE_NAME_OPT.getOpt());
    String schemaName = cmdLine.getOptionValue(SCHEMA_NAME_OPT.getOpt());
    String qualifiedTableName = getQualifiedTableName(schemaName, tableName);
    List<ColumnInfo> importColumns = buildImportColumns(conn, cmdLine, qualifiedTableName);

    LOG.info("tableName=" + tableName);
    LOG.info("schemaName=" + schemaName);
    LOG.info("qualifiedTableName=" + qualifiedTableName);

    configureOptions(cmdLine, importColumns, getConf());

    try {
        validateTable(conn, schemaName, tableName);
    } finally {
        conn.close();
    }

    Path inputPath = new Path(cmdLine.getOptionValue(INPUT_PATH_OPT.getOpt()));
    Path outputPath = null;
    if (cmdLine.hasOption(OUTPUT_PATH_OPT.getOpt())) {
        outputPath = new Path(cmdLine.getOptionValue(OUTPUT_PATH_OPT.getOpt()));
    } else {
        outputPath = new Path("/tmp/" + UUID.randomUUID());
    }
    LOG.info("Configuring HFile output path to {}", outputPath);

    Job job = new Job(getConf(),
            "Phoenix MapReduce import for " + getConf().get(PhoenixCsvToKeyValueMapper.TABLE_NAME_CONFKEY));

    // Allow overriding the job jar setting by using a -D system property at startup
    if (job.getJar() == null) {
        job.setJarByClass(PhoenixCsvToKeyValueMapper.class);
    }
    job.setInputFormatClass(TextInputFormat.class);
    FileInputFormat.addInputPath(job, inputPath);

    FileSystem.get(getConf());
    FileOutputFormat.setOutputPath(job, outputPath);

    job.setMapperClass(PhoenixCsvToKeyValueMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);

    HTable htable = new HTable(getConf(), qualifiedTableName);

    // Auto configure partitioner and reducer according to the Main Data table
    HFileOutputFormat.configureIncrementalLoad(job, htable);

    LOG.info("Running MapReduce import job from {} to {}", inputPath, outputPath);
    boolean success = job.waitForCompletion(true);
    if (!success) {
        LOG.error("Import job failed, check JobTracker for details");
        return 1;
    }

    LOG.info("Loading HFiles from {}", outputPath);
    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(getConf());
    loader.doBulkLoad(outputPath, htable);
    htable.close();

    LOG.info("Incremental load complete");

    LOG.info("Removing output directory {}", outputPath);
    if (!FileSystem.get(getConf()).delete(outputPath, true)) {
        LOG.error("Removing output directory {} failed", outputPath);
    }

    return 0;
}

From source file:com.ailk.oci.ocnosql.tools.load.mutiple.MutipleColumnImportTsv.java

License:Apache License

/**
 * Sets up the actual job.//from  w w  w  . j  a  va  2s . com
 *
 * @param conf  The current configuration.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String tableName, String inputPath,
        String tmpOutputPath) throws IOException, ClassNotFoundException {

    // Support non-XML supported characters
    // by re-encoding the passed separator as a Base64 string.
    String actualSeparator = conf.get(CommonConstants.SEPARATOR);
    if (actualSeparator != null) {
        conf.set(CommonConstants.SEPARATOR, Base64.encodeBytes(actualSeparator.getBytes()));
    }
    String tableNameConf = conf.get(CommonConstants.TABLE_NAME);
    if (tableNameConf == null) {
        conf.set(CommonConstants.TABLE_NAME, tableName);
    }

    // See if a non-default Mapper was set
    String mapperClassName = conf.get(MAPPER_CONF_KEY);
    Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER;

    Path inputDir = new Path(inputPath);
    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJarByClass(MutipleColumnImportTsv.class);
    FileInputFormat.setInputPaths(job, inputDir);

    //??Dimporttsv.inputFormatInputFormat,TextInputFormat
    String inputFmtName = conf.get(CommonConstants.INPUTFORMAT,
            "org.apache.hadoop.mapreduce.lib.input.TextInputFormat");
    LOG.info(CommonConstants.INPUTFORMAT + " is " + inputFmtName);
    Class<? extends InputFormat> inputFmtClass = Class.forName(inputFmtName).asSubclass(InputFormat.class);
    job.setInputFormatClass(inputFmtClass);
    job.setMapperClass(mapperClass);

    String hfileOutPath = tmpOutputPath;
    if (hfileOutPath != null) {
        if (!doesTableExist(tableName)) {
            createTable(conf, tableName);
        }
        HTable table = new HTable(conf, tableName);
        //      job.setReducerClass(MutipleColumnReducer.class);
        Path outputDir = new Path(hfileOutPath);
        FileOutputFormat.setOutputPath(job, outputDir);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(Put.class);
        HFileOutputFormat.configureIncrementalLoad(job, table);
    } else {
        // No reducers.  Just write straight to table.  Call initTableReducerJob
        // to set up the TableOutputFormat.
        TableMapReduceUtil.initTableReducerJob(tableName, null, job);
        job.setNumReduceTasks(0);
    }

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
            com.google.common.base.Function.class /* Guava used by TsvParser */);
    return job;
}

From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java

License:Apache License

/**
 * Sets up the actual job. importtsvmapreduce job
 *
 * @param conf  The current configuration.
 * @return The newly created job.//from  w ww .  j  ava 2 s .  c o  m
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String tableName, String inputPath,
        String tmpOutputPath) throws IOException, ClassNotFoundException {

    // Support non-XML supported characters
    // by re-encoding the passed separator as a Base64 string.
    //???BASE64?
    String actualSeparator = conf.get(CommonConstants.SEPARATOR);
    if (actualSeparator != null) {
        conf.set(CommonConstants.SEPARATOR, Base64.encodeBytes(actualSeparator.getBytes()));
    }

    // See if a non-default Mapper was set?mapper?SingleColumnImporterMapper
    String mapperClassName = conf.get(MAPPER_CONF_KEY);
    Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER;

    Path inputDir = new Path(inputPath);
    //?job
    Job job = new Job(conf, NAME + "_" + tableName);
    //Set the Jar by finding where a given class came from.
    job.setJarByClass(SingleColumnImportTsv.class);
    //
    FileInputFormat.setInputPaths(job, inputDir);
    //jobinputformat

    //??Dimporttsv.inputFormatInputFormat,TextInputFormat
    //??Dimporttsv.inputFormatInputFormat,TextInputFormat
    String inputFmtName = conf.get(CommonConstants.INPUTFORMAT,
            "org.apache.hadoop.mapreduce.lib.input.TextInputFormat");
    LOG.info(CommonConstants.INPUTFORMAT + " is " + inputFmtName);
    Class<? extends InputFormat> inputFmtClass = Class.forName(inputFmtName).asSubclass(InputFormat.class);
    job.setInputFormatClass(inputFmtClass);
    job.setMapperClass(mapperClass);

    //mapper
    job.setMapperClass(mapperClass);

    String hfileOutPath = tmpOutputPath;
    if (hfileOutPath != null) {
        //?
        if (!doesTableExist(tableName)) {
            createTable(conf, tableName);
        }
        HTable table = new HTable(conf, tableName);
        //reducer
        job.setReducerClass(SingleColumnReducer.class);

        Path outputDir = new Path(hfileOutPath);
        //
        FileOutputFormat.setOutputPath(job, outputDir);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(TextArrayWritable.class);
        //job?partition?outputformat?reduce
        configureIncrementalLoad(job, table);

    } else {//put
        // No reducers.  Just write straight to table.  Call initTableReducerJob
        // to set up the TableOutputFormat.
        TableMapReduceUtil.initTableReducerJob(tableName, null, job);
        job.setNumReduceTasks(0);
    }

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
            com.google.common.base.Function.class /* Guava used by TsvParser */);
    return job;
}

From source file:com.alectenharmsel.research.FileCombine.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: FileCombine <input> <output>");
        System.exit(-1);// w  w w .j  a  va 2s . com
    }

    Job job = new Job(getConf(), "FileCombine");
    job.setJarByClass(FileCombine.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(FileCombineMapper.class);
    job.setReducerClass(FileCombineReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.alectenharmsel.research.hadoop.CodeTokenizer.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: MoabLicenses <input> <output>");
        System.exit(-1);/*  ww  w  . j av  a2 s .  c o m*/
    }

    Configuration conf = getConf();
    Job job = new Job(conf, "SrcTok");
    job.setJarByClass(CodeTokenizer.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    boolean success = job.waitForCompletion(true);

    return success ? 0 : 1;
}

From source file:com.alectenharmsel.research.hadoop.FileCombine.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: FileCombine <input> <output>");
        System.exit(-1);/*w  ww.  j  a  v  a 2s .c o  m*/
    }

    Job job = new Job(getConf(), "FileCombine");
    job.setJarByClass(FileCombine.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.alectenharmsel.research.LcCounters.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: LineCounter <input> <output>");
        System.exit(-1);//from w w w  .  jav a 2s .c om
    }

    Job job = new Job(getConf(), "LineCount");
    job.setJarByClass(LineCount.class);

    job.setInputFormatClass(WholeBlockInputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(LineCountMapper.class);
    job.setReducerClass(LineCountReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    Configuration check = job.getConfiguration();
    boolean success = job.waitForCompletion(true);

    //Get the counter here, output to a file called total in the dir
    Counters counters = job.getCounters();

    //Throw it in the file
    Path outPath = new Path(args[1]);
    FileSystem fs = outPath.getFileSystem(check);
    OutputStream out = fs.create(new Path(outPath, "total"));
    String total = counters.findCounter(LcCounters.NUM_LINES).getValue() + "\n";
    out.write(total.getBytes());
    out.close();
    return success ? 0 : 1;
}

From source file:com.alectenharmsel.research.MoabLicenses.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: MoabLicenses <input> <output>");
        System.exit(-1);// w  w w .  j a v  a 2  s.c om
    }

    Configuration conf = getConf();
    Job job = new Job(conf, "MoabLicenses");
    job.setJarByClass(MoabLicenses.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(MoabLicensesMapper.class);
    job.setReducerClass(MoabLicensesReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    Configuration check = job.getConfiguration();
    boolean success = job.waitForCompletion(true);

    return success ? 0 : 1;
}