Example usage for org.apache.hadoop.mapreduce Job Job

List of usage examples for org.apache.hadoop.mapreduce Job Job

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job Job.

Prototype

Job(JobStatus status, JobConf conf) throws IOException 

Source Link

Usage

From source file:com.phantom.hadoop.examples.WordMedian.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordmedian <in> <out>");
        return 0;
    }/*ww  w .  j  a va 2  s .c  o  m*/

    setConf(new Configuration());
    Configuration conf = getConf();

    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "word median");
    job.setJarByClass(WordMedian.class);
    job.setMapperClass(WordMedianMapper.class);
    job.setCombinerClass(WordMedianReducer.class);
    job.setReducerClass(WordMedianReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    boolean result = job.waitForCompletion(true);

    // Wait for JOB 1 -- get middle value to check for Median

    long totalWords = job.getCounters().getGroup(TaskCounter.class.getCanonicalName())
            .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
    int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
    int medianIndex2 = (int) Math.floor((totalWords / 2.0));

    median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf);

    return (result ? 0 : 1);
}

From source file:com.phantom.hadoop.examples.WordStandardDeviation.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordstddev <in> <out>");
        return 0;
    }//w  w w  .  j  a  v  a 2 s  . c om

    Configuration conf = getConf();

    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "word stddev");
    job.setJarByClass(WordStandardDeviation.class);
    job.setMapperClass(WordStandardDeviationMapper.class);
    job.setCombinerClass(WordStandardDeviationReducer.class);
    job.setReducerClass(WordStandardDeviationReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path outputpath = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, outputpath);
    boolean result = job.waitForCompletion(true);

    // read output and calculate standard deviation
    stddev = readAndCalcStdDev(outputpath, conf);

    return (result ? 0 : 1);
}

From source file:com.pivotal.hawq.mapreduce.demo.HAWQInputFormatDemo.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 3) {
        printUsage();/*  ww w .j av  a 2 s . c  om*/
    }

    String dbURL = args[0];
    String tableName = args[1];
    String outputPath = args[2];
    String username = (args.length >= 4) ? args[3] : null;
    String password = (args.length >= 5) ? args[4] : null;

    Job job = new Job(getConf(), "HAWQInputFormatDemo");
    job.setJarByClass(HAWQInputFormatDemo.class);

    job.setInputFormatClass(HAWQInputFormat.class);

    HAWQInputFormat.setInput(job.getConfiguration(), dbURL, username, password, tableName);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapperClass(HAWQEchoMapper.class);
    job.setNumReduceTasks(0);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    long startTime = System.currentTimeMillis();
    int returnCode = job.waitForCompletion(true) ? 0 : 1;
    long endTime = System.currentTimeMillis();

    System.out.println("Time elapsed: " + (endTime - startTime) + " milliseconds");

    return returnCode;
}

From source file:com.pivotal.hawq.mapreduce.demo.HAWQInputFormatDemo2.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        printUsage();//  w ww  .j a v  a2s.c om
    }

    String metadataFile = args[0];
    String outputPath = args[1];

    Job job = new Job(getConf(), "HAWQInputFormatDemo2");
    job.setJarByClass(HAWQInputFormatDemo2.class);

    job.setInputFormatClass(HAWQInputFormat.class);

    HAWQInputFormat.setInput(job.getConfiguration(), metadataFile);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapperClass(HAWQEchoMapper.class);
    job.setNumReduceTasks(0);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    long startTime = System.currentTimeMillis();
    int returnCode = job.waitForCompletion(true) ? 0 : 1;
    long endTime = System.currentTimeMillis();

    System.out.println("Time elapsed: " + (endTime - startTime) + " milliseconds");

    return returnCode;
}

From source file:com.pivotal.hawq.mapreduce.MapReduceClusterDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3 && args.length != 4) {
        System.err.printf("Usage: %s [generic options] <tableName> <dburl> <output> [<mapper_classname>]\n",
                getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }//from   ww w  . ja  v a2s.  c  om

    String tableName = args[0];
    String dbUrl = args[1];
    Path outputPath = new Path(args[2]);
    Class<? extends Mapper> mapperClass = (args.length == 3) ? HAWQTableMapper.class
            : (Class<? extends Mapper>) Class.forName(args[3]);

    // delete previous output
    FileSystem fs = FileSystem.get(getConf());
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);
    fs.close();

    Job job = new Job(getConf(), "job_read_" + tableName);
    job.setJarByClass(MapReduceClusterDriver.class);

    job.setInputFormatClass(HAWQInputFormat.class);
    HAWQInputFormat.setInput(job.getConfiguration(), dbUrl, null, null, tableName);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.setMapperClass(mapperClass);
    job.setReducerClass(HAWQTableReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.pivotal.hawq.mapreduce.parquet.HAWQParquetOutputDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf(), "HAWQParquetOutputFormat");
    job.setJarByClass(HAWQParquetOutputDriver.class);

    job.setOutputFormatClass(HAWQParquetOutputFormat.class);

    /*/*  ww  w  .j a v a2 s .c  om*/
    // int2 int4 int8
    HAWQSchema schema = new HAWQSchema("t_int",
    HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.INT2, "col_short"),
    HAWQSchema.optional_field(HAWQPrimitiveField.PrimitiveType.INT4, "col_int"),
    HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.INT8, "col_long")
    );
    job.setMapperClass(WriteIntMapper.class);
    */

    /*
    // varchar
    HAWQSchema schema = new HAWQSchema("t_varchar",
    HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.VARCHAR, "col_varchar")
    );
    job.setMapperClass(WriteVarcharMapper.class);
    */

    /*
    // float4 float8
    HAWQSchema schema = new HAWQSchema("t_floating",
    HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.FLOAT4, "col_float"),
    HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.FLOAT8, "col_long")
    );
    job.setMapperClass(WriteFloatingNumberMapper.class);
    */

    // boolean
    //      HAWQSchema schema = new HAWQSchema("t_boolean",
    //            HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.BOOL, "col_bool"));
    //      job.setMapperClass(WriteBooleanMapper.class);

    // byte array
    HAWQSchema schema = new HAWQSchema("t_bytea",
            HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.BYTEA, "col_bytea"));
    job.setMapperClass(WriteByteArrayMapper.class);

    HAWQParquetOutputFormat.setSchema(job, schema);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    HAWQParquetOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setNumReduceTasks(0);

    job.setMapOutputKeyClass(Void.class);
    job.setMapOutputValueClass(HAWQRecord.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.placeiq.piqconnect.BlocksBuilder.java

License:Apache License

protected Job configStage1() throws Exception {
    FileSystem fs = FileSystem.get(getConf());
    fs.delete(pathOutput, true); // useful ?

    Configuration conf = getConf();
    conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize);
    conf.setBoolean(Constants.PROP_IS_VECTOR, isVector);
    conf.set("mapred.output.compression.type", "BLOCK"); // useful ?

    Job job = new Job(conf, "data-piqid.piqconnect.BlocksBuilder");
    job.setJarByClass(BlocksBuilder.class);
    job.setMapperClass(MapStage1.class);
    job.setReducerClass(RedStage1.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setNumReduceTasks(numberOfReducers);
    job.setMapOutputKeyClass(BlockIndexWritable.class);
    job.setMapOutputValueClass(LightBlockWritable.class);
    job.setOutputKeyClass(BlockIndexWritable.class);
    job.setOutputValueClass(BlockWritable.class);

    FileInputFormat.setInputPaths(job, pathEdges);
    SequenceFileOutputFormat.setOutputPath(job, pathOutput);
    SequenceFileOutputFormat.setCompressOutput(job, true);

    Runner.setCompression(job);/*  w ww.  ja va 2  s .c  o m*/

    return job;
}

From source file:com.placeiq.piqconnect.InitialVectorGenerator.java

License:Apache License

private Job buildJob() throws Exception {
    Configuration conf = getConf();
    conf.setLong("numberOfNodes", numberOfNodes);

    Job job = new Job(conf, "data-piqid.piqconnect.ConCmptIVGen_Stage1");
    job.setJarByClass(InitialVectorGenerator.class);
    job.setMapperClass(_Mapper.class);
    job.setReducerClass(_Reducer.class);
    job.setNumReduceTasks(numberOfReducers);
    job.setOutputKeyClass(VLongWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, pathBitmask);
    FileOutputFormat.setOutputPath(job, pathVector);
    FileOutputFormat.setCompressOutput(job, true);

    return job;/*from   w  w w. j  a va2 s  .c o m*/
}

From source file:com.placeiq.piqconnect.Runner.java

License:Apache License

private Job buildJob1(Path input1, Path input2, Path output) throws Exception {
    Configuration conf = getConf();
    conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize);
    conf.set("mapred.output.compression.type", "BLOCK");

    Job job = new Job(conf, "data-piqid.piqconnect.IterationStage1");
    job.setJarByClass(Runner.class);

    job.setMapperClass(IterationStage1._Mapper.class);
    job.setReducerClass(IterationStage1._Reducer.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setNumReduceTasks(numberOfReducers);
    job.setMapOutputKeyClass(IterationStage1.JoinKey.class);
    job.setMapOutputValueClass(BlockWritable.class);
    job.setOutputKeyClass(VLongWritable.class);
    job.setOutputValueClass(BlockWritable.class);
    job.setGroupingComparatorClass(IterationStage1.IndexComparator.class);
    job.setPartitionerClass(IterationStage1.IndexPartitioner.class);
    job.setSortComparatorClass(IterationStage1.SortComparator.class);

    FileInputFormat.setInputPaths(job, input1, input2);
    SequenceFileOutputFormat.setOutputPath(job, output);
    SequenceFileOutputFormat.setCompressOutput(job, true);

    setCompression(job);/*from w w  w  .j  av  a2s  .co  m*/

    return job;
}

From source file:com.placeiq.piqconnect.Runner.java

License:Apache License

private Job buildJob2(Path input, Path output) throws Exception {
    Configuration conf = getConf();
    conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize);

    Job job = new Job(conf, "data-piqid.piqconnect.IterationStage2");
    job.setJarByClass(Runner.class);

    job.setMapperClass(Mapper.class);
    job.setReducerClass(IterationStage2._Reducer.class);
    job.setNumReduceTasks(numberOfReducers);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapOutputKeyClass(VLongWritable.class);
    job.setMapOutputValueClass(BlockWritable.class);
    job.setOutputKeyClass(BlockIndexWritable.class);
    job.setOutputValueClass(BlockWritable.class);
    job.setSortComparatorClass(VLongWritableComparator.class);

    SequenceFileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);
    FileOutputFormat.setCompressOutput(job, true);

    setCompression(job);/*from   w  w w  . j a va 2  s. com*/
    return job;
}