Example usage for org.apache.hadoop.mapreduce Job Job

List of usage examples for org.apache.hadoop.mapreduce Job Job

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job Job.

Prototype

Job(JobStatus status, JobConf conf) throws IOException 

Source Link

Usage

From source file:com.github.seqware.queryengine.plugins.hbasemr.MRHBasePluginRunner.java

License:Open Source License

public MRHBasePluginRunner(MapReducePlugin mapReducePlugin, FeatureSet inputSet, Object... parameters) {
    this.mapReducePlugin = mapReducePlugin;
    try {/*from   w  ww  .  ja  va  2 s  .  c  o m*/
        CreateUpdateManager manager = SWQEFactory.getModelManager();
        //outputSet should attach to the original reference
        this.outputSet = manager.buildFeatureSet().setReferenceID(inputSet.getReferenceID()).build();
        manager.close();

        // do setup for Map/Reduce from the HBase API
        String tableName = generateTableName(inputSet);
        String destTableName = generateTableName(outputSet);

        Configuration conf = new Configuration();
        HBaseStorage.configureHBaseConfig(conf);
        HBaseConfiguration.addHbaseResources(conf);

        // we need to pass the parameters for a featureset, maybe we can take advantage of our serializers
        byte[] sSet = SWQEFactory.getSerialization().serialize(inputSet);
        byte[] dSet = SWQEFactory.getSerialization().serialize(outputSet);

        String[] str_params = serializeParametersToString(parameters, mapReducePlugin, sSet, dSet);

        File file = new File(new URI(Constants.Term.DEVELOPMENT_DEPENDENCY.getTermValue(String.class)));
        if (file.exists()) {
            conf.setStrings("tmpjars", Constants.Term.DEVELOPMENT_DEPENDENCY.getTermValue(String.class));
        }
        conf.setStrings(EXT_PARAMETERS, str_params);
        conf.set("mapreduce.map.java.opts", "-Xmx4096m  -verbose:gc");
        conf.set("mapreduce.reduce.java.opts", "-Xmx4096m  -verbose:gc");
        conf.set("mapreduce.map.ulimit", "4194304");
        conf.set("mapreduce.reduce.ulimit", "4194304");
        conf.set("mapreduce.map.memory.mb", "4096");
        conf.set("mapreduce.reduce.memory.mb", "4096");
        conf.set("mapreduce.map.memory.physical.mb", "4096");
        conf.set("mapreduce.reduce.memory.physical.mb", "4096");
        // the above settings all seem to be ignored by hboot
        // TODO: only this one works, but as far I know, we're using mapreduce not mapred.
        // Strange
        conf.set("mapred.child.java.opts", "-Xmx2048m -verbose:gc");

        this.job = new Job(conf, mapReducePlugin.getClass().getSimpleName());

        Scan scan = new Scan();
        scan.setMaxVersions(); // we need all version data
        scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
        scan.setCacheBlocks(false); // don't set to true for MR jobs
        byte[] qualiferBytes = Bytes.toBytes(inputSet.getSGID().getUuid().toString());
        scan.addColumn(HBaseStorage.getTEST_FAMILY_INBYTES(), qualiferBytes);
        scan.setFilter(new QualifierFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator(qualiferBytes)));

        // handle the part that changes from job to job
        // pluginInterface.performVariableInit(tableName, destTableName, scan);
        TableMapReduceUtil.initTableMapperJob(tableName, // input HBase table name
                scan, // Scan instance to control CF and attribute selection
                PluginRunnerMapper.class, // mapper
                mapReducePlugin.getMapOutputKeyClass(), // mapper output key 
                mapReducePlugin.getMapOutputValueClass(), // mapper output value
                job);
        job.setOutputFormatClass(mapReducePlugin.getOutputClass()); // because we aren't emitting anything from mapper
        job.setReducerClass(MRHBasePluginRunner.PluginRunnerReducer.class); // reducer class
        job.setNumReduceTasks(mapReducePlugin.getNumReduceTasks());

        if (mapReducePlugin.getResultMechanism() == PluginInterface.ResultMechanism.FILE) {
            FileContext fileContext = FileContext.getFileContext(this.job.getConfiguration());
            Path path = new Path(
                    "/tmp/" + new BigInteger(20, new SecureRandom()).toString(32) + mapReducePlugin.toString());
            path = fileContext.makeQualified(path);
            TextOutputFormat.setOutputPath(job, path); // adjust directories as required
        }

        TableMapReduceUtil.addDependencyJars(job);
        job.setJarByClass(MRHBasePluginRunner.class);
        // submit the job, but do not block
        job.submit();
    } catch (URISyntaxException ex) {
        Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex);
    } catch (InterruptedException ex) {
        Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex);
    } catch (ClassNotFoundException ex) {
        Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex);
    } catch (IOException ex) {
        Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex);
    }
}

From source file:com.github.seqware.queryengine.plugins.runners.hbasemr.MRHBasePluginRunner.java

License:Open Source License

/**
 * /*from w  ww.j  ava2  s  .c  om*/
 * @param mapReducePlugin the particular plugin to instantiate and run
 * @param reference a reference (has to be provided in lieu of a feature set) 
 * @param inputSet a set of feature sets to operate on
 * @param parameters an arbitrary number of external parameters for plugin developers to provide to their plugins
 */
public MRHBasePluginRunner(MapReducePlugin mapReducePlugin, Reference reference, List<FeatureSet> inputSet,
        Object... parameters) {
    // handle null inputSet
    if (inputSet == null) {
        inputSet = new ArrayList<FeatureSet>();
    }
    // we should either have a reference or more than one input set
    assert (reference != null || inputSet.size() > 0);
    // all feature sets should have the same reference
    if (inputSet.size() > 0) {
        SGID ref = inputSet.iterator().next().getReference().getSGID();
        for (FeatureSet set : inputSet) {
            assert (set.getReferenceID().equals(ref));
        }
    }

    SGID referenceSGID = reference != null ? reference.getSGID() : inputSet.iterator().next().getReferenceID();

    this.mapReducePlugin = mapReducePlugin;
    try {
        CreateUpdateManager manager = SWQEFactory.getModelManager();
        //outputSet should attach to the original reference
        this.outputSet = manager.buildFeatureSet().setReferenceID(referenceSGID).build();
        manager.close();

        // do setup for Map/Reduce from the HBase API
        String tableName = generateTableName(outputSet);
        String destTableName = generateTableName(outputSet);

        Configuration conf = new Configuration();
        HBaseStorage.configureHBaseConfig(conf);
        HBaseConfiguration.addHbaseResources(conf);

        // we need to pass the parameters for a featureset, maybe we can take advantage of our serializers
        byte[][] sSet = new byte[inputSet.size()][];//SWQEFactory.getSerialization().serialize(inputSet);
        for (int i = 0; i < sSet.length; i++) {
            sSet[i] = SWQEFactory.getSerialization().serialize(inputSet.get(i));
        }
        byte[] dSet = SWQEFactory.getSerialization().serialize(outputSet);

        String[] str_params = serializeParametersToString(parameters, mapReducePlugin, sSet, dSet);

        File file = new File(new URI(Constants.Term.DEVELOPMENT_DEPENDENCY.getTermValue(String.class)));
        if (file.exists()) {
            conf.setStrings("tmpjars", Constants.Term.DEVELOPMENT_DEPENDENCY.getTermValue(String.class));
        }
        conf.setStrings(EXT_PARAMETERS, str_params);
        conf.set("mapreduce.map.java.opts", "-Xmx4096m  -verbose:gc");
        conf.set("mapreduce.reduce.java.opts", "-Xmx4096m  -verbose:gc");
        conf.set("mapreduce.map.ulimit", "4194304");
        conf.set("mapreduce.reduce.ulimit", "4194304");
        conf.set("mapreduce.map.memory.mb", "4096");
        conf.set("mapreduce.reduce.memory.mb", "4096");
        conf.set("mapreduce.map.memory.physical.mb", "4096");
        conf.set("mapreduce.reduce.memory.physical.mb", "4096");

        conf.set("mapred.job.map.memory.mb", "4096");
        conf.set("mapred.job.reduce.memory.mb", "4096");

        // the above settings all seem to be ignored by hboot
        // TODO: only this one works, but as far I know, we're using mapreduce not mapred.
        // Strange
        conf.set("mapred.child.java.opts", "-Xmx2048m -verbose:gc");

        this.job = new Job(conf, mapReducePlugin.getClass().getSimpleName());

        Scan scan = new Scan();
        scan.setMaxVersions(); // we need all version data
        scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
        scan.setCacheBlocks(false); // don't set to true for MR jobs
        for (FeatureSet set : inputSet) {
            byte[] qualiferBytes = Bytes.toBytes(set.getSGID().getUuid().toString());
            scan.addColumn(HBaseStorage.getTEST_FAMILY_INBYTES(), qualiferBytes);
        }
        // this might be redundant, check this!!!! 
        // scan.setFilter(new QualifierFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator(qualiferBytes)));

        // handle the part that changes from job to job
        // pluginInterface.performVariableInit(tableName, destTableName, scan);
        TableMapReduceUtil.initTableMapperJob(tableName, // input HBase table name
                scan, // Scan instance to control CF and attribute selection
                PluginRunnerMapper.class, // mapper
                mapReducePlugin.getMapOutputKeyClass(), // mapper output key 
                mapReducePlugin.getMapOutputValueClass(), // mapper output value
                job);
        TableMapReduceUtil.initTableReducerJob(tableName, PluginRunnerReducer.class, job);

        if (mapReducePlugin.getOutputClass() != null) {
            job.setOutputFormatClass(mapReducePlugin.getOutputClass());
        }
        job.setReducerClass(MRHBasePluginRunner.PluginRunnerReducer.class); // reducer class

        if (mapReducePlugin.getResultMechanism() == PluginInterface.ResultMechanism.FILE) {
            FileContext fileContext = FileContext.getFileContext(this.job.getConfiguration());
            FileSystem fs = FileSystem.get(job.getConfiguration());
            Path path = new Path(fs.getHomeDirectory(),
                    new BigInteger(20, new SecureRandom()).toString(32) + mapReducePlugin.toString());
            path = fileContext.makeQualified(path);
            TextOutputFormat.setOutputPath(job, path); // adjust directories as required
        }

        job.setJarByClass(MRHBasePluginRunner.class);
        TableMapReduceUtil.addDependencyJars(job);
        TableMapReduceUtil.addDependencyJars(conf, MRHBasePluginRunner.class,
                MRHBasePluginRunner.PluginRunnerMapper.class, MRHBasePluginRunner.PluginRunnerReducer.class);
        // submit the job, but do not block
        job.submit();
    } catch (URISyntaxException ex) {
        Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex);
    } catch (InterruptedException ex) {
        Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex);
    } catch (ClassNotFoundException ex) {
        Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex);
    } catch (IOException ex) {
        Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex);
    }
}

From source file:com.goldsaxfoundation.bigdata.Module5.SimpleMapReduce.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job = new Job(conf, "wordcount");
    job.setJarByClass(SimpleMapReduce.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);//from   w  ww .j  a va 2 s.  c o m
}

From source file:com.hadoop.examples.secondSort.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    // ?hadoop?//from  w  w  w  .j a v a  2  s.  c  o  m
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysort <in> <out>");
        System.exit(2);
    }
    // ?
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    // Mapper
    job.setMapperClass(MapClass.class);
    // ???CombinerCombiner<Text, IntWritable>Reduce<IntPair, IntWritable>?
    //job.setCombinerClass(Reduce.class);

    // Reducer
    job.setReducerClass(Reduce.class);

    // *
    // *group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);
    //setSortComparatorClass()hadoopkey?(?2.Hadoopkey?)
    //IntPair?compareTo()
    //job.setSortComparatorClass(cls);
    // *
    job.setGroupingComparatorClass(FirstGroupingComparator.class);

    // map Key
    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    // mapValue
    job.setMapOutputValueClass(IntWritable.class);

    // rduceKeyTextOutputFormatClassTextOutputFormat
    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    // rduceValue
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    // ??job
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.hadoop.mapreduce.TestLzoLazyLoading.java

License:Open Source License

private void runWordCount(Configuration cf, boolean compressIn, boolean compressOut)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration thisConf = new Configuration(cf);
    if (compressIn) {
        thisConf.setBoolean("mapred.compression.lzo.test.codec-checked-after-map", true);
    }//from w w w . jav  a2s.c o m

    if (compressOut) {
        thisConf.setBoolean("mapred.compression.lzo.test.codec-checked-after-reduce", true);
    }
    Path pathIn = new Path(TEST_ROOT_DIR + "/in");
    Path pathOut = new Path(TEST_ROOT_DIR + "/out");
    localFs.delete(pathIn, true);
    localFs.delete(pathOut, true);
    writeFile(makeFileName("in/part1", compressIn), "this is a test\nof word count test\ntest\n");
    writeFile(makeFileName("in/part2", compressIn), "more test");
    Job job = new Job(thisConf, "word count");
    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyCombiner.class);
    job.setReducerClass(MyReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    if (compressOut) {
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, LzoCodec.class);
    }
    FileInputFormat.addInputPath(job, pathIn);
    FileOutputFormat.setOutputPath(job, pathOut);
    job.submit();
    assertEquals("IsLzoChecked (client)?", compressIn, LzoCodec.isNativeLzoChecked());
    assertTrue(job.waitForCompletion(false));
    String result = readFile(makeFileName("out/part-r-00000", compressOut));
    System.out.println(result);
    assertEquals("a\t1\ncount\t1\nis\t1\nmore\t1\nof\t1\ntest\t4\nthis\t1\nword\t1\n", result);
}

From source file:com.hadoop.secondarysort.SecondarySortDESC.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    // if (otherArgs.length != 2) {
    // System.err.println("Usage: secondarysrot <in> <out>");
    // System.exit(2);
    // }//from w ww  .  j a va2s  . c o m

    // JobConf jobConf = new JobConf();

    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySortDESC.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    // group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);
    job.setGroupingComparatorClass(FirstGroupingComparator.class);
    // conf.setClass("mapred.output.key.comparator.class",
    // KeyComparator.class, RawComparator.class);
    // job.setSortComparatorClass(SecondGroupingComparator.class);
    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    job.setMapOutputValueClass(IntWritable.class);

    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(inPath));
    FileOutputFormat.setOutputPath(job, new Path(outPath));
    FileSystem fileSystem = FileSystem.get(conf);
    if (fileSystem.exists(new Path(outPath))) {
        fileSystem.delete(new Path(outPath));
    }
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.hhscyber.nl.tweets.hbase2.Hbase2.java

@Override
public int run(String[] args) throws Exception {
    Job client = new Job(getConf(), "hbasetest");
    client.setSpeculativeExecution(false);
    client.setMaxMapAttempts(2);// www. j  a  v a 2 s  .c o m
    client.setJarByClass(Hbase2.class);
    client.setOutputKeyClass(Text.class);
    client.setOutputValueClass(Text.class);
    client.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(client, new Path("input/1441737001"));//test one folder
    TextOutputFormat.setOutputPath(client, new Path("output4"));

    client.setMapperClass(Hbase2Mapper.class);
    client.setReducerClass(Hbase2Reducer.class);

    try {
        client.waitForCompletion(true);
    } catch (IOException | InterruptedException | ClassNotFoundException e) {
        System.out.println(e);
    }
    return 0;
}

From source file:com.hn.cluster.hadoop.mrs.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    // ?hadoop?/*  www . ja v a  2 s . c o m*/
    Configuration conf = new Configuration();
    // ?
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    // Mapper
    job.setMapperClass(MapClass.class);
    // Reducer
    job.setReducerClass(Reduce.class);

    // 
    job.setPartitionerClass(FirstPartitioner.class);
    // 
    job.setGroupingComparatorClass(FirstGroupingComparator.class);

    // map Key
    job.setMapOutputKeyClass(IntPair.class);
    // mapValue
    job.setMapOutputValueClass(IntWritable.class);

    // rduceKeyTextOutputFormatClassTextOutputFormat
    job.setOutputKeyClass(Text.class);
    // rduceValue
    job.setOutputValueClass(IntWritable.class);

    /**
     * ?????splites???RecordReder
     * ??RecordReder?keyvalue
     * Map<LongWritable, Text>
     * Mapmap<LongWritable, Text>Mapmap
     * ?List<IntPair, IntWritable>
     * map?job.setPartitionerClassList?reducer
     */
    job.setInputFormatClass(TextInputFormat.class);
    // ??RecordWriter?
    job.setOutputFormatClass(TextOutputFormat.class);

    // hdfs
    FileInputFormat.addInputPath(job, new Path("hdfs://192.1168.1.12:9000/input/input/soso.txt"));
    // hdfs
    FileOutputFormat.setOutputPath(job, new Path("hdfs://192.1168.1.12:9000/output/sort/"));
    // ??job
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.howbuy.hadoop.mr.online.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysrot <in> <out>");
        System.exit(2);/*from  ww  w. j  a va  2s .  c  o  m*/
    }
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    // group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);
    job.setGroupingComparatorClass(FirstGroupingComparator.class);

    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    job.setMapOutputValueClass(IntWritable.class);

    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setInputFormatClass(TextInputFormat.class);
    // job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setNumReduceTasks(3);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.huihui.mr.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {

    /*HadoopJava.util.Properties??Apache Jakarta Commons Configuration??
     * ????API?org.apache.hadoop.conf.Configuration???
     *///from w  ww  . j  ava2s.c  o  m
    Configuration conf = new Configuration();
    /*
     * ?HadoopGenericOptionsParser
    ???
    -D mapreduce.job.queuename  ??getRemainingArgs()?
    ?"xrli/STJoin_in","xrli/STJoin_out"?otherArgs
            
    ? fs jt libjars files archives D tokenCacheFile
     */
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);
    }
    conf.set("fs.defaultFS", "hdfs://localhost:9000");
    //
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);

    //??? 
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    //Path    ???URI?Path???Path
    String input = "hdfs://localhost:9000/input/";
    String output = "hdfs://localhost:9000/user/hdfs/log_kpi/browser1";
    FileInputFormat.addInputPath(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));
    //????
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}