List of usage examples for org.apache.hadoop.mapreduce Job Job
Job(JobStatus status, JobConf conf) throws IOException
From source file:com.github.seqware.queryengine.plugins.hbasemr.MRHBasePluginRunner.java
License:Open Source License
public MRHBasePluginRunner(MapReducePlugin mapReducePlugin, FeatureSet inputSet, Object... parameters) { this.mapReducePlugin = mapReducePlugin; try {/*from w ww . ja va 2 s . c o m*/ CreateUpdateManager manager = SWQEFactory.getModelManager(); //outputSet should attach to the original reference this.outputSet = manager.buildFeatureSet().setReferenceID(inputSet.getReferenceID()).build(); manager.close(); // do setup for Map/Reduce from the HBase API String tableName = generateTableName(inputSet); String destTableName = generateTableName(outputSet); Configuration conf = new Configuration(); HBaseStorage.configureHBaseConfig(conf); HBaseConfiguration.addHbaseResources(conf); // we need to pass the parameters for a featureset, maybe we can take advantage of our serializers byte[] sSet = SWQEFactory.getSerialization().serialize(inputSet); byte[] dSet = SWQEFactory.getSerialization().serialize(outputSet); String[] str_params = serializeParametersToString(parameters, mapReducePlugin, sSet, dSet); File file = new File(new URI(Constants.Term.DEVELOPMENT_DEPENDENCY.getTermValue(String.class))); if (file.exists()) { conf.setStrings("tmpjars", Constants.Term.DEVELOPMENT_DEPENDENCY.getTermValue(String.class)); } conf.setStrings(EXT_PARAMETERS, str_params); conf.set("mapreduce.map.java.opts", "-Xmx4096m -verbose:gc"); conf.set("mapreduce.reduce.java.opts", "-Xmx4096m -verbose:gc"); conf.set("mapreduce.map.ulimit", "4194304"); conf.set("mapreduce.reduce.ulimit", "4194304"); conf.set("mapreduce.map.memory.mb", "4096"); conf.set("mapreduce.reduce.memory.mb", "4096"); conf.set("mapreduce.map.memory.physical.mb", "4096"); conf.set("mapreduce.reduce.memory.physical.mb", "4096"); // the above settings all seem to be ignored by hboot // TODO: only this one works, but as far I know, we're using mapreduce not mapred. // Strange conf.set("mapred.child.java.opts", "-Xmx2048m -verbose:gc"); this.job = new Job(conf, mapReducePlugin.getClass().getSimpleName()); Scan scan = new Scan(); scan.setMaxVersions(); // we need all version data scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs byte[] qualiferBytes = Bytes.toBytes(inputSet.getSGID().getUuid().toString()); scan.addColumn(HBaseStorage.getTEST_FAMILY_INBYTES(), qualiferBytes); scan.setFilter(new QualifierFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator(qualiferBytes))); // handle the part that changes from job to job // pluginInterface.performVariableInit(tableName, destTableName, scan); TableMapReduceUtil.initTableMapperJob(tableName, // input HBase table name scan, // Scan instance to control CF and attribute selection PluginRunnerMapper.class, // mapper mapReducePlugin.getMapOutputKeyClass(), // mapper output key mapReducePlugin.getMapOutputValueClass(), // mapper output value job); job.setOutputFormatClass(mapReducePlugin.getOutputClass()); // because we aren't emitting anything from mapper job.setReducerClass(MRHBasePluginRunner.PluginRunnerReducer.class); // reducer class job.setNumReduceTasks(mapReducePlugin.getNumReduceTasks()); if (mapReducePlugin.getResultMechanism() == PluginInterface.ResultMechanism.FILE) { FileContext fileContext = FileContext.getFileContext(this.job.getConfiguration()); Path path = new Path( "/tmp/" + new BigInteger(20, new SecureRandom()).toString(32) + mapReducePlugin.toString()); path = fileContext.makeQualified(path); TextOutputFormat.setOutputPath(job, path); // adjust directories as required } TableMapReduceUtil.addDependencyJars(job); job.setJarByClass(MRHBasePluginRunner.class); // submit the job, but do not block job.submit(); } catch (URISyntaxException ex) { Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex); } catch (InterruptedException ex) { Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex); } catch (ClassNotFoundException ex) { Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex); } catch (IOException ex) { Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex); } }
From source file:com.github.seqware.queryengine.plugins.runners.hbasemr.MRHBasePluginRunner.java
License:Open Source License
/** * /*from w ww.j ava2 s .c om*/ * @param mapReducePlugin the particular plugin to instantiate and run * @param reference a reference (has to be provided in lieu of a feature set) * @param inputSet a set of feature sets to operate on * @param parameters an arbitrary number of external parameters for plugin developers to provide to their plugins */ public MRHBasePluginRunner(MapReducePlugin mapReducePlugin, Reference reference, List<FeatureSet> inputSet, Object... parameters) { // handle null inputSet if (inputSet == null) { inputSet = new ArrayList<FeatureSet>(); } // we should either have a reference or more than one input set assert (reference != null || inputSet.size() > 0); // all feature sets should have the same reference if (inputSet.size() > 0) { SGID ref = inputSet.iterator().next().getReference().getSGID(); for (FeatureSet set : inputSet) { assert (set.getReferenceID().equals(ref)); } } SGID referenceSGID = reference != null ? reference.getSGID() : inputSet.iterator().next().getReferenceID(); this.mapReducePlugin = mapReducePlugin; try { CreateUpdateManager manager = SWQEFactory.getModelManager(); //outputSet should attach to the original reference this.outputSet = manager.buildFeatureSet().setReferenceID(referenceSGID).build(); manager.close(); // do setup for Map/Reduce from the HBase API String tableName = generateTableName(outputSet); String destTableName = generateTableName(outputSet); Configuration conf = new Configuration(); HBaseStorage.configureHBaseConfig(conf); HBaseConfiguration.addHbaseResources(conf); // we need to pass the parameters for a featureset, maybe we can take advantage of our serializers byte[][] sSet = new byte[inputSet.size()][];//SWQEFactory.getSerialization().serialize(inputSet); for (int i = 0; i < sSet.length; i++) { sSet[i] = SWQEFactory.getSerialization().serialize(inputSet.get(i)); } byte[] dSet = SWQEFactory.getSerialization().serialize(outputSet); String[] str_params = serializeParametersToString(parameters, mapReducePlugin, sSet, dSet); File file = new File(new URI(Constants.Term.DEVELOPMENT_DEPENDENCY.getTermValue(String.class))); if (file.exists()) { conf.setStrings("tmpjars", Constants.Term.DEVELOPMENT_DEPENDENCY.getTermValue(String.class)); } conf.setStrings(EXT_PARAMETERS, str_params); conf.set("mapreduce.map.java.opts", "-Xmx4096m -verbose:gc"); conf.set("mapreduce.reduce.java.opts", "-Xmx4096m -verbose:gc"); conf.set("mapreduce.map.ulimit", "4194304"); conf.set("mapreduce.reduce.ulimit", "4194304"); conf.set("mapreduce.map.memory.mb", "4096"); conf.set("mapreduce.reduce.memory.mb", "4096"); conf.set("mapreduce.map.memory.physical.mb", "4096"); conf.set("mapreduce.reduce.memory.physical.mb", "4096"); conf.set("mapred.job.map.memory.mb", "4096"); conf.set("mapred.job.reduce.memory.mb", "4096"); // the above settings all seem to be ignored by hboot // TODO: only this one works, but as far I know, we're using mapreduce not mapred. // Strange conf.set("mapred.child.java.opts", "-Xmx2048m -verbose:gc"); this.job = new Job(conf, mapReducePlugin.getClass().getSimpleName()); Scan scan = new Scan(); scan.setMaxVersions(); // we need all version data scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs for (FeatureSet set : inputSet) { byte[] qualiferBytes = Bytes.toBytes(set.getSGID().getUuid().toString()); scan.addColumn(HBaseStorage.getTEST_FAMILY_INBYTES(), qualiferBytes); } // this might be redundant, check this!!!! // scan.setFilter(new QualifierFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator(qualiferBytes))); // handle the part that changes from job to job // pluginInterface.performVariableInit(tableName, destTableName, scan); TableMapReduceUtil.initTableMapperJob(tableName, // input HBase table name scan, // Scan instance to control CF and attribute selection PluginRunnerMapper.class, // mapper mapReducePlugin.getMapOutputKeyClass(), // mapper output key mapReducePlugin.getMapOutputValueClass(), // mapper output value job); TableMapReduceUtil.initTableReducerJob(tableName, PluginRunnerReducer.class, job); if (mapReducePlugin.getOutputClass() != null) { job.setOutputFormatClass(mapReducePlugin.getOutputClass()); } job.setReducerClass(MRHBasePluginRunner.PluginRunnerReducer.class); // reducer class if (mapReducePlugin.getResultMechanism() == PluginInterface.ResultMechanism.FILE) { FileContext fileContext = FileContext.getFileContext(this.job.getConfiguration()); FileSystem fs = FileSystem.get(job.getConfiguration()); Path path = new Path(fs.getHomeDirectory(), new BigInteger(20, new SecureRandom()).toString(32) + mapReducePlugin.toString()); path = fileContext.makeQualified(path); TextOutputFormat.setOutputPath(job, path); // adjust directories as required } job.setJarByClass(MRHBasePluginRunner.class); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJars(conf, MRHBasePluginRunner.class, MRHBasePluginRunner.PluginRunnerMapper.class, MRHBasePluginRunner.PluginRunnerReducer.class); // submit the job, but do not block job.submit(); } catch (URISyntaxException ex) { Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex); } catch (InterruptedException ex) { Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex); } catch (ClassNotFoundException ex) { Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex); } catch (IOException ex) { Logger.getLogger(MRHBasePluginRunner.class.getName()).fatal(null, ex); } }
From source file:com.goldsaxfoundation.bigdata.Module5.SimpleMapReduce.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "wordcount"); job.setJarByClass(SimpleMapReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true);//from w ww .j a va 2 s. c o m }
From source file:com.hadoop.examples.secondSort.SecondarySort.java
License:Apache License
public static void main(String[] args) throws Exception { // ?hadoop?//from w w w .j a v a 2 s. c o m Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: secondarysort <in> <out>"); System.exit(2); } // ? Job job = new Job(conf, "secondary sort"); job.setJarByClass(SecondarySort.class); // Mapper job.setMapperClass(MapClass.class); // ???CombinerCombiner<Text, IntWritable>Reduce<IntPair, IntWritable>? //job.setCombinerClass(Reduce.class); // Reducer job.setReducerClass(Reduce.class); // * // *group and partition by the first int in the pair job.setPartitionerClass(FirstPartitioner.class); //setSortComparatorClass()hadoopkey?(?2.Hadoopkey?) //IntPair?compareTo() //job.setSortComparatorClass(cls); // * job.setGroupingComparatorClass(FirstGroupingComparator.class); // map Key // the map output is IntPair, IntWritable job.setMapOutputKeyClass(IntPair.class); // mapValue job.setMapOutputValueClass(IntWritable.class); // rduceKeyTextOutputFormatClassTextOutputFormat // the reduce output is Text, IntWritable job.setOutputKeyClass(Text.class); // rduceValue job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); // ??job System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.hadoop.mapreduce.TestLzoLazyLoading.java
License:Open Source License
private void runWordCount(Configuration cf, boolean compressIn, boolean compressOut) throws IOException, InterruptedException, ClassNotFoundException { Configuration thisConf = new Configuration(cf); if (compressIn) { thisConf.setBoolean("mapred.compression.lzo.test.codec-checked-after-map", true); }//from w w w . jav a2s.c o m if (compressOut) { thisConf.setBoolean("mapred.compression.lzo.test.codec-checked-after-reduce", true); } Path pathIn = new Path(TEST_ROOT_DIR + "/in"); Path pathOut = new Path(TEST_ROOT_DIR + "/out"); localFs.delete(pathIn, true); localFs.delete(pathOut, true); writeFile(makeFileName("in/part1", compressIn), "this is a test\nof word count test\ntest\n"); writeFile(makeFileName("in/part2", compressIn), "more test"); Job job = new Job(thisConf, "word count"); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); if (compressOut) { FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, LzoCodec.class); } FileInputFormat.addInputPath(job, pathIn); FileOutputFormat.setOutputPath(job, pathOut); job.submit(); assertEquals("IsLzoChecked (client)?", compressIn, LzoCodec.isNativeLzoChecked()); assertTrue(job.waitForCompletion(false)); String result = readFile(makeFileName("out/part-r-00000", compressOut)); System.out.println(result); assertEquals("a\t1\ncount\t1\nis\t1\nmore\t1\nof\t1\ntest\t4\nthis\t1\nword\t1\n", result); }
From source file:com.hadoop.secondarysort.SecondarySortDESC.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); // if (otherArgs.length != 2) { // System.err.println("Usage: secondarysrot <in> <out>"); // System.exit(2); // }//from w ww . j a va2s . c o m // JobConf jobConf = new JobConf(); Job job = new Job(conf, "secondary sort"); job.setJarByClass(SecondarySortDESC.class); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); // group and partition by the first int in the pair job.setPartitionerClass(FirstPartitioner.class); job.setGroupingComparatorClass(FirstGroupingComparator.class); // conf.setClass("mapred.output.key.comparator.class", // KeyComparator.class, RawComparator.class); // job.setSortComparatorClass(SecondGroupingComparator.class); // the map output is IntPair, IntWritable job.setMapOutputKeyClass(IntPair.class); job.setMapOutputValueClass(IntWritable.class); // the reduce output is Text, IntWritable job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(inPath)); FileOutputFormat.setOutputPath(job, new Path(outPath)); FileSystem fileSystem = FileSystem.get(conf); if (fileSystem.exists(new Path(outPath))) { fileSystem.delete(new Path(outPath)); } System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.hhscyber.nl.tweets.hbase2.Hbase2.java
@Override public int run(String[] args) throws Exception { Job client = new Job(getConf(), "hbasetest"); client.setSpeculativeExecution(false); client.setMaxMapAttempts(2);// www. j a v a 2 s .c o m client.setJarByClass(Hbase2.class); client.setOutputKeyClass(Text.class); client.setOutputValueClass(Text.class); client.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(client, new Path("input/1441737001"));//test one folder TextOutputFormat.setOutputPath(client, new Path("output4")); client.setMapperClass(Hbase2Mapper.class); client.setReducerClass(Hbase2Reducer.class); try { client.waitForCompletion(true); } catch (IOException | InterruptedException | ClassNotFoundException e) { System.out.println(e); } return 0; }
From source file:com.hn.cluster.hadoop.mrs.SecondarySort.java
License:Apache License
public static void main(String[] args) throws Exception { // ?hadoop?/* www . ja v a 2 s . c o m*/ Configuration conf = new Configuration(); // ? Job job = new Job(conf, "secondary sort"); job.setJarByClass(SecondarySort.class); // Mapper job.setMapperClass(MapClass.class); // Reducer job.setReducerClass(Reduce.class); // job.setPartitionerClass(FirstPartitioner.class); // job.setGroupingComparatorClass(FirstGroupingComparator.class); // map Key job.setMapOutputKeyClass(IntPair.class); // mapValue job.setMapOutputValueClass(IntWritable.class); // rduceKeyTextOutputFormatClassTextOutputFormat job.setOutputKeyClass(Text.class); // rduceValue job.setOutputValueClass(IntWritable.class); /** * ?????splites???RecordReder * ??RecordReder?keyvalue * Map<LongWritable, Text> * Mapmap<LongWritable, Text>Mapmap * ?List<IntPair, IntWritable> * map?job.setPartitionerClassList?reducer */ job.setInputFormatClass(TextInputFormat.class); // ??RecordWriter? job.setOutputFormatClass(TextOutputFormat.class); // hdfs FileInputFormat.addInputPath(job, new Path("hdfs://192.1168.1.12:9000/input/input/soso.txt")); // hdfs FileOutputFormat.setOutputPath(job, new Path("hdfs://192.1168.1.12:9000/output/sort/")); // ??job System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.howbuy.hadoop.mr.online.SecondarySort.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: secondarysrot <in> <out>"); System.exit(2);/*from ww w. j a va 2s . c o m*/ } Job job = new Job(conf, "secondary sort"); job.setJarByClass(SecondarySort.class); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); // group and partition by the first int in the pair job.setPartitionerClass(FirstPartitioner.class); job.setGroupingComparatorClass(FirstGroupingComparator.class); // the map output is IntPair, IntWritable job.setMapOutputKeyClass(IntPair.class); job.setMapOutputValueClass(IntWritable.class); // the reduce output is Text, IntWritable job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(TextInputFormat.class); // job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(3); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.huihui.mr.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { /*HadoopJava.util.Properties??Apache Jakarta Commons Configuration?? * ????API?org.apache.hadoop.conf.Configuration??? *///from w ww . j ava2s.c o m Configuration conf = new Configuration(); /* * ?HadoopGenericOptionsParser ??? -D mapreduce.job.queuename ??getRemainingArgs()? ?"xrli/STJoin_in","xrli/STJoin_out"?otherArgs ? fs jt libjars files archives D tokenCacheFile */ String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2); } conf.set("fs.defaultFS", "hdfs://localhost:9000"); // Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); //??? job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); //Path ???URI?Path???Path String input = "hdfs://localhost:9000/input/"; String output = "hdfs://localhost:9000/user/hdfs/log_kpi/browser1"; FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); //???? System.exit(job.waitForCompletion(true) ? 0 : 1); }