List of usage examples for org.apache.hadoop.mapreduce Job Job
Job(JobStatus status, JobConf conf) throws IOException
From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.PFPGrowth.java
License:Apache License
/** * Run the Parallel FPGrowth Map/Reduce Job to calculate the Top K features of group dependent shards *///from ww w . j av a2 s . com public static void startParallelFPGrowth(Parameters params, Configuration conf) throws IOException, InterruptedException, ClassNotFoundException { conf.set(PFP_PARAMETERS, params.toString()); conf.set("mapred.compress.map.output", "true"); conf.set("mapred.output.compression.type", "BLOCK"); Path input = new Path(params.get(INPUT)); Job job = new Job(conf, "PFP Growth Driver running over input" + input); job.setJarByClass(PFPGrowth.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(TransactionTree.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(TopKStringPatterns.class); FileInputFormat.addInputPath(job, input); Path outPath = new Path(params.get(OUTPUT), FPGROWTH); FileOutputFormat.setOutputPath(job, outPath); HadoopUtil.delete(conf, outPath); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(ParallelFPGrowthMapper.class); job.setCombinerClass(ParallelFPGrowthCombiner.class); job.setReducerClass(ParallelFPGrowthReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); boolean succeeded = job.waitForCompletion(true); if (!succeeded) { throw new IllegalStateException("Job failed!"); } }
From source file:com.cg.mapreduce.myfpgrowth.PFPGrowth.java
License:Apache License
/** * Count the frequencies of various features in parallel using Map/Reduce *///from ww w. jav a 2 s .co m public static void startParallelCounting(Parameters params, Configuration conf) throws IOException, InterruptedException, ClassNotFoundException { conf.set(PFP_PARAMETERS, params.toString()); conf.set("mapred.compress.map.output", "true"); conf.set("mapred.output.compression.type", "BLOCK"); String input = params.get(INPUT); Job job = new Job(conf, "Parallel Counting Driver running over input: " + input); job.setJarByClass(PFPGrowth.class); // Job job = initJob(conf); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(input)); Path outPath = new Path(params.get(OUTPUT), PARALLEL_COUNTING); FileOutputFormat.setOutputPath(job, outPath); HadoopUtil.delete(conf, outPath); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(ParallelCountingMapper.class); job.setCombinerClass(ParallelCountingReducer.class); job.setReducerClass(ParallelCountingReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); boolean succeeded = job.waitForCompletion(true); if (!succeeded) { throw new IllegalStateException("Job failed!"); } }
From source file:com.cg.mapreduce.myfpgrowth.PFPGrowth.java
License:Apache License
/** * Run the Parallel FPGrowth Map/Reduce Job to calculate the Top K features of group dependent shards *//*from w w w . jav a2 s . c o m*/ public static void startParallelFPGrowth(Parameters params, Configuration conf) throws IOException, InterruptedException, ClassNotFoundException { conf.set(PFP_PARAMETERS, params.toString()); conf.set("mapred.compress.map.output", "true"); conf.set("mapred.output.compression.type", "BLOCK"); Path input = new Path(params.get(INPUT)); Job job = new Job(conf, "PFP Growth Driver running over input" + input); job.setJarByClass(PFPGrowth.class); // Job job = initJob(conf); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(ArrayList.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, input); Path outPath = new Path(params.get(OUTPUT), FPGROWTH); FileOutputFormat.setOutputPath(job, outPath); HadoopUtil.delete(conf, outPath); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(ParallelFPGrowthMapper.class); //job.setCombinerClass(ParallelFPGrowthCombiner.class); job.setReducerClass(ParallelFPGrowthReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); boolean succeeded = job.waitForCompletion(true); if (!succeeded) { throw new IllegalStateException("Job failed!"); } }
From source file:com.cg.mapreduce.myfpgrowth.PFPGrowth.java
License:Apache License
private static Job initJob(Configuration conf) { conf.addResource(new Path("D:/program/hadoop-2.6.0/etc/hadoop/core-site.xml")); conf.addResource(new Path("D:/program/hadoop-2.6.0/etc/hadoop/hdfs-default.xml")); conf.addResource(new Path("D:/program/hadoop-2.6.0/etc/hadoop/hdfs-site.xml")); conf.addResource(new Path("D:/program/hadoop-2.6.0/etc/hadoop/yarn-default.xml")); conf.addResource(new Path("D:/program/hadoop-2.6.0/etc/hadoop/yarn-site.xml")); conf.addResource(new Path("D:/program/hadoop-2.6.0/etc/hadoop/mapred-site.xml")); conf.set("HADOOP_USER_NAME", "hadoop"); conf.set("mapred.reduce.tasks", "3"); Job job = null;/*from w w w .j a v a 2 s . c o m*/ try { File jarFile = EJob.createTempJar("bin"); EJob.addClasspath("D:/program/hadoop-2.6.0/etc/hadoop/"); ClassLoader classLoader = EJob.getClassLoader(); Thread.currentThread().setContextClassLoader(classLoader); job = new Job(conf, "PFP"); ((JobConf) job.getConfiguration()).setJar(jarFile.toString()); } catch (IOException e) { e.printStackTrace(); } return job; }
From source file:com.cloudera.accumulo.upgrade.compatibility.DataCompatibilityLoad.java
License:Open Source License
@Override public int run(String[] args) throws Exception { final String jobName = this.getClass().getName(); options.parseArgs(jobName, args);// w ww . ja v a 2s . c om final Job job = new Job(getConf(), jobName); if (-1 == options.test.numRows) { options.test.numRows = job.getConfiguration().getInt("mapred.map.tasks", DataCompatibilityTestCli.DEFAULT_NUM_ROWS); } job.setJarByClass(this.getClass()); job.setInputFormatClass(DataLoadInputFormat.class); DataLoadInputFormat.setTabletServers(job, options.connection.getConnector().instanceOperations().getTabletServers()); DataLoadInputFormat.setNumRows(job, options.test.numRows); DataLoadInputFormat.setNumQualifiersPerFamily(job, options.test.qualifiers); job.getConfiguration().set(VISIBILITY, new String(options.visibility.visibility.getExpression(), "UTF-8")); final TableOperations ops = options.connection.getConnector().tableOperations(); final List<String> names = options.test.getTableNamesAndConfigureThem(ops); for (String name : names) { final int numSplits = ops.getSplits(name, options.test.numRows).size(); if (options.test.numRows > numSplits) { log.info("adding splits to table '" + name + "', to bring it from " + numSplits + " to " + options.test.numRows + "."); final SortedSet<Text> splits = new TreeSet<Text>(); // for cases where we're adding way more splits than there are currently possible servers to handle them, do a pre-pre-split // N.B. If we've just created this table, there will be 0 splits because we'll just have the initial tablet. if (0 == numSplits || options.test.numRows / numSplits > 10) { log.info("splitting in two waves due to the number of splits we need to add."); // TODO turtles all the way down. final int prepre = options.test.numRows / (0 == numSplits ? 10 : numSplits * 10); for (int i = 0; i < prepre; i++) { splits.add(new Text(new StringBuilder(Long.toString(i)).reverse().toString())); } ops.addSplits(name, splits); log.debug("delay 30s for splits to get assigned off host."); try { Thread.currentThread().sleep(30 * 1000); } catch (InterruptedException exception) { log.warn("interrupted from sleep early."); } splits.clear(); } for (int i = 0; i < options.test.numRows; i++) { splits.add(new Text(new StringBuilder(Long.toString(i)).reverse().toString())); } ops.addSplits(name, splits); } } log.debug("delay 30s for splits to get assigned off host."); try { Thread.currentThread().sleep(30 * 1000); } catch (InterruptedException exception) { log.warn("interrupted from sleep early."); } job.getConfiguration().setStrings(OUTPUT_TABLE_NAMES, names.toArray(new String[0])); job.setMapperClass(DataLoadMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Mutation.class); job.setNumReduceTasks(0); log.info("launching map-only job to insert " + options.test.numRows + " rows of " + (FAMILIES.length * options.test.qualifiers) + " cells each into each of the tables " + names); options.output.useAccumuloOutputFormat(job); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:com.cloudera.accumulo.upgrade.compatibility.DataCompatibilityVerify.java
License:Open Source License
@Override public int run(String[] args) throws Exception { final String jobName = this.getClass().getName(); options.parseArgs(jobName, args);/*from w w w .j av a 2 s .c o m*/ try { final int totalMapSlots = getConf().getInt("mapred.map.tasks", DataCompatibilityTestCli.DEFAULT_NUM_ROWS); if (-1 == options.test.numRows) { options.test.numRows = totalMapSlots; } final TableOperations ops = options.connection.getConnector().tableOperations(); final List<String> names = options.test.getTableNames(ops); int totalReduceSlots = getConf().getInt("mapred.reduce.tasks", 0); if (-1 != options.test.numReduceSlots) { totalReduceSlots = options.test.numReduceSlots; } if (0 == totalReduceSlots) { totalReduceSlots = names.size(); } final int reducesPerJob = Math.max(1, totalReduceSlots / names.size()); final List<Job> jobs = new ArrayList(); for (String name : names) { final Job job = new Job(getConf(), jobName + " " + name); job.setJarByClass(this.getClass()); options.input.useAccumuloInputFormat(job, name); job.setMapperClass(DataVerifyMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setReducerClass(LongSumReducer.class); job.setCombinerClass(LongSumReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(options.test.output, name)); job.setNumReduceTasks(reducesPerJob); job.submit(); jobs.add(job); } boolean success = true; final long numCellsPerRow = options.test.qualifiers * DataCompatibilityLoad.FAMILIES.length; final long numCellsPerFamily = options.test.qualifiers * options.test.numRows; for (Job job : jobs) { success &= job.waitForCompletion(true); final CounterGroup group = job.getCounters().getGroup(DataVerifyMapper.class.getName()); if (null == group) { log.error("Job '" + job.getJobName() + "' doesn't have counters for the verification mapper."); success = false; } else { final Counter badCounter = group.findCounter(BAD_COUNTER); if (null != badCounter && 0 < badCounter.getValue()) { log.error("Job '" + job.getJobName() + "' has " + badCounter.getValue() + " entries with bad checksums."); success = false; } int numRows = 0; int numFamilies = 0; for (Counter counter : group) { if (counter.getName().startsWith(ROW_COUNTER_PREFIX)) { numRows++; if (numCellsPerRow != counter.getValue()) { log.error("Job '" + job.getJobName() + "', counter '" + counter.getName() + "' should have " + numCellsPerRow + " cells, but instead has " + counter.getValue()); success = false; } } else if (counter.getName().startsWith(FAMILY_COUNTER_PREFIX)) { numFamilies++; if (numCellsPerFamily != counter.getValue()) { log.error("Job '" + job.getJobName() + "', counter '" + counter.getName() + "' should have " + numCellsPerFamily + " cells, but instead has " + counter.getValue()); success = false; } } } if (options.test.numRows != numRows) { log.error("Job '" + job.getJobName() + "' is supposed to have " + options.test.numRows + " rows, but has " + numRows); success = false; } if (DataCompatibilityLoad.FAMILIES.length != numFamilies) { log.error("Job '" + job.getJobName() + "' is supposed to have " + DataCompatibilityLoad.FAMILIES.length + " families, but has " + numFamilies); success = false; } } } if (success) { log.info("All internal checks passed."); } else { log.info("Some checks failed. see log."); } return success ? 0 : 1; } finally { options.input.close(); } }
From source file:com.cloudera.hbase.WordCount.java
License:Open Source License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: wordcount <in> <out>"); return 2; }/*from w w w .ja v a 2 s.com*/ Configuration conf = getConf(); Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.cloudera.test.UseHCat.java
License:Apache License
public int run(String[] args) throws Exception { Configuration conf = getConf(); args = new GenericOptionsParser(conf, args).getRemainingArgs(); // Get the input and output table names as arguments String inputTableName = args[0]; String outputTableName = args[1]; // Assume the default database String dbName = null;/* w w w . j a v a 2 s .co m*/ Job job = new Job(conf, "UseHCat"); HCatInputFormat.setInput(job, dbName, inputTableName); job.setJarByClass(UseHCat.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); // An HCatalog record as input job.setInputFormatClass(HCatInputFormat.class); // Mapper emits a string as key and an integer as value job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); // Ignore the key for the reducer output; emitting an HCatalog record as value job.setOutputKeyClass(WritableComparable.class); job.setOutputValueClass(DefaultHCatRecord.class); job.setOutputFormatClass(HCatOutputFormat.class); HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, null)); HCatSchema s = HCatOutputFormat.getTableSchema(job); System.err.println("INFO: output schema explicitly set for writing:" + s); HCatOutputFormat.setSchema(job, s); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:com.cqx.mr.MRSearchAuto.java
public void searchHBase(int numOfDays) throws IOException, InterruptedException, ClassNotFoundException { long startTime; long endTime; Configuration conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", "node2,node3,node4"); conf.set("fs.default.name", "hdfs://node1"); conf.set("mapred.job.tracker", "node1:54311"); /*/* w w w . j a va 2 s . com*/ * ?map */ conf.set("search.license", "C87310"); conf.set("search.color", "10"); conf.set("search.direction", "2"); Job job = new Job(conf, "MRSearchHBase"); System.out.println("search.license: " + conf.get("search.license")); job.setNumReduceTasks(0); job.setJarByClass(MRSearchAuto.class); Scan scan = new Scan(); scan.addFamily(FAMILY_NAME); byte[] startRow = Bytes.toBytes("2011010100000"); byte[] stopRow; switch (numOfDays) { case 1: stopRow = Bytes.toBytes("2011010200000"); break; case 10: stopRow = Bytes.toBytes("2011011100000"); break; case 30: stopRow = Bytes.toBytes("2011020100000"); break; case 365: stopRow = Bytes.toBytes("2012010100000"); break; default: stopRow = Bytes.toBytes("2011010101000"); } // ?key scan.setStartRow(startRow); scan.setStopRow(stopRow); TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, SearchMapper.class, ImmutableBytesWritable.class, Text.class, job); Path outPath = new Path("searchresult"); HDFS_File file = new HDFS_File(); file.DelFile(conf, outPath.getName(), true); // FileOutputFormat.setOutputPath(job, outPath);// startTime = System.currentTimeMillis(); job.waitForCompletion(true); endTime = System.currentTimeMillis(); System.out.println("Time used: " + (endTime - startTime)); System.out.println("startRow:" + Text.decode(startRow)); System.out.println("stopRow: " + Text.decode(stopRow)); }
From source file:com.daleway.training.hadoop.condprob.ConditionalProbabilityPairs.java
License:Apache License
public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException { Job job = new Job(conf, "pair wise count"); job.setJarByClass(ConditionalProbabilityPairs.class); job.setMapperClass(TokenizerMapper.class); //job.setCombinerClass(IntSumReducer.class); job.setPartitionerClass(ProbDistPartitioner.class); job.setReducerClass(IntSumReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(5);// w w w.jav a2s. c o m FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job; }