List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputValueClass
public void setMapOutputValueClass(Class<?> theClass) throws IllegalStateException
From source file:com.asakusafw.runtime.stage.AbstractStageClient.java
License:Apache License
@SuppressWarnings("rawtypes") private void configureShuffle(Job job, VariableTable variables) { Class<? extends Reducer> reducer = getReducerClassOrNull(); if (reducer != null) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Reducer: {0}", reducer.getName())); //$NON-NLS-1$ }/*from w w w.j ava 2s .co m*/ job.setReducerClass(reducer); } else { if (LOG.isDebugEnabled()) { LOG.debug("Reducer: N/A"); //$NON-NLS-1$ } job.setNumReduceTasks(0); return; } Class<? extends Writable> outputKeyClass = or(getShuffleKeyClassOrNull(), NullWritable.class); Class<? extends Writable> outputValueClass = or(getShuffleValueClassOrNull(), NullWritable.class); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Shuffle: key={0}, value={1}", //$NON-NLS-1$ outputKeyClass.getName(), outputValueClass.getName())); } job.setMapOutputKeyClass(outputKeyClass); job.setMapOutputValueClass(outputValueClass); Class<? extends Reducer> combiner = getCombinerClassOrNull(); if (combiner != null) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Combiner: {0}", combiner.getName())); //$NON-NLS-1$ } job.setCombinerClass(combiner); } else { if (LOG.isDebugEnabled()) { LOG.debug("Combiner: N/A"); //$NON-NLS-1$ } } Class<? extends Partitioner> partitioner = getPartitionerClassOrNull(); if (partitioner != null) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Partitioner: {0}", partitioner.getName())); //$NON-NLS-1$ } job.setPartitionerClass(partitioner); } else { if (LOG.isDebugEnabled()) { LOG.debug("Partitioner: DEFAULT"); //$NON-NLS-1$ } } Class<? extends RawComparator> groupingComparator = getGroupingComparatorClassOrNull(); if (groupingComparator != null) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("GroupingComparator: {0}", groupingComparator.getName())); //$NON-NLS-1$ } job.setGroupingComparatorClass(groupingComparator); } else { if (LOG.isDebugEnabled()) { LOG.debug("GroupingComparator: DEFAULT"); //$NON-NLS-1$ } } Class<? extends RawComparator> sortComparator = getSortComparatorClassOrNull(); if (sortComparator != null) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("SortComparator: {0}", sortComparator.getName())); //$NON-NLS-1$ } job.setSortComparatorClass(sortComparator); } else { if (LOG.isDebugEnabled()) { LOG.debug("SortComparator: DEFAULT"); //$NON-NLS-1$ } } }
From source file:com.asakusafw.thundergate.runtime.cache.mapreduce.CacheBuildClient.java
License:Apache License
private void updateMerge() throws IOException, InterruptedException { Job job = newJob(); List<StageInput> inputList = new ArrayList<>(); inputList.add(new StageInput(storage.getHeadContents("*").toString(), TemporaryInputFormat.class, MergeJoinBaseMapper.class)); inputList.add(new StageInput(storage.getPatchContents("*").toString(), TemporaryInputFormat.class, MergeJoinPatchMapper.class)); StageInputDriver.set(job, inputList); job.setInputFormatClass(StageInputFormat.class); job.setMapperClass(StageInputMapper.class); job.setMapOutputKeyClass(PatchApplyKey.class); job.setMapOutputValueClass(modelClass); // combiner may have no effect in normal cases job.setReducerClass(MergeJoinReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(modelClass); job.setPartitionerClass(PatchApplyKey.Partitioner.class); job.setSortComparatorClass(PatchApplyKey.SortComparator.class); job.setGroupingComparatorClass(PatchApplyKey.GroupComparator.class); TemporaryOutputFormat.setOutputPath(job, getNextDirectory()); job.setOutputFormatClass(TemporaryOutputFormat.class); job.getConfiguration().setClass("mapred.output.committer.class", LegacyBridgeOutputCommitter.class, org.apache.hadoop.mapred.OutputCommitter.class); LOG.info(MessageFormat.format("applying patch (merge join): {0} / {1} -> {2}", storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents())); try {// www . j a va 2s . c om boolean succeed = job.waitForCompletion(true); LOG.info(MessageFormat.format("applied patch (merge join): succeed={0}, {1} / {2} -> {3}", succeed, storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents())); if (succeed == false) { throw new IOException(MessageFormat.format("failed to apply patch (merge join): {0} / {1} -> {2}", storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents())); } } catch (ClassNotFoundException e) { throw new IOException(e); } putMeta(); }
From source file:com.asakusafw.thundergate.runtime.cache.mapreduce.CacheBuildClient.java
License:Apache License
private void updateTable() throws IOException, InterruptedException { Job job = newJob(); List<StageInput> inputList = new ArrayList<>(); inputList.add(new StageInput(storage.getHeadContents("*").toString(), TemporaryInputFormat.class, TableJoinBaseMapper.class)); inputList.add(new StageInput(storage.getPatchContents("*").toString(), TemporaryInputFormat.class, TableJoinPatchMapper.class)); StageInputDriver.set(job, inputList); StageResourceDriver.add(job, storage.getPatchContents("*").toString(), TableJoinBaseMapper.RESOURCE_KEY); job.setInputFormatClass(StageInputFormat.class); job.setMapperClass(StageInputMapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(modelClass); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(modelClass); TemporaryOutputFormat.setOutputPath(job, getNextDirectory()); job.setOutputFormatClass(TemporaryOutputFormat.class); job.getConfiguration().setClass("mapred.output.committer.class", LegacyBridgeOutputCommitter.class, org.apache.hadoop.mapred.OutputCommitter.class); job.setNumReduceTasks(0);//from w ww . j a v a2s . c o m LOG.info(MessageFormat.format("applying patch (table join): {0} / {1} -> {2}", storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents())); try { boolean succeed = job.waitForCompletion(true); LOG.info(MessageFormat.format("applied patch (table join): succeed={0}, {1} / {2} -> {3}", succeed, storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents())); if (succeed == false) { throw new IOException(MessageFormat.format("failed to apply patch (table join): {0} / {1} -> {2}", storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents())); } } catch (ClassNotFoundException e) { throw new IOException(e); } putMeta(); }
From source file:com.asakusafw.thundergate.runtime.cache.mapreduce.CacheBuildClient.java
License:Apache License
private void create() throws InterruptedException, IOException { Job job = newJob(); List<StageInput> inputList = new ArrayList<>(); inputList.add(new StageInput(storage.getPatchContents("*").toString(), TemporaryInputFormat.class, CreateCacheMapper.class)); StageInputDriver.set(job, inputList); job.setInputFormatClass(StageInputFormat.class); job.setMapperClass(StageInputMapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(modelClass); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(modelClass); TemporaryOutputFormat.setOutputPath(job, getNextDirectory()); job.setOutputFormatClass(TemporaryOutputFormat.class); job.getConfiguration().setClass("mapred.output.committer.class", LegacyBridgeOutputCommitter.class, org.apache.hadoop.mapred.OutputCommitter.class); job.setNumReduceTasks(0);/* w w w . j av a 2 s .c o m*/ LOG.info(MessageFormat.format("applying patch (no join): {0} / (empty) -> {2}", storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents())); try { boolean succeed = job.waitForCompletion(true); LOG.info(MessageFormat.format("applied patch (no join): succeed={0}, {1} / (empty) -> {3}", succeed, storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents())); if (succeed == false) { throw new IOException(MessageFormat.format("failed to apply patch (no join): {0} / (empty) -> {2}", storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents())); } } catch (ClassNotFoundException e) { throw new IOException(e); } putMeta(); }
From source file:com.asp.tranlog.ImportTsv.java
License:Apache License
/** * Sets up the actual job.//from w w w . j a va 2 s . c o m * * @param conf * The current configuration. * @param args * The command line parameters. * @return The newly created job. * @throws IOException * When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException, ClassNotFoundException { // Support non-XML supported characters // by re-encoding the passed separator as a Base64 string. String actualSeparator = conf.get(SEPARATOR_CONF_KEY); if (actualSeparator != null) { conf.set(SEPARATOR_CONF_KEY, new String(Base64.encodeBytes(actualSeparator.getBytes()))); } // See if a non-default Mapper was set String mapperClassName = conf.get(MAPPER_CONF_KEY); Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER; String tableName = args[0]; Path inputDir = new Path(args[1]); Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(mapperClass); FileInputFormat.setInputPaths(job, inputDir); String inputCodec = conf.get(INPUT_LZO_KEY); if (inputCodec == null) { FileInputFormat.setMaxInputSplitSize(job, 67108864l); // max split // size = // 64m job.setInputFormatClass(TextInputFormat.class); } else { if (inputCodec.equalsIgnoreCase("lzo")) job.setInputFormatClass(LzoTextInputFormat.class); else { usage("not supported compression codec!"); System.exit(-1); } } job.setMapperClass(mapperClass); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); if (hfileOutPath != null) { HTable table = new HTable(conf, tableName); job.setReducerClass(PutSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); HFileOutputFormat.configureIncrementalLoad(job, table); } else { // No reducers. Just write straight to table. Call // initTableReducerJob // to set up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName, null, job); job.setNumReduceTasks(0); } TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Function.class /* * Guava used by TsvParser */); return job; }
From source file:com.avira.couchdoop.demo.BenchmarkUpdater.java
License:Apache License
public Job configureJob(Configuration conf, String input) throws IOException { conf.setInt("mapreduce.map.failures.maxpercent", 5); conf.setInt("mapred.max.map.failures.percent", 5); conf.setInt("mapred.max.tracker.failures", 20); Job job = Job.getInstance(conf); job.setJarByClass(BenchmarkUpdater.class); // User classpath takes precedence in favor of Hadoop classpath. // This is because the Couchbase client requires a newer version of // org.apache.httpcomponents:httpcore. job.setUserClassesTakesPrecedence(true); // Input/* w ww . j ava 2 s. c om*/ FileInputFormat.setInputPaths(job, input); // Mapper job.setMapperClass(BenchmarkUpdateMapper.class); job.setMapOutputKeyClass(String.class); job.setMapOutputValueClass(CouchbaseAction.class); // Reducer job.setNumReduceTasks(0); // Output job.setOutputFormatClass(CouchbaseOutputFormat.class); job.setMapOutputKeyClass(String.class); job.setMapOutputValueClass(CouchbaseAction.class); return job; }
From source file:com.avira.couchdoop.jobs.CouchbaseExporter.java
License:Apache License
public Job configureJob(Configuration conf, String input) throws IOException { conf.setInt("mapreduce.map.failures.maxpercent", 5); conf.setInt("mapred.max.map.failures.percent", 5); conf.setInt("mapred.max.tracker.failures", 20); Job job = Job.getInstance(conf); job.setJarByClass(CouchbaseExporter.class); // Input/*from w w w . ja v a 2s . co m*/ FileInputFormat.setInputPaths(job, input); // Mapper job.setMapperClass(CsvToCouchbaseMapper.class); job.setMapOutputKeyClass(String.class); job.setMapOutputValueClass(CouchbaseAction.class); // Reducer job.setNumReduceTasks(0); // Output job.setOutputFormatClass(CouchbaseOutputFormat.class); job.setOutputKeyClass(String.class); job.setOutputValueClass(CouchbaseAction.class); return job; }
From source file:com.avira.couchdoop.jobs.CouchbaseViewImporter.java
License:Apache License
public Job configureJob(Configuration conf, String output) throws IOException { conf.setInt("mapreduce.map.failures.maxpercent", 5); conf.setInt("mapred.max.map.failures.percent", 5); conf.setInt("mapred.max.tracker.failures", 20); Job job = Job.getInstance(conf); job.setJarByClass(CouchbaseViewImporter.class); // Input/*from ww w .j av a 2 s . c o m*/ job.setInputFormatClass(CouchbaseViewInputFormat.class); // Mapper job.setMapperClass(CouchbaseViewToFileMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // Reducer job.setNumReduceTasks(0); // Output job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(output)); return job; }
From source file:com.awcoleman.BouncyCastleGenericCDRHadoopWithWritable.BasicDriverMapReduce.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 2) { System.out.println("Missing input and output filenames. Exiting."); System.exit(1);/* www . j a va 2s .c om*/ } @SuppressWarnings("deprecation") Job job = new Job(super.getConf()); job.setJarByClass(BasicDriverMapReduce.class); job.setJobName("BasicDriverMapReduce"); job.setMapperClass(BasicMapper.class); job.setReducerClass(BasicReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(CallDetailRecord.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setInputFormatClass(RawFileAsBinaryInputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.bah.applefox.main.plugins.fulltextindex.FTLoader.java
License:Apache License
/** * run takes the comandline args as arguments (in this case from a * configuration file), creates a new job, configures it, initiates it, * waits for completion, and returns 0 if it is successful (1 if it is not) * /*from w w w .j a v a2 s .c o m*/ * @param args * the commandline arguments (in this case from a configuration * file) * * @return 0 if the job ran successfully and 1 if it isn't */ public int run(String[] args) throws Exception { try { // Initialize variables FTLoader.articleFile = args[8]; FTLoader.maxNGrams = Integer.parseInt(args[9]); FTLoader.stopWords = getStopWords(); FTLoader.dTable = args[10]; FTLoader.urlCheckedTable = args[11]; FTLoader.divsFile = args[20]; FTLoader.exDivs = getExDivs(); // Give the job a name String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis(); // Create job and set the jar Job job = new Job(getConf(), jobName); job.setJarByClass(this.getClass()); String urlTable = args[5]; job.setInputFormatClass(AccumuloInputFormat.class); InputFormatBase.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); InputFormatBase.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), urlTable, new Authorizations()); job.setMapperClass(MapperClass.class); job.setMapOutputKeyClass(Key.class); job.setMapOutputValueClass(Value.class); job.setReducerClass(ReducerClass.class); job.setNumReduceTasks(Integer.parseInt(args[4])); job.setOutputFormatClass(AccumuloOutputFormat.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(Value.class); AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true, urlTable); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; } catch (IOException e) { if (e.getMessage() != null) { log.error(e.getMessage()); } else { log.error(e.getStackTrace()); } } catch (InterruptedException e) { if (e.getMessage() != null) { log.error(e.getMessage()); } else { log.error(e.getStackTrace()); } } catch (ClassNotFoundException e) { if (e.getMessage() != null) { log.error(e.getMessage()); } else { log.error(e.getStackTrace()); } } return 1; }