List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputValueClass
public void setMapOutputValueClass(Class<?> theClass) throws IllegalStateException
From source file:com.mvdb.platform.action.VersionMerge.java
License:Apache License
public static void main(String[] args) throws Exception { logger.error("error1"); logger.warn("warning1"); logger.info("info1"); logger.debug("debug1"); logger.trace("trace1"); ActionUtils.setUpInitFileProperty(); // LoggerContext lc = (LoggerContext) LoggerFactory.getILoggerFactory(); // StatusPrinter.print(lc); Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); //Also add lastMergedTimeStamp and mergeUptoTimestamp and passive db name which would be mv1 or mv2 if (otherArgs.length != 3) { System.err.println("Usage: versionmerge <customer-directory>"); System.exit(2);//from w w w . jav a 2 s .c om } //Example: file:/home/umesh/.mvdb/etl/data/alpha //Example: hdfs://localhost:9000/data/alpha String customerDirectory = otherArgs[0]; String lastMergedDirName = otherArgs[1]; String lastCopiedDirName = otherArgs[2]; org.apache.hadoop.conf.Configuration conf1 = new org.apache.hadoop.conf.Configuration(); //conf1.addResource(new Path("/home/umesh/ops/hadoop-1.2.0/conf/core-site.xml")); FileSystem hdfsFileSystem = FileSystem.get(conf1); Path topPath = new Path(customerDirectory); //Clean scratch db Path passiveDbPath = new Path(topPath, "db/mv1"); Path tempDbPath = new Path(topPath, "db/tmp-" + (int) (Math.random() * 100000)); if (hdfsFileSystem.exists(tempDbPath)) { boolean success = hdfsFileSystem.delete(tempDbPath, true); if (success == false) { System.err.println(String.format("Unable to delete temp directory %s", tempDbPath.toString())); System.exit(1); } } //last three parameters are hardcoded and the nulls must be replaced later after changing inout parameters. Path[] inputPaths = getInputPaths(hdfsFileSystem, topPath, lastMergedDirName, lastCopiedDirName, null); Set<String> tableNameSet = new HashSet<String>(); for (Path path : inputPaths) { tableNameSet.add(path.getName()); } Job job = new Job(conf, "versionmerge"); job.setJarByClass(VersionMerge.class); job.setMapperClass(VersionMergeMapper.class); job.setReducerClass(VersionMergeReducer.class); job.setMapOutputKeyClass(MergeKey.class); job.setMapOutputValueClass(BytesWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); String lastDirName = null; if (inputPaths != null && inputPaths.length > 1) { lastDirName = inputPaths[(inputPaths.length) - 2].getParent().getName(); } for (Path inputPath : inputPaths) { FileInputFormat.addInputPath(job, inputPath); } FileOutputFormat.setOutputPath(job, tempDbPath); for (String table : tableNameSet) { if (table.endsWith(".dat") == false) { continue; } table = table.replaceAll("-", ""); table = table.replaceAll(".dat", ""); MultipleOutputs.addNamedOutput(job, table, SequenceFileOutputFormat.class, Text.class, BytesWritable.class); } boolean success = job.waitForCompletion(true); System.out.println("Success:" + success); System.out.println(ManagementFactory.getRuntimeMXBean().getName()); if (success && lastDirName != null) { ActionUtils.setConfigurationValue(new Path(customerDirectory).getName(), ConfigurationKeys.LAST_MERGE_TO_MVDB_DIRNAME, lastDirName); } //hdfsFileSystem.delete(passiveDbPath, true); //hdfsFileSystem.rename(tempDbPath, passiveDbPath); System.exit(success ? 0 : 1); }
From source file:com.netflix.Aegisthus.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); job.setJarByClass(Aegisthus.class); CommandLine cl = getOptions(args);/*from w w w. jav a 2 s.c o m*/ if (cl == null) { return 1; } job.setInputFormatClass(AegisthusInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(Map.class); job.setReducerClass(CassReducer.class); List<Path> paths = Lists.newArrayList(); if (cl.hasOption(OPT_INPUT)) { for (String input : cl.getOptionValues(OPT_INPUT)) { paths.add(new Path(input)); } } if (cl.hasOption(OPT_INPUTDIR)) { paths.addAll(getDataFiles(job.getConfiguration(), cl.getOptionValue(OPT_INPUTDIR))); } TextInputFormat.setInputPaths(job, paths.toArray(new Path[0])); TextOutputFormat.setOutputPath(job, new Path(cl.getOptionValue(OPT_OUTPUT))); job.submit(); System.out.println(job.getJobID()); System.out.println(job.getTrackingURL()); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:com.pagerankcalculator.TwitterPageRank.java
public int sortPagerank(String in, String out) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(getConf()); job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#3 Sorting Page Rank"); job.setJarByClass(TwitterPageRank.class); job.setMapOutputKeyClass(DoubleWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(PageRankSortingMapper.class); job.setReducerClass(PageRankSortingReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setNumReduceTasks(1);/* ww w . j a v a 2 s. c o m*/ LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); job.setSortComparatorClass(DoubleSortDescComparator.class); Path inputFilePath = new Path(in); Path outputFilePath = new Path(out); FileInputFormat.addInputPath(job, inputFilePath); FileOutputFormat.setOutputPath(job, outputFilePath); FileSystem fs = FileSystem.newInstance(getConf()); if (fs.exists(outputFilePath)) { fs.delete(outputFilePath, true); } return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.phantom.hadoop.examples.BaileyBorweinPlouffe.java
License:Apache License
/** Create and setup a job */ private static Job createJob(String name, Configuration conf) throws IOException { final Job job = new Job(conf, NAME + "_" + name); final Configuration jobconf = job.getConfiguration(); job.setJarByClass(BaileyBorweinPlouffe.class); // setup mapper job.setMapperClass(BbpMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BytesWritable.class); // setup reducer job.setReducerClass(BbpReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(BytesWritable.class); job.setNumReduceTasks(1);/*from ww w .j ava2s .c o m*/ // setup input job.setInputFormatClass(BbpInputFormat.class); // disable task timeout jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0); // do not use speculative execution jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false); jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false); return job; }
From source file:com.phantom.hadoop.examples.DBCountPageView.java
License:Apache License
@Override // Usage DBCountPageView [driverClass dburl] public int run(String[] args) throws Exception { String driverClassName = DRIVER_CLASS; String url = DB_URL;/*from w w w. j ava2 s .com*/ if (args.length > 1) { driverClassName = args[0]; url = args[1]; } initialize(driverClassName, url); Configuration conf = getConf(); DBConfiguration.configureDB(conf, driverClassName, url); Job job = new Job(conf); job.setJobName("Count Pageviews of URLs"); job.setJarByClass(DBCountPageView.class); job.setMapperClass(PageviewMapper.class); job.setCombinerClass(LongSumReducer.class); job.setReducerClass(PageviewReducer.class); DBInputFormat.setInput(job, AccessRecord.class, "Access", null, "url", AccessFieldNames); DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(PageviewRecord.class); job.setOutputValueClass(NullWritable.class); int ret; try { ret = job.waitForCompletion(true) ? 0 : 1; boolean correct = verify(); if (!correct) { throw new RuntimeException("Evaluation was not correct!"); } } finally { shutdown(); } return ret; }
From source file:com.phantom.hadoop.examples.SecondarySort.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: secondarysort <in> <out>"); System.exit(2);//from w ww .jav a 2 s. com } Job job = new Job(conf, "secondary sort"); job.setJarByClass(SecondarySort.class); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); // group and partition by the first int in the pair job.setPartitionerClass(FirstPartitioner.class); job.setGroupingComparatorClass(FirstGroupingComparator.class); // the map output is IntPair, IntWritable job.setMapOutputKeyClass(IntPair.class); job.setMapOutputValueClass(IntWritable.class); // the reduce output is Text, IntWritable job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.philiphubbard.digraph.MRBuildVertices.java
License:Open Source License
public static void setupJob(Job job, Path inputPath, Path outputPath) throws IOException { job.setJarByClass(MRBuildVertices.class); job.setMapperClass(MRBuildVertices.Mapper.class); job.setCombinerClass(MRBuildVertices.Reducer.class); job.setReducerClass(MRBuildVertices.Reducer.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(BytesWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); }
From source file:com.philiphubbard.digraph.MRCompressChains.java
License:Open Source License
public static void setupIterationJob(Job job, Path inputPathOrig, Path outputPathOrig) throws IOException { job.setJarByClass(MRCompressChains.class); job.setMapperClass(MRCompressChains.Mapper.class); job.setCombinerClass(MRCompressChains.Reducer.class); job.setReducerClass(MRCompressChains.Reducer.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); Path inputPath;/* ww w . j av a 2 s .c om*/ if (iter == 0) inputPath = inputPathOrig; else inputPath = new Path(outputPathOrig.toString() + (iter - 1)); Path outputPath = new Path(outputPathOrig.toString() + iter); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); }
From source file:com.pinterest.terrapin.hadoop.examples.WordCount.java
License:Apache License
public int run(String[] args) throws Exception { TerrapinUploaderOptions options = TerrapinUploaderOptions.initFromSystemProperties(); // Create the job, setting the inputs and map output key and map output value classes. // Also, set reducer and mapper. Job job = Job.getInstance(super.getConf(), "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setReducerClass(IntSumReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); // Wrap around Hadoop Loader job to write the data to a terrapin fileset. return new HadoopJobLoader(options, job).waitForCompletion() ? 0 : 1; }
From source file:com.pivotal.gfxd.demo.mapreduce.LoadAverage.java
License:Open Source License
/** * This method is assuming fs.default.name as args[0] * * @param args/* w w w .j av a 2 s. c om*/ * @return * @throws Exception */ @Override public int run(String[] args) throws Exception { System.out.println("Starting MapReduce Job"); GfxdDataSerializable.initTypes(); Configuration conf = new Configuration(); //Configuration conf = getConf(); Path outputPath = new Path("/output"); String hdfsHomeDir = "/sensorStore"; //args[1]; String tableName = "RAW_SENSOR"; String outTableName = "LOAD_AVERAGES_SHADOW"; String gfxdURL = conf.get("gemfirexd.url", "jdbc:gemfirexd://localhost:1527"); // conf.set("fs.default.name", args[0]); String hdfsUrl = conf.get("fs.defaultFS"); FileSystem hdfs = FileSystem.get(new URI(hdfsUrl), conf); // Retrieve last run timestamp long now = System.currentTimeMillis(); long lastStart = getLastStart(hdfs); outputPath.getFileSystem(conf).delete(outputPath, true); conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir); conf.set(RowInputFormat.INPUT_TABLE, tableName); conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false); conf.setLong(RowInputFormat.START_TIME_MILLIS, lastStart); conf.setLong(RowInputFormat.END_TIME_MILLIS, now); conf.set(RowOutputFormat.OUTPUT_URL, gfxdURL); conf.set(RowOutputFormat.OUTPUT_TABLE, outTableName); // print config to troubleshoot possible issues // Configuration.dumpConfiguration(conf, new PrintWriter(System.out)); Job job = Job.getInstance(conf, "LoadAverage"); job.setNumReduceTasks(1); job.setInputFormatClass(RowInputFormat.class); // configure mapper and reducer job.setJarByClass(LoadAverage.class); job.setMapperClass(LoadAverageMapper.class); job.setReducerClass(LoadAverageReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LoadKey.class); TextOutputFormat.setOutputPath(job, outputPath); job.setOutputFormatClass(RowOutputFormat.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(LoadAverageModel.class); boolean jobSuccess = job.waitForCompletion(true); if (jobSuccess) { writeLastStart(hdfs, now); } return jobSuccess ? 0 : 1; }