List of usage examples for org.apache.hadoop.mapreduce Job Job
Job(JobStatus status, JobConf conf) throws IOException
From source file:com.phantom.hadoop.examples.WordMedian.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: wordmedian <in> <out>"); return 0; }/*ww w . j a va 2 s .c o m*/ setConf(new Configuration()); Configuration conf = getConf(); @SuppressWarnings("deprecation") Job job = new Job(conf, "word median"); job.setJarByClass(WordMedian.class); job.setMapperClass(WordMedianMapper.class); job.setCombinerClass(WordMedianReducer.class); job.setReducerClass(WordMedianReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); boolean result = job.waitForCompletion(true); // Wait for JOB 1 -- get middle value to check for Median long totalWords = job.getCounters().getGroup(TaskCounter.class.getCanonicalName()) .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue(); int medianIndex1 = (int) Math.ceil((totalWords / 2.0)); int medianIndex2 = (int) Math.floor((totalWords / 2.0)); median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf); return (result ? 0 : 1); }
From source file:com.phantom.hadoop.examples.WordStandardDeviation.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: wordstddev <in> <out>"); return 0; }//w w w . j a v a 2 s . c om Configuration conf = getConf(); @SuppressWarnings("deprecation") Job job = new Job(conf, "word stddev"); job.setJarByClass(WordStandardDeviation.class); job.setMapperClass(WordStandardDeviationMapper.class); job.setCombinerClass(WordStandardDeviationReducer.class); job.setReducerClass(WordStandardDeviationReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); Path outputpath = new Path(args[1]); FileOutputFormat.setOutputPath(job, outputpath); boolean result = job.waitForCompletion(true); // read output and calculate standard deviation stddev = readAndCalcStdDev(outputpath, conf); return (result ? 0 : 1); }
From source file:com.pivotal.hawq.mapreduce.demo.HAWQInputFormatDemo.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 3) { printUsage();/* ww w .j av a 2 s . c om*/ } String dbURL = args[0]; String tableName = args[1]; String outputPath = args[2]; String username = (args.length >= 4) ? args[3] : null; String password = (args.length >= 5) ? args[4] : null; Job job = new Job(getConf(), "HAWQInputFormatDemo"); job.setJarByClass(HAWQInputFormatDemo.class); job.setInputFormatClass(HAWQInputFormat.class); HAWQInputFormat.setInput(job.getConfiguration(), dbURL, username, password, tableName); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapperClass(HAWQEchoMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); long startTime = System.currentTimeMillis(); int returnCode = job.waitForCompletion(true) ? 0 : 1; long endTime = System.currentTimeMillis(); System.out.println("Time elapsed: " + (endTime - startTime) + " milliseconds"); return returnCode; }
From source file:com.pivotal.hawq.mapreduce.demo.HAWQInputFormatDemo2.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { printUsage();// w ww .j a v a2s.c om } String metadataFile = args[0]; String outputPath = args[1]; Job job = new Job(getConf(), "HAWQInputFormatDemo2"); job.setJarByClass(HAWQInputFormatDemo2.class); job.setInputFormatClass(HAWQInputFormat.class); HAWQInputFormat.setInput(job.getConfiguration(), metadataFile); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapperClass(HAWQEchoMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); long startTime = System.currentTimeMillis(); int returnCode = job.waitForCompletion(true) ? 0 : 1; long endTime = System.currentTimeMillis(); System.out.println("Time elapsed: " + (endTime - startTime) + " milliseconds"); return returnCode; }
From source file:com.pivotal.hawq.mapreduce.MapReduceClusterDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 3 && args.length != 4) { System.err.printf("Usage: %s [generic options] <tableName> <dburl> <output> [<mapper_classname>]\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }//from ww w . ja v a2s. c om String tableName = args[0]; String dbUrl = args[1]; Path outputPath = new Path(args[2]); Class<? extends Mapper> mapperClass = (args.length == 3) ? HAWQTableMapper.class : (Class<? extends Mapper>) Class.forName(args[3]); // delete previous output FileSystem fs = FileSystem.get(getConf()); if (fs.exists(outputPath)) fs.delete(outputPath, true); fs.close(); Job job = new Job(getConf(), "job_read_" + tableName); job.setJarByClass(MapReduceClusterDriver.class); job.setInputFormatClass(HAWQInputFormat.class); HAWQInputFormat.setInput(job.getConfiguration(), dbUrl, null, null, tableName); FileOutputFormat.setOutputPath(job, outputPath); job.setMapperClass(mapperClass); job.setReducerClass(HAWQTableReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.pivotal.hawq.mapreduce.parquet.HAWQParquetOutputDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf(), "HAWQParquetOutputFormat"); job.setJarByClass(HAWQParquetOutputDriver.class); job.setOutputFormatClass(HAWQParquetOutputFormat.class); /*/* ww w .j a v a2 s .c om*/ // int2 int4 int8 HAWQSchema schema = new HAWQSchema("t_int", HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.INT2, "col_short"), HAWQSchema.optional_field(HAWQPrimitiveField.PrimitiveType.INT4, "col_int"), HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.INT8, "col_long") ); job.setMapperClass(WriteIntMapper.class); */ /* // varchar HAWQSchema schema = new HAWQSchema("t_varchar", HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.VARCHAR, "col_varchar") ); job.setMapperClass(WriteVarcharMapper.class); */ /* // float4 float8 HAWQSchema schema = new HAWQSchema("t_floating", HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.FLOAT4, "col_float"), HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.FLOAT8, "col_long") ); job.setMapperClass(WriteFloatingNumberMapper.class); */ // boolean // HAWQSchema schema = new HAWQSchema("t_boolean", // HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.BOOL, "col_bool")); // job.setMapperClass(WriteBooleanMapper.class); // byte array HAWQSchema schema = new HAWQSchema("t_bytea", HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.BYTEA, "col_bytea")); job.setMapperClass(WriteByteArrayMapper.class); HAWQParquetOutputFormat.setSchema(job, schema); FileInputFormat.addInputPath(job, new Path(args[0])); HAWQParquetOutputFormat.setOutputPath(job, new Path(args[1])); job.setNumReduceTasks(0); job.setMapOutputKeyClass(Void.class); job.setMapOutputValueClass(HAWQRecord.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.placeiq.piqconnect.BlocksBuilder.java
License:Apache License
protected Job configStage1() throws Exception { FileSystem fs = FileSystem.get(getConf()); fs.delete(pathOutput, true); // useful ? Configuration conf = getConf(); conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize); conf.setBoolean(Constants.PROP_IS_VECTOR, isVector); conf.set("mapred.output.compression.type", "BLOCK"); // useful ? Job job = new Job(conf, "data-piqid.piqconnect.BlocksBuilder"); job.setJarByClass(BlocksBuilder.class); job.setMapperClass(MapStage1.class); job.setReducerClass(RedStage1.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(numberOfReducers); job.setMapOutputKeyClass(BlockIndexWritable.class); job.setMapOutputValueClass(LightBlockWritable.class); job.setOutputKeyClass(BlockIndexWritable.class); job.setOutputValueClass(BlockWritable.class); FileInputFormat.setInputPaths(job, pathEdges); SequenceFileOutputFormat.setOutputPath(job, pathOutput); SequenceFileOutputFormat.setCompressOutput(job, true); Runner.setCompression(job);/* w ww. ja va 2 s .c o m*/ return job; }
From source file:com.placeiq.piqconnect.InitialVectorGenerator.java
License:Apache License
private Job buildJob() throws Exception { Configuration conf = getConf(); conf.setLong("numberOfNodes", numberOfNodes); Job job = new Job(conf, "data-piqid.piqconnect.ConCmptIVGen_Stage1"); job.setJarByClass(InitialVectorGenerator.class); job.setMapperClass(_Mapper.class); job.setReducerClass(_Reducer.class); job.setNumReduceTasks(numberOfReducers); job.setOutputKeyClass(VLongWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, pathBitmask); FileOutputFormat.setOutputPath(job, pathVector); FileOutputFormat.setCompressOutput(job, true); return job;/*from w w w. j a va2 s .c o m*/ }
From source file:com.placeiq.piqconnect.Runner.java
License:Apache License
private Job buildJob1(Path input1, Path input2, Path output) throws Exception { Configuration conf = getConf(); conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize); conf.set("mapred.output.compression.type", "BLOCK"); Job job = new Job(conf, "data-piqid.piqconnect.IterationStage1"); job.setJarByClass(Runner.class); job.setMapperClass(IterationStage1._Mapper.class); job.setReducerClass(IterationStage1._Reducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(numberOfReducers); job.setMapOutputKeyClass(IterationStage1.JoinKey.class); job.setMapOutputValueClass(BlockWritable.class); job.setOutputKeyClass(VLongWritable.class); job.setOutputValueClass(BlockWritable.class); job.setGroupingComparatorClass(IterationStage1.IndexComparator.class); job.setPartitionerClass(IterationStage1.IndexPartitioner.class); job.setSortComparatorClass(IterationStage1.SortComparator.class); FileInputFormat.setInputPaths(job, input1, input2); SequenceFileOutputFormat.setOutputPath(job, output); SequenceFileOutputFormat.setCompressOutput(job, true); setCompression(job);/*from w w w .j av a2s .co m*/ return job; }
From source file:com.placeiq.piqconnect.Runner.java
License:Apache License
private Job buildJob2(Path input, Path output) throws Exception { Configuration conf = getConf(); conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize); Job job = new Job(conf, "data-piqid.piqconnect.IterationStage2"); job.setJarByClass(Runner.class); job.setMapperClass(Mapper.class); job.setReducerClass(IterationStage2._Reducer.class); job.setNumReduceTasks(numberOfReducers); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(VLongWritable.class); job.setMapOutputValueClass(BlockWritable.class); job.setOutputKeyClass(BlockIndexWritable.class); job.setOutputValueClass(BlockWritable.class); job.setSortComparatorClass(VLongWritableComparator.class); SequenceFileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); FileOutputFormat.setCompressOutput(job, true); setCompression(job);/*from w w w . j a va 2 s. com*/ return job; }