List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputValueClass
public void setMapOutputValueClass(Class<?> theClass) throws IllegalStateException
From source file:com.pivotal.hawq.mapreduce.parquet.HAWQParquetOutputDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf(), "HAWQParquetOutputFormat"); job.setJarByClass(HAWQParquetOutputDriver.class); job.setOutputFormatClass(HAWQParquetOutputFormat.class); /*/*from w ww .ja v a 2 s . co m*/ // int2 int4 int8 HAWQSchema schema = new HAWQSchema("t_int", HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.INT2, "col_short"), HAWQSchema.optional_field(HAWQPrimitiveField.PrimitiveType.INT4, "col_int"), HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.INT8, "col_long") ); job.setMapperClass(WriteIntMapper.class); */ /* // varchar HAWQSchema schema = new HAWQSchema("t_varchar", HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.VARCHAR, "col_varchar") ); job.setMapperClass(WriteVarcharMapper.class); */ /* // float4 float8 HAWQSchema schema = new HAWQSchema("t_floating", HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.FLOAT4, "col_float"), HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.FLOAT8, "col_long") ); job.setMapperClass(WriteFloatingNumberMapper.class); */ // boolean // HAWQSchema schema = new HAWQSchema("t_boolean", // HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.BOOL, "col_bool")); // job.setMapperClass(WriteBooleanMapper.class); // byte array HAWQSchema schema = new HAWQSchema("t_bytea", HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.BYTEA, "col_bytea")); job.setMapperClass(WriteByteArrayMapper.class); HAWQParquetOutputFormat.setSchema(job, schema); FileInputFormat.addInputPath(job, new Path(args[0])); HAWQParquetOutputFormat.setOutputPath(job, new Path(args[1])); job.setNumReduceTasks(0); job.setMapOutputKeyClass(Void.class); job.setMapOutputValueClass(HAWQRecord.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.placeiq.piqconnect.BlocksBuilder.java
License:Apache License
protected Job configStage1() throws Exception { FileSystem fs = FileSystem.get(getConf()); fs.delete(pathOutput, true); // useful ? Configuration conf = getConf(); conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize); conf.setBoolean(Constants.PROP_IS_VECTOR, isVector); conf.set("mapred.output.compression.type", "BLOCK"); // useful ? Job job = new Job(conf, "data-piqid.piqconnect.BlocksBuilder"); job.setJarByClass(BlocksBuilder.class); job.setMapperClass(MapStage1.class); job.setReducerClass(RedStage1.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(numberOfReducers); job.setMapOutputKeyClass(BlockIndexWritable.class); job.setMapOutputValueClass(LightBlockWritable.class); job.setOutputKeyClass(BlockIndexWritable.class); job.setOutputValueClass(BlockWritable.class); FileInputFormat.setInputPaths(job, pathEdges); SequenceFileOutputFormat.setOutputPath(job, pathOutput); SequenceFileOutputFormat.setCompressOutput(job, true); Runner.setCompression(job);// w w w .j av a 2s .c o m return job; }
From source file:com.placeiq.piqconnect.Runner.java
License:Apache License
private Job buildJob1(Path input1, Path input2, Path output) throws Exception { Configuration conf = getConf(); conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize); conf.set("mapred.output.compression.type", "BLOCK"); Job job = new Job(conf, "data-piqid.piqconnect.IterationStage1"); job.setJarByClass(Runner.class); job.setMapperClass(IterationStage1._Mapper.class); job.setReducerClass(IterationStage1._Reducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(numberOfReducers); job.setMapOutputKeyClass(IterationStage1.JoinKey.class); job.setMapOutputValueClass(BlockWritable.class); job.setOutputKeyClass(VLongWritable.class); job.setOutputValueClass(BlockWritable.class); job.setGroupingComparatorClass(IterationStage1.IndexComparator.class); job.setPartitionerClass(IterationStage1.IndexPartitioner.class); job.setSortComparatorClass(IterationStage1.SortComparator.class); FileInputFormat.setInputPaths(job, input1, input2); SequenceFileOutputFormat.setOutputPath(job, output); SequenceFileOutputFormat.setCompressOutput(job, true); setCompression(job);/*from w ww . j a v a 2s .c om*/ return job; }
From source file:com.placeiq.piqconnect.Runner.java
License:Apache License
private Job buildJob2(Path input, Path output) throws Exception { Configuration conf = getConf(); conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize); Job job = new Job(conf, "data-piqid.piqconnect.IterationStage2"); job.setJarByClass(Runner.class); job.setMapperClass(Mapper.class); job.setReducerClass(IterationStage2._Reducer.class); job.setNumReduceTasks(numberOfReducers); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(VLongWritable.class); job.setMapOutputValueClass(BlockWritable.class); job.setOutputKeyClass(BlockIndexWritable.class); job.setOutputValueClass(BlockWritable.class); job.setSortComparatorClass(VLongWritableComparator.class); SequenceFileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); FileOutputFormat.setCompressOutput(job, true); setCompression(job);/*from w ww. j a v a 2 s. c o m*/ return job; }
From source file:com.rim.logdriver.util.Cat.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }/*from w w w .j a v a2s .co m*/ FileSystem fs = FileSystem.get(conf); // The command line options List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 2) { System.out.println("usage: [genericOptions] input [input ...] output"); System.exit(1); } // Get the files we need from the command line. for (int i = 0; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(Cat.class); jobConf.setIfUnset("mapred.job.name", "Cat Files"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } job.setInputFormatClass(BoomInputFormat.class); job.setMapperClass(CatMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { BoomInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:com.rim.logdriver.util.FastSearch.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }// w w w .j av a 2 s .c o m FileSystem fs = FileSystem.get(conf); // The command line options String searchString = null; List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 3) { System.out.println("usage: [genericOptions] searchString input [input ...] output"); System.exit(1); } // Get the files we need from the command line. searchString = args[0]; for (int i = 1; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(FastSearch.class); jobConf.setIfUnset("mapred.job.name", "Search Files"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } jobConf.set("logdriver.search.string", Base64.encodeBase64String(searchString.getBytes("UTF-8"))); job.setInputFormatClass(AvroBlockInputFormat.class); job.setMapperClass(SearchMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); // And set the output as usual job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { AvroBlockInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:com.rim.logdriver.util.Grep.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }//from w ww. j a va2s.com FileSystem fs = FileSystem.get(conf); // The command line options String regex = null; List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 3) { System.out.println("usage: [genericOptions] regex input [input ...] output"); System.exit(1); } // Get the files we need from the command line. regex = args[0]; for (int i = 1; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(Grep.class); jobConf.setIfUnset("mapred.job.name", "Grep Files"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } jobConf.set("logdriver.grep.regex", Base64.encodeBase64String(regex.getBytes("UTF-8"))); job.setInputFormatClass(BoomInputFormat.class); job.setMapperClass(GrepMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); // And set the output as usual job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { BoomInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:com.rim.logdriver.util.MultiSearch.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }//ww w. j a v a 2s . c om FileSystem fs = FileSystem.get(conf); // The command line options String searchStringDir = null; List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 3) { System.out.println("usage: [genericOptions] searchStringDirectory input [input ...] output"); System.exit(1); } // Get the files we need from the command line. searchStringDir = args[0]; // We are going to be reading all the files in this directory a lot. So // let's up the replication factor by a lot so that they're easy to read. for (FileStatus f : fs.listStatus(new Path(searchStringDir))) { fs.setReplication(f.getPath(), (short) 16); } for (int i = 1; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(MultiSearch.class); jobConf.setIfUnset("mapred.job.name", "MultiSearch"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } jobConf.set("logdriver.search.string.dir", searchStringDir); // This search is generally too fast to make good use of 128MB blocks, so // let's set the value to 256MB (if it's not set already) if (jobConf.get("mapred.max.split.size") == null) { jobConf.setLong("mapred.max.split.size", 256 * 1024 * 1024); } job.setInputFormatClass(AvroBlockInputFormat.class); job.setMapperClass(SearchMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { AvroBlockInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:com.rim.logdriver.util.Search.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }/* w w w. j av a2s .c o m*/ FileSystem fs = FileSystem.get(conf); // The command line options String searchString = null; List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 3) { System.out.println("usage: [genericOptions] searchString input [input ...] output"); System.exit(1); } // Get the files we need from the command line. searchString = args[0]; for (int i = 1; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(Search.class); jobConf.setIfUnset("mapred.job.name", "Search Files"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } jobConf.set("logdriver.search.string", searchString); job.setInputFormatClass(BoomInputFormat.class); job.setMapperClass(SearchMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); // And set the output as usual job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { BoomInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:com.rockstor.compact.CompactDataTool.java
License:Apache License
private Job createSubmittableJob(Configuration conf) throws IOException { Job job = new Job(conf, NAME); job.setJarByClass(CompactDataTool.class); job.setInputFormatClass(CompactDirInputFormat.class); job.setMapOutputValueClass(NullWritable.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapperClass(CompactDataMapper.class); job.setNumReduceTasks(0);/*from ww w . j av a2 s .c om*/ job.setOutputFormatClass(NullOutputFormat.class); LOG.info("init job " + NAME + " OK"); return job; }