List of usage examples for org.apache.hadoop.mapreduce Job getInstance
@Deprecated public static Job getInstance(Cluster ignored) throws IOException
From source file:com.ery.hadoop.mrddx.client.MRJOBClient.java
@Override public void run(Map<String, String> paramMap) throws Exception { // license/* w w w. j a v a 2s . c om*/ // License.checkLicense(); // ?? // ?MR?job Configuration conf = new Configuration(); // ? for (String key : paramMap.keySet()) { String value = paramMap.get(key); if (null != value) {// ? value = value.replaceAll("\\\\n", "\n"); value = value.replaceAll("\\\\r", "\r"); conf.set(key, value); paramMap.put(key, value); } } String debug = paramMap.get(MRConfiguration.INTERNAL_JOB_LOG_DEBUG); if (null != debug) { String rownum = paramMap.get(MRConfiguration.INTERNAL_JOB_LOG_DEBUG_ROWNUM); conf.setInt(MRConfiguration.INTERNAL_JOB_LOG_DEBUG, Integer.parseInt(debug)); conf.setInt(MRConfiguration.INTERNAL_JOB_LOG_DEBUG_ROWNUM, Integer.parseInt(rownum)); } // ?? this.printParameter(paramMap); MRJOBService mrJobService = new MRJOBService(); // jobconfjob Job job = Job.getInstance(conf); job.setJarByClass(MRJOBService.class); mrJobService.run(paramMap, job); // if (mrJobService.isJobRun(conf)) { // } else { // JobConf jobConf = new JobConf(conf, MRJOBService.class); // mrJobService.run(paramMap, jobConf); // } }
From source file:com.examples.ch03.ParseWeblogs_Ex_1.java
public int run(String[] args) throws Exception { Path inputPath = new Path("apache_clf.txt"); Path outputPath = new Path("output"); Configuration conf = getConf(); Job weblogJob = Job.getInstance(conf); weblogJob.setJobName("Weblog Transformer"); weblogJob.setJarByClass(getClass()); weblogJob.setNumReduceTasks(0);/*from www .ja v a 2 s . c om*/ weblogJob.setMapperClass(CLFMapper_Ex_1.class); weblogJob.setMapOutputKeyClass(Text.class); weblogJob.setMapOutputValueClass(Text.class); weblogJob.setOutputKeyClass(Text.class); weblogJob.setOutputValueClass(Text.class); weblogJob.setInputFormatClass(TextInputFormat.class); weblogJob.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(weblogJob, inputPath); FileOutputFormat.setOutputPath(weblogJob, outputPath); if (weblogJob.waitForCompletion(true)) { return 0; } return 1; }
From source file:com.fanlehai.hadoop.join.CompositeJoin.java
License:Apache License
/** * The main driver for sort program. Invoke this method to submit the * map/reduce job./*from www.j a v a 2 s. c o m*/ * * @throws IOException * When there is communication problems with the job tracker. */ @SuppressWarnings("rawtypes") public int run(String[] args) throws Exception { Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String join_reduces = conf.get(REDUCES_PER_HOST); if (join_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(join_reduces); } Job job = Job.getInstance(conf); job.setJobName("join"); job.setJarByClass(CompositeJoin.class); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); Class<? extends InputFormat> inputFormatClass = KeyValueTextInputFormat.class;// SequenceFileInputFormat.class; Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = Text.class;// BytesWritable.class; Class<? extends Writable> outputValueClass = Text.class;//TupleWritable.class; String op = "inner"; List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-joinOp".equals(args[i])) { op = args[++i]; } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs job.setNumReduceTasks(num_reduces); if (otherArgs.size() < 2) { System.out.println("ERROR: Wrong number of parameters: "); return printUsage(); } String strOut = otherArgs.remove(otherArgs.size() - 1); FileSystem.get(new Configuration()).delete(new Path(strOut), true); FileOutputFormat.setOutputPath(job, new Path(strOut)); List<Path> plist = new ArrayList<Path>(otherArgs.size()); for (String s : otherArgs) { plist.add(new Path(s)); } job.setInputFormatClass(CompositeInputFormat.class); job.getConfiguration().set(CompositeInputFormat.JOIN_EXPR, CompositeInputFormat.compose(op, inputFormatClass, plist.toArray(new Path[0]))); job.setOutputFormatClass(outputFormatClass); job.setMapperClass(MapComposite.class); job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }
From source file:com.github.libsml.commons.util.HadoopUtils.java
License:Apache License
/** * Create a map-only Hadoop Job out of the passed in parameters. Does not set the * Job name.//from www.ja v a 2s. c om * * @see #getCustomJobName(String, JobContext, Class, Class) */ public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException { // Job job = new Job(new Configuration(conf)); Job job = Job.getInstance(conf); Configuration jobConf = job.getConfiguration(); if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); } job.setJarByClass(mapper); job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); job.setMapOutputKeyClass(mapperKey); job.setMapOutputValueClass(mapperValue); job.setOutputKeyClass(mapperKey); job.setOutputValueClass(mapperValue); jobConf.setBoolean("mapred.compress.map.output", true); job.setNumReduceTasks(0); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
From source file:com.github.libsml.commons.util.HadoopUtils.java
License:Apache License
/** * * @param inputPaths//from w w w . j a va 2 s . co m * @param outputPath * @param inputFormat * @param inputKey * @param inputValue * @param mapper * @param mapperKey * @param mapperValue * @param combiner * @param reducer * @param outputKey * @param outputValue * @param outputFormat * @param conf * @param overwrite * @param isCompress * @return * @throws IOException */ public static Job prepareAvroJob(String inputPaths, String outputPath, Class<? extends InputFormat> inputFormat, Object inputKey, Object inputValue, Class<? extends Mapper> mapper, Object mapperKey, Object mapperValue, Class<? extends Reducer> combiner, Class<? extends Reducer> reducer, Object outputKey, Object outputValue, Class<? extends OutputFormat> outputFormat, Configuration conf, boolean overwrite, boolean isCompress) throws IOException { Job job = Job.getInstance(conf); Configuration jobConf = job.getConfiguration(); if (inputKey instanceof Schema) { if (inputValue instanceof Schema) { inputFormat = inputFormat == null ? AvroKeyValueInputFormat.class : inputFormat; } inputFormat = inputFormat == null ? AvroKeyInputFormat.class : inputFormat; } if (inputFormat != null) { job.setInputFormatClass(inputFormat); } if (inputKey instanceof Schema) { AvroJob.setInputKeySchema(job, (Schema) inputKey); } if (inputValue instanceof Schema) { AvroJob.setInputValueSchema(job, (Schema) inputValue); } if (outputKey instanceof Schema) { if (outputValue instanceof Schema) { outputFormat = outputFormat == null ? AvroKeyValueOutputFormat.class : outputFormat; } outputFormat = outputFormat == null ? AvroKeyOutputFormat.class : outputFormat; } if (outputFormat != null) { job.setOutputFormatClass(outputFormat); } if (outputKey instanceof Schema) { AvroJob.setOutputKeySchema(job, (Schema) outputKey); } else if (outputKey instanceof Class) { job.setOutputKeyClass((Class) outputKey); } if (outputValue instanceof Schema) { AvroJob.setOutputValueSchema(job, (Schema) outputValue); } else if (outputValue instanceof Class) { job.setOutputValueClass((Class) outputValue); } if (reducer == null) { job.setNumReduceTasks(0); if (mapperKey instanceof Schema) { AvroJob.setMapOutputKeySchema(job, (Schema) mapperKey); } else if (mapperKey instanceof Class) { job.setOutputKeyClass((Class) mapperKey); } if (mapperValue instanceof Schema) { AvroJob.setOutputValueSchema(job, (Schema) mapperValue); } else if (mapperKey instanceof Class) { job.setOutputValueClass((Class) mapperValue); } job.setJarByClass(mapper); } else if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); } job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } FileInputFormat.setInputPaths(job, inputPaths); FileOutputFormat.setOutputPath(job, new Path(outputPath)); if (isCompress) { FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DeflateCodec.class); } job.setMapperClass(mapper); if (mapperKey instanceof Schema) { AvroJob.setMapOutputKeySchema(job, (Schema) mapperKey); } else if (mapperKey instanceof Class) { job.setMapOutputKeyClass((Class) mapperKey); } if (mapperValue instanceof Schema) { AvroJob.setMapOutputValueSchema(job, (Schema) mapperValue); } else if (mapperKey instanceof Class) { job.setMapOutputValueClass((Class) mapperValue); } if (reducer != null) { job.setReducerClass(reducer); } if (combiner != null) { job.setCombinerClass(combiner); } if (overwrite) { HadoopUtils.delete(jobConf, new Path(outputPath)); } return job; }
From source file:com.github.libsml.commons.util.HadoopUtils.java
License:Apache License
public static Job prepareAvroJob(String inputPaths, Path outputPath, Schema inputKeySchema, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends Reducer> combiner, Class<? extends Reducer> reducer, Schema outputKeySchema, Class<? extends Writable> outputValue, Configuration conf, boolean overwrite) throws IOException { Job job = Job.getInstance(conf); Configuration jobConf = job.getConfiguration(); if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); }/*from w w w. j ava 2 s . co m*/ job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } FileInputFormat.setInputPaths(job, inputPaths); FileOutputFormat.setOutputPath(job, outputPath); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DeflateCodec.class); job.setInputFormatClass(AvroKeyInputFormat.class); AvroJob.setInputKeySchema(job, inputKeySchema); job.setMapperClass(mapper); if (mapperKey != null) { job.setMapOutputKeyClass(mapperKey); } if (mapperValue != null) { job.setMapOutputValueClass(mapperValue); } if (combiner != null) { job.setCombinerClass(combiner); } job.setOutputFormatClass(AvroKeyOutputFormat.class); job.setReducerClass(reducer); AvroJob.setOutputKeySchema(job, outputKeySchema); job.setOutputValueClass(outputValue); if (overwrite) { HadoopUtils.delete(jobConf, outputPath); } return job; }
From source file:com.github.libsml.commons.util.HadoopUtils.java
License:Apache License
public static Job prepareAvroJob(String inputPaths, Path outputPath, Schema inputKeySchema, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends Reducer> combiner, Class<? extends Reducer> reducer, Class<? extends Writable> outputKey, Class<? extends Writable> outputValue, Configuration conf, boolean overwrite) throws IOException { Job job = Job.getInstance(conf); Configuration jobConf = job.getConfiguration(); if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); }/* w w w . java2s . co m*/ job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } FileInputFormat.setInputPaths(job, inputPaths); FileOutputFormat.setOutputPath(job, outputPath); // FileOutputFormat.setCompressOutput(job, true); // FileOutputFormat.setOutputCompressorClass(job, DeflateCodec.class); job.setInputFormatClass(AvroKeyInputFormat.class); AvroJob.setInputKeySchema(job, inputKeySchema); job.setMapperClass(mapper); if (mapperKey != null) { job.setMapOutputKeyClass(mapperKey); } if (mapperValue != null) { job.setMapOutputValueClass(mapperValue); } if (combiner != null) { job.setCombinerClass(combiner); } job.setReducerClass(reducer); job.setOutputKeyClass(outputKey); job.setOutputValueClass(outputValue); if (overwrite) { HadoopUtils.delete(jobConf, outputPath); } return job; }
From source file:com.github.libsml.commons.util.HadoopUtils.java
License:Apache License
public static Job prepareJob(String inputPath, String outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer, Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue, Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException { // Job job = new Job(new Configuration(conf)); Job job = Job.getInstance(conf); Configuration jobConf = job.getConfiguration(); if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); }//from w w w . j a va2s . c o m job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath); job.setMapperClass(mapper); if (mapperKey != null) { job.setMapOutputKeyClass(mapperKey); } if (mapperValue != null) { job.setMapOutputValueClass(mapperValue); } jobConf.setBoolean("mapred.compress.map.output", true); job.setReducerClass(reducer); job.setOutputKeyClass(reducerKey); job.setOutputValueClass(reducerValue); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath); return job; }
From source file:com.github.sakserv.minicluster.mapreduce.Driver.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 2) { System.out.println("usage: [input] [output]"); System.exit(-1);// www .java 2 s .co m } if (null == configuration) { configuration = new Configuration(); } Job job = Job.getInstance(configuration); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(WordMapper.class); job.setReducerClass(SumReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJarByClass(Driver.class); job.waitForCompletion(true); }
From source file:com.github.sandgorgon.parmr.Main.java
License:Open Source License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: parmr <input file> <output path>"); return -1; }//from w w w . j a v a2s . c o m Configuration conf = super.getConf(); conf.set("mapreduce.job.queuename", "prod"); Job job = Job.getInstance(conf); job.setJobName(jobName); job.setJarByClass(Main.class); // Parquet Schema // Read from the input file itself the schema that we will be assuming Path infile = new Path(args[0]); List<Footer> footers = ParquetFileReader.readFooters(conf, infile.getFileSystem(conf).getFileStatus(infile), true); MessageType schema = footers.get(0).getParquetMetadata().getFileMetaData().getSchema(); // Avro Schema // Convert the Parquet schema to an Avro schema AvroSchemaConverter avroSchemaConverter = new AvroSchemaConverter(); Schema avroSchema = avroSchemaConverter.convert(schema); // Set the Mapper job.setMapperClass(UserMapper.class); // This works for predicate pushdown on record assembly read. AvroParquetInputFormat.setUnboundRecordFilter(job, UserRecordFilter.class); AvroParquetInputFormat.addInputPath(job, new Path(args[0])); AvroParquetInputFormat.setAvroReadSchema(job, avroSchema); job.setInputFormatClass(AvroParquetInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // If you needed to return an avro object from the mapper, refer to this... //job.setMapOutputValueClass(AvroValue.class); //AvroJob.setMapOutputValueSchema(job, avroSchema); // Reducer job.setReducerClass(UserReducer.class); // Output job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); // If we need to return an avro class again, refer to this... //job.setOutputFormatClass(AvroParquetOutputFormat.class); //AvroParquetOutputFormat.setOutputPath(job, new Path(args[1])); //AvroParquetOutputFormat.setSchema(job, avroSchema); //job.setOutputKeyClass(Void.class); //job.setOutputValueClass(GenericRecord.class); // Rough way of testing the projection side of things. AvroParquetInputFormat.setRequestedProjection(job, Schema.parse("{\"namespace\": \"com.github.sandgorgon.parmr.avro\",\n" + " \"type\": \"record\",\n" + " \"name\": \"User\",\n" + " \"fields\": [\n" + " {\"name\": \"name\", \"type\": \"string\"},\n" + " {\"name\": \"favorite_number\", \"type\": [\"int\", \"null\"]}\n" + // " {\"name\": \"favorite_color\", \"type\": [\"string\", \"null\"]}\n" + " ]\n" + "}\n" + "")); // Do the deed! int completion = job.waitForCompletion(true) ? 0 : 1; return completion; }