List of usage examples for org.apache.hadoop.mapreduce Job Job
Job(JobStatus status, JobConf conf) throws IOException
From source file:com.tdunning.plume.local.lazy.MapRedExecutor.java
License:Apache License
/** * This method returns a Job instance out of a {@link MSCR} entity. It puts the Class of * the {@link PlumeWorkflow} argument and the MSCR id in the hadoop configuration. * // w w w . j a v a 2 s . c o m * @param mscr The MSCR to convert * @param workflow The workflow whose class will be instantiated by hadoop mappers/reducers * @param outputPath The output path of the MapRed job * @return A hadoop-executable MapRed Job * * @throws IOException */ static Job getMapRed(final MSCR mscr, PlumeWorkflow workFlow, String workFlowOutputPath, String outputPath) throws IOException { Configuration conf = new Configuration(); conf.set(WORKFLOW_NAME, workFlow.getClass().getName()); conf.setInt(MSCR_ID, mscr.getId()); conf.set(TEMP_OUTPUT_PATH, workFlowOutputPath); Job job = new Job(conf, "MSCR"); // TODO deprecation job.setMapOutputKeyClass(PlumeObject.class); job.setMapOutputValueClass(PlumeObject.class); job.setJarByClass(MapRedExecutor.class); /** * Define multiple inputs */ for (PCollection<?> input : mscr.getInputs()) { if (!(input instanceof LazyCollection)) { throw new IllegalArgumentException("Can't create MapRed from MSCR whose inputs are not LazyTable"); } LazyCollection<Text> l = (LazyCollection<Text>) input; if (!(l.isMaterialized() && l.getFile() != null)) { // Collections have plume ID only if they are intermediate results - TODO better naming for this if (l.getPlumeId().length() < 1) { throw new IllegalArgumentException( "Can't create MapRed from MSCR inputs that are not materialized to a file"); } } PCollectionType<?> rType = l.getType(); Class<? extends InputFormat> format = SequenceFileInputFormat.class; if (rType instanceof PTableType) { PTableType<?, ?> tType = (PTableType<?, ?>) rType; if (tType.valueType() instanceof StringType && tType.keyType() instanceof StringType) { format = KeyValueTextInputFormat.class; } MultipleInputs.addInputPath(job, new Path(l.getFile()), format, MSCRMapper.class); } else { if (rType.elementType() instanceof StringType) { format = TextInputFormat.class; } MultipleInputs.addInputPath(job, new Path(l.getFile()), format, MSCRMapper.class); } } /** * Define multiple outputs */ FileOutputFormat.setOutputPath(job, new Path(outputPath)); for (Map.Entry<PCollection<?>, Integer> entry : mscr.getNumberedChannels().entrySet()) { PCollectionType<?> rType = ((LazyCollection<?>) mscr.getOutputChannels().get(entry.getKey()).output) .getType(); if (rType instanceof PTableType) { PTableType<?, ?> tType = (PTableType<?, ?>) rType; Class<? extends OutputFormat> outputFormat = SequenceFileOutputFormat.class; if (tType.keyType() instanceof StringType && tType.valueType() instanceof StringType) { outputFormat = TextOutputFormat.class; } MultipleOutputs.addNamedOutput(job, entry.getValue() + "", outputFormat, getHadoopType(tType.keyType()), getHadoopType(tType.valueType())); } else { Class<? extends OutputFormat> outputFormat = SequenceFileOutputFormat.class; if (rType.elementType() instanceof StringType) { outputFormat = TextOutputFormat.class; } MultipleOutputs.addNamedOutput(job, entry.getValue() + "", outputFormat, NullWritable.class, getHadoopType(rType.elementType())); } } /** * Define Reducer & Combiner */ job.setCombinerClass(MSCRCombiner.class); job.setReducerClass(MSCRReducer.class); job.setNumReduceTasks(1); return job; }
From source file:com.teradata.compaction.mapreduce.MergeParquetFilesMR.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "MergeParquet"); if (args.length != 2) { System.err.println("Usage: java -jar MergeParquetFilesMR path_to_input_folder path_to_output_folder "); System.exit(0);//from w w w . j a v a2 s . com } final Path inputPath = new Path(args[0]); final Path out = new Path(args[1]); Schema schemaParquetFile = getBaseSchema(inputPath, conf); job.setJarByClass(MergeParquetFilesMR.class); job.setMapperClass(SampleParquetMapper.class); job.setReducerClass(SampleParquetReducer.class); job.setInputFormatClass(AvroParquetInputFormat.class); job.setOutputFormatClass(AvroParquetOutputFormat.class); job.setMapOutputKeyClass(NullWritable.class); AvroJob.setMapOutputValueSchema(job, schemaParquetFile); AvroParquetOutputFormat.setSchema(job, schemaParquetFile); FileInputFormat.addInputPath(job, inputPath); AvroParquetOutputFormat.setOutputPath(job, out); job.setNumReduceTasks(1); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.tetsuyaodaka.hadoop.math.matrix.MatrixMult.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Date startProc = new Date(System.currentTimeMillis()); System.out.println("process started at " + startProc); Configuration conf = new Configuration(); int I = Integer.parseInt(args[3]); // Num of Row of MatrixA int K = Integer.parseInt(args[4]); // Num of Row of MatrixB' int IB = Integer.parseInt(args[5]); // RowBlock Size of MatrixA int KB = Integer.parseInt(args[6]); // RowBlock Size of MatrixB' int M = 0;// w w w.jav a 2 s . c om if (I % IB == 0) { M = I / IB; } else { M = I / IB + 1; } int N = 0; if (K % KB == 0) { N = K / KB; } else { N = K / KB + 1; } conf.set("I", args[3]); // Num of Row of MatrixA conf.set("K", args[4]); // Num of Row of MatrixB' conf.set("IB", args[5]); // RowBlock Size of MatrixA conf.set("KB", args[6]); // RowBlock Size of MatrixB' conf.set("M", new Integer(M).toString()); conf.set("N", new Integer(N).toString()); Job job = new Job(conf, "MatrixMultiplication"); job.setJarByClass(MatrixMult.class); job.setReducerClass(Reduce.class); job.setMapOutputKeyClass(MatrixMult.IndexPair.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Mapper????? MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, MapA.class); // matrixA MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, MapB.class); // matrixB FileOutputFormat.setOutputPath(job, new Path(args[2])); // output path System.out.println("num of MatrixA RowBlock(M) is " + M); System.out.println("num of MatrixB RowBlock(N) is " + N); boolean success = job.waitForCompletion(true); Date endProc = new Date(System.currentTimeMillis()); System.out.println("process ended at " + endProc); System.out.println(success); }
From source file:com.tetsuyaodaka.hadoop.math.matrix.MatrixMultiplication.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Date startProc = new Date(System.currentTimeMillis()); System.out.println("process started at " + startProc); Configuration conf = new Configuration(); int I = Integer.parseInt(args[3]); // Num of Row of MatrixA int K = Integer.parseInt(args[4]); // Num of Row of MatrixB' int IB = Integer.parseInt(args[5]); // RowBlock Size of MatrixA int KB = Integer.parseInt(args[6]); // RowBlock Size of MatrixB' int M = 0;/*from ww w .ja v a 2s .c om*/ if (I % IB == 0) { M = I / IB; } else { M = I / IB + 1; } int N = 0; if (K % KB == 0) { N = K / KB; } else { N = K / KB + 1; } conf.set("I", args[3]); // Num of Row of MatrixA conf.set("K", args[4]); // Num of Row of MatrixB' conf.set("IB", args[5]); // RowBlock Size of MatrixA conf.set("KB", args[6]); // RowBlock Size of MatrixB' conf.set("M", new Integer(M).toString()); conf.set("N", new Integer(N).toString()); Job job = new Job(conf, "MatrixMultiplication"); job.setJarByClass(MatrixMultiplication.class); job.setReducerClass(Reduce.class); job.setMapOutputKeyClass(MatrixMultiplication.IndexPair.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Mapper????? MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, MapA.class); // matrixA MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, MapB.class); // matrixB FileOutputFormat.setOutputPath(job, new Path(args[2])); // output path System.out.println("num of MatrixA RowBlock(M) is " + M); System.out.println("num of MatrixB ColBlock(N) is " + N); boolean success = job.waitForCompletion(true); Date endProc = new Date(System.currentTimeMillis()); System.out.println("process ended at " + endProc); System.out.println(success); }
From source file:com.tetsuyaodaka.hadoop.math.matrix.TransformMatrix.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Date startProc = new Date(System.currentTimeMillis()); System.out.println("process started at " + startProc); Configuration conf = new Configuration(); if (args[2].equals("yes")) { conf.set("transpose", "true"); // transpose } else {//from w w w . j a v a2s . co m conf.set("transpose", "false"); // } Job job = new Job(conf, "MatrixMultiplication"); job.setJarByClass(TransformMatrix.class); job.setReducerClass(Reduce.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); // Mapper????? MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, Map.class); // matrixA FileOutputFormat.setOutputPath(job, new Path(args[1])); // output path boolean success = job.waitForCompletion(true); Date endProc = new Date(System.currentTimeMillis()); System.out.println("process ended at " + endProc); System.out.println(success); }
From source file:com.tfm.utad.reducerdata.ReducerDataPig.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM-dd-HH-mm-ss"); Date date = new Date(); Path inputPath = new Path("/home/jab/camus/reducer-data-pig"); Path outputDir = new Path("/home/jab/camus/pigdata/" + sdf.format(date)); // Create configuration Configuration conf = new Configuration(true); conf.set(FS_DEFAULT_FS, HDFS_LOCALHOST_LOCALDOMAIN); FileSystem fs = FileSystem.get(conf); Path filesPath = new Path(inputPath + "/*"); FileStatus[] files = fs.globStatus(filesPath); // Create job Job job = new Job(conf, "ReducerDataPig"); job.setJarByClass(ReducerDataPig.class); // Setup MapReduce job.setMapperClass(ReducerDataPigMapper.class); job.setReducerClass(ReducerDataPigReducer.class); job.setNumReduceTasks(1);/* w w w . j a v a2 s .c o m*/ // Specify key / value job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(ReducerPigKey.class); // Input FileInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(SequenceFileInputFormat.class); // Output FileOutputFormat.setOutputPath(job, outputDir); job.setOutputFormatClass(TextOutputFormat.class); // Delete output if exists if (fs.exists(outputDir)) { fs.delete(outputDir, true); } // Execute job int code = job.waitForCompletion(true) ? 0 : 1; if (code == 0) { Counters counters = job.getCounters(); Counter malformedCounter = counters.findCounter(ReducerDataEnum.MALFORMED_DATA); LOG.info("Counter malformed data: " + malformedCounter.getValue()); for (FileStatus fStatus : files) { LOG.info("File name:" + fStatus.getPath()); if (fStatus.isFile()) { LOG.info("Removing file in path:" + fStatus.getPath()); fs.delete(fStatus.getPath(), false); } } } }
From source file:com.tfm.utad.reducerdata.ReducerDataVertica.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM-dd-HH-mm-ss"); Date date = new Date(); Path inputPath = new Path("/home/jab/camus/reducer-data-vertica"); Path outputDir = new Path("/home/jab/camus/verticadb/" + sdf.format(date)); // Create configuration Configuration conf = new Configuration(true); conf.set(FS_DEFAULT_FS, HDFS_LOCALHOST_LOCALDOMAIN); FileSystem fs = FileSystem.get(conf); Path filesPath = new Path(inputPath + "/*"); FileStatus[] files = fs.globStatus(filesPath); // Create job Job job = new Job(conf, "ReducerDataVertica"); job.setJarByClass(ReducerDataVertica.class); // Setup MapReduce job.setMapperClass(ReducerDataVerticaMapper.class); job.setReducerClass(ReducerDataVerticaReducer.class); job.setNumReduceTasks(1);/*from w w w . j a v a2 s. co m*/ // Specify key / value job.setOutputKeyClass(Text.class); job.setOutputValueClass(ReducerVerticaValue.class); // Input FileInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(SequenceFileInputFormat.class); // Output FileOutputFormat.setOutputPath(job, outputDir); job.setOutputFormatClass(TextOutputFormat.class); // Delete output if exists if (fs.exists(outputDir)) { fs.delete(outputDir, true); } // Execute job int code = job.waitForCompletion(true) ? 0 : 1; if (code == 0) { Counters counters = job.getCounters(); Counter malformedCounter = counters.findCounter(ReducerDataEnum.MALFORMED_DATA); LOG.info("Counter malformed data: " + malformedCounter.getValue()); for (FileStatus fStatus : files) { LOG.info("File name:" + fStatus.getPath()); if (fStatus.isFile()) { LOG.info("Removing file in path:" + fStatus.getPath()); fs.delete(fStatus.getPath(), false); } } } }
From source file:com.tomslabs.grid.avro.AvroWordCount.java
License:Apache License
public static Job createSubmitableJob(final Configuration conf, final Path inputPath, final Path outputPath) throws IOException { conf.set(AvroFileOutputFormat.OUTPUT_SCHEMA, WordCountSchema.getSchema().toString()); conf.setInt("mapred.max.split.size", 1024000); conf.setInt("mapred.reduce.tasks", 10); conf.setBoolean("mapred.reduce.tasks.speculative.execution", true); final Job job = new Job(conf, "Word Count"); job.setJarByClass(AvroWordCount.class); job.setInputFormatClass(AvroFileInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setOutputKeyClass(GenericRecord.class); job.setOutputValueClass(NullWritable.class); job.setOutputFormatClass(AvroFileOutputFormat.class); AvroFileOutputFormat.setDeflateLevel(job, 3); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); return job;//from w ww. j av a2s.com }