Example usage for org.apache.hadoop.mapreduce Job Job

List of usage examples for org.apache.hadoop.mapreduce Job Job

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job Job.

Prototype

Job(JobStatus status, JobConf conf) throws IOException 

Source Link

Usage

From source file:com.tdunning.plume.local.lazy.MapRedExecutor.java

License:Apache License

/**
 * This method returns a Job instance out of a {@link MSCR} entity. It puts the Class of 
 * the {@link PlumeWorkflow} argument and the MSCR id in the hadoop configuration.
 * //  w  w w  .  j a  v  a  2  s  . c  o  m
 * @param mscr The MSCR to convert 
 * @param workflow The workflow whose class will be instantiated by hadoop mappers/reducers
 * @param outputPath The output path of the MapRed job
 * @return A hadoop-executable MapRed Job
 * 
 * @throws IOException
 */
static Job getMapRed(final MSCR mscr, PlumeWorkflow workFlow, String workFlowOutputPath, String outputPath)
        throws IOException {

    Configuration conf = new Configuration();
    conf.set(WORKFLOW_NAME, workFlow.getClass().getName());
    conf.setInt(MSCR_ID, mscr.getId());
    conf.set(TEMP_OUTPUT_PATH, workFlowOutputPath);

    Job job = new Job(conf, "MSCR"); // TODO deprecation

    job.setMapOutputKeyClass(PlumeObject.class);
    job.setMapOutputValueClass(PlumeObject.class);

    job.setJarByClass(MapRedExecutor.class);

    /**
     * Define multiple inputs
     */
    for (PCollection<?> input : mscr.getInputs()) {
        if (!(input instanceof LazyCollection)) {
            throw new IllegalArgumentException("Can't create MapRed from MSCR whose inputs are not LazyTable");
        }
        LazyCollection<Text> l = (LazyCollection<Text>) input;
        if (!(l.isMaterialized() && l.getFile() != null)) {
            // Collections have plume ID only if they are intermediate results - TODO better naming for this
            if (l.getPlumeId().length() < 1) {
                throw new IllegalArgumentException(
                        "Can't create MapRed from MSCR inputs that are not materialized to a file");
            }
        }
        PCollectionType<?> rType = l.getType();
        Class<? extends InputFormat> format = SequenceFileInputFormat.class;
        if (rType instanceof PTableType) {
            PTableType<?, ?> tType = (PTableType<?, ?>) rType;
            if (tType.valueType() instanceof StringType && tType.keyType() instanceof StringType) {
                format = KeyValueTextInputFormat.class;
            }
            MultipleInputs.addInputPath(job, new Path(l.getFile()), format, MSCRMapper.class);
        } else {
            if (rType.elementType() instanceof StringType) {
                format = TextInputFormat.class;
            }
            MultipleInputs.addInputPath(job, new Path(l.getFile()), format, MSCRMapper.class);
        }
    }
    /**
     * Define multiple outputs
     */
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    for (Map.Entry<PCollection<?>, Integer> entry : mscr.getNumberedChannels().entrySet()) {
        PCollectionType<?> rType = ((LazyCollection<?>) mscr.getOutputChannels().get(entry.getKey()).output)
                .getType();
        if (rType instanceof PTableType) {
            PTableType<?, ?> tType = (PTableType<?, ?>) rType;
            Class<? extends OutputFormat> outputFormat = SequenceFileOutputFormat.class;
            if (tType.keyType() instanceof StringType && tType.valueType() instanceof StringType) {
                outputFormat = TextOutputFormat.class;
            }
            MultipleOutputs.addNamedOutput(job, entry.getValue() + "", outputFormat,
                    getHadoopType(tType.keyType()), getHadoopType(tType.valueType()));
        } else {
            Class<? extends OutputFormat> outputFormat = SequenceFileOutputFormat.class;
            if (rType.elementType() instanceof StringType) {
                outputFormat = TextOutputFormat.class;
            }
            MultipleOutputs.addNamedOutput(job, entry.getValue() + "", outputFormat, NullWritable.class,
                    getHadoopType(rType.elementType()));
        }
    }
    /**
     * Define Reducer & Combiner
     */
    job.setCombinerClass(MSCRCombiner.class);
    job.setReducerClass(MSCRReducer.class);

    job.setNumReduceTasks(1);
    return job;
}

From source file:com.teradata.compaction.mapreduce.MergeParquetFilesMR.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "MergeParquet");

    if (args.length != 2) {
        System.err.println("Usage: java -jar MergeParquetFilesMR path_to_input_folder path_to_output_folder ");
        System.exit(0);//from  w w  w . j a v a2 s  . com
    }

    final Path inputPath = new Path(args[0]);
    final Path out = new Path(args[1]);

    Schema schemaParquetFile = getBaseSchema(inputPath, conf);
    job.setJarByClass(MergeParquetFilesMR.class);
    job.setMapperClass(SampleParquetMapper.class);
    job.setReducerClass(SampleParquetReducer.class);
    job.setInputFormatClass(AvroParquetInputFormat.class);
    job.setOutputFormatClass(AvroParquetOutputFormat.class);
    job.setMapOutputKeyClass(NullWritable.class);

    AvroJob.setMapOutputValueSchema(job, schemaParquetFile);
    AvroParquetOutputFormat.setSchema(job, schemaParquetFile);
    FileInputFormat.addInputPath(job, inputPath);
    AvroParquetOutputFormat.setOutputPath(job, out);
    job.setNumReduceTasks(1);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.tetsuyaodaka.hadoop.math.matrix.MatrixMult.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Date startProc = new Date(System.currentTimeMillis());
    System.out.println("process started at " + startProc);

    Configuration conf = new Configuration();
    int I = Integer.parseInt(args[3]); // Num of Row of MatrixA
    int K = Integer.parseInt(args[4]); // Num of Row of MatrixB'

    int IB = Integer.parseInt(args[5]); // RowBlock Size of MatrixA
    int KB = Integer.parseInt(args[6]); // RowBlock Size of MatrixB'

    int M = 0;// w  w  w.jav a 2  s . c om
    if (I % IB == 0) {
        M = I / IB;
    } else {
        M = I / IB + 1;
    }

    int N = 0;
    if (K % KB == 0) {
        N = K / KB;
    } else {
        N = K / KB + 1;
    }

    conf.set("I", args[3]); // Num of Row of MatrixA
    conf.set("K", args[4]); // Num of Row of MatrixB'
    conf.set("IB", args[5]); // RowBlock Size of MatrixA
    conf.set("KB", args[6]); // RowBlock Size of MatrixB'
    conf.set("M", new Integer(M).toString());
    conf.set("N", new Integer(N).toString());

    Job job = new Job(conf, "MatrixMultiplication");
    job.setJarByClass(MatrixMult.class);

    job.setReducerClass(Reduce.class);

    job.setMapOutputKeyClass(MatrixMult.IndexPair.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Mapper?????
    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, MapA.class); // matrixA
    MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, MapB.class); // matrixB
    FileOutputFormat.setOutputPath(job, new Path(args[2])); // output path

    System.out.println("num of MatrixA RowBlock(M) is " + M);
    System.out.println("num of MatrixB RowBlock(N) is " + N);

    boolean success = job.waitForCompletion(true);

    Date endProc = new Date(System.currentTimeMillis());
    System.out.println("process ended at " + endProc);

    System.out.println(success);
}

From source file:com.tetsuyaodaka.hadoop.math.matrix.MatrixMultiplication.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Date startProc = new Date(System.currentTimeMillis());
    System.out.println("process started at " + startProc);

    Configuration conf = new Configuration();
    int I = Integer.parseInt(args[3]); // Num of Row of MatrixA
    int K = Integer.parseInt(args[4]); // Num of Row of MatrixB'

    int IB = Integer.parseInt(args[5]); // RowBlock Size of MatrixA
    int KB = Integer.parseInt(args[6]); // RowBlock Size of MatrixB'

    int M = 0;/*from ww  w .ja  v  a 2s .c om*/
    if (I % IB == 0) {
        M = I / IB;
    } else {
        M = I / IB + 1;
    }

    int N = 0;
    if (K % KB == 0) {
        N = K / KB;
    } else {
        N = K / KB + 1;
    }

    conf.set("I", args[3]); // Num of Row of MatrixA
    conf.set("K", args[4]); // Num of Row of MatrixB'
    conf.set("IB", args[5]); // RowBlock Size of MatrixA
    conf.set("KB", args[6]); // RowBlock Size of MatrixB'
    conf.set("M", new Integer(M).toString());
    conf.set("N", new Integer(N).toString());

    Job job = new Job(conf, "MatrixMultiplication");
    job.setJarByClass(MatrixMultiplication.class);

    job.setReducerClass(Reduce.class);

    job.setMapOutputKeyClass(MatrixMultiplication.IndexPair.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Mapper?????
    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, MapA.class); // matrixA
    MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, MapB.class); // matrixB
    FileOutputFormat.setOutputPath(job, new Path(args[2])); // output path

    System.out.println("num of MatrixA RowBlock(M) is " + M);
    System.out.println("num of MatrixB ColBlock(N) is " + N);

    boolean success = job.waitForCompletion(true);

    Date endProc = new Date(System.currentTimeMillis());
    System.out.println("process ended at " + endProc);

    System.out.println(success);
}

From source file:com.tetsuyaodaka.hadoop.math.matrix.TransformMatrix.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Date startProc = new Date(System.currentTimeMillis());
    System.out.println("process started at " + startProc);

    Configuration conf = new Configuration();
    if (args[2].equals("yes")) {
        conf.set("transpose", "true"); // transpose
    } else {//from w w w .  j a  v a2s  . co  m
        conf.set("transpose", "false"); // 
    }

    Job job = new Job(conf, "MatrixMultiplication");
    job.setJarByClass(TransformMatrix.class);

    job.setReducerClass(Reduce.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    // Mapper?????
    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, Map.class); // matrixA
    FileOutputFormat.setOutputPath(job, new Path(args[1])); // output path

    boolean success = job.waitForCompletion(true);

    Date endProc = new Date(System.currentTimeMillis());
    System.out.println("process ended at " + endProc);

    System.out.println(success);
}

From source file:com.tfm.utad.reducerdata.ReducerDataPig.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM-dd-HH-mm-ss");
    Date date = new Date();

    Path inputPath = new Path("/home/jab/camus/reducer-data-pig");
    Path outputDir = new Path("/home/jab/camus/pigdata/" + sdf.format(date));

    // Create configuration
    Configuration conf = new Configuration(true);
    conf.set(FS_DEFAULT_FS, HDFS_LOCALHOST_LOCALDOMAIN);
    FileSystem fs = FileSystem.get(conf);
    Path filesPath = new Path(inputPath + "/*");
    FileStatus[] files = fs.globStatus(filesPath);

    // Create job
    Job job = new Job(conf, "ReducerDataPig");
    job.setJarByClass(ReducerDataPig.class);

    // Setup MapReduce
    job.setMapperClass(ReducerDataPigMapper.class);
    job.setReducerClass(ReducerDataPigReducer.class);
    job.setNumReduceTasks(1);/*  w  w  w  . j  a  v a2 s .c  o  m*/

    // Specify key / value
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(ReducerPigKey.class);

    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);

    // Output
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Delete output if exists
    if (fs.exists(outputDir)) {
        fs.delete(outputDir, true);
    }

    // Execute job
    int code = job.waitForCompletion(true) ? 0 : 1;
    if (code == 0) {
        Counters counters = job.getCounters();
        Counter malformedCounter = counters.findCounter(ReducerDataEnum.MALFORMED_DATA);
        LOG.info("Counter malformed data: " + malformedCounter.getValue());
        for (FileStatus fStatus : files) {
            LOG.info("File name:" + fStatus.getPath());
            if (fStatus.isFile()) {
                LOG.info("Removing file in path:" + fStatus.getPath());
                fs.delete(fStatus.getPath(), false);
            }
        }
    }
}

From source file:com.tfm.utad.reducerdata.ReducerDataVertica.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM-dd-HH-mm-ss");
    Date date = new Date();

    Path inputPath = new Path("/home/jab/camus/reducer-data-vertica");
    Path outputDir = new Path("/home/jab/camus/verticadb/" + sdf.format(date));

    // Create configuration
    Configuration conf = new Configuration(true);
    conf.set(FS_DEFAULT_FS, HDFS_LOCALHOST_LOCALDOMAIN);
    FileSystem fs = FileSystem.get(conf);
    Path filesPath = new Path(inputPath + "/*");
    FileStatus[] files = fs.globStatus(filesPath);

    // Create job
    Job job = new Job(conf, "ReducerDataVertica");
    job.setJarByClass(ReducerDataVertica.class);

    // Setup MapReduce
    job.setMapperClass(ReducerDataVerticaMapper.class);
    job.setReducerClass(ReducerDataVerticaReducer.class);
    job.setNumReduceTasks(1);/*from   w  w  w .  j a  v a2 s. co m*/

    // Specify key / value
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(ReducerVerticaValue.class);

    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);

    // Output
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Delete output if exists
    if (fs.exists(outputDir)) {
        fs.delete(outputDir, true);
    }

    // Execute job
    int code = job.waitForCompletion(true) ? 0 : 1;
    if (code == 0) {
        Counters counters = job.getCounters();
        Counter malformedCounter = counters.findCounter(ReducerDataEnum.MALFORMED_DATA);
        LOG.info("Counter malformed data: " + malformedCounter.getValue());
        for (FileStatus fStatus : files) {
            LOG.info("File name:" + fStatus.getPath());
            if (fStatus.isFile()) {
                LOG.info("Removing file in path:" + fStatus.getPath());
                fs.delete(fStatus.getPath(), false);
            }
        }
    }
}

From source file:com.tomslabs.grid.avro.AvroWordCount.java

License:Apache License

public static Job createSubmitableJob(final Configuration conf, final Path inputPath, final Path outputPath)
        throws IOException {

    conf.set(AvroFileOutputFormat.OUTPUT_SCHEMA, WordCountSchema.getSchema().toString());

    conf.setInt("mapred.max.split.size", 1024000);
    conf.setInt("mapred.reduce.tasks", 10);
    conf.setBoolean("mapred.reduce.tasks.speculative.execution", true);
    final Job job = new Job(conf, "Word Count");
    job.setJarByClass(AvroWordCount.class);

    job.setInputFormatClass(AvroFileInputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setMapperClass(WordCountMapper.class);

    job.setReducerClass(WordCountReducer.class);

    job.setOutputKeyClass(GenericRecord.class);
    job.setOutputValueClass(NullWritable.class);
    job.setOutputFormatClass(AvroFileOutputFormat.class);
    AvroFileOutputFormat.setDeflateLevel(job, 3);

    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    return job;//from   w ww. j av  a2s.com
}