Example usage for org.apache.hadoop.mapreduce Job setMapOutputValueClass

List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputValueClass.

Prototype

public void setMapOutputValueClass(Class<?> theClass) throws IllegalStateException 

Source Link

Document

Set the value class for the map output data.

Usage

From source file:com.telefonica.iot.tidoop.mrlib.jobs.Filter.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    // check the number of arguments, show the usage if it is wrong
    if (args.length != 3) {
        showUsage();//from  w ww.ja  v  a 2  s .  c o  m
        return -1;
    } // if

    // get the arguments
    String input = args[0];
    String output = args[1];
    String regex = args[2];

    // create and configure a MapReduce job
    Configuration conf = this.getConf();
    conf.set(Constants.PARAM_REGEX, regex);
    Job job = Job.getInstance(conf, "tidoop-mr-lib-filter");
    job.setNumReduceTasks(1);
    job.setJarByClass(Filter.class);
    job.setMapperClass(LineFilter.class);
    job.setCombinerClass(LinesCombiner.class);
    job.setReducerClass(LinesJoiner.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    // run the MapReduce job
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.telefonica.iot.tidoop.mrlib.jobs.MapOnly.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    // check the number of arguments, show the usage if it is wrong
    if (args.length != 3) {
        showUsage();//from  ww  w.  ja v  a2  s.c  om
        return -1;
    } // if

    // get the arguments
    String input = args[0];
    String output = args[1];
    String mapFunction = args[2];

    // create and configure a MapReduce job
    Configuration conf = this.getConf();
    conf.set(Constants.PARAM_FUNCTION, mapFunction);
    Job job = Job.getInstance(conf, "tidoop-mr-lib-maponly");
    job.setNumReduceTasks(0);
    job.setJarByClass(MapOnly.class);
    job.setMapperClass(CustomMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    // run the MapReduce job
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.test.hadoop.unoExample.CardDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    String input, output;/*from   w  ww .j  av  a2 s.c o m*/
    if (args.length == 2) {
        input = args[0];
        output = args[1];
    } else {
        System.err.println("Incorrect number of arguments.  Expected: input output");
        return -1;
    }

    Job job = new Job(getConf());
    job.setJarByClass(CardDriver.class);
    job.setJobName(this.getClass().getName());

    FileInputFormat.setInputPaths(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    job.setMapperClass(CardMapper.class);
    job.setReducerClass(CardTotalReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:com.tetsuyaodaka.hadoop.math.matrix.MatrixMult.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Date startProc = new Date(System.currentTimeMillis());
    System.out.println("process started at " + startProc);

    Configuration conf = new Configuration();
    int I = Integer.parseInt(args[3]); // Num of Row of MatrixA
    int K = Integer.parseInt(args[4]); // Num of Row of MatrixB'

    int IB = Integer.parseInt(args[5]); // RowBlock Size of MatrixA
    int KB = Integer.parseInt(args[6]); // RowBlock Size of MatrixB'

    int M = 0;//from   w w w .  ja va  2  s.  c  om
    if (I % IB == 0) {
        M = I / IB;
    } else {
        M = I / IB + 1;
    }

    int N = 0;
    if (K % KB == 0) {
        N = K / KB;
    } else {
        N = K / KB + 1;
    }

    conf.set("I", args[3]); // Num of Row of MatrixA
    conf.set("K", args[4]); // Num of Row of MatrixB'
    conf.set("IB", args[5]); // RowBlock Size of MatrixA
    conf.set("KB", args[6]); // RowBlock Size of MatrixB'
    conf.set("M", new Integer(M).toString());
    conf.set("N", new Integer(N).toString());

    Job job = new Job(conf, "MatrixMultiplication");
    job.setJarByClass(MatrixMult.class);

    job.setReducerClass(Reduce.class);

    job.setMapOutputKeyClass(MatrixMult.IndexPair.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Mapper?????
    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, MapA.class); // matrixA
    MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, MapB.class); // matrixB
    FileOutputFormat.setOutputPath(job, new Path(args[2])); // output path

    System.out.println("num of MatrixA RowBlock(M) is " + M);
    System.out.println("num of MatrixB RowBlock(N) is " + N);

    boolean success = job.waitForCompletion(true);

    Date endProc = new Date(System.currentTimeMillis());
    System.out.println("process ended at " + endProc);

    System.out.println(success);
}

From source file:com.tetsuyaodaka.hadoop.math.matrix.MatrixMultiplication.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Date startProc = new Date(System.currentTimeMillis());
    System.out.println("process started at " + startProc);

    Configuration conf = new Configuration();
    int I = Integer.parseInt(args[3]); // Num of Row of MatrixA
    int K = Integer.parseInt(args[4]); // Num of Row of MatrixB'

    int IB = Integer.parseInt(args[5]); // RowBlock Size of MatrixA
    int KB = Integer.parseInt(args[6]); // RowBlock Size of MatrixB'

    int M = 0;/*from w ww  . ja v a2  s.  co  m*/
    if (I % IB == 0) {
        M = I / IB;
    } else {
        M = I / IB + 1;
    }

    int N = 0;
    if (K % KB == 0) {
        N = K / KB;
    } else {
        N = K / KB + 1;
    }

    conf.set("I", args[3]); // Num of Row of MatrixA
    conf.set("K", args[4]); // Num of Row of MatrixB'
    conf.set("IB", args[5]); // RowBlock Size of MatrixA
    conf.set("KB", args[6]); // RowBlock Size of MatrixB'
    conf.set("M", new Integer(M).toString());
    conf.set("N", new Integer(N).toString());

    Job job = new Job(conf, "MatrixMultiplication");
    job.setJarByClass(MatrixMultiplication.class);

    job.setReducerClass(Reduce.class);

    job.setMapOutputKeyClass(MatrixMultiplication.IndexPair.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Mapper?????
    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, MapA.class); // matrixA
    MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, MapB.class); // matrixB
    FileOutputFormat.setOutputPath(job, new Path(args[2])); // output path

    System.out.println("num of MatrixA RowBlock(M) is " + M);
    System.out.println("num of MatrixB ColBlock(N) is " + N);

    boolean success = job.waitForCompletion(true);

    Date endProc = new Date(System.currentTimeMillis());
    System.out.println("process ended at " + endProc);

    System.out.println(success);
}

From source file:com.tetsuyaodaka.hadoop.math.matrix.TransformMatrix.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Date startProc = new Date(System.currentTimeMillis());
    System.out.println("process started at " + startProc);

    Configuration conf = new Configuration();
    if (args[2].equals("yes")) {
        conf.set("transpose", "true"); // transpose
    } else {/* w ww .java2s.co m*/
        conf.set("transpose", "false"); // 
    }

    Job job = new Job(conf, "MatrixMultiplication");
    job.setJarByClass(TransformMatrix.class);

    job.setReducerClass(Reduce.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    // Mapper?????
    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, Map.class); // matrixA
    FileOutputFormat.setOutputPath(job, new Path(args[1])); // output path

    boolean success = job.waitForCompletion(true);

    Date endProc = new Date(System.currentTimeMillis());
    System.out.println("process ended at " + endProc);

    System.out.println(success);
}

From source file:com.toddbodnar.simpleHadoop.distributedHadoopDriver.java

/**
 * Runs a job/*from ww w .ja  v a2s .  c  o  m*/
 *
 * @param theJob the MapReduceJob to be run
 * @param verbose if true, output progress information
 */
public static void run(MapReduceJob theJob, boolean verbose)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = GetConfiguration.get();
    Job job = Job.getInstance(conf, theJob.toString());
    job.setJarByClass(distributedHadoopDriver.class);

    job.setMapperClass(theJob.getMapper().getClass());
    job.setReducerClass(theJob.getReducer().getClass());

    job.setMapOutputKeyClass(theJob.getKeyType());
    job.setMapOutputValueClass(theJob.getValueType());

    theJob.writeConfig(job.getConfiguration());

    hdfsFile input = hdfsFile.transferToHDFS(theJob.getInput().getFile());
    if (!input.equals(theJob.getInput().getFile())) {
        garbage_collector.noteCreated(input);
    }
    if (theJob.getClass().equals(join.class)) {
        join jobLeftJoin = (join) theJob;

        hdfsFile input2 = hdfsFile.transferToHDFS(jobLeftJoin.getOtherInput().getFile());
        if (!input2.equals(jobLeftJoin.getOtherInput().getFile())) {
            garbage_collector.noteCreated(input2);
        }

        Mapper maps[] = jobLeftJoin.getMapperPairs();
        MultipleInputs.addInputPath(job, input.getPath(), TextInputFormat.class, maps[0].getClass());
        MultipleInputs.addInputPath(job, input2.getPath(), TextInputFormat.class, maps[1].getClass());
    } else {
        MultipleInputs.addInputPath(job, input.getPath(), TextInputFormat.class);
    }

    job.getConfiguration().set(TextOutputFormat.SEPERATOR, "");

    job.setOutputFormatClass(TextOutputFormat.class);

    //FileInputFormat.setInputPaths(job, new Path(theJob.getInput().getFile().getLocation()));
    Path out = new Path(settings.hdfs_prefix + "/TMP_TABLE_" + theJob.hashCode());
    FileOutputFormat.setOutputPath(job, out);

    boolean success = job.waitForCompletion(true);

    if (!success) {
        System.err.println("Error processing " + theJob);
        return;
    }

    FileSystem fs = FileSystem.get(GetConfiguration.get());

    fs.delete(new Path(out, "_SUCCESS"), false);

    table output = new table(new hdfsFile(out), theJob.getOutput().getColNames());
    output.setSeperator(theJob.getOutput().getSeperator());

    theJob.setOutput(output);

    garbage_collector.noteCreated(output.getFile());
}

From source file:com.tomslabs.grid.avro.AvroWordCount.java

License:Apache License

public static Job createSubmitableJob(final Configuration conf, final Path inputPath, final Path outputPath)
        throws IOException {

    conf.set(AvroFileOutputFormat.OUTPUT_SCHEMA, WordCountSchema.getSchema().toString());

    conf.setInt("mapred.max.split.size", 1024000);
    conf.setInt("mapred.reduce.tasks", 10);
    conf.setBoolean("mapred.reduce.tasks.speculative.execution", true);
    final Job job = new Job(conf, "Word Count");
    job.setJarByClass(AvroWordCount.class);

    job.setInputFormatClass(AvroFileInputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setMapperClass(WordCountMapper.class);

    job.setReducerClass(WordCountReducer.class);

    job.setOutputKeyClass(GenericRecord.class);
    job.setOutputValueClass(NullWritable.class);
    job.setOutputFormatClass(AvroFileOutputFormat.class);
    AvroFileOutputFormat.setDeflateLevel(job, 3);

    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    return job;/*  w  w w  .j  a  v  a2  s  .c  o  m*/
}

From source file:com.toshiba.mwcloud.gs.hadoop.mapreduce.examples.GSWordCount.java

License:Apache License

/**
 * <div lang="ja">/*w w  w.j  av a 2s .c  o m*/
 * WordCount?MapReduce???
 * @param args 
 * @return ???0????????1
 * @throws Exception ??????
 * </div><div lang="en">
 * Run a MapReduce job of WordCount.
 * @param args command argument
 * @return 0 for normal termination of the job and 1 otherwise
 * @throws Exception processing failed.
 * </div>
 */
public int run(String[] args) throws Exception {
    GSConf gsConf = new GSConf();
    gsConf.parseArg(args);

    Configuration conf = getConf();
    gsConf.setup(conf);

    Job job = Job.getInstance(conf, APP_NAME);
    job.setJarByClass(GSWordCount.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(GSRowWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(GSRowInputFormat.class);
    job.setOutputFormatClass(GSRowOutputFormat.class);

    int res = job.waitForCompletion(true) ? 0 : 1;

    if (res == 0) {
        printResult(gsConf);
    }

    return res;
}

From source file:com.trexinhca.TrexinHCATest.java

License:Apache License

public static void main(String[] args) throws Exception {

    ks = KieServices.Factory.get();/*from  ww  w  .j  av a2s.  c  o m*/
    kContainer = ks.getKieClasspathContainer();
    ksession = TrexinHCATest.kContainer.newKieSession("MapReduceKS");
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: TrexinHCATest <in> [<in>...] <out>");
        System.exit(2);
    }
    Job job = Job.getInstance(conf);
    job.setJobName("HCATest");
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(TrexinHCAReducer.class);
    job.setReducerClass(TrexinHCAReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(TextOutputFormat.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    job.setJarByClass(TrexinHCATest.class);
    job.waitForCompletion(true);

}