Example usage for org.apache.hadoop.mapreduce Job setMapOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputValueClass.

Prototype

public void setMapOutputValueClass(Class<?> theClass) throws IllegalStateException

Source Link

Document

Set the value class for the map output data.

Usage

From source file:com.telefonica.iot.tidoop.mrlib.jobs.Filter.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    // check the number of arguments, show the usage if it is wrong
    if (args.length != 3) {
        showUsage();//from  w ww.ja  v  a 2  s .  c o  m
        return -1;
    } // if

    // get the arguments
    String input = args[0];
    String output = args[1];
    String regex = args[2];

    // create and configure a MapReduce job
    Configuration conf = this.getConf();
    conf.set(Constants.PARAM_REGEX, regex);
    Job job = Job.getInstance(conf, "tidoop-mr-lib-filter");
    job.setNumReduceTasks(1);
    job.setJarByClass(Filter.class);
    job.setMapperClass(LineFilter.class);
    job.setCombinerClass(LinesCombiner.class);
    job.setReducerClass(LinesJoiner.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    // run the MapReduce job
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.telefonica.iot.tidoop.mrlib.jobs.MapOnly.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    // check the number of arguments, show the usage if it is wrong
    if (args.length != 3) {
        showUsage();//from  ww  w.  ja v  a2  s.c  om
        return -1;
    } // if

    // get the arguments
    String input = args[0];
    String output = args[1];
    String mapFunction = args[2];

    // create and configure a MapReduce job
    Configuration conf = this.getConf();
    conf.set(Constants.PARAM_FUNCTION, mapFunction);
    Job job = Job.getInstance(conf, "tidoop-mr-lib-maponly");
    job.setNumReduceTasks(0);
    job.setJarByClass(MapOnly.class);
    job.setMapperClass(CustomMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    // run the MapReduce job
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.test.hadoop.unoExample.CardDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    String input, output;/*from   w  ww .j  av  a2 s.c o m*/
    if (args.length == 2) {
        input = args[0];
        output = args[1];
    } else {
        System.err.println("Incorrect number of arguments.  Expected: input output");
        return -1;
    }

    Job job = new Job(getConf());
    job.setJarByClass(CardDriver.class);
    job.setJobName(this.getClass().getName());

    FileInputFormat.setInputPaths(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    job.setMapperClass(CardMapper.class);
    job.setReducerClass(CardTotalReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:com.tetsuyaodaka.hadoop.math.matrix.MatrixMult.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Date startProc = new Date(System.currentTimeMillis());
    System.out.println("process started at " + startProc);

    Configuration conf = new Configuration();
    int I = Integer.parseInt(args[3]); // Num of Row of MatrixA
    int K = Integer.parseInt(args[4]); // Num of Row of MatrixB'

    int IB = Integer.parseInt(args[5]); // RowBlock Size of MatrixA
    int KB = Integer.parseInt(args[6]); // RowBlock Size of MatrixB'

    int M = 0;//from   w w w .  ja va  2  s.  c  om
    if (I % IB == 0) {
        M = I / IB;
    } else {
        M = I / IB + 1;
    }

    int N = 0;
    if (K % KB == 0) {
        N = K / KB;
    } else {
        N = K / KB + 1;
    }

    conf.set("I", args[3]); // Num of Row of MatrixA
    conf.set("K", args[4]); // Num of Row of MatrixB'
    conf.set("IB", args[5]); // RowBlock Size of MatrixA
    conf.set("KB", args[6]); // RowBlock Size of MatrixB'
    conf.set("M", new Integer(M).toString());
    conf.set("N", new Integer(N).toString());

    Job job = new Job(conf, "MatrixMultiplication");
    job.setJarByClass(MatrixMult.class);

    job.setReducerClass(Reduce.class);

    job.setMapOutputKeyClass(MatrixMult.IndexPair.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Mapper?????
    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, MapA.class); // matrixA
    MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, MapB.class); // matrixB
    FileOutputFormat.setOutputPath(job, new Path(args[2])); // output path

    System.out.println("num of MatrixA RowBlock(M) is " + M);
    System.out.println("num of MatrixB RowBlock(N) is " + N);

    boolean success = job.waitForCompletion(true);

    Date endProc = new Date(System.currentTimeMillis());
    System.out.println("process ended at " + endProc);

    System.out.println(success);
}

From source file:com.tetsuyaodaka.hadoop.math.matrix.MatrixMultiplication.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Date startProc = new Date(System.currentTimeMillis());
    System.out.println("process started at " + startProc);

    Configuration conf = new Configuration();
    int I = Integer.parseInt(args[3]); // Num of Row of MatrixA
    int K = Integer.parseInt(args[4]); // Num of Row of MatrixB'

    int IB = Integer.parseInt(args[5]); // RowBlock Size of MatrixA
    int KB = Integer.parseInt(args[6]); // RowBlock Size of MatrixB'

    int M = 0;/*from w ww  . ja v a2  s.  co  m*/
    if (I % IB == 0) {
        M = I / IB;
    } else {
        M = I / IB + 1;
    }

    int N = 0;
    if (K % KB == 0) {
        N = K / KB;
    } else {
        N = K / KB + 1;
    }

    conf.set("I", args[3]); // Num of Row of MatrixA
    conf.set("K", args[4]); // Num of Row of MatrixB'
    conf.set("IB", args[5]); // RowBlock Size of MatrixA
    conf.set("KB", args[6]); // RowBlock Size of MatrixB'
    conf.set("M", new Integer(M).toString());
    conf.set("N", new Integer(N).toString());

    Job job = new Job(conf, "MatrixMultiplication");
    job.setJarByClass(MatrixMultiplication.class);

    job.setReducerClass(Reduce.class);

    job.setMapOutputKeyClass(MatrixMultiplication.IndexPair.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Mapper?????
    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, MapA.class); // matrixA
    MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, MapB.class); // matrixB
    FileOutputFormat.setOutputPath(job, new Path(args[2])); // output path

    System.out.println("num of MatrixA RowBlock(M) is " + M);
    System.out.println("num of MatrixB ColBlock(N) is " + N);

    boolean success = job.waitForCompletion(true);

    Date endProc = new Date(System.currentTimeMillis());
    System.out.println("process ended at " + endProc);

    System.out.println(success);
}

From source file:com.tetsuyaodaka.hadoop.math.matrix.TransformMatrix.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Date startProc = new Date(System.currentTimeMillis());
    System.out.println("process started at " + startProc);

    Configuration conf = new Configuration();
    if (args[2].equals("yes")) {
        conf.set("transpose", "true"); // transpose
    } else {/* w ww .java2s.co m*/
        conf.set("transpose", "false"); // 
    }

    Job job = new Job(conf, "MatrixMultiplication");
    job.setJarByClass(TransformMatrix.class);

    job.setReducerClass(Reduce.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    // Mapper?????
    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, Map.class); // matrixA
    FileOutputFormat.setOutputPath(job, new Path(args[1])); // output path

    boolean success = job.waitForCompletion(true);

    Date endProc = new Date(System.currentTimeMillis());
    System.out.println("process ended at " + endProc);

    System.out.println(success);
}

From source file:com.toddbodnar.simpleHadoop.distributedHadoopDriver.java

/**
 * Runs a job/*from ww w .ja  v a2s .  c  o  m*/
 *
 * @param theJob the MapReduceJob to be run
 * @param verbose if true, output progress information
 */
public static void run(MapReduceJob theJob, boolean verbose)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = GetConfiguration.get();
    Job job = Job.getInstance(conf, theJob.toString());
    job.setJarByClass(distributedHadoopDriver.class);

    job.setMapperClass(theJob.getMapper().getClass());
    job.setReducerClass(theJob.getReducer().getClass());

    job.setMapOutputKeyClass(theJob.getKeyType());
    job.setMapOutputValueClass(theJob.getValueType());

    theJob.writeConfig(job.getConfiguration());

    hdfsFile input = hdfsFile.transferToHDFS(theJob.getInput().getFile());
    if (!input.equals(theJob.getInput().getFile())) {
        garbage_collector.noteCreated(input);
    }
    if (theJob.getClass().equals(join.class)) {
        join jobLeftJoin = (join) theJob;

        hdfsFile input2 = hdfsFile.transferToHDFS(jobLeftJoin.getOtherInput().getFile());
        if (!input2.equals(jobLeftJoin.getOtherInput().getFile())) {
            garbage_collector.noteCreated(input2);
        }

        Mapper maps[] = jobLeftJoin.getMapperPairs();
        MultipleInputs.addInputPath(job, input.getPath(), TextInputFormat.class, maps[0].getClass());
        MultipleInputs.addInputPath(job, input2.getPath(), TextInputFormat.class, maps[1].getClass());
    } else {
        MultipleInputs.addInputPath(job, input.getPath(), TextInputFormat.class);
    }

    job.getConfiguration().set(TextOutputFormat.SEPERATOR, "");

    job.setOutputFormatClass(TextOutputFormat.class);

    //FileInputFormat.setInputPaths(job, new Path(theJob.getInput().getFile().getLocation()));
    Path out = new Path(settings.hdfs_prefix + "/TMP_TABLE_" + theJob.hashCode());
    FileOutputFormat.setOutputPath(job, out);

    boolean success = job.waitForCompletion(true);

    if (!success) {
        System.err.println("Error processing " + theJob);
        return;
    }

    FileSystem fs = FileSystem.get(GetConfiguration.get());

    fs.delete(new Path(out, "_SUCCESS"), false);

    table output = new table(new hdfsFile(out), theJob.getOutput().getColNames());
    output.setSeperator(theJob.getOutput().getSeperator());

    theJob.setOutput(output);

    garbage_collector.noteCreated(output.getFile());
}

From source file:com.tomslabs.grid.avro.AvroWordCount.java

License:Apache License

public static Job createSubmitableJob(final Configuration conf, final Path inputPath, final Path outputPath)
        throws IOException {

    conf.set(AvroFileOutputFormat.OUTPUT_SCHEMA, WordCountSchema.getSchema().toString());

    conf.setInt("mapred.max.split.size", 1024000);
    conf.setInt("mapred.reduce.tasks", 10);
    conf.setBoolean("mapred.reduce.tasks.speculative.execution", true);
    final Job job = new Job(conf, "Word Count");
    job.setJarByClass(AvroWordCount.class);

    job.setInputFormatClass(AvroFileInputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setMapperClass(WordCountMapper.class);

    job.setReducerClass(WordCountReducer.class);

    job.setOutputKeyClass(GenericRecord.class);
    job.setOutputValueClass(NullWritable.class);
    job.setOutputFormatClass(AvroFileOutputFormat.class);
    AvroFileOutputFormat.setDeflateLevel(job, 3);

    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    return job;/*  w  w w  .j  a  v  a2  s  .c  o  m*/
}

From source file:com.toshiba.mwcloud.gs.hadoop.mapreduce.examples.GSWordCount.java

License:Apache License

/**
 * <div lang="ja">/*w w  w.j  av a 2s .c  o m*/
 * WordCount?MapReduce???
 * @param args 
 * @return ???0????????1
 * @throws Exception ??????
 * </div><div lang="en">
 * Run a MapReduce job of WordCount.
 * @param args command argument
 * @return 0 for normal termination of the job and 1 otherwise
 * @throws Exception processing failed.
 * </div>
 */
public int run(String[] args) throws Exception {
    GSConf gsConf = new GSConf();
    gsConf.parseArg(args);

    Configuration conf = getConf();
    gsConf.setup(conf);

    Job job = Job.getInstance(conf, APP_NAME);
    job.setJarByClass(GSWordCount.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(GSRowWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(GSRowInputFormat.class);
    job.setOutputFormatClass(GSRowOutputFormat.class);

    int res = job.waitForCompletion(true) ? 0 : 1;

    if (res == 0) {
        printResult(gsConf);
    }

    return res;
}

From source file:com.trexinhca.TrexinHCATest.java

License:Apache License

public static void main(String[] args) throws Exception {

    ks = KieServices.Factory.get();/*from  ww  w  .j  av a2s.  c  o m*/
    kContainer = ks.getKieClasspathContainer();
    ksession = TrexinHCATest.kContainer.newKieSession("MapReduceKS");
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: TrexinHCATest <in> [<in>...] <out>");
        System.exit(2);
    }
    Job job = Job.getInstance(conf);
    job.setJobName("HCATest");
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(TrexinHCAReducer.class);
    job.setReducerClass(TrexinHCAReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(TextOutputFormat.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    job.setJarByClass(TrexinHCATest.class);
    job.waitForCompletion(true);

}