Example usage for org.apache.hadoop.mapreduce Job setMapOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputValueClass.

Prototype

public void setMapOutputValueClass(Class<?> theClass) throws IllegalStateException

Source Link

Document

Set the value class for the map output data.

Usage

From source file:com.jbw.recommendsystem.cooc.CoocMRD.java

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = getConf();
    Path in = new Path(conf.get("input"));
    Path out = new Path(conf.get("output"));

    Job surJob = Job.getInstance(conf);
    surJob.setJarByClass(CoocMRD.class);
    surJob.setJobName("Coor");

    surJob.setMapperClass(CoocMapper.class);
    surJob.setReducerClass(IntSumReducer.class);

    surJob.setMapOutputKeyClass(Text.class);
    surJob.setMapOutputValueClass(IntWritable.class);

    surJob.setOutputKeyClass(Text.class);
    surJob.setOutputValueClass(IntWritable.class);

    surJob.setInputFormatClass(TextInputFormat.class);
    surJob.setOutputFormatClass(TextOutputFormat.class);

    TextInputFormat.addInputPath(surJob, in);
    TextOutputFormat.setOutputPath(surJob, out);

    return surJob.waitForCompletion(true) ? 0 : 1;
}

From source file:com.jbw.recommendsystem.guiyihua.GYHMRD.java

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = getConf();
    Path in = new Path(conf.get("in"));
    Path out = new Path(conf.get("out"));

    Job job = Job.getInstance(conf);
    job.setJarByClass(GYHMRD.class);
    job.setJobName("fdsjh");
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setInputFormatClass(TextInputFormat.class);

    job.setPartitionerClass(XXPartition.class);

    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(2);/*w w w  . j a v  a 2 s. c  o m*/

    TextInputFormat.addInputPath(job, in);
    TextOutputFormat.setOutputPath(job, out);
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.jbw.recommendsystem.iteamlist.ItemMRD.java

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = getConf();
    Path in = new Path(conf.get("input"));
    Path out = new Path(conf.get("output"));

    Job surJob = Job.getInstance(conf);
    surJob.setJarByClass(ItemMRD.class);
    surJob.setJobName("item");

    surJob.setMapperClass(ItemMapper.class);
    surJob.setReducerClass(ItemReducer.class);

    surJob.setMapOutputKeyClass(Text.class);
    surJob.setMapOutputValueClass(Text.class);

    surJob.setOutputKeyClass(Text.class);
    surJob.setOutputValueClass(Text.class);

    surJob.setInputFormatClass(TextInputFormat.class);
    surJob.setOutputFormatClass(TextOutputFormat.class);

    TextInputFormat.addInputPath(surJob, in);
    TextOutputFormat.setOutputPath(surJob, out);

    return surJob.waitForCompletion(true) ? 0 : 1;
}

From source file:com.jbw.recommendsystem.martrixlist.MartrixListMRD.java

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = getConf();
    Path in = new Path(conf.get("input"));
    Path out = new Path(conf.get("output"));

    Job surJob = Job.getInstance(conf);
    surJob.setJarByClass(MartrixListMRD.class);
    surJob.setJobName("user");

    surJob.setMapperClass(MListMapper.class);
    surJob.setReducerClass(MListReducer.class);

    surJob.setMapOutputKeyClass(Text.class);
    surJob.setMapOutputValueClass(Text.class);

    surJob.setOutputKeyClass(Text.class);
    surJob.setOutputValueClass(Text.class);

    surJob.setInputFormatClass(TextInputFormat.class);
    surJob.setOutputFormatClass(TextOutputFormat.class);

    TextInputFormat.addInputPath(surJob, in);
    TextOutputFormat.setOutputPath(surJob, out);

    return surJob.waitForCompletion(true) ? 0 : 1;
}

From source file:com.juniarto.secondsorter.SsJob.java

public int run(String[] allArgs) throws Exception {
    Configuration conf = getConf();
    Job job = new Job(conf, "secondary sort");

    job.setJarByClass(SsJob.class);
    job.setPartitionerClass(NaturalKeyPartitioner.class);
    job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class);
    job.setSortComparatorClass(CompositeKeyComparator.class);

    job.setMapOutputKeyClass(TextDsi.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(SsMapper.class);
    job.setReducerClass(SsReducer.class);
    job.setNumReduceTasks(2);//from w ww . j a  v a2 s. co m

    String[] args = new GenericOptionsParser(getConf(), allArgs).getRemainingArgs();
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    //job.submit();

    long time1 = System.nanoTime();
    boolean status = job.waitForCompletion(true);
    long time2 = System.nanoTime();
    long timeSpent = time2 - time1;
    LOG.info("TIME: " + timeSpent);
    return 0;

}

From source file:com.justgiving.raven.kissmetrics.jsonenricher.KissmetricsJsonToEnrichedJsonDriver.java

License:Open Source License

public static void main(String[] args) throws Exception {

    logger.info("Logger - Converting Kissmetrics Json to Valid Json files");
    System.out.println("Converting Kissmetrics Json to Valid Json files");
    System.out.println("defaultCharacterEncoding by property: " + System.getProperty("file.encoding"));
    System.out.println("defaultCharacterEncoding by code: " + getDefaultCharEncoding());
    System.out.println("defaultCharacterEncoding by charSet: " + Charset.defaultCharset());

    Job job = Job.getInstance();
    job.setJarByClass(KissmetricsJsonToEnrichedJsonDriver.class);
    job.setJobName("Kissmetrics Json to valid and enriched Json files");
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    //Add number of reducers
    int numberOfReducers = 2;
    if (args.length > 2 && args[2] != null) {
        numberOfReducers = Integer.parseInt(args[2]);
        if (numberOfReducers <= 0) {
            numberOfReducers = 2;/*from w  w  w  .j ava 2s .  c  o  m*/
        }
    }

    job.setMapperClass(com.justgiving.raven.kissmetrics.jsonenricher.KissmetricsJsonToEnrichedJsonMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setReducerClass(
            com.justgiving.raven.kissmetrics.jsonenricher.KissmetricsJsonToEnrichedJsonReducer.class);
    job.setNumReduceTasks(numberOfReducers);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.kasabi.labs.freebase.mr.Freebase2RDFDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (log.isDebugEnabled()) {
        log.debug("run({})", Utils.toString(args));
    }//from   w  w  w.  j  a v a 2  s .  com

    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }

    Configuration configuration = getConf();
    boolean useCompression = configuration.getBoolean(Constants.OPTION_USE_COMPRESSION,
            Constants.OPTION_USE_COMPRESSION_DEFAULT);

    if (useCompression) {
        configuration.setBoolean("mapred.compress.map.output", true);
        configuration.set("mapred.output.compression.type", "BLOCK");
        configuration.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
    }

    boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERRIDE_OUTPUT,
            Constants.OPTION_OVERRIDE_OUTPUT_DEFAULT);
    FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration);
    if (overrideOutput) {
        fs.delete(new Path(args[1]), true);
    }

    Job job = new Job(configuration);
    job.setJobName("Freebase2RDFDriver");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setInputFormatClass(TextInputFormat.class);

    job.setMapperClass(Freebase2RDFMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setReducerClass(Freebase2RDFReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    Utils.setReducers(job, configuration, log);

    job.setOutputFormatClass(TextOutputFormat.class);

    if (log.isDebugEnabled())
        Utils.log(job, log);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.kk.hadoop.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysrot <in> <out>");
        System.exit(2);/*from  w  w  w  .  j a  v  a  2 s  . co  m*/
    }
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    job.setNumReduceTasks(2);

    // group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);

    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    job.setMapOutputValueClass(IntWritable.class);

    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.kse.bigdata.main.Driver.java

License:Apache License

public static void main(String[] args) throws Exception {
    /**********************************************************************************
     **    Merge the source files into one.                                          **
    /**    Should change the directories of each file before executing the program   **
    ***********************************************************************************/
    //        String inputFileDirectory = "/media/bk/??/BigData_Term_Project/Debug";
    //        String resultFileDirectory = "/media/bk/??/BigData_Term_Project/debug.csv";
    //        File resultFile = new File(resultFileDirectory);
    //        if(!resultFile.exists())
    //            new SourceFileMerger(inputFileDirectory, resultFileDirectory).mergeFiles();

    /**********************************************************************************
     * Hadoop Operation.// w w w.  j  a  v  a2  s .  c  o  m
     * Befort Start, Check the Length of Sequence We Want to Predict.
     **********************************************************************************/

    Configuration conf = new Configuration();

    //Enable MapReduce intermediate compression as Snappy
    conf.setBoolean("mapred.compress.map.output", true);
    conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec");

    //Enable Profiling
    //conf.setBoolean("mapred.task.profile", true);

    String testPath = null;
    String inputPath = null;
    String outputPath = null;

    int sampleSize = 1;
    ArrayList<String> results = new ArrayList<String>();

    for (int index = 0; index < args.length; index++) {

        /*
         * Mandatory command
         */
        //Extract input path string from command line.
        if (args[index].equals("-in"))
            inputPath = args[index + 1];

        //Extract output path string from command line.
        if (args[index].equals("-out"))
            outputPath = args[index + 1];

        //Extract test data path string from command line.
        if (args[index].equals("-test"))
            testPath = args[index + 1];

        /*
         * Optional command
         */
        //Extract a number of neighbors.
        if (args[index].equals("-nn"))
            conf.setInt(Reduce.NUMBER_OF_NEAREAST_NEIGHBOR, Integer.parseInt(args[index + 1]));

        //Whether job uses normalization or not.
        if (args[index].equals("-norm"))
            conf.setBoolean(Map.NORMALIZATION, true);

        //Extract the number of sample size to test.
        if (args[index].equals("-s"))
            sampleSize = Integer.valueOf(args[index + 1]);

        //Whether job uses mean or median
        //[Default : mean]
        if (args[index].equals("-med"))
            conf.setBoolean(Reduce.MEDIAN, true);
    }

    String outputFileName = "part-r-00000";
    SequenceSampler sampler = new SequenceSampler(testPath, sampleSize);
    LinkedList<Sequence> testSequences = sampler.getRandomSample();

    //        Test Sequence
    //        String testSeqString = "13.591-13.674-13.778-13.892-13.958-14.049-14.153-14.185-14.169-14.092-13.905-13.702-13.438-13.187-13.0-12.914-12.868-12.766-12.62-12.433-12.279-12.142-12.063-12.025-100";
    //        Sequence testSeq = new Sequence(testSeqString);
    //        LinkedList<Sequence> testSequences = new LinkedList<>();
    //        testSequences.add(testSeq);

    for (Sequence seq : testSequences) {

        /*
         ********************  Hadoop Launch ***********************
         */

        System.out.println(seq.getTailString());

        conf.set(Map.INPUT_SEQUENCE, seq.toString());

        Job job = new Job(conf);
        job.setJarByClass(Driver.class);
        job.setJobName("term-project-driver");

        job.setMapperClass(Map.class);
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(Text.class);

        //          Should think another way to implement the combiner class
        //          Current Implementation is not helpful to Job.
        //          job.setCombinerClass(Combiner.class);

        //Set 1 for number of reduce task for keeping 100 most neighbors in sorted set.
        job.setNumReduceTasks(1);
        job.setReducerClass(Reduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        FileInputFormat.setInputPaths(job, new Path(inputPath));
        FileOutputFormat.setOutputPath(job, new Path(outputPath));

        job.waitForCompletion(true);

        /*
         * if job finishes, get result of the job and store it in results(list).
         */
        try {
            FileSystem hdfs = FileSystem.get(new Configuration());
            BufferedReader fileReader = new BufferedReader(
                    new InputStreamReader(hdfs.open(new Path(outputPath + "/" + outputFileName))));

            String line;
            while ((line = fileReader.readLine()) != null) {
                results.add(seq.getSeqString() + " " + line);
            }

            fileReader.close();

            hdfs.delete(new Path(outputPath), true);
            hdfs.close();

        } catch (IOException e) {
            e.printStackTrace();
            System.exit(1);
        }
    }

    /*
     * if all jobs finish, store results of jobs to output/result.txt file.
     */
    String finalOutputPath = "output/result.csv";
    try {
        FileSystem hdfs = FileSystem.get(new Configuration());
        Path file = new Path(finalOutputPath);
        if (hdfs.exists(file)) {
            hdfs.delete(file, true);
        }

        OutputStream os = hdfs.create(file);
        PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(os, "UTF-8"));

        //CSV File Header
        printWriter.println("Actual,Predicted,MER,MAE");
        printWriter.flush();

        for (String result : results) {
            String[] tokens = result.split("\\s+");

            printWriter.println(tokens[0] + "," + tokens[1] + "," + tokens[2] + "," + tokens[3]);
            printWriter.flush();
        }

        printWriter.close();
        hdfs.close();
    } catch (IOException e) {
        e.printStackTrace();
        System.exit(1);
    }

}

From source file:com.lightboxtechnologies.spectrum.PythonJob.java

License:Apache License

public static int run(String imageID, String friendlyName, String outpath, String pymap, String pyred,
        String format, Configuration conf) throws Exception {
    if (conf == null) {
        conf = HBaseConfiguration.create();
    }/*from  w ww  .java2  s  . c o  m*/
    final Job job = SKJobFactory.createJobFromConf(imageID, friendlyName, "PythonJob", conf);
    job.setJarByClass(PythonJob.class);

    job.setMapperClass(PythonMapper.class);
    PyEngine py = new PyEngine();
    configPyTask(job, py, "map", pymap);
    job.setMapOutputKeyClass(py.getKeyClass());
    job.setMapOutputValueClass(py.getValueClass());

    int numReduces = 1;
    job.setOutputKeyClass(py.getKeyClass());
    job.setOutputValueClass(py.getValueClass());
    if (pyred.equals("none")) {
        numReduces = 0;
    } else if (pyred.equals("identity")) {
        job.setReducerClass(Reducer.class);
        job.setOutputKeyClass(py.getKeyClass());
        job.setOutputValueClass(py.getValueClass());
    } else if (pyred.equals("LongSumReducer")) {
        job.setReducerClass(LongSumReducer.class);
        job.setCombinerClass(LongSumReducer.class);
    } else {
        job.setReducerClass(PythonReducer.class);
        configPyTask(job, py, "reduce", pyred);
        job.setOutputKeyClass(py.getKeyClass());
        job.setOutputValueClass(py.getValueClass());
    }
    job.setNumReduceTasks(numReduces);

    // it is possible to run over a flat json file...
    // String input = otherArgs[0];
    // if (input.endsWith(".json") == true) {
    //   job.setInputFormatClass(FsEntryJsonInputFormat.class);
    //   FsEntryJsonInputFormat.addInputPath(job, new Path(input));
    // }
    // else {

    FsEntryHBaseInputFormat.setupJob(job, imageID);
    job.setInputFormatClass(FsEntryHBaseInputFormat.class);

    if (format != null && format.equals("SequenceFileOutputFormat")) {
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
    } else {
        job.setOutputFormatClass(TextOutputFormat.class);
    }
    FileOutputFormat.setOutputPath(job, new Path(outpath));
    return job.waitForCompletion(true) ? 0 : 1;
}