Example usage for org.apache.hadoop.mapreduce Job setMapOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputValueClass.

Prototype

public void setMapOutputValueClass(Class<?> theClass) throws IllegalStateException

Source Link

Document

Set the value class for the map output data.

Usage

From source file:com.google.cloud.bigtable.mapreduce.Import.java

License:Open Source License

/**
 * Sets up the actual job.//from w w w  . j  a  va  2 s  .  com
 * @param conf The current configuration.
 * @param args The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    TableName tableName = TableName.valueOf(args[0]);
    conf.set(TABLE_NAME, tableName.getNameAsString());
    Path inputDir = new Path(args[1]);
    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
    job.setJarByClass(Importer.class);
    FileInputFormat.setInputPaths(job, inputDir);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);

    // make sure we get the filter in the jars
    try {
        Class<? extends Filter> filter = conf.getClass(FILTER_CLASS_CONF_KEY, null, Filter.class);
        if (filter != null) {
            TableMapReduceUtil.addDependencyJars(conf, filter);
        }
    } catch (Exception e) {
        throw new IOException(e);
    }

    if (hfileOutPath != null) {
        job.setMapperClass(KeyValueImporter.class);
        try (Connection conn = ConnectionFactory.createConnection(conf);
                Table table = conn.getTable(tableName);
                RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
            job.setReducerClass(KeyValueSortReducer.class);
            Path outputDir = new Path(hfileOutPath);
            FileOutputFormat.setOutputPath(job, outputDir);
            job.setMapOutputKeyClass(ImmutableBytesWritable.class);
            job.setMapOutputValueClass(KeyValue.class);
            HFileOutputFormat2.configureIncrementalLoad(job, table, regionLocator);
            TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
                    com.google.common.base.Preconditions.class);
        }
    } else {
        // No reducers.  Just write straight to table.  Call initTableReducerJob
        // because it sets up the TableOutputFormat.
        job.setMapperClass(Importer.class);
        TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
        job.setNumReduceTasks(0);
    }
    return job;
}

From source file:com.gsinnovations.howdah.AbstractJob.java

License:Apache License

protected Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer,
        Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue,
        Class<? extends OutputFormat> outputFormat) throws IOException {

    Job job = new Job(new Configuration(getConf()));
    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }//from   w  ww. j a va2 s  .  com
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    job.setMapOutputKeyClass(mapperKey);
    job.setMapOutputValueClass(mapperValue);

    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setReducerClass(reducer);
    job.setOutputKeyClass(reducerKey);
    job.setOutputValueClass(reducerValue);

    job.setJobName(getCustomJobName(job, mapper, reducer));

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:com.gsinnovations.howdah.Driver.java

License:Apache License

public static void job(Path input, Path output, int numReduceTasks)
        throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();

    Job job = new Job(conf);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapperClass(TikaMapper.class);
    //job.setCombinerClass(KMeansCombiner.class);
    //job.setReducerClass(KMeansReducer.class);
    job.setNumReduceTasks(numReduceTasks);

    FileInputFormat.addInputPath(job, input);
    FileOutputFormat.setOutputPath(job, output);

    job.setJarByClass(Driver.class);
    HadoopUtil.overwriteOutput(output);//from  w  ww  . j  a  va  2  s.  com
    job.waitForCompletion(true);

}

From source file:com.gsvic.csmr.CSMRBase.java

License:Apache License

public static void generatePairs(String in, String out)
        throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = new Configuration();
    path = out;/*from   www  .  j  a  va  2 s.c  om*/
    Job job;
    Path input, output;
    input = new Path(in);
    output = new Path(path + "/CSMRPairs");

    job = new Job(conf);
    job.setJobName("CSMR Pairs Job");
    job.setJarByClass(CSMRBase.class);

    FileInputFormat.addInputPath(job, input);
    FileOutputFormat.setOutputPath(job, output);

    job.setMapperClass(CSMRMapper.class);
    job.setReducerClass(CSMRReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(DocumentWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(VectorArrayWritable.class);

    job.waitForCompletion(true);
}

From source file:com.gsvic.csmr.CSMRBase.java

License:Apache License

public static void StartCSMR() throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = new Configuration();
    Job job;
    job = new Job(conf);
    job.setJobName("CSMR Cosine Similarity Job");
    job.setJarByClass(CSMRBase.class);

    FileInputFormat.addInputPath(job, new Path(path + "/CSMRPairs/part-r-00000"));
    FileOutputFormat.setOutputPath(job, new Path(path + "/Results"));
    job.setMapperClass(Mapper.class);
    job.setReducerClass(CosineSimilarityReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(VectorArrayWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    System.exit(job.waitForCompletion(true) ? 1 : 0);

}

From source file:com.hadoop.examples.secondSort.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    // ?hadoop?/*w ww  .j  av  a 2 s.  c  o  m*/
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysort <in> <out>");
        System.exit(2);
    }
    // ?
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    // Mapper
    job.setMapperClass(MapClass.class);
    // ???CombinerCombiner<Text, IntWritable>Reduce<IntPair, IntWritable>?
    //job.setCombinerClass(Reduce.class);

    // Reducer
    job.setReducerClass(Reduce.class);

    // *
    // *group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);
    //setSortComparatorClass()hadoopkey?(?2.Hadoopkey?)
    //IntPair?compareTo()
    //job.setSortComparatorClass(cls);
    // *
    job.setGroupingComparatorClass(FirstGroupingComparator.class);

    // map Key
    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    // mapValue
    job.setMapOutputValueClass(IntWritable.class);

    // rduceKeyTextOutputFormatClassTextOutputFormat
    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    // rduceValue
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    // ??job
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.hadoop.secondarysort.SecondarySortDESC.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    // if (otherArgs.length != 2) {
    // System.err.println("Usage: secondarysrot <in> <out>");
    // System.exit(2);
    // }//w  w w . j  a  v a 2 s . co  m

    // JobConf jobConf = new JobConf();

    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySortDESC.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    // group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);
    job.setGroupingComparatorClass(FirstGroupingComparator.class);
    // conf.setClass("mapred.output.key.comparator.class",
    // KeyComparator.class, RawComparator.class);
    // job.setSortComparatorClass(SecondGroupingComparator.class);
    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    job.setMapOutputValueClass(IntWritable.class);

    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(inPath));
    FileOutputFormat.setOutputPath(job, new Path(outPath));
    FileSystem fileSystem = FileSystem.get(conf);
    if (fileSystem.exists(new Path(outPath))) {
        fileSystem.delete(new Path(outPath));
    }
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.hhscyber.nl.tweets.svm.train.Train.java

/**
 * @param args the command line arguments
 * @throws java.io.IOException/*from  www .  j  a v  a 2 s. c  o  m*/
 */
public static void main(String[] args) throws IOException {

    Conf conf = new Conf(args, "");
    FileSystem hdfs = FileSystem.get(conf);

    hdfs.delete(new Path("trainer"), true);

    Job client = new HBJob(conf, "SVMTrainer");

    client.setJarByClass(Train.class);
    client.setMapOutputKeyClass(Text.class);
    client.setMapOutputValueClass(Text.class);

    client.setInputFormatClass(TextInputFormat.class);

    TextInputFormat.addInputPath(client, new Path("svmclass"));
    client.setNumReduceTasks(1);

    client.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(client, new Path("trainer"));

    client.setMapperClass(TrainMapper.class);
    client.setReducerClass(TrainReducer.class);

    try {
        client.waitForCompletion(true);
    } catch (IOException | InterruptedException | ClassNotFoundException e) {
    }
}

From source file:com.hn.cluster.hadoop.mrs.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    // ?hadoop?/*  ww  w . jav  a 2  s  .co  m*/
    Configuration conf = new Configuration();
    // ?
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    // Mapper
    job.setMapperClass(MapClass.class);
    // Reducer
    job.setReducerClass(Reduce.class);

    // 
    job.setPartitionerClass(FirstPartitioner.class);
    // 
    job.setGroupingComparatorClass(FirstGroupingComparator.class);

    // map Key
    job.setMapOutputKeyClass(IntPair.class);
    // mapValue
    job.setMapOutputValueClass(IntWritable.class);

    // rduceKeyTextOutputFormatClassTextOutputFormat
    job.setOutputKeyClass(Text.class);
    // rduceValue
    job.setOutputValueClass(IntWritable.class);

    /**
     * ?????splites???RecordReder
     * ??RecordReder?keyvalue
     * Map<LongWritable, Text>
     * Mapmap<LongWritable, Text>Mapmap
     * ?List<IntPair, IntWritable>
     * map?job.setPartitionerClassList?reducer
     */
    job.setInputFormatClass(TextInputFormat.class);
    // ??RecordWriter?
    job.setOutputFormatClass(TextOutputFormat.class);

    // hdfs
    FileInputFormat.addInputPath(job, new Path("hdfs://192.1168.1.12:9000/input/input/soso.txt"));
    // hdfs
    FileOutputFormat.setOutputPath(job, new Path("hdfs://192.1168.1.12:9000/output/sort/"));
    // ??job
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.hortonworks.mapreduce.URLCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", " ");
    Job job = Job.getInstance(conf, "URLCount");
    job.setJarByClass(getClass());// w ww  .  ja  va 2s  . c om
    job.setInputFormatClass(KeyValueTextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setMapperClass(URLCountM.class);
    job.setReducerClass(URLCountR.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    return (job.waitForCompletion(true) == true ? 0 : -1);
}