Example usage for org.apache.hadoop.mapreduce Job setMapOutputValueClass

List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputValueClass.

Prototype

public void setMapOutputValueClass(Class<?> theClass) throws IllegalStateException 

Source Link

Document

Set the value class for the map output data.

Usage

From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java

License:Apache License

/**
 * Sets up the actual job. importtsvmapreduce job
 *
 * @param conf  The current configuration.
 * @return The newly created job./*from w ww .j a  v  a2s .  co  m*/
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String tableName, String inputPath,
        String tmpOutputPath) throws IOException, ClassNotFoundException {

    // Support non-XML supported characters
    // by re-encoding the passed separator as a Base64 string.
    //???BASE64?
    String actualSeparator = conf.get(CommonConstants.SEPARATOR);
    if (actualSeparator != null) {
        conf.set(CommonConstants.SEPARATOR, Base64.encodeBytes(actualSeparator.getBytes()));
    }

    // See if a non-default Mapper was set?mapper?SingleColumnImporterMapper
    String mapperClassName = conf.get(MAPPER_CONF_KEY);
    Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER;

    Path inputDir = new Path(inputPath);
    //?job
    Job job = new Job(conf, NAME + "_" + tableName);
    //Set the Jar by finding where a given class came from.
    job.setJarByClass(SingleColumnImportTsv.class);
    //
    FileInputFormat.setInputPaths(job, inputDir);
    //jobinputformat

    //??Dimporttsv.inputFormatInputFormat,TextInputFormat
    //??Dimporttsv.inputFormatInputFormat,TextInputFormat
    String inputFmtName = conf.get(CommonConstants.INPUTFORMAT,
            "org.apache.hadoop.mapreduce.lib.input.TextInputFormat");
    LOG.info(CommonConstants.INPUTFORMAT + " is " + inputFmtName);
    Class<? extends InputFormat> inputFmtClass = Class.forName(inputFmtName).asSubclass(InputFormat.class);
    job.setInputFormatClass(inputFmtClass);
    job.setMapperClass(mapperClass);

    //mapper
    job.setMapperClass(mapperClass);

    String hfileOutPath = tmpOutputPath;
    if (hfileOutPath != null) {
        //?
        if (!doesTableExist(tableName)) {
            createTable(conf, tableName);
        }
        HTable table = new HTable(conf, tableName);
        //reducer
        job.setReducerClass(SingleColumnReducer.class);

        Path outputDir = new Path(hfileOutPath);
        //
        FileOutputFormat.setOutputPath(job, outputDir);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(TextArrayWritable.class);
        //job?partition?outputformat?reduce
        configureIncrementalLoad(job, table);

    } else {//put
        // No reducers.  Just write straight to table.  Call initTableReducerJob
        // to set up the TableOutputFormat.
        TableMapReduceUtil.initTableReducerJob(tableName, null, job);
        job.setNumReduceTasks(0);
    }

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
            com.google.common.base.Function.class /* Guava used by TsvParser */);
    return job;
}

From source file:com.airline.analytics.AirlineDelayAnalytics.java

@Override
public int run(String[] strings) throws Exception {

    Job job = Job.getInstance(getConf(), "Hadoop Airline Delay Analytics");

    job.setJarByClass(AirlineDelayAnalytics.class);

    job.setMapperClass(AirlineMapper.class);
    // job.setCombinerClass(AirlineReducer.class);
    job.setReducerClass(AirlineReducer.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(strings[0]));
    FileOutputFormat.setOutputPath(job, new Path(strings[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.airline.analytics.AirlineUniqueRoutesAnalytics.java

@Override
public int run(String[] strings) throws Exception {

    Job job = Job.getInstance(getConf(), "Hadoop Airline Orign Destination Analytics");

    job.setJarByClass(getClass());//from  w  w w. j av a2  s.  c o m

    // Distributed Cache
    job.addCacheFile(new URI("/airline/codes.csv"));

    job.setMapperClass(AirlineMapper.class);
    // job.setCombinerClass(AirlineReducer.class);
    job.setReducerClass(AirlineReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(strings[0]));
    FileOutputFormat.setOutputPath(job, new Path(strings[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.alectenharmsel.research.hadoop.CodeTokenizer.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: MoabLicenses <input> <output>");
        System.exit(-1);/*from   ww  w .j av  a2  s .  c o  m*/
    }

    Configuration conf = getConf();
    Job job = new Job(conf, "SrcTok");
    job.setJarByClass(CodeTokenizer.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    boolean success = job.waitForCompletion(true);

    return success ? 0 : 1;
}

From source file:com.alectenharmsel.research.SrcTok.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: MoabLicenses <input> <output>");
        System.exit(-1);/*from  w  w  w  . java2 s.  c om*/
    }

    Configuration conf = getConf();
    Job job = new Job(conf, "SrcTok");
    job.setJarByClass(SrcTok.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(SrcTokMapper.class);
    job.setReducerClass(SrcTokReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    boolean success = job.waitForCompletion(true);

    return success ? 0 : 1;
}

From source file:com.alexholmes.hadooputils.combine.avro.mapreduce.CombineAvroKeyValueInputFormatTest.java

License:Apache License

@Test
public void testKeyValueInput() throws ClassNotFoundException, IOException, InterruptedException {
    // Create a test input file.
    File inputFile = createInputFile();

    // Configure the job input.
    Job job = new Job();
    FileInputFormat.setInputPaths(job, new Path(inputFile.getAbsolutePath()));
    job.setInputFormatClass(CombineAvroKeyValueInputFormat.class);
    AvroJob.setInputKeySchema(job, Schema.create(Schema.Type.INT));
    AvroJob.setInputValueSchema(job, Schema.create(Schema.Type.STRING));

    // Configure a mapper.
    job.setMapperClass(IndexMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    // Configure a reducer.
    job.setReducerClass(IndexReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(AvroValue.class);
    AvroJob.setOutputValueSchema(job, Schema.createArray(Schema.create(Schema.Type.INT)));

    // Configure the output format.
    job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
    Path outputPath = new Path(mTempDir.getRoot().getPath(), "out-index");
    FileOutputFormat.setOutputPath(job, outputPath);

    // Run the job.
    assertTrue(job.waitForCompletion(true));

    // Verify that the output Avro container file as the expected data.
    File avroFile = new File(outputPath.toString(), "part-r-00000.avro");
    DatumReader<GenericRecord> datumReader = new SpecificDatumReader<GenericRecord>(AvroKeyValue
            .getSchema(Schema.create(Schema.Type.STRING), Schema.createArray(Schema.create(Schema.Type.INT))));
    DataFileReader<GenericRecord> avroFileReader = new DataFileReader<GenericRecord>(avroFile, datumReader);
    assertTrue(avroFileReader.hasNext());

    AvroKeyValue<CharSequence, List<Integer>> appleRecord = new AvroKeyValue<CharSequence, List<Integer>>(
            avroFileReader.next());/*from w  w w  .  ja v  a 2 s .  c om*/
    assertNotNull(appleRecord.get());
    assertEquals("apple", appleRecord.getKey().toString());
    List<Integer> appleDocs = appleRecord.getValue();
    assertEquals(3, appleDocs.size());
    assertTrue(appleDocs.contains(1));
    assertTrue(appleDocs.contains(2));
    assertTrue(appleDocs.contains(3));

    assertTrue(avroFileReader.hasNext());
    AvroKeyValue<CharSequence, List<Integer>> bananaRecord = new AvroKeyValue<CharSequence, List<Integer>>(
            avroFileReader.next());
    assertNotNull(bananaRecord.get());
    assertEquals("banana", bananaRecord.getKey().toString());
    List<Integer> bananaDocs = bananaRecord.getValue();
    assertEquals(2, bananaDocs.size());
    assertTrue(bananaDocs.contains(1));
    assertTrue(bananaDocs.contains(2));

    assertTrue(avroFileReader.hasNext());
    AvroKeyValue<CharSequence, List<Integer>> carrotRecord = new AvroKeyValue<CharSequence, List<Integer>>(
            avroFileReader.next());
    assertEquals("carrot", carrotRecord.getKey().toString());
    List<Integer> carrotDocs = carrotRecord.getValue();
    assertEquals(1, carrotDocs.size());
    assertTrue(carrotDocs.contains(1));

    assertFalse(avroFileReader.hasNext());
    avroFileReader.close();
}

From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileJob.java

License:Apache License

/**
 * The driver for the MapReduce job./*from  w w  w.  jav  a 2s. c o  m*/
 *
 * @param conf           configuration
 * @param inputDirAsString  input directory in CSV-form
 * @param outputDirAsString output directory
 * @return true if the job completed successfully
 * @throws java.io.IOException         if something went wrong
 * @throws java.net.URISyntaxException if a URI wasn't correctly formed
 */
public boolean runJob(final Configuration conf, final String inputDirAsString, final String outputDirAsString)
        throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException {

    Job job = new Job(conf);

    job.setJarByClass(CombineSequenceFileJob.class);
    job.setJobName("seqfilecombiner");

    job.setNumReduceTasks(0);

    //        job.setMapperClass(IdentityMapper.class);

    job.setInputFormatClass(CombineSequenceFileInputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, inputDirAsString);
    FileOutputFormat.setOutputPath(job, new Path(outputDirAsString));

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    boolean jobResult = job.waitForCompletion(true);
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println("The job took "
            + TimeUnit.MILLISECONDS.toSeconds(endTime.getTime() - startTime.getTime()) + " seconds.");

    return jobResult;
}

From source file:com.aliyun.openservices.tablestore.hadoop.TableStoreOutputFormatExample.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (!parseArgs(args)) {
        printUsage();//  w ww . j a v  a 2s  .co  m
        System.exit(1);
    }
    if (endpoint == null || accessKeyId == null || accessKeySecret == null || inputTable == null
            || outputTable == null) {
        printUsage();
        System.exit(1);
    }

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, TableStoreOutputFormatExample.class.getName());
    job.setMapperClass(OwnerMapper.class);
    job.setReducerClass(IntoTableReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(MapWritable.class);
    job.setInputFormatClass(TableStoreInputFormat.class);
    job.setOutputFormatClass(TableStoreOutputFormat.class);

    TableStore.setCredential(job, accessKeyId, accessKeySecret, securityToken);
    TableStore.setEndpoint(job, endpoint, instance);
    TableStoreInputFormat.addCriteria(job, fetchCriteria());
    TableStoreOutputFormat.setOutputTable(job, outputTable);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.antbrains.crf.hadoop.CalcFeatureWeights.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (otherArgs.length != 3 && otherArgs.length != 4) {
        System.err.println("CalcFeatureWeights <inDir> <tmpDir> <outDir> [startStep]");
        System.exit(-1);/*from   ww w. ja  va2  s  . com*/
    }
    int startStep = 1;
    if (otherArgs.length == 4) {
        startStep = Integer.valueOf(otherArgs[otherArgs.length - 1]);
    }
    FileSystem fs = FileSystem.get(conf);
    if (startStep <= 1) {
        System.out.println("calc");
        fs.delete(new Path(otherArgs[1]), true);
        Job job = new Job(conf, CalcFeatureWeights.class.getSimpleName());
        job.setNumReduceTasks(1);
        job.setJarByClass(CalcFeatureWeights.class);
        job.setMapperClass(CalcFeatureMapper.class);
        job.setReducerClass(CalcFeatureReducer.class);

        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        job.setInputFormatClass(SequenceFileInputFormat.class);

        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(MyKey.class);

        job.setOutputKeyClass(MyKey.class);
        job.setOutputValueClass(MyValue.class);
        FileInputFormat.setInputPaths(job, new Path(otherArgs[0]));

        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

        boolean res = job.waitForCompletion(true);
        if (!res) {
            System.err.println("step1 failed");
            return;
        }
    }

    if (startStep <= 2)
    // sort
    {
        fs.delete(new Path(otherArgs[2]), true);
        System.out.println("sort");
        Job job = new Job(conf, CalcFeatureWeights.class.getSimpleName());

        job.setNumReduceTasks(1);
        job.setJarByClass(CalcFeatureWeights.class);
        job.setMapperClass(IdentityMapper.class);
        job.setReducerClass(IdentityReducer.class);

        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        job.setInputFormatClass(SequenceFileInputFormat.class);

        job.setMapOutputKeyClass(MyKey.class);
        job.setMapOutputValueClass(MyValue.class);
        job.setOutputKeyClass(MyKey.class);
        job.setOutputValueClass(MyValue.class);

        FileInputFormat.setInputPaths(job, new Path(otherArgs[1]));

        FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));

        boolean res = job.waitForCompletion(true);
        if (!res) {
            System.err.println("step2 failed");
            return;
        }
    }

}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunnerTest.java

License:Apache License

/**
 * Test for map-reduce job./*from   w  w  w.j  av a 2s . co m*/
 * @throws Exception if failed
 */
@Test
public void map_reduce() throws Exception {
    Job job = newJob();
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(WordCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setSortComparatorClass(Text.Comparator.class);
    job.setGroupingComparatorClass(Text.Comparator.class);

    job.setReducerClass(WordCountReducer.class);
    job.setNumReduceTasks(1);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    File inputDir = folder.newFolder();
    File inputFile = new File(inputDir, "input.txt");
    write(inputFile, new String[] { "a b c d", "a a b c", "c", });

    File outputDir = folder.newFolder();
    outputDir.delete();

    FileInputFormat.setInputPaths(job, new Path(inputFile.toURI()));
    FileOutputFormat.setOutputPath(job, new Path(outputDir.toURI()));
    assertThat(new SimpleJobRunner().run(job), is(true));
    assertThat(toMap(read(outputDir)), is(map(new String[] { "a", "3", "b", "2", "c", "3", "d", "1", })));
}