Example usage for org.apache.hadoop.mapreduce Job setMapOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputValueClass.

Prototype

public void setMapOutputValueClass(Class<?> theClass) throws IllegalStateException

Source Link

Document

Set the value class for the map output data.

Usage

From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java

License:Apache License

/**
 * Sets up the actual job. importtsvmapreduce job
 *
 * @param conf  The current configuration.
 * @return The newly created job./*from w ww .j a  v  a2s .  co  m*/
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String tableName, String inputPath,
        String tmpOutputPath) throws IOException, ClassNotFoundException {

    // Support non-XML supported characters
    // by re-encoding the passed separator as a Base64 string.
    //???BASE64?
    String actualSeparator = conf.get(CommonConstants.SEPARATOR);
    if (actualSeparator != null) {
        conf.set(CommonConstants.SEPARATOR, Base64.encodeBytes(actualSeparator.getBytes()));
    }

    // See if a non-default Mapper was set?mapper?SingleColumnImporterMapper
    String mapperClassName = conf.get(MAPPER_CONF_KEY);
    Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER;

    Path inputDir = new Path(inputPath);
    //?job
    Job job = new Job(conf, NAME + "_" + tableName);
    //Set the Jar by finding where a given class came from.
    job.setJarByClass(SingleColumnImportTsv.class);
    //
    FileInputFormat.setInputPaths(job, inputDir);
    //jobinputformat

    //??Dimporttsv.inputFormatInputFormat,TextInputFormat
    //??Dimporttsv.inputFormatInputFormat,TextInputFormat
    String inputFmtName = conf.get(CommonConstants.INPUTFORMAT,
            "org.apache.hadoop.mapreduce.lib.input.TextInputFormat");
    LOG.info(CommonConstants.INPUTFORMAT + " is " + inputFmtName);
    Class<? extends InputFormat> inputFmtClass = Class.forName(inputFmtName).asSubclass(InputFormat.class);
    job.setInputFormatClass(inputFmtClass);
    job.setMapperClass(mapperClass);

    //mapper
    job.setMapperClass(mapperClass);

    String hfileOutPath = tmpOutputPath;
    if (hfileOutPath != null) {
        //?
        if (!doesTableExist(tableName)) {
            createTable(conf, tableName);
        }
        HTable table = new HTable(conf, tableName);
        //reducer
        job.setReducerClass(SingleColumnReducer.class);

        Path outputDir = new Path(hfileOutPath);
        //
        FileOutputFormat.setOutputPath(job, outputDir);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(TextArrayWritable.class);
        //job?partition?outputformat?reduce
        configureIncrementalLoad(job, table);

    } else {//put
        // No reducers.  Just write straight to table.  Call initTableReducerJob
        // to set up the TableOutputFormat.
        TableMapReduceUtil.initTableReducerJob(tableName, null, job);
        job.setNumReduceTasks(0);
    }

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
            com.google.common.base.Function.class /* Guava used by TsvParser */);
    return job;
}

From source file:com.airline.analytics.AirlineDelayAnalytics.java

@Override
public int run(String[] strings) throws Exception {

    Job job = Job.getInstance(getConf(), "Hadoop Airline Delay Analytics");

    job.setJarByClass(AirlineDelayAnalytics.class);

    job.setMapperClass(AirlineMapper.class);
    // job.setCombinerClass(AirlineReducer.class);
    job.setReducerClass(AirlineReducer.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(strings[0]));
    FileOutputFormat.setOutputPath(job, new Path(strings[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.airline.analytics.AirlineUniqueRoutesAnalytics.java

@Override
public int run(String[] strings) throws Exception {

    Job job = Job.getInstance(getConf(), "Hadoop Airline Orign Destination Analytics");

    job.setJarByClass(getClass());//from  w  w w. j av a2  s.  c o m

    // Distributed Cache
    job.addCacheFile(new URI("/airline/codes.csv"));

    job.setMapperClass(AirlineMapper.class);
    // job.setCombinerClass(AirlineReducer.class);
    job.setReducerClass(AirlineReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(strings[0]));
    FileOutputFormat.setOutputPath(job, new Path(strings[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.alectenharmsel.research.hadoop.CodeTokenizer.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: MoabLicenses <input> <output>");
        System.exit(-1);/*from   ww  w .j av  a2  s .  c o  m*/
    }

    Configuration conf = getConf();
    Job job = new Job(conf, "SrcTok");
    job.setJarByClass(CodeTokenizer.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    boolean success = job.waitForCompletion(true);

    return success ? 0 : 1;
}

From source file:com.alectenharmsel.research.SrcTok.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: MoabLicenses <input> <output>");
        System.exit(-1);/*from  w  w  w  . java2 s.  c om*/
    }

    Configuration conf = getConf();
    Job job = new Job(conf, "SrcTok");
    job.setJarByClass(SrcTok.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(SrcTokMapper.class);
    job.setReducerClass(SrcTokReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    boolean success = job.waitForCompletion(true);

    return success ? 0 : 1;
}

From source file:com.alexholmes.hadooputils.combine.avro.mapreduce.CombineAvroKeyValueInputFormatTest.java

License:Apache License

@Test
public void testKeyValueInput() throws ClassNotFoundException, IOException, InterruptedException {
    // Create a test input file.
    File inputFile = createInputFile();

    // Configure the job input.
    Job job = new Job();
    FileInputFormat.setInputPaths(job, new Path(inputFile.getAbsolutePath()));
    job.setInputFormatClass(CombineAvroKeyValueInputFormat.class);
    AvroJob.setInputKeySchema(job, Schema.create(Schema.Type.INT));
    AvroJob.setInputValueSchema(job, Schema.create(Schema.Type.STRING));

    // Configure a mapper.
    job.setMapperClass(IndexMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    // Configure a reducer.
    job.setReducerClass(IndexReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(AvroValue.class);
    AvroJob.setOutputValueSchema(job, Schema.createArray(Schema.create(Schema.Type.INT)));

    // Configure the output format.
    job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
    Path outputPath = new Path(mTempDir.getRoot().getPath(), "out-index");
    FileOutputFormat.setOutputPath(job, outputPath);

    // Run the job.
    assertTrue(job.waitForCompletion(true));

    // Verify that the output Avro container file as the expected data.
    File avroFile = new File(outputPath.toString(), "part-r-00000.avro");
    DatumReader<GenericRecord> datumReader = new SpecificDatumReader<GenericRecord>(AvroKeyValue
            .getSchema(Schema.create(Schema.Type.STRING), Schema.createArray(Schema.create(Schema.Type.INT))));
    DataFileReader<GenericRecord> avroFileReader = new DataFileReader<GenericRecord>(avroFile, datumReader);
    assertTrue(avroFileReader.hasNext());

    AvroKeyValue<CharSequence, List<Integer>> appleRecord = new AvroKeyValue<CharSequence, List<Integer>>(
            avroFileReader.next());/*from w  w w  .  ja v  a 2 s .  c om*/
    assertNotNull(appleRecord.get());
    assertEquals("apple", appleRecord.getKey().toString());
    List<Integer> appleDocs = appleRecord.getValue();
    assertEquals(3, appleDocs.size());
    assertTrue(appleDocs.contains(1));
    assertTrue(appleDocs.contains(2));
    assertTrue(appleDocs.contains(3));

    assertTrue(avroFileReader.hasNext());
    AvroKeyValue<CharSequence, List<Integer>> bananaRecord = new AvroKeyValue<CharSequence, List<Integer>>(
            avroFileReader.next());
    assertNotNull(bananaRecord.get());
    assertEquals("banana", bananaRecord.getKey().toString());
    List<Integer> bananaDocs = bananaRecord.getValue();
    assertEquals(2, bananaDocs.size());
    assertTrue(bananaDocs.contains(1));
    assertTrue(bananaDocs.contains(2));

    assertTrue(avroFileReader.hasNext());
    AvroKeyValue<CharSequence, List<Integer>> carrotRecord = new AvroKeyValue<CharSequence, List<Integer>>(
            avroFileReader.next());
    assertEquals("carrot", carrotRecord.getKey().toString());
    List<Integer> carrotDocs = carrotRecord.getValue();
    assertEquals(1, carrotDocs.size());
    assertTrue(carrotDocs.contains(1));

    assertFalse(avroFileReader.hasNext());
    avroFileReader.close();
}

From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileJob.java

License:Apache License

/**
 * The driver for the MapReduce job./*from  w w  w.  jav  a 2s. c o  m*/
 *
 * @param conf           configuration
 * @param inputDirAsString  input directory in CSV-form
 * @param outputDirAsString output directory
 * @return true if the job completed successfully
 * @throws java.io.IOException         if something went wrong
 * @throws java.net.URISyntaxException if a URI wasn't correctly formed
 */
public boolean runJob(final Configuration conf, final String inputDirAsString, final String outputDirAsString)
        throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException {

    Job job = new Job(conf);

    job.setJarByClass(CombineSequenceFileJob.class);
    job.setJobName("seqfilecombiner");

    job.setNumReduceTasks(0);

    //        job.setMapperClass(IdentityMapper.class);

    job.setInputFormatClass(CombineSequenceFileInputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, inputDirAsString);
    FileOutputFormat.setOutputPath(job, new Path(outputDirAsString));

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    boolean jobResult = job.waitForCompletion(true);
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println("The job took "
            + TimeUnit.MILLISECONDS.toSeconds(endTime.getTime() - startTime.getTime()) + " seconds.");

    return jobResult;
}

From source file:com.aliyun.openservices.tablestore.hadoop.TableStoreOutputFormatExample.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (!parseArgs(args)) {
        printUsage();//  w ww . j a v  a 2s  .co  m
        System.exit(1);
    }
    if (endpoint == null || accessKeyId == null || accessKeySecret == null || inputTable == null
            || outputTable == null) {
        printUsage();
        System.exit(1);
    }

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, TableStoreOutputFormatExample.class.getName());
    job.setMapperClass(OwnerMapper.class);
    job.setReducerClass(IntoTableReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(MapWritable.class);
    job.setInputFormatClass(TableStoreInputFormat.class);
    job.setOutputFormatClass(TableStoreOutputFormat.class);

    TableStore.setCredential(job, accessKeyId, accessKeySecret, securityToken);
    TableStore.setEndpoint(job, endpoint, instance);
    TableStoreInputFormat.addCriteria(job, fetchCriteria());
    TableStoreOutputFormat.setOutputTable(job, outputTable);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.antbrains.crf.hadoop.CalcFeatureWeights.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (otherArgs.length != 3 && otherArgs.length != 4) {
        System.err.println("CalcFeatureWeights <inDir> <tmpDir> <outDir> [startStep]");
        System.exit(-1);/*from   ww w. ja  va2  s  . com*/
    }
    int startStep = 1;
    if (otherArgs.length == 4) {
        startStep = Integer.valueOf(otherArgs[otherArgs.length - 1]);
    }
    FileSystem fs = FileSystem.get(conf);
    if (startStep <= 1) {
        System.out.println("calc");
        fs.delete(new Path(otherArgs[1]), true);
        Job job = new Job(conf, CalcFeatureWeights.class.getSimpleName());
        job.setNumReduceTasks(1);
        job.setJarByClass(CalcFeatureWeights.class);
        job.setMapperClass(CalcFeatureMapper.class);
        job.setReducerClass(CalcFeatureReducer.class);

        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        job.setInputFormatClass(SequenceFileInputFormat.class);

        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(MyKey.class);

        job.setOutputKeyClass(MyKey.class);
        job.setOutputValueClass(MyValue.class);
        FileInputFormat.setInputPaths(job, new Path(otherArgs[0]));

        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

        boolean res = job.waitForCompletion(true);
        if (!res) {
            System.err.println("step1 failed");
            return;
        }
    }

    if (startStep <= 2)
    // sort
    {
        fs.delete(new Path(otherArgs[2]), true);
        System.out.println("sort");
        Job job = new Job(conf, CalcFeatureWeights.class.getSimpleName());

        job.setNumReduceTasks(1);
        job.setJarByClass(CalcFeatureWeights.class);
        job.setMapperClass(IdentityMapper.class);
        job.setReducerClass(IdentityReducer.class);

        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        job.setInputFormatClass(SequenceFileInputFormat.class);

        job.setMapOutputKeyClass(MyKey.class);
        job.setMapOutputValueClass(MyValue.class);
        job.setOutputKeyClass(MyKey.class);
        job.setOutputValueClass(MyValue.class);

        FileInputFormat.setInputPaths(job, new Path(otherArgs[1]));

        FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));

        boolean res = job.waitForCompletion(true);
        if (!res) {
            System.err.println("step2 failed");
            return;
        }
    }

}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunnerTest.java

License:Apache License

/**
 * Test for map-reduce job./*from   w  w  w.j  av a 2s . co m*/
 * @throws Exception if failed
 */
@Test
public void map_reduce() throws Exception {
    Job job = newJob();
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(WordCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setSortComparatorClass(Text.Comparator.class);
    job.setGroupingComparatorClass(Text.Comparator.class);

    job.setReducerClass(WordCountReducer.class);
    job.setNumReduceTasks(1);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    File inputDir = folder.newFolder();
    File inputFile = new File(inputDir, "input.txt");
    write(inputFile, new String[] { "a b c d", "a a b c", "c", });

    File outputDir = folder.newFolder();
    outputDir.delete();

    FileInputFormat.setInputPaths(job, new Path(inputFile.toURI()));
    FileOutputFormat.setOutputPath(job, new Path(outputDir.toURI()));
    assertThat(new SimpleJobRunner().run(job), is(true));
    assertThat(toMap(read(outputDir)), is(map(new String[] { "a", "3", "b", "2", "c", "3", "d", "1", })));
}