Example usage for org.apache.hadoop.mapreduce Job setMapOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputValueClass.

Prototype

public void setMapOutputValueClass(Class<?> theClass) throws IllegalStateException

Source Link

Document

Set the value class for the map output data.

Usage

From source file:com.rockstor.compact.RecoveryTool.java

License:Apache License

private Job createSubmittableJob(Configuration conf) throws IOException {
    Job job = new Job(conf, NAME);
    job.setJarByClass(RecoveryTool.class);

    job.setInputFormatClass(CompactDirInputFormat.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setMapOutputKeyClass(NullWritable.class);

    job.setMapperClass(RecoveryMapper.class);

    job.setNumReduceTasks(0);/*from w  ww .  j  av  a  2s. c  o m*/

    job.setOutputFormatClass(NullOutputFormat.class);
    LOG.info("init job " + NAME + " OK!");
    return job;
}

From source file:com.sa.npopa.samples.hbase.FindBadMOBReferences.java

License:Apache License

public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    String tableName = args[0];// w w  w. j a  v a  2s.  co m

    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
    job.setJarByClass(FindBadMOBReferences.class);

    Scan scan = new Scan();
    scan.setCacheBlocks(false);
    scan.setBatch(10);
    scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE));
    scan.setAttribute(MobConstants.MOB_SCAN_REF_ONLY, Bytes.toBytes(Boolean.TRUE));
    scan.addFamily(Bytes.toBytes("J"));
    //scan.setRowPrefixFilter(Bytes.toBytes("a00"));

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    // job.setOutputFormatClass(NullOutputFormat.class);
    TableMapReduceUtil.initTableMapperJob(tableName, scan, FindBadMOBReferencesMapper.class, Text.class,
            Text.class, job);

    //job.setNumReduceTasks(0);
    job.setReducerClass(FindBadMOBReferencesReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(1);

    FileOutputFormat.setOutputPath(job, new Path("/tmp/out"));
    return job;
}

From source file:com.savy3.nonequijoin.MapOutputSampler.java

License:Apache License

/**
 * Driver for InputSampler MapReduce Job
 *//*from  ww w  . java  2  s .c  o  m*/
public static void runMap(Job job, Path sampleInputPath)
        throws IOException, IllegalStateException, ClassNotFoundException, InterruptedException {
    LOG.info("Running a MapReduce Job on Sample Input File" + sampleInputPath.toString());

    Configuration conf = new Configuration();
    conf.setBoolean("mapreduce.job.ubertask.enable", true);
    conf.set("numSamples", "" + (job.getNumReduceTasks() - 1));
    Job sampleJob = new Job(conf);
    sampleJob.setMapperClass(job.getMapperClass());
    sampleJob.setReducerClass(SampleKeyReducer.class);
    sampleJob.setJarByClass(job.getMapperClass());
    sampleJob.setMapOutputKeyClass(job.getMapOutputKeyClass());
    sampleJob.setMapOutputValueClass(job.getMapOutputValueClass());
    sampleJob.setOutputKeyClass(job.getMapOutputKeyClass());
    sampleJob.setOutputValueClass(NullWritable.class);
    sampleJob.setInputFormatClass(SequenceFileInputFormat.class);
    sampleJob.setOutputFormatClass(SequenceFileOutputFormat.class);

    SequenceFileInputFormat.addInputPath(sampleJob, sampleInputPath);
    FileSystem fs = FileSystem.get(conf);

    Path out = new Path(sampleInputPath.getParent(), "mapOut");
    fs.delete(out, true);

    SequenceFileOutputFormat.setOutputPath(sampleJob, out);

    sampleJob.waitForCompletion(true);

    LOG.info("Sample MapReduce Job Output File" + out.toString());

    Path partFile = new Path(out, "part-r-00000");
    Path tmpFile = new Path("/_tmp");
    fs.delete(tmpFile, true);
    fs.rename(partFile, tmpFile);
    fs.delete(sampleInputPath.getParent(), true);
    fs.rename(new Path("/_tmp"), sampleInputPath.getParent());

    LOG.info("Sample partitioning file cpied to location " + sampleInputPath.getParent().toString());
}

From source file:com.shopzilla.hadoop.mapreduce.MiniMRClusterContextMRTest.java

License:Apache License

@Test
public void testWordCount() throws Exception {
    Path input = new Path("/user/test/keywords_data");
    Path output = new Path("/user/test/word_count");

    Job job = new Job(configuration);

    job.setJobName("Word Count Test");

    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(SumReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setNumReduceTasks(1);//w  w w. j ava2  s .co  m
    FileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);

    assertTrue("All files from /data classpath directory should have been copied into HDFS",
            miniMRClusterContext.getFileSystem().exists(input));

    job.waitForCompletion(true);

    assertTrue("Output file should have been created", miniMRClusterContext.getFileSystem().exists(output));

    final LinkedList<String> expectedLines = new LinkedList<String>();
    expectedLines.add("goodbye\t1");
    expectedLines.add("hello\t1");
    expectedLines.add("world\t2");

    miniMRClusterContext.processData(output, new Function<String, Void>() {
        @Override
        public Void apply(String line) {
            assertEquals(expectedLines.pop(), line);
            return null;
        }
    });
    assertEquals(0, expectedLines.size());
}

From source file:com.sirius.hadoop.job.onlinetime.OnlineTimeJob.java

License:Apache License

public Job build() throws Exception {
    //init/*  w w  w  .j av a 2s  . c  o  m*/
    Job job = Job.getInstance(getConf(), "onlinetime");
    job.setJarByClass(OnlineTimeJob.class);

    //mapp
    job.setMapperClass(StatusMapper.class);
    job.setMapOutputKeyClass(StatusKey.class);
    job.setMapOutputValueClass(OnlineRecord.class);

    //custom partition
    job.setPartitionerClass(StatusKeyPartitioner.class);

    //reduce
    job.setGroupingComparatorClass(StatusKeyGroupComparator.class);
    job.setReducerClass(StatusReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    //input
    FileInputFormat.setInputPaths(job, new Path("/subscriber_status/subscriber_status.json"));

    //output
    FileOutputFormat.setOutputPath(job, out);
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, Lz4Codec.class);

    return job;
}

From source file:com.soteradefense.dga.louvain.mapreduce.CommunityCompression.java

License:Apache License

public int run(String[] args) throws Exception {
    Configuration mrConf = this.getConf();
    for (java.util.Map.Entry<String, String> entry : dgaConfiguration.getSystemProperties().entrySet()) {
        mrConf.set(entry.getKey(), entry.getValue());
    }//from   ww  w .  ja va  2 s . c o m

    Job job = Job.getInstance(mrConf);
    job.setJarByClass(CommunityCompression.class);
    Path in = new Path(inputPath);
    Path out = new Path(outputPath);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);
    job.setJobName("CommunityCompression");

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LouvainVertexWritable.class);

    job.setMapperClass(CommunityCompression.Map.class);
    job.setReducerClass(CommunityCompression.Reduce.class);

    logger.debug("Running Mapreduce step with job configuration: {}", job);

    return job.waitForCompletion(false) ? 0 : 1;
}

From source file:com.soteradefense.dga.louvain.mapreduce.LouvainTableSynthesizer.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = null;
    try {//from  w  w  w .  jav a  2s.c  o m
        int iteration = 0;
        if (!basePath.endsWith("/"))
            basePath = basePath + "/";
        String inputPath = basePath + GIRAPH_FOLDER_BASE_NAME + FILE_NAME_SEPARATOR + iteration;
        String joinPath = basePath + GIRAPH_FOLDER_BASE_NAME + FILE_NAME_SEPARATOR + (iteration + 1);
        String outputPath = basePath + TABLE_BASE_NAME + FILE_NAME_SEPARATOR + iteration;
        Configuration mrConf = this.getConf();
        job = Job.getInstance(mrConf);

        for (Map.Entry<String, String> entry : dgaConfiguration.getSystemProperties().entrySet()) {
            mrConf.set(entry.getKey(), entry.getValue());
        }

        FileSystem fs = FileSystem.get(job.getConfiguration());
        boolean nextFileExists = fs.exists(new Path(joinPath));
        while (nextFileExists) {
            System.out.println("Processing " + inputPath + " and " + joinPath);
            job = Job.getInstance(mrConf);
            job.setJobName("Louvain Table Synthesizer " + iteration);

            job.setJarByClass(LouvainTableSynthesizer.class);

            job.setMapperClass(LouvainTableSynthesizerMapper.class);
            job.setReducerClass(LouvainTableSynthesizerReducer.class);

            job.setInputFormatClass(TextInputFormat.class);
            job.setOutputFormatClass(TextOutputFormat.class);

            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);

            //Reducer Output
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(NullWritable.class);

            //Add both input folders
            Path in = new Path(inputPath);
            Path joinIn = new Path(joinPath);
            Path out = new Path(outputPath);
            FileInputFormat.addInputPath(job, in);
            FileInputFormat.addInputPath(job, joinIn);
            FileOutputFormat.setOutputPath(job, out);

            job.waitForCompletion(true);
            //Set the new temp input path
            inputPath = outputPath;
            iteration++;
            outputPath = basePath + TABLE_BASE_NAME + FILE_NAME_SEPARATOR + iteration;
            joinPath = basePath + GIRAPH_FOLDER_BASE_NAME + FILE_NAME_SEPARATOR + (iteration + 1);
            nextFileExists = fs.exists(new Path(joinPath));
        }

    } catch (IOException e) {
        e.printStackTrace();
        return -1;
    } catch (InterruptedException e) {
        e.printStackTrace();
        return -1;
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
        return -1;
    }
    return 0;
}

From source file:com.soteradefense.dga.LouvainRunner.java

License:Apache License

private int runMapreduceJob(String inputPath, String outputPath, DGAConfiguration conf) throws Exception {
    Configuration mrConf = new Configuration();
    for (Map.Entry<String, String> entry : conf.getSystemProperties().entrySet()) {
        mrConf.set(entry.getKey(), entry.getValue());
    }//from   w w  w.  j  a v a2  s. c o m

    Job job = Job.getInstance(configuration);
    job.setJarByClass(LouvainRunner.class);
    Path in = new Path(inputPath);
    Path out = new Path(outputPath);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);
    job.setJobName("CommunityCompression");

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LouvainVertexWritable.class);

    job.setMapperClass(CommunityCompression.Map.class);
    job.setReducerClass(CommunityCompression.Reduce.class);

    logger.debug("Running Mapreduce step with job configuration: {}", job);

    return job.waitForCompletion(false) ? 0 : 1;
}

From source file:com.splicemachine.mrio.api.SpliceTableMapReduceUtil.java

License:Apache License

/**
 * Use this before submitting a TableMap job. It will appropriately set up
 * the job./*  ww w.  j  av  a2 s  .  c o m*/
 *
 * @param table  The Splice table name to read from.
 * @param scan  The scan instance with the columns, time range etc.
 * @param mapper  The mapper class to use.
 * @param outputKeyClass  The class of the output key.
 * @param outputValueClass  The class of the output value.
 * @param job  The current job to adjust.  Make sure the passed job is
 * carrying all necessary HBase configuration.
 * @param addDependencyJars upload HBase jars and jars for any of the configured
 *           job classes via the distributed cache (tmpjars).
 * @throws IOException When setting up the details fails.
 */
public static void initTableMapperJob(String table, Scan scan, Class<? extends Mapper> mapper,
        Class<? extends WritableComparable> outputKeyClass, Class<? extends Object> outputValueClass, Job job,
        boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass) throws IOException {
    job.setInputFormatClass(inputFormatClass);
    if (outputValueClass != null)
        job.setMapOutputValueClass(outputValueClass);
    if (outputKeyClass != null)
        job.setMapOutputKeyClass(outputKeyClass);
    if (mapper != null)
        job.setMapperClass(mapper);
    job.getConfiguration().set(MRConstants.SPLICE_INPUT_TABLE_NAME, table);
    job.getConfiguration().set(TableInputFormat.SCAN, convertScanToString(scan));
    if (addDependencyJars) {
        addDependencyJars(job);
    }

}

From source file:com.splunk.shuttl.integration.hadoop.hbase.CSVJobFactory.java

License:Apache License

/**
 * @return the hadoopConfiguration/*from  ww  w .j a v a2 s .  c o m*/
 * @throws IOException
 */
public static Job getConfiguredJob(String[] arguments) throws IOException {

    Configuration jobConfiguration = new Configuration(true);
    // Load hbase-site.xml
    HBaseConfiguration.addHbaseResources(jobConfiguration);

    jobConfiguration.set("fs.default.name", arguments[0]);
    jobConfiguration.set("mapred.job.tracker", arguments[1]);
    jobConfiguration.set(JobConfigurationConstants.FILENAME, arguments[2]);
    jobConfiguration.set(JobConfigurationConstants.OUTPUT_PATH, arguments[3]);
    jobConfiguration.set(JobConfigurationConstants.TABLE_NAME, arguments[4]);

    jobConfiguration.set(JobConfigurationConstants.COLUMN_FAMILY, "d");

    Job job = new Job(jobConfiguration, "BucketToHbase");
    job.setJarByClass(CSVMapper.class);

    job.setMapperClass(CSVMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);

    job.setInputFormatClass(TextInputFormat.class);

    return job;
}