Example usage for org.apache.hadoop.mapreduce Job Job

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job Job.

Prototype

Job(JobStatus status, JobConf conf) throws IOException

Source Link

Usage

From source file:com.mozilla.main.ReadHBaseWriteHdfs.java

License:LGPL

@Override
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("mapred.job.queue.name", "prod");
    Job job = new Job(conf, "ReadHBaseWriteHDFS");
    job.setJarByClass(ReadHBaseWriteHdfs.class);
    Scan scan = new Scan();
    scan.addFamily("data".getBytes());

    TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, ReadHBaseWriteHdfsMapper.class, Text.class,
            Text.class, job);

    job.setReducerClass(ReadHBaseWriteHdfsReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(1000);//  w w w  .  j  a  v  a  2 s. c o  m

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    SequenceFileOutputFormat.setOutputPath(job, new Path(args[0]));

    job.waitForCompletion(true);
    if (job.isSuccessful()) {
        System.out.println("DONE");
    }

    return 0;
}

From source file:com.mvdb.platform.action.VersionMerge.java

License:Apache License

public static void main(String[] args) throws Exception {
    logger.error("error1");
    logger.warn("warning1");
    logger.info("info1");
    logger.debug("debug1");
    logger.trace("trace1");
    ActionUtils.setUpInitFileProperty();
    //        LoggerContext lc = (LoggerContext) LoggerFactory.getILoggerFactory();
    //        StatusPrinter.print(lc);

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    //Also add  lastMergedTimeStamp and  mergeUptoTimestamp and passive db name which would be mv1 or mv2
    if (otherArgs.length != 3) {
        System.err.println("Usage: versionmerge <customer-directory>");
        System.exit(2);/*from   w  ww  .  java  2s . com*/
    }
    //Example: file:/home/umesh/.mvdb/etl/data/alpha
    //Example: hdfs://localhost:9000/data/alpha
    String customerDirectory = otherArgs[0];
    String lastMergedDirName = otherArgs[1];
    String lastCopiedDirName = otherArgs[2];

    org.apache.hadoop.conf.Configuration conf1 = new org.apache.hadoop.conf.Configuration();
    //conf1.addResource(new Path("/home/umesh/ops/hadoop-1.2.0/conf/core-site.xml"));
    FileSystem hdfsFileSystem = FileSystem.get(conf1);

    Path topPath = new Path(customerDirectory);

    //Clean scratch db
    Path passiveDbPath = new Path(topPath, "db/mv1");
    Path tempDbPath = new Path(topPath, "db/tmp-" + (int) (Math.random() * 100000));
    if (hdfsFileSystem.exists(tempDbPath)) {
        boolean success = hdfsFileSystem.delete(tempDbPath, true);
        if (success == false) {
            System.err.println(String.format("Unable to delete temp directory %s", tempDbPath.toString()));
            System.exit(1);
        }
    }
    //last three parameters are hardcoded and  the nulls must be replaced later after changing inout parameters. 
    Path[] inputPaths = getInputPaths(hdfsFileSystem, topPath, lastMergedDirName, lastCopiedDirName, null);
    Set<String> tableNameSet = new HashSet<String>();
    for (Path path : inputPaths) {
        tableNameSet.add(path.getName());
    }

    Job job = new Job(conf, "versionmerge");
    job.setJarByClass(VersionMerge.class);
    job.setMapperClass(VersionMergeMapper.class);
    job.setReducerClass(VersionMergeReducer.class);
    job.setMapOutputKeyClass(MergeKey.class);
    job.setMapOutputValueClass(BytesWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BytesWritable.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    String lastDirName = null;
    if (inputPaths != null && inputPaths.length > 1) {
        lastDirName = inputPaths[(inputPaths.length) - 2].getParent().getName();
    }
    for (Path inputPath : inputPaths) {
        FileInputFormat.addInputPath(job, inputPath);
    }
    FileOutputFormat.setOutputPath(job, tempDbPath);

    for (String table : tableNameSet) {
        if (table.endsWith(".dat") == false) {
            continue;
        }
        table = table.replaceAll("-", "");
        table = table.replaceAll(".dat", "");
        MultipleOutputs.addNamedOutput(job, table, SequenceFileOutputFormat.class, Text.class,
                BytesWritable.class);
    }
    boolean success = job.waitForCompletion(true);
    System.out.println("Success:" + success);
    System.out.println(ManagementFactory.getRuntimeMXBean().getName());
    if (success && lastDirName != null) {
        ActionUtils.setConfigurationValue(new Path(customerDirectory).getName(),
                ConfigurationKeys.LAST_MERGE_TO_MVDB_DIRNAME, lastDirName);
    }
    //hdfsFileSystem.delete(passiveDbPath, true);
    //hdfsFileSystem.rename(tempDbPath, passiveDbPath);
    System.exit(success ? 0 : 1);
}

From source file:com.mvdb.platform.scratch.action.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    logger.error("error1");
    logger.warn("warning1");
    logger.info("info1");
    logger.debug("debug1");
    logger.trace("trace1");

    ILoggerFactory lc = LoggerFactory.getILoggerFactory();
    System.err.println("lc:" + lc);
    // print logback's internal status
    //StatusPrinter.print(lc);

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs != null) {
        for (String arg : otherArgs) {
            System.out.println(arg);
        }/*from   ww  w  . j  a  v a  2 s  .c o m*/
    }
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.mycompany.hadooptrain.WordCount.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Path inputPath = new Path(args[0]);
    Path outputDir = new Path(args[1]);

    // Create configuration
    Configuration conf = new Configuration(true);

    // Create job
    Job job = new Job(conf, "WordCount");
    job.setJarByClass(WordCountMapper.class);

    // Setup MapReduce
    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(WordCountReducer.class);
    job.setNumReduceTasks(1);/*from   w ww .ja  v a2  s.  c  om*/

    // Specify key / value
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(TextInputFormat.class);

    // Output
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Delete output if exists
    FileSystem hdfs = FileSystem.get(conf);
    if (hdfs.exists(outputDir))
        hdfs.delete(outputDir, true);

    // Execute job
    int code = job.waitForCompletion(true) ? 0 : 1;
    System.exit(code);

}

From source file:com.nearinfinity.blur.mapreduce.BlurTask.java

License:Apache License

public Job configureJob(Configuration configuration) throws IOException {
    if (getIndexingType() == INDEXING_TYPE.UPDATE) {
        checkTable();//w  w w. ja va2 s .  co  m
    }
    ByteArrayOutputStream os = new ByteArrayOutputStream();
    DataOutputStream output = new DataOutputStream(os);
    write(output);
    output.close();
    String blurTask = new String(Base64.encodeBase64(os.toByteArray()));
    configuration.set(BLUR_BLURTASK, blurTask);

    Job job = new Job(configuration, "Blur Indexer");
    job.setReducerClass(BlurReducer.class);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(BlurMutate.class);
    job.setNumReduceTasks(getNumReducers(configuration));
    return job;
}

From source file:com.neu.cs6240.AvgTimeToAnswer.AvgTimeToAnsPerHashTag.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("mapred.textoutputformat.separator", ",");
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: AvgTimeToAnsPerHashTag <in> <out>");
        System.exit(2);//  www  .j a v  a2 s.  c  om
    }
    Job job = new Job(conf, "AvgTimeToAnsPerHashTag");
    job.setJarByClass(AvgTimeToAnsPerHashTag.class);
    job.setMapperClass(AvgTimeToAnsPerHashTagMapper.class);
    job.setReducerClass(AvgTimeToAnsPerHashTagReducer.class);
    job.setCombinerClass(AvgTimeToAnsPerHashTagReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setPartitionerClass(AvgTimeToAnsPerHashTagPartitioner.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.neu.cs6240.TopKExperts.JoinQA.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: JoinQA <in> <out>");
        System.exit(2);/*from  w ww  .  ja  v  a 2s  .  c  om*/
    }
    Job job = new Job(conf, "JoinQA");
    job.setJarByClass(JoinQA.class);
    job.setMapperClass(JoinQAMapper.class);
    job.setReducerClass(JoinQAReducer.class);
    job.setOutputKeyClass(JoinQAKey.class);
    job.setOutputValueClass(JoinQAValue.class);
    job.setPartitionerClass(JoinQAPartitioner.class);
    job.setGroupingComparatorClass(JoinQAGroupComparator.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    boolean isSucess = false;

    isSucess = job.waitForCompletion(true);

    if (isSucess) {
        // On successful completion of JoinQA start UserAnswerCountPerHashTag
        System.out.println("MR - JoinQA complete. Starting UserAnswerCountPerHashTag...");
        String[] argsForMR2 = new String[2];
        argsForMR2[0] = otherArgs[1];
        argsForMR2[1] = otherArgs[1] + "MR2";
        isSucess = UserAnswerCountPerHashTag.initUserAnswerCountPerHashTag(argsForMR2);

        if (isSucess) {
            // On successful completion of UserAnswerCountPerHashTag start TopKPerHashTag
            System.out.println("MR - UserAnswerCountPerHashTag complete. Starting TopKPerHashTag...");
            String[] argsForMR3 = new String[2];
            argsForMR3[0] = argsForMR2[1];
            argsForMR3[1] = argsForMR2[1] + "MR3";
            isSucess = TopKPerHashTag.initTopKPerHashTag(argsForMR3);
            if (isSucess) {
                // Successfully complete TopKPerHashTag MR
                System.out.println("All MR - Successful.");
            } else {
                // Failed UserAnswerCountPerHashTag MR
                System.out.println("MR - TopKPerHashTag failed.");
            }
        } else {
            // On unsuccessful completion of JoinQA end MR
            System.out.println("MR - UserAnswerCountPerHashTag failed.");
        }

    } else {
        // On unsuccessful completion of JoinQA end MR
        System.out.println("MR - JoinQA failed.");
    }

    System.exit(isSucess ? 0 : 1);
}

From source file:com.neu.cs6240.TopKExperts.TopKPerHashTag.java

License:Apache License

public static boolean initTopKPerHashTag(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: TopKPerHashTag <in> <out>");
        System.exit(2);/*  w w w  .  j  a  v  a  2  s  . com*/
    }
    Job job = new Job(conf, "TopKPerHashTag");
    job.setJarByClass(TopKPerHashTag.class);
    job.setMapperClass(TopKPerHashTagMapper.class);
    job.setReducerClass(TopKPerHashTagReducer.class);
    job.setOutputKeyClass(TopKPerHashTagKey.class);
    job.setOutputValueClass(Text.class);
    job.setPartitionerClass(TopKPerHashTagPartitioner.class);
    job.setGroupingComparatorClass(TopKPerHashTagGroupComparator.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    return job.waitForCompletion(true);
}

From source file:com.neu.cs6240.TopKExperts.UserAnswerCountPerHashTag.java

License:Apache License

public static boolean initUserAnswerCountPerHashTag(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: UserAnswerCountPerHashTag <in> <out>");
        System.exit(2);//from  w w w .j  a v  a2s  . c  o  m
    }
    Job job = new Job(conf, "UserAnswerCountPerHashTag");
    job.setJarByClass(UserAnswerCountPerHashTag.class);
    job.setMapperClass(UserAnswerCountPerHashTagMapper.class);
    job.setReducerClass(UserAnswerCountPerHashTagReducer.class);
    job.setOutputKeyClass(UserAnswerCountPerHashTagKey.class);
    job.setOutputValueClass(IntWritable.class);
    job.setPartitionerClass(UserAnswerCountPerHashTagPartitioner.class);
    job.setGroupingComparatorClass(UserAnswerCountPerHashTagGroupComparator.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    return job.waitForCompletion(true);
}

From source file:com.neu.cs6240.Xml2csv.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    // Setting up the xml tag configurator for splitter
    conf.set("xmlinput.start", "<row ");
    conf.set("xmlinput.end", " />");

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: Xml2csv <in> <out>");
        System.exit(2);//from  w  ww.  j  a v a2s.  com
    }
    Job job = new Job(conf, "Converts Posts.xml to .csv");
    job.setJarByClass(Xml2csv.class);
    job.setInputFormatClass(XmlInputFormat.class);
    job.setMapperClass(PostsMapper.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(0);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}