Example usage for org.apache.hadoop.mapreduce Job waitForCompletion

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job waitForCompletion.

Prototype

public boolean waitForCompletion(boolean verbose)
        throws IOException, InterruptedException, ClassNotFoundException

Source Link

Document

Submit the job to the cluster and wait for it to finish.

Usage

From source file:com.example.Driver.java

License:Open Source License

public int run(String[] args) throws Exception {

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Your job name");

    job.setJarByClass(Driver.class);

    logger.info("job " + job.getJobName() + " [" + job.getJar() + "] started with the following arguments: "
            + Arrays.toString(args));

    if (args.length < 2) {
        logger.warn("to run this jar are necessary at 2 parameters \"" + job.getJar()
                + " input_files output_directory");
        return 1;
    }//from  w w w .j a v  a2  s  .c  o m

    job.setMapperClass(WordcountMapper.class);
    logger.info("mapper class is " + job.getMapperClass());

    //job.setMapOutputKeyClass(Text.class);
    //job.setMapOutputValueClass(IntWritable.class);
    logger.info("mapper output key class is " + job.getMapOutputKeyClass());
    logger.info("mapper output value class is " + job.getMapOutputValueClass());

    job.setReducerClass(WordcountReducer.class);
    logger.info("reducer class is " + job.getReducerClass());
    job.setCombinerClass(WordcountReducer.class);
    logger.info("combiner class is " + job.getCombinerClass());
    //When you are not runnign any Reducer
    //OR    job.setNumReduceTasks(0);
    //      logger.info("number of reduce task is " + job.getNumReduceTasks());

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    logger.info("output key class is " + job.getOutputKeyClass());
    logger.info("output value class is " + job.getOutputValueClass());

    job.setInputFormatClass(TextInputFormat.class);
    logger.info("input format class is " + job.getInputFormatClass());

    job.setOutputFormatClass(TextOutputFormat.class);
    logger.info("output format class is " + job.getOutputFormatClass());

    Path filePath = new Path(args[0]);
    logger.info("input path " + filePath);
    FileInputFormat.setInputPaths(job, filePath);

    Path outputPath = new Path(args[1]);
    logger.info("output path " + outputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.waitForCompletion(true);
    return 0;
}

From source file:com.examples.ch03.ParseWeblogs_Ex_1.java

public int run(String[] args) throws Exception {
    Path inputPath = new Path("apache_clf.txt");
    Path outputPath = new Path("output");
    Configuration conf = getConf();
    Job weblogJob = Job.getInstance(conf);
    weblogJob.setJobName("Weblog Transformer");
    weblogJob.setJarByClass(getClass());
    weblogJob.setNumReduceTasks(0);//from   ww  w  . ja  v  a  2 s. c o m

    weblogJob.setMapperClass(CLFMapper_Ex_1.class);
    weblogJob.setMapOutputKeyClass(Text.class);
    weblogJob.setMapOutputValueClass(Text.class);

    weblogJob.setOutputKeyClass(Text.class);
    weblogJob.setOutputValueClass(Text.class);

    weblogJob.setInputFormatClass(TextInputFormat.class);
    weblogJob.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(weblogJob, inputPath);
    FileOutputFormat.setOutputPath(weblogJob, outputPath);

    if (weblogJob.waitForCompletion(true)) {
        return 0;
    }
    return 1;
}

From source file:com.facebook.hiveio.mapreduce.output.WritingTool.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    handleCommandLine(args, conf);/*  www . j a  v a 2 s.  c  o m*/
    HadoopUtils.setMapAttempts(conf, 1);
    adjustConfigurationForHive(conf);
    HiveTools.setupJob(conf);

    Job job = new Job(conf, "hive-io-writing");
    if (job.getJar() == null) {
        job.setJarByClass(getClass());
    }
    job.setMapperClass(SampleMapper.class);
    job.setInputFormatClass(SampleInputFormat.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(HiveWritableRecord.class);
    job.setOutputFormatClass(SampleOutputFormat.class);

    job.setNumReduceTasks(0);

    job.submit();
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.fanlehai.hadoop.join.CompositeJoin.java

License:Apache License

/**
 * The main driver for sort program. Invoke this method to submit the
 * map/reduce job./*from  w  w  w  . j a v a 2s .co  m*/
 * 
 * @throws IOException
 *             When there is communication problems with the job tracker.
 */

@SuppressWarnings("rawtypes")
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    JobClient client = new JobClient(conf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String join_reduces = conf.get(REDUCES_PER_HOST);
    if (join_reduces != null) {
        num_reduces = cluster.getTaskTrackers() * Integer.parseInt(join_reduces);
    }
    Job job = Job.getInstance(conf);
    job.setJobName("join");
    job.setJarByClass(CompositeJoin.class);

    job.setMapperClass(Mapper.class);
    job.setReducerClass(Reducer.class);

    Class<? extends InputFormat> inputFormatClass = KeyValueTextInputFormat.class;// SequenceFileInputFormat.class;
    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    Class<? extends WritableComparable> outputKeyClass = Text.class;// BytesWritable.class;
    Class<? extends Writable> outputValueClass = Text.class;//TupleWritable.class;
    String op = "inner";
    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-r".equals(args[i])) {
                num_reduces = Integer.parseInt(args[++i]);
            } else if ("-inFormat".equals(args[i])) {
                inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class);
            } else if ("-outFormat".equals(args[i])) {
                outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
            } else if ("-outKey".equals(args[i])) {
                outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class);
            } else if ("-outValue".equals(args[i])) {
                outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class);
            } else if ("-joinOp".equals(args[i])) {
                op = args[++i];
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }

    // Set user-supplied (possibly default) job configs
    job.setNumReduceTasks(num_reduces);

    if (otherArgs.size() < 2) {
        System.out.println("ERROR: Wrong number of parameters: ");
        return printUsage();
    }

    String strOut = otherArgs.remove(otherArgs.size() - 1);
    FileSystem.get(new Configuration()).delete(new Path(strOut), true);

    FileOutputFormat.setOutputPath(job, new Path(strOut));
    List<Path> plist = new ArrayList<Path>(otherArgs.size());
    for (String s : otherArgs) {
        plist.add(new Path(s));
    }

    job.setInputFormatClass(CompositeInputFormat.class);
    job.getConfiguration().set(CompositeInputFormat.JOIN_EXPR,
            CompositeInputFormat.compose(op, inputFormatClass, plist.toArray(new Path[0])));
    job.setOutputFormatClass(outputFormatClass);

    job.setMapperClass(MapComposite.class);

    job.setOutputKeyClass(outputKeyClass);
    job.setOutputValueClass(outputValueClass);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return ret;
}

From source file:com.fanlehai.hadoop.serialize.avro.MapReduceAvroWordCount.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length != 2) {
        printUsage();/*from w w w . jav a 2 s  . com*/
    }

    FileSystem.get(new Configuration()).delete(new Path(args[1]), true);
    Job job = Job.getInstance(super.getConf(), "AvroWordCount");

    job.setJarByClass(MapReduceAvroWordCount.class);
    job.setJobName("AvroWordCount");

    // We call setOutputSchema first so we can override the configuration
    // parameters it sets
    AvroJob.setOutputKeySchema(job, Pair.getPairSchema(Schema.create(Type.STRING), Schema.create(Type.INT)));
    job.setOutputValueClass(NullWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setSortComparatorClass(Text.Comparator.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job.waitForCompletion(true) ? 1 : 0;
}

From source file:com.fanlehai.hadoop.serialize.avro.MapReduceColorCount.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length != 2) {
        printUsage();/*  w w  w.  ja va 2  s  .  c  o  m*/
    }

    FileSystem.get(new Configuration()).delete(new Path(args[1]), true);
    Job job = Job.getInstance(super.getConf(), "MapReduceAvroWordCount");

    job.setJarByClass(MapReduceColorCount.class);
    job.setJobName("Color Count");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setInputFormatClass(AvroKeyInputFormat.class);
    job.setMapperClass(ColorCountMapper.class);
    AvroJob.setInputKeySchema(job, User.getClassSchema());
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
    job.setReducerClass(ColorCountReducer.class);
    AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
    AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));

    return job.waitForCompletion(true) ? 1 : 0;
}

From source file:com.fanlehai.hadoop.serialize.json.multiline.ExampleJob.java

License:Apache License

/**
 * The MapReduce driver - setup and launch the job.
 *
 * @param args//from  w ww.  ja  v a 2 s .  c  om
 *            the command-line arguments
 * @return the process exit code
 * @throws Exception
 *             if something goes wrong
 */
@Override
public int run(String[] args) throws Exception {

    if (args.length != 2) {
        System.err.println("Usage: ExampleJob <in dir> <out dir>");
        ToolRunner.printGenericCommandUsage(System.err);
        System.exit(2);
    }

    String input = args[0];
    String output = args[1];

    Configuration conf = super.getConf();

    writeInput(conf, new Path(input));

    Job job = Job.getInstance(getConf(), "ExampleJob");
    job.setJarByClass(ExampleJob.class);
    job.setMapperClass(Map.class);

    job.setNumReduceTasks(0);

    Path outputPath = new Path(output);

    FileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, outputPath);

    // use the JSON input format
    job.setInputFormatClass(MultiLineJsonInputFormat.class);

    // specify the JSON attribute name which is used to determine which
    // JSON elements are supplied to the mapper
    MultiLineJsonInputFormat.setInputJsonMember(job, "colorName");

    if (job.waitForCompletion(true)) {
        return 0;
    }
    return 1;
}

From source file:com.flipkart.fdp.migration.distcp.core.MirrorDistCPDriver.java

License:Apache License

public int run(String[] args) throws Exception {

    configuration = getConf();/*  www  . j  av a  2  s  .  c  o  m*/

    MirrorFileInputFormat.setExclusionsFileList(configuration, excludeList);
    MirrorFileInputFormat.setInclusionFileList(configuration, includeList);

    System.out.println("Inclusion File List: " + MirrorFileInputFormat.getInclusionFileList(configuration));
    // Setting task timeout to 2 hrs
    configuration.setLong("mapred.task.timeout", 1000 * 60 * 60 * 2);

    populateConfFromDCMConfig();

    int jobReturnValue = 0;
    stateManager = StateManagerFactory.getStateManager(configuration, dcmConfig);

    System.out.println("Instantiated " + dcmConfig.getStateManagerType()
            + " StateManger, Starting Batch Execution with RunID: " + stateManager.getRunId());
    try {
        stateManager.beginBatch();
    } catch (Exception e) {
        System.out.println("Exception starting batch: " + e.getMessage());
        e.printStackTrace();
        return 1;
    }

    try {
        if (localmode) {
            System.out.println("Running Blueshift in Local Mode...");
            configuration.set("mapreduce.framework.name", "local");
        } else {
            System.out.println("Running Blueshift in Distributed Mode...");
        }
        Job job = createJob(configuration);

        System.out.println("Launching Job - Blueshift v 2.0 - " + dcmConfig.getBatchName());
        job.waitForCompletion(true);

        System.out.println("Job Complete...");

        jobReturnValue = processJobCounters(job);
    } catch (Throwable t) {
        jobReturnValue = 1;
        System.out.println("Job Failed...");
        t.printStackTrace();
    }
    stateManager.completeBatch(jobReturnValue != 0 ? Status.FAILED : Status.COMPLETED);

    return jobReturnValue;
}

From source file:com.flytxt.yesbank.processor.HdfsToHbaseEngine.java

License:Open Source License

public static void main(String[] args) throws Exception {
    if (args.length < 1) {
        System.out.println("Hdfs to Hbase Engine requires the model Id as Input ...");
        System.exit(1);// ww w  .j a v  a 2 s. c  o m
    }

    String modelId = args[0];

    DBConnection dbConnection = DBConnection.getInstance();
    dbConnection.loadDbProperties();
    dbConnection.initializeDataBaseConnection();

    String hfdsInputLoc = dbConnection.getHdfsInputDirectory(modelId);

    if (hfdsInputLoc != null) {

        Configuration conf = new Configuration();
        String params = args[0];
        conf.set("test", params);
        Job job = new Job(conf);

        // Configuration conf = new Configuration();
        // Job job = Job.getInstance(conf, "hfds to hbase Engine");
        job.setJarByClass(HdfsToHbaseEngine.class);
        job.setMapperClass(HdfsEngineMapper.class);
        // job.setCombinerClass(test.class);
        // job.setReducerClass(test.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.addInputPath(job, new Path(hfdsInputLoc));
        job.setOutputFormatClass(NullOutputFormat.class);
        // FileOutputFormat.setOutputPath(job, new Path("/output_dir"));
        // conf.set("argParamValue", args[2]);
        // System.out.println("second argument value ------" + args[0]);
        // System.out.println(" in main method conf.getStrings --------" +
        // conf.get("argParamValue"));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    } else {
        System.out.println(" Hdfs Input Location deos not exists .. Unable to process the Request ....");
        System.exit(0);
    }
}

From source file:com.flytxt.yesbank.test.ModelProcessor.java

License:Open Source License

public static void main(String[] args) throws Exception {

    String modelId = args[0];/*ww w  .j  a v  a 2 s  . c  o m*/

    ModelProcessor modelProcessor = new ModelProcessor();

    modelProcessor.loadDbProperties();

    modelProcessor.initializeDataBaseConnection();

    String hfdsInputLoc = modelProcessor.getHdfsInputDirectory(modelId);

    if (hfdsInputLoc != null) {

        Configuration conf = new Configuration();
        String params = args[0];
        conf.set("test", params);

        Job job = new Job(conf);

        // Configuration conf = new Configuration();
        // Job job = Job.getInstance(conf, "word count");
        job.setJarByClass(ModelProcessor.class);
        job.setMapperClass(HdfsProcessMapper.class);
        // job.setCombinerClass(IntSumReducer.class);
        // job.setReducerClass(IntSumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.addInputPath(job, new Path(hfdsInputLoc));
        job.setOutputFormatClass(NullOutputFormat.class);
        // FileOutputFormat.setOutputPath(job, new Path("/output_dir"));
        // conf.set("argParamValue", args[2]);
        // System.out.println("second argument value ------" + args[0]);
        // System.out.println(" in main method conf.getStrings --------" +
        // conf.get("argParamValue"));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    } else {
        System.out.println(" Hdfs Input Location deos not exists .. Unable to process the Request ....");
        System.exit(0);
    }
}