Example usage for org.apache.hadoop.mapreduce Job waitForCompletion

List of usage examples for org.apache.hadoop.mapreduce Job waitForCompletion

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job waitForCompletion.

Prototype

public boolean waitForCompletion(boolean verbose)
        throws IOException, InterruptedException, ClassNotFoundException 

Source Link

Document

Submit the job to the cluster and wait for it to finish.

Usage

From source file:com.datasalt.utils.mapred.joiner.TestMultiJoinerGlob.java

License:Apache License

@Test
public void test() throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = getConf();
    MultiJoiner multiJoiner = new MultiJoiner("MultiJoiner Test", conf);
    multiJoiner.setReducer(TestReducer.class);
    multiJoiner.setOutputKeyClass(Text.class);
    multiJoiner.setOutputValueClass(Text.class);
    multiJoiner.setOutputFormat(TextOutputFormat.class);
    multiJoiner.setOutputPath(new Path(OUTPUT_FOR_TEST));

    Job job = multiJoiner
            .addChanneledInput(0, new Path("src/test/resources/glob-folder/*"), A.class, TextInputFormat.class,
                    AMapper.class)
            .addChanneledInput(1, new Path("src/test/resources/multijoiner.test.b.txt"), B.class,
                    TextInputFormat.class, BMapper.class)
            .getJob();//from  w  w  w  . j  av a  2s  .c o m
    job.waitForCompletion(true);
    assertTrue(job.isSuccessful());

    HadoopUtils.deleteIfExists(FileSystem.get(conf), new Path(OUTPUT_FOR_TEST));
}

From source file:com.datasalt.utils.mapred.joiner.TestMultiJoinerMultiChannel.java

License:Apache License

@Test
public void test() throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = getConf();
    MultiJoiner multiJoiner = new MultiJoiner("MultiJoiner Test", conf);
    multiJoiner.setReducer(TestReducer.class);
    multiJoiner.setOutputKeyClass(Text.class);
    multiJoiner.setOutputValueClass(Text.class);
    multiJoiner.setOutputFormat(TextOutputFormat.class);
    multiJoiner.setOutputPath(new Path(OUTPUT_FOR_TEST));

    Job job = multiJoiner.addInput(new Path("src/test/resources/multijoiner.test.a.txt"), TextInputFormat.class,
            ABMapper.class).setChannelDatumClass(0, A.class).setChannelDatumClass(1, B.class).getJob();

    job.waitForCompletion(true);
    assertTrue(job.isSuccessful());/*from  w  ww .j  a va 2  s.  c  o m*/

    HadoopUtils.deleteIfExists(FileSystem.get(conf), new Path(OUTPUT_FOR_TEST));
}

From source file:com.datasalt.utils.mapred.joiner.TestMultiJoinerSameClass.java

License:Apache License

@Test
public void test() throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = getConf();
    MultiJoiner multiJoiner = new MultiJoiner("MultiJoiner Test", conf);
    multiJoiner.setReducer(TestReducerSameClass.class);
    multiJoiner.setOutputKeyClass(Text.class);
    multiJoiner.setOutputValueClass(Text.class);
    multiJoiner.setOutputFormat(TextOutputFormat.class);
    multiJoiner.setOutputPath(new Path(OUTPUT_FOR_TEST));
    Job job = multiJoiner
            .addChanneledInput(0, new Path("src/test/resources/multijoiner.test.a.txt"), A.class,
                    TextInputFormat.class, AMapperSameClass.class)
            .addChanneledInput(1, new Path("src/test/resources/multijoiner.test.same.class.a.txt"), A.class,
                    TextInputFormat.class, AMapperSameClass.class)
            .getJob();/*from   ww w.  ja  va  2  s. c  o m*/
    job.waitForCompletion(true);
    assertTrue(job.isSuccessful());

    HadoopUtils.deleteIfExists(FileSystem.get(conf), new Path(OUTPUT_FOR_TEST));
}

From source file:com.datasalt.utils.mapred.joiner.TestMultiJoinerSecondarySort.java

License:Apache License

@Test
public void test() throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = getConf();
    MultiJoiner multiJoiner = new MultiJoiner("MultiJoiner Test", conf);
    multiJoiner.setReducer(TestReducerSecondarySort.class);
    multiJoiner.setOutputKeyClass(Text.class);
    multiJoiner.setOutputValueClass(Text.class);
    multiJoiner.setOutputFormat(TextOutputFormat.class);
    multiJoiner.setOutputPath(new Path(OUTPUT_FOR_TEST));
    Job job = multiJoiner.setMultiJoinPairClass(MultiJoinPairText.class)
            .addChanneledInput(0, new Path("src/test/resources/multijoiner.test.a.2.txt"), A.class,
                    TextInputFormat.class, AMapperSecondarySort.class)
            .addChanneledInput(1, new Path("src/test/resources/multijoiner.test.b.2.txt"), B.class,
                    TextInputFormat.class, BMapperSecondarySort.class)
            .getJob();//from   w  ww  .  j a v  a2  s  . c o  m
    job.waitForCompletion(true);
    assertTrue(job.isSuccessful());

    HadoopUtils.deleteIfExists(FileSystem.get(conf), new Path(OUTPUT_FOR_TEST));
}

From source file:com.declum.squzer.example.hbase.table2file.Export.java

License:Apache License

/**
 * Main entry point./*  ww  w  . j  ava2 s . c om*/
 * 
 * @param args
 *            The command line parameters.
 * @throws Exception
 *             When running the job fails.
 */
public static void main(String[] args) throws Exception {
    args = new String[] { "test", "Out" };
    Configuration conf = HBaseConfiguration.create();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        usage("Wrong number of arguments: " + otherArgs.length);
        System.exit(-1);
    }
    Job job = createSubmittableJob(conf, otherArgs);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.digitalpebble.behemoth.io.sequencefile.SequenceFileConverterJob.java

License:Apache License

public int run(String[] args) throws Exception {
    int result = 0;
    addInputOption();/* w w w  .j  a v a  2s. co  m*/
    addOutputOption();
    if (parseArguments(args) == null) {
        return -1;
    }
    Path input = getInputPath();
    Path output = getOutputPath();

    Job job = prepareJob(input, output, SequenceFileInputFormat.class, SequenceFileConverterMapper.class,
            Text.class, BehemothDocument.class, SequenceFileOutputFormat.class);
    job.setJobName("Convert Sequence File: " + input);
    job.waitForCompletion(true);

    if (log.isInfoEnabled()) {
        log.info("Conversion: done");
    }
    return result;
}

From source file:com.digitalpebble.behemoth.mahout.BehemothDocumentProcessor.java

License:Apache License

/**
 * Convert the input documents into token array using the
 * {@link StringTuple} The input documents has to be in the
 * {@link org.apache.hadoop.io.SequenceFile} format
 * /*from  w w  w .j  a va  2 s . c  o  m*/
 * @param input
 *            input directory of the documents in
 *            {@link org.apache.hadoop.io.SequenceFile} format
 * @param output
 *            output directory were the {@link StringTuple} token array of
 *            each document has to be created
 * @param type
 *            The annotation type representing the tokens
 * @param feature
 *            The name of the features holding the token value
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws InterruptedException
 */
public static void tokenizeDocuments(Path input, String type, String feature, Path output)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    // this conf parameter needs to be set enable serialisation of conf
    // values
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");
    conf.set(TOKEN_TYPE, type);
    conf.set(FEATURE_NAME, feature);

    Job job = new Job(conf);
    job.setJobName("DocumentProcessor::BehemothTokenizer: input-folder: " + input);
    job.setJarByClass(BehemothDocumentProcessor.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(StringTuple.class);
    FileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);

    job.setMapperClass(BehemothTokenizerMapper.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setNumReduceTasks(0);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    HadoopUtil.delete(conf, output);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded)
        throw new IllegalStateException("Job failed!");
}

From source file:com.digitalpebble.behemoth.mahout.BehemothDocumentProcessor.java

License:Apache License

/**
 * Convert the input documents into token array using the
 * {@link StringTuple} The input documents has to be in the
 * {@link org.apache.hadoop.io.SequenceFile} format
 * /*from   ww  w. j  av  a 2s.  c  om*/
 * @param input
 *            input directory of the documents in
 *            {@link org.apache.hadoop.io.SequenceFile} format
 * @param output
 *            output directory were the {@link StringTuple} token array of
 *            each document has to be created
 * @param analyzerClass
 *            The Lucene {@link Analyzer} for tokenizing the UTF-8 text
 */
public static void tokenizeDocuments(Path input, Class<? extends Analyzer> analyzerClass, Path output,
        Configuration baseConf) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration(baseConf);
    // this conf parameter needs to be set enable serialisation of conf
    // values
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");
    conf.set(ANALYZER_CLASS, analyzerClass.getName());

    Job job = new Job(conf);
    job.setJobName("DocumentProcessor::LuceneTokenizer: input-folder: " + input);
    job.setJarByClass(BehemothDocumentProcessor.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(StringTuple.class);
    FileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);

    job.setMapperClass(LuceneTokenizerMapper.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setNumReduceTasks(0);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    HadoopUtil.delete(conf, output);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded)
        throw new IllegalStateException("Job failed!");

}

From source file:com.digitalpebble.behemoth.mahout.BehemothDocumentProcessor.java

License:Apache License

public static void dumpLabels(Path input, Path output, Configuration baseConf)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration(baseConf);
    // this conf parameter needs to be set enable serialisation of conf
    // values/*from  w  w  w  . j  av a  2 s.co m*/
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");

    Job job = new Job(conf);
    job.setJobName("DocumentProcessor::LabelDumper: input-folder: " + input);
    job.setJarByClass(BehemothDocumentProcessor.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);

    job.setMapperClass(BehemothLabelMapper.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setNumReduceTasks(0);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    HadoopUtil.delete(conf, output);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded)
        throw new IllegalStateException("Job failed!");

}

From source file:com.digitalpebble.behemoth.mahout.DocumentProcessor.java

License:Apache License

/**
 * Convert the input documents into token array using the
 * {@link StringTuple} The input documents has to be in the
 * {@link org.apache.hadoop.io.SequenceFile} format
 * /* ww  w .j  a  v  a  2 s . c  o  m*/
 * @param input
 *            input directory of the documents in
 *            {@link org.apache.hadoop.io.SequenceFile} format
 * @param output
 *            output directory were the {@link StringTuple} token array of
 *            each document has to be created
 * @param type
 *            The annotation type representing the tokens
 * @param feature
 *            The name of the features holding the token value
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws InterruptedException
 */
public static void tokenizeDocuments(Path input, String type, String feature, Path output)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    // this conf parameter needs to be set enable serialisation of conf
    // values
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");
    conf.set(TOKEN_TYPE, type);
    conf.set(FEATURE_NAME, feature);

    Job job = new Job(conf);
    job.setJobName("DocumentProcessor::DocumentTokenizer: input-folder: " + input);
    job.setJarByClass(DocumentProcessor.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(StringTuple.class);
    FileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);

    job.setMapperClass(SequenceFileTokenizerMapper.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setNumReduceTasks(0);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    HadoopUtil.delete(conf, output);

    job.waitForCompletion(true);
}