Example usage for org.apache.hadoop.mapreduce Job waitForCompletion

List of usage examples for org.apache.hadoop.mapreduce Job waitForCompletion

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job waitForCompletion.

Prototype

public boolean waitForCompletion(boolean verbose)
        throws IOException, InterruptedException, ClassNotFoundException 

Source Link

Document

Submit the job to the cluster and wait for it to finish.

Usage

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TestTupleTextInputOutputFormat.java

License:Apache License

@Test
public void testNulls()
        throws IOException, InterruptedException, ClassNotFoundException, TupleMRException, URISyntaxException {

    String line1 = "\"Joe\",\\N,,\"\\\"Joan\\\"\",\"\"";

    CommonUtils.writeTXT(line1, new File(IN));
    Configuration conf = getConf();
    FileSystem fS = FileSystem.get(conf);
    Path outPath = new Path(OUT);
    Path inPath = new Path(IN);
    HadoopUtils.deleteIfExists(fS, outPath);

    Schema schema = new Schema("schema",
            Fields.parse("name:string,name2:string,age:int,name3:string,emptystring:string"));

    MapOnlyJobBuilder mO = new MapOnlyJobBuilder(conf);
    mO.addInput(inPath,/*from w  w  w  .j  av  a2  s  .  com*/
            new TupleTextInputFormat(schema, false, true, ',', '"', '\\', FieldSelector.NONE,
                    TupleTextInputFormat.NO_NULL_STRING),
            new MapOnlyMapper<ITuple, NullWritable, NullWritable, NullWritable>() {

                protected void map(ITuple key, NullWritable value, Context context,
                        MultipleOutputsCollector collector) throws IOException, InterruptedException {

                    try {
                        Assert.assertNull(key.get("name2"));
                        Assert.assertNull(key.get("age"));
                        Assert.assertEquals("Joe", key.get("name"));
                        Assert.assertEquals("\"Joan\"", key.get("name3"));
                        Assert.assertEquals("", key.get("emptystring"));
                    } catch (Throwable t) {
                        t.printStackTrace();
                        throw new RuntimeException(t);
                    }
                }
            });

    mO.setOutput(outPath, new HadoopOutputFormat(NullOutputFormat.class), NullWritable.class,
            NullWritable.class);
    Job job = mO.createJob();
    try {
        assertTrue(job.waitForCompletion(true));
    } finally {
        mO.cleanUpInstanceFiles();
    }

    HadoopUtils.deleteIfExists(fS, inPath);
    HadoopUtils.deleteIfExists(fS, outPath);
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TestTupleTextInputOutputFormat.java

License:Apache License

@Test
public void testNumberNulls()
        throws IOException, InterruptedException, ClassNotFoundException, TupleMRException, URISyntaxException {

    String line1 = ",-, ,.";

    CommonUtils.writeTXT(line1, new File(IN));
    Configuration conf = getConf();
    FileSystem fS = FileSystem.get(conf);
    Path outPath = new Path(OUT);
    Path inPath = new Path(IN);
    HadoopUtils.deleteIfExists(fS, outPath);

    Schema schema = new Schema("schema", Fields.parse("n1:int,n2:long,n3:float,n4:double"));

    MapOnlyJobBuilder mO = new MapOnlyJobBuilder(conf);
    mO.addInput(inPath,//from  ww  w . ja  v  a 2 s .co m
            new TupleTextInputFormat(schema, false, true, ',', '"', '\\', FieldSelector.NONE,
                    TupleTextInputFormat.NO_NULL_STRING),
            new MapOnlyMapper<ITuple, NullWritable, NullWritable, NullWritable>() {

                protected void map(ITuple key, NullWritable value, Context context,
                        MultipleOutputsCollector collector) throws IOException, InterruptedException {

                    try {
                        Assert.assertNull(key.get("n1"));
                        Assert.assertNull(key.get("n2"));
                        Assert.assertNull(key.get("n3"));
                        Assert.assertNull(key.get("n4"));
                    } catch (Throwable t) {
                        t.printStackTrace();
                        throw new RuntimeException(t);
                    }
                }
            });

    mO.setOutput(outPath, new HadoopOutputFormat(NullOutputFormat.class), NullWritable.class,
            NullWritable.class);
    Job job = mO.createJob();
    try {
        assertTrue(job.waitForCompletion(true));
    } finally {
        mO.cleanUpInstanceFiles();
    }

    HadoopUtils.deleteIfExists(fS, inPath);
    HadoopUtils.deleteIfExists(fS, outPath);
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TestTupleTextInputOutputFormat.java

License:Apache License

@Test
public void testQuotes()
        throws IOException, InterruptedException, ClassNotFoundException, TupleMRException, URISyntaxException {

    String line1 = "\"MYS\",\"Malaysia\",\"Asia\",\"Southeast Asia\",329758.00,1957,22244000,70.8,69213.00,97884.00,\"Malaysia\",\"Constitutional Monarchy, Federation\",\"Salahuddin Abdul Aziz Shah Alhaj\",2464,\"MY\"";

    CommonUtils.writeTXT(line1, new File(IN));
    Configuration conf = getConf();
    FileSystem fS = FileSystem.get(conf);
    Path outPath = new Path(OUT);
    Path inPath = new Path(IN);
    HadoopUtils.deleteIfExists(fS, outPath);

    Schema schema = new Schema("schema",
            Fields.parse("code:string," + "name:string," + "continent:string," + "region:string,"
                    + "surface_area:double," + "indep_year:int," + "population:int," + "life_expectancy:double,"
                    + "gnp:double," + "gnp_old:double," + "local_name:string," + "government_form:string,"
                    + "head_of_state:string," + "capital:int," + "code2:string"));

    MapOnlyJobBuilder mO = new MapOnlyJobBuilder(conf);
    mO.addInput(inPath,//from ww  w  . j a va  2 s.  co m
            new TupleTextInputFormat(schema, false, false, ',', '"', '\\', FieldSelector.NONE,
                    TupleTextInputFormat.NO_NULL_STRING),
            new MapOnlyMapper<ITuple, NullWritable, NullWritable, NullWritable>() {

                protected void map(ITuple key, NullWritable value, Context context,
                        MultipleOutputsCollector collector) throws IOException, InterruptedException {

                    try {
                        Assert.assertEquals("Constitutional Monarchy, Federation",
                                key.get("government_form").toString());
                        Assert.assertEquals("Salahuddin Abdul Aziz Shah Alhaj",
                                key.get("head_of_state").toString());
                        Assert.assertEquals(2464, key.get("capital"));
                    } catch (Throwable t) {
                        t.printStackTrace();
                        throw new RuntimeException(t);
                    }
                }
            });
    mO.setOutput(outPath, new HadoopOutputFormat(NullOutputFormat.class), NullWritable.class,
            NullWritable.class);
    Job job = mO.createJob();
    try {
        assertTrue(job.waitForCompletion(true));
    } finally {
        mO.cleanUpInstanceFiles();
    }

    HadoopUtils.deleteIfExists(fS, inPath);
    HadoopUtils.deleteIfExists(fS, outPath);
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TestTupleTextInputOutputFormat.java

License:Apache License

@Test
public void testFixedWidthNulls()
        throws IOException, InterruptedException, ClassNotFoundException, TupleMRException, URISyntaxException {

    String line1 = "1000  - ";
    int fieldsPos[] = new int[] { 0, 3, 5, 7 };

    CommonUtils.writeTXT(line1, new File(IN));
    Configuration conf = getConf();
    FileSystem fS = FileSystem.get(conf);
    Path outPath = new Path(OUT);
    Path inPath = new Path(IN);
    HadoopUtils.deleteIfExists(fS, outPath);

    Schema schema = new Schema("schema", Fields.parse("name:string,name2:string"));

    MapOnlyJobBuilder mO = new MapOnlyJobBuilder(conf);
    mO.addInput(inPath, new TupleTextInputFormat(schema, fieldsPos, false, "-"),
            new MapOnlyMapper<ITuple, NullWritable, NullWritable, NullWritable>() {

                protected void map(ITuple key, NullWritable value, Context context,
                        MultipleOutputsCollector collector) throws IOException, InterruptedException {

                    try {
                        Assert.assertNull(key.get("name2"));
                        Assert.assertEquals("1000", key.get("name"));
                    } catch (Throwable t) {
                        t.printStackTrace();
                        throw new RuntimeException(t);
                    }/*from  w ww . ja v a 2  s . c om*/
                }
            });

    mO.setOutput(outPath, new HadoopOutputFormat(NullOutputFormat.class), NullWritable.class,
            NullWritable.class);
    Job job = mO.createJob();
    try {
        assertTrue(job.waitForCompletion(true));
    } finally {
        mO.cleanUpInstanceFiles();
    }

    HadoopUtils.deleteIfExists(fS, inPath);
    HadoopUtils.deleteIfExists(fS, outPath);
}

From source file:com.datasalt.pangool.tuplemr.serialization.TestTupleFieldSerialization.java

License:Apache License

@SuppressWarnings("deprecation")
@Test/*www . j  av a  2 s . c  o  m*/
public void test() throws Exception {
    initHadoop();
    trash(INPUT1, INPUT2, OUTPUT);

    // Prepare input
    BufferedWriter writer;

    // INPUT1
    writer = new BufferedWriter(new FileWriter(INPUT1));
    writer.write("foo1" + "\t" + "30" + "\n");
    writer.write("foo2" + "\t" + "20" + "\n");
    writer.write("foo3" + "\t" + "140" + "\n");
    writer.write("foo4" + "\t" + "110" + "\n");
    writer.write("foo5" + "\t" + "220" + "\n");
    writer.write("foo6" + "\t" + "260" + "\n");
    writer.close();

    // INPUT2
    writer = new BufferedWriter(new FileWriter(INPUT2));
    writer.write("4.5" + "\t" + "true" + "\n");
    writer.write("4.6" + "\t" + "false" + "\n");
    writer.close();

    TupleMRBuilder builder = new TupleMRBuilder(getConf());

    final Schema tupleSchema1 = new Schema("tupleSchema1", Fields.parse("a:string, b:int"));
    final Schema tupleSchema2 = new Schema("tupleSchema2", Fields.parse("c:double, d:boolean"));

    List<Field> fields = new ArrayList<Field>();
    fields.add(Field.create("partitionId", Type.INT));
    fields.add(Fields.createTupleField("tuple1", tupleSchema1));
    final Schema schema1 = new Schema("tupleInTuple1", fields);

    fields.clear();
    fields.add(Field.create("partitionId", Type.INT));
    fields.add(Fields.createTupleField("tuple2", tupleSchema2));
    final Schema schema2 = new Schema("tupleInTuple2", fields);

    builder.addIntermediateSchema(schema1);
    builder.addIntermediateSchema(schema2);

    builder.addInput(new Path(INPUT1), new HadoopInputFormat(TextInputFormat.class),
            new TupleMapper<LongWritable, Text>() {

                ITuple tupleInTuple1 = new Tuple(schema1);
                ITuple tuple1 = new Tuple(tupleSchema1);

                @Override
                public void map(LongWritable key, Text value, TupleMRContext context, Collector collector)
                        throws IOException, InterruptedException {
                    String[] split = value.toString().split("\t");
                    tuple1.set("a", split[0]);
                    tuple1.set("b", Integer.parseInt(split[1]));

                    tupleInTuple1.set("partitionId", 0);
                    tupleInTuple1.set("tuple1", tuple1);
                    collector.write(tupleInTuple1);
                }
            });

    builder.addInput(new Path(INPUT2), new HadoopInputFormat(TextInputFormat.class),
            new TupleMapper<LongWritable, Text>() {

                ITuple tupleInTuple2 = new Tuple(schema2);
                ITuple tuple2 = new Tuple(tupleSchema2);

                @Override
                public void map(LongWritable key, Text value, TupleMRContext context, Collector collector)
                        throws IOException, InterruptedException {
                    String[] split = value.toString().split("\t");
                    tuple2.set("c", Double.parseDouble(split[0]));
                    tuple2.set("d", Boolean.parseBoolean(split[1]));

                    tupleInTuple2.set("partitionId", 0);
                    tupleInTuple2.set("tuple2", tuple2);
                    collector.write(tupleInTuple2);
                }
            });

    builder.setTupleReducer(new TupleReducer<Text, NullWritable>() {

        public void reduce(ITuple group, Iterable<ITuple> tuples, TupleMRContext context, Collector collector)
                throws IOException, InterruptedException, TupleMRException {

            Iterator<ITuple> iterator = tuples.iterator();
            ITuple currentTuple;

            assertEquals(0, group.get("partitionId"));

            currentTuple = iterator.next();
            assertEquals("foo1", ((ITuple) currentTuple.get("tuple1")).get("a").toString());
            assertEquals(30, ((ITuple) currentTuple.get("tuple1")).get("b"));

            currentTuple = iterator.next();
            assertEquals("foo2", ((ITuple) currentTuple.get("tuple1")).get("a").toString());
            assertEquals(20, ((ITuple) currentTuple.get("tuple1")).get("b"));

            currentTuple = iterator.next();
            assertEquals("foo3", ((ITuple) currentTuple.get("tuple1")).get("a").toString());
            assertEquals(140, ((ITuple) currentTuple.get("tuple1")).get("b"));

            currentTuple = iterator.next();
            assertEquals("foo4", ((ITuple) currentTuple.get("tuple1")).get("a").toString());
            assertEquals(110, ((ITuple) currentTuple.get("tuple1")).get("b"));

            currentTuple = iterator.next();
            assertEquals("foo5", ((ITuple) currentTuple.get("tuple1")).get("a").toString());
            assertEquals(220, ((ITuple) currentTuple.get("tuple1")).get("b"));

            currentTuple = iterator.next();
            assertEquals("foo6", ((ITuple) currentTuple.get("tuple1")).get("a").toString());
            assertEquals(260, ((ITuple) currentTuple.get("tuple1")).get("b"));

            // Second data source BEGINS
            currentTuple = iterator.next();
            assertEquals(4.5, ((ITuple) currentTuple.get("tuple2")).get("c"));
            assertEquals(true, ((ITuple) currentTuple.get("tuple2")).get("d"));

            currentTuple = iterator.next();
            assertEquals(4.6, ((ITuple) currentTuple.get("tuple2")).get("c"));
            assertEquals(false, ((ITuple) currentTuple.get("tuple2")).get("d"));
        };
    });
    builder.setGroupByFields("partitionId");
    builder.setOutput(new Path(OUTPUT), new HadoopOutputFormat(TextOutputFormat.class), Text.class,
            NullWritable.class);
    Job job = builder.createJob();
    try {
        job.waitForCompletion(true);
    } finally {
        builder.cleanUpInstanceFiles();
    }
    trash(INPUT1, INPUT2, OUTPUT);
}

From source file:com.datasalt.pangool.utils.test.AbstractHadoopTestLibrary.java

License:Apache License

public void assertRun(Job job) throws IOException, InterruptedException, ClassNotFoundException {
    FileSystem fs = FileSystem.get(job.getConfiguration());
    HadoopUtils.deleteIfExists(fs, FileOutputFormat.getOutputPath(job));
    // Close input writers first
    for (Map.Entry<String, Object> entry : inputs.entrySet()) {
        Object in = entry.getValue();
        if (in instanceof SequenceFile.Writer) {
            ((SequenceFile.Writer) in).close();
        } else if (in instanceof TupleFile.Writer) {
            ((TupleFile.Writer) in).close();
        }//from   ww  w . j a  v  a  2s  . c o  m
    }
    job.waitForCompletion(true);
    Assert.assertTrue(job.isSuccessful());

}

From source file:com.datasalt.utils.mapred.counter.TestMapRedCounter.java

License:Apache License

public void testWithMinimumCountOtherThan1(boolean withCombiner)
        throws IOException, InterruptedException, ClassNotFoundException, CloneNotSupportedException {
    Configuration conf = BaseConfigurationFactory.getInstance().getConf();
    Job job;

    /*//from  www. jav a 2 s  .  co m
     * Set minimum count
     */
    conf.setInt(MapRedCounter.MINIMUM_COUNT_FOR_GROUP_CONF_PREFIX + "0", 2);
    conf.setInt(MapRedCounter.MINIMUM_COUNT_FOR_GROUP_CONF_PREFIX + "1", 2);
    conf.setInt(MapRedCounter.MINIMUM_COUNT_FOR_GROUP_CONF_PREFIX + "2", 2);

    if (withCombiner) {
        job = MapRedCounter.buildMapRedCounterJob("counter", SequenceFileOutputFormat.class, OUTPUT_COUNT,
                conf);
    } else {
        job = MapRedCounter.buildMapRedCounterJobWithoutCombiner("counter", SequenceFileOutputFormat.class,
                OUTPUT_COUNT, conf);
    }

    MapRedCounter.addInput(job, new Path(SINGLE_LINE_FILE), TextInputFormat.class, TestMapper.class);

    job.waitForCompletion(true);

    HashMap<String, Long> itemCount = itemCountAsMap(getFs(),
            OUTPUT_COUNT + "/" + MapRedCounter.Outputs.COUNTFILE + "/part-r-00000");
    HashMap<String, LongPairWritable> itemGroupCount = itemGroupCountAsMap(getFs(),
            OUTPUT_COUNT + "/" + MapRedCounter.Outputs.COUNTDISTINCTFILE + "/part-r-00000");

    assertCount(2, "2:c6d3:c", itemCount);
    assertCount(2, "2:c6d3:b", itemCount);
    assertCount(2, "2:c6d3:a", itemCount);

    assertCount(2, "1:c3d2:a", itemCount);

    assertCount(2, "1:c2d1:a", itemCount);

    assertGroupCount(6, 3, "2:c6d3", itemGroupCount);
    assertGroupCount(2, 1, "1:c3d2", itemGroupCount);
    assertGroupCount(2, 1, "1:c2d1", itemGroupCount);
}

From source file:com.datasalt.utils.mapred.counter.TestMapRedCounter.java

License:Apache License

public void test(boolean withCombiner)
        throws IOException, InterruptedException, ClassNotFoundException, CloneNotSupportedException {
    Configuration conf = getConf();
    Job job;

    if (withCombiner) {
        job = MapRedCounter.buildMapRedCounterJob("counter", SequenceFileOutputFormat.class, OUTPUT_COUNT,
                conf);//from   w w w .  ja v  a2s  .  c om
    } else {
        job = MapRedCounter.buildMapRedCounterJobWithoutCombiner("counter", SequenceFileOutputFormat.class,
                OUTPUT_COUNT, conf);
    }

    MapRedCounter.addInput(job, new Path(SINGLE_LINE_FILE), TextInputFormat.class, TestMapper.class);

    job.waitForCompletion(true);

    HashMap<String, Long> itemCount = itemCountAsMap(getFs(),
            OUTPUT_COUNT + "/" + MapRedCounter.Outputs.COUNTFILE + "/part-r-00000");
    HashMap<String, LongPairWritable> itemGroupCount = itemGroupCountAsMap(getFs(),
            OUTPUT_COUNT + "/" + MapRedCounter.Outputs.COUNTDISTINCTFILE + "/part-r-00000");

    assertCount(1, "0:single:isingle", itemCount);
    assertCount(1, "1:c2d2:a", itemCount);
    assertCount(1, "1:c2d2:b", itemCount);
    assertCount(2, "1:c2d1:a", itemCount);
    assertCount(2, "1:c3d2:a", itemCount);
    assertCount(1, "1:c3d2:b", itemCount);
    assertCount(1, "2:c4d3:a", itemCount);
    assertCount(1, "2:c4d3:b", itemCount);
    assertCount(2, "2:c4d3:c", itemCount);

    assertGroupCount(1, 1, "0:single", itemGroupCount);
    assertGroupCount(2, 2, "1:c2d2", itemGroupCount);
    assertGroupCount(2, 1, "1:c2d1", itemGroupCount);
    assertGroupCount(3, 2, "1:c3d2", itemGroupCount);
    assertGroupCount(4, 3, "2:c4d3", itemGroupCount);
    assertGroupCount(6, 3, "2:c6d3", itemGroupCount);
}

From source file:com.datasalt.utils.mapred.joiner.TestJoinOneToMany.java

License:Apache License

@Test
public void test() throws IOException, InterruptedException, ClassNotFoundException {
    File input = new File(INPUT1);
    if (input.exists()) {
        while (!input.delete())
            ;//from  w ww  . j a  v  a 2 s  .  co m
    }
    Files.write("line", input, Charset.defaultCharset());
    input = new File(INPUT2);
    if (input.exists()) {
        while (!input.delete())
            ;
    }
    Files.write("line", input, Charset.defaultCharset());

    Configuration conf = getConf();
    Job job = getMultiJoiner(conf);
    job.waitForCompletion(true);
    assertTrue(job.isSuccessful());

    File out = new File(OUTPUT, "part-r-00000");
    List<String> lines = Files.readLines(out, Charset.defaultCharset());
    System.out.println(lines);

    assertEquals(6, lines.size());
    assertTrue(lines.contains("1 foo"));
    assertTrue(lines.contains("1 bar"));
    assertTrue(lines.contains("2 oh la la"));
    assertTrue(lines.contains("2 blah blah"));
    assertTrue(lines.contains("-1 bluu"));
    assertTrue(lines.contains("3 snull"));

    assertTrue(firstWasSecondClass == 1);
    assertTrue(noSecondClass == true);

    cleanUp();
}

From source file:com.datasalt.utils.mapred.joiner.TestMultiJoiner.java

License:Apache License

@Test
public void test() throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = getConf();
    MultiJoiner multiJoiner = new MultiJoiner("MultiJoiner Test", conf);
    multiJoiner.setReducer(TestReducer.class);
    multiJoiner.setOutputKeyClass(Text.class);
    multiJoiner.setOutputValueClass(Text.class);
    multiJoiner.setOutputFormat(TextOutputFormat.class);
    multiJoiner.setOutputPath(new Path(OUTPUT_FOR_TEST));

    Job job = multiJoiner
            .addChanneledInput(2, new Path("src/test/resources/multijoiner.test.a.txt"), A.class,
                    TextInputFormat.class, AMapper.class)
            .addChanneledInput(4, new Path("src/test/resources/multijoiner.test.b.txt"), B.class,
                    TextInputFormat.class, BMapper.class)
            .getJob();/*  w w  w  .  j a v a  2s . com*/
    job.waitForCompletion(true);
    assertTrue(job.isSuccessful());

    HadoopUtils.deleteIfExists(FileSystem.get(conf), new Path(OUTPUT_FOR_TEST));
}