List of usage examples for org.apache.hadoop.mapreduce Job waitForCompletion
public boolean waitForCompletion(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException
From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TestTupleTextInputOutputFormat.java
License:Apache License
@Test public void testNulls() throws IOException, InterruptedException, ClassNotFoundException, TupleMRException, URISyntaxException { String line1 = "\"Joe\",\\N,,\"\\\"Joan\\\"\",\"\""; CommonUtils.writeTXT(line1, new File(IN)); Configuration conf = getConf(); FileSystem fS = FileSystem.get(conf); Path outPath = new Path(OUT); Path inPath = new Path(IN); HadoopUtils.deleteIfExists(fS, outPath); Schema schema = new Schema("schema", Fields.parse("name:string,name2:string,age:int,name3:string,emptystring:string")); MapOnlyJobBuilder mO = new MapOnlyJobBuilder(conf); mO.addInput(inPath,/*from w w w .j av a2 s . com*/ new TupleTextInputFormat(schema, false, true, ',', '"', '\\', FieldSelector.NONE, TupleTextInputFormat.NO_NULL_STRING), new MapOnlyMapper<ITuple, NullWritable, NullWritable, NullWritable>() { protected void map(ITuple key, NullWritable value, Context context, MultipleOutputsCollector collector) throws IOException, InterruptedException { try { Assert.assertNull(key.get("name2")); Assert.assertNull(key.get("age")); Assert.assertEquals("Joe", key.get("name")); Assert.assertEquals("\"Joan\"", key.get("name3")); Assert.assertEquals("", key.get("emptystring")); } catch (Throwable t) { t.printStackTrace(); throw new RuntimeException(t); } } }); mO.setOutput(outPath, new HadoopOutputFormat(NullOutputFormat.class), NullWritable.class, NullWritable.class); Job job = mO.createJob(); try { assertTrue(job.waitForCompletion(true)); } finally { mO.cleanUpInstanceFiles(); } HadoopUtils.deleteIfExists(fS, inPath); HadoopUtils.deleteIfExists(fS, outPath); }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TestTupleTextInputOutputFormat.java
License:Apache License
@Test public void testNumberNulls() throws IOException, InterruptedException, ClassNotFoundException, TupleMRException, URISyntaxException { String line1 = ",-, ,."; CommonUtils.writeTXT(line1, new File(IN)); Configuration conf = getConf(); FileSystem fS = FileSystem.get(conf); Path outPath = new Path(OUT); Path inPath = new Path(IN); HadoopUtils.deleteIfExists(fS, outPath); Schema schema = new Schema("schema", Fields.parse("n1:int,n2:long,n3:float,n4:double")); MapOnlyJobBuilder mO = new MapOnlyJobBuilder(conf); mO.addInput(inPath,//from ww w . ja v a 2 s .co m new TupleTextInputFormat(schema, false, true, ',', '"', '\\', FieldSelector.NONE, TupleTextInputFormat.NO_NULL_STRING), new MapOnlyMapper<ITuple, NullWritable, NullWritable, NullWritable>() { protected void map(ITuple key, NullWritable value, Context context, MultipleOutputsCollector collector) throws IOException, InterruptedException { try { Assert.assertNull(key.get("n1")); Assert.assertNull(key.get("n2")); Assert.assertNull(key.get("n3")); Assert.assertNull(key.get("n4")); } catch (Throwable t) { t.printStackTrace(); throw new RuntimeException(t); } } }); mO.setOutput(outPath, new HadoopOutputFormat(NullOutputFormat.class), NullWritable.class, NullWritable.class); Job job = mO.createJob(); try { assertTrue(job.waitForCompletion(true)); } finally { mO.cleanUpInstanceFiles(); } HadoopUtils.deleteIfExists(fS, inPath); HadoopUtils.deleteIfExists(fS, outPath); }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TestTupleTextInputOutputFormat.java
License:Apache License
@Test public void testQuotes() throws IOException, InterruptedException, ClassNotFoundException, TupleMRException, URISyntaxException { String line1 = "\"MYS\",\"Malaysia\",\"Asia\",\"Southeast Asia\",329758.00,1957,22244000,70.8,69213.00,97884.00,\"Malaysia\",\"Constitutional Monarchy, Federation\",\"Salahuddin Abdul Aziz Shah Alhaj\",2464,\"MY\""; CommonUtils.writeTXT(line1, new File(IN)); Configuration conf = getConf(); FileSystem fS = FileSystem.get(conf); Path outPath = new Path(OUT); Path inPath = new Path(IN); HadoopUtils.deleteIfExists(fS, outPath); Schema schema = new Schema("schema", Fields.parse("code:string," + "name:string," + "continent:string," + "region:string," + "surface_area:double," + "indep_year:int," + "population:int," + "life_expectancy:double," + "gnp:double," + "gnp_old:double," + "local_name:string," + "government_form:string," + "head_of_state:string," + "capital:int," + "code2:string")); MapOnlyJobBuilder mO = new MapOnlyJobBuilder(conf); mO.addInput(inPath,//from ww w . j a va 2 s. co m new TupleTextInputFormat(schema, false, false, ',', '"', '\\', FieldSelector.NONE, TupleTextInputFormat.NO_NULL_STRING), new MapOnlyMapper<ITuple, NullWritable, NullWritable, NullWritable>() { protected void map(ITuple key, NullWritable value, Context context, MultipleOutputsCollector collector) throws IOException, InterruptedException { try { Assert.assertEquals("Constitutional Monarchy, Federation", key.get("government_form").toString()); Assert.assertEquals("Salahuddin Abdul Aziz Shah Alhaj", key.get("head_of_state").toString()); Assert.assertEquals(2464, key.get("capital")); } catch (Throwable t) { t.printStackTrace(); throw new RuntimeException(t); } } }); mO.setOutput(outPath, new HadoopOutputFormat(NullOutputFormat.class), NullWritable.class, NullWritable.class); Job job = mO.createJob(); try { assertTrue(job.waitForCompletion(true)); } finally { mO.cleanUpInstanceFiles(); } HadoopUtils.deleteIfExists(fS, inPath); HadoopUtils.deleteIfExists(fS, outPath); }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TestTupleTextInputOutputFormat.java
License:Apache License
@Test public void testFixedWidthNulls() throws IOException, InterruptedException, ClassNotFoundException, TupleMRException, URISyntaxException { String line1 = "1000 - "; int fieldsPos[] = new int[] { 0, 3, 5, 7 }; CommonUtils.writeTXT(line1, new File(IN)); Configuration conf = getConf(); FileSystem fS = FileSystem.get(conf); Path outPath = new Path(OUT); Path inPath = new Path(IN); HadoopUtils.deleteIfExists(fS, outPath); Schema schema = new Schema("schema", Fields.parse("name:string,name2:string")); MapOnlyJobBuilder mO = new MapOnlyJobBuilder(conf); mO.addInput(inPath, new TupleTextInputFormat(schema, fieldsPos, false, "-"), new MapOnlyMapper<ITuple, NullWritable, NullWritable, NullWritable>() { protected void map(ITuple key, NullWritable value, Context context, MultipleOutputsCollector collector) throws IOException, InterruptedException { try { Assert.assertNull(key.get("name2")); Assert.assertEquals("1000", key.get("name")); } catch (Throwable t) { t.printStackTrace(); throw new RuntimeException(t); }/*from w ww . ja v a 2 s . c om*/ } }); mO.setOutput(outPath, new HadoopOutputFormat(NullOutputFormat.class), NullWritable.class, NullWritable.class); Job job = mO.createJob(); try { assertTrue(job.waitForCompletion(true)); } finally { mO.cleanUpInstanceFiles(); } HadoopUtils.deleteIfExists(fS, inPath); HadoopUtils.deleteIfExists(fS, outPath); }
From source file:com.datasalt.pangool.tuplemr.serialization.TestTupleFieldSerialization.java
License:Apache License
@SuppressWarnings("deprecation") @Test/*www . j av a 2 s . c o m*/ public void test() throws Exception { initHadoop(); trash(INPUT1, INPUT2, OUTPUT); // Prepare input BufferedWriter writer; // INPUT1 writer = new BufferedWriter(new FileWriter(INPUT1)); writer.write("foo1" + "\t" + "30" + "\n"); writer.write("foo2" + "\t" + "20" + "\n"); writer.write("foo3" + "\t" + "140" + "\n"); writer.write("foo4" + "\t" + "110" + "\n"); writer.write("foo5" + "\t" + "220" + "\n"); writer.write("foo6" + "\t" + "260" + "\n"); writer.close(); // INPUT2 writer = new BufferedWriter(new FileWriter(INPUT2)); writer.write("4.5" + "\t" + "true" + "\n"); writer.write("4.6" + "\t" + "false" + "\n"); writer.close(); TupleMRBuilder builder = new TupleMRBuilder(getConf()); final Schema tupleSchema1 = new Schema("tupleSchema1", Fields.parse("a:string, b:int")); final Schema tupleSchema2 = new Schema("tupleSchema2", Fields.parse("c:double, d:boolean")); List<Field> fields = new ArrayList<Field>(); fields.add(Field.create("partitionId", Type.INT)); fields.add(Fields.createTupleField("tuple1", tupleSchema1)); final Schema schema1 = new Schema("tupleInTuple1", fields); fields.clear(); fields.add(Field.create("partitionId", Type.INT)); fields.add(Fields.createTupleField("tuple2", tupleSchema2)); final Schema schema2 = new Schema("tupleInTuple2", fields); builder.addIntermediateSchema(schema1); builder.addIntermediateSchema(schema2); builder.addInput(new Path(INPUT1), new HadoopInputFormat(TextInputFormat.class), new TupleMapper<LongWritable, Text>() { ITuple tupleInTuple1 = new Tuple(schema1); ITuple tuple1 = new Tuple(tupleSchema1); @Override public void map(LongWritable key, Text value, TupleMRContext context, Collector collector) throws IOException, InterruptedException { String[] split = value.toString().split("\t"); tuple1.set("a", split[0]); tuple1.set("b", Integer.parseInt(split[1])); tupleInTuple1.set("partitionId", 0); tupleInTuple1.set("tuple1", tuple1); collector.write(tupleInTuple1); } }); builder.addInput(new Path(INPUT2), new HadoopInputFormat(TextInputFormat.class), new TupleMapper<LongWritable, Text>() { ITuple tupleInTuple2 = new Tuple(schema2); ITuple tuple2 = new Tuple(tupleSchema2); @Override public void map(LongWritable key, Text value, TupleMRContext context, Collector collector) throws IOException, InterruptedException { String[] split = value.toString().split("\t"); tuple2.set("c", Double.parseDouble(split[0])); tuple2.set("d", Boolean.parseBoolean(split[1])); tupleInTuple2.set("partitionId", 0); tupleInTuple2.set("tuple2", tuple2); collector.write(tupleInTuple2); } }); builder.setTupleReducer(new TupleReducer<Text, NullWritable>() { public void reduce(ITuple group, Iterable<ITuple> tuples, TupleMRContext context, Collector collector) throws IOException, InterruptedException, TupleMRException { Iterator<ITuple> iterator = tuples.iterator(); ITuple currentTuple; assertEquals(0, group.get("partitionId")); currentTuple = iterator.next(); assertEquals("foo1", ((ITuple) currentTuple.get("tuple1")).get("a").toString()); assertEquals(30, ((ITuple) currentTuple.get("tuple1")).get("b")); currentTuple = iterator.next(); assertEquals("foo2", ((ITuple) currentTuple.get("tuple1")).get("a").toString()); assertEquals(20, ((ITuple) currentTuple.get("tuple1")).get("b")); currentTuple = iterator.next(); assertEquals("foo3", ((ITuple) currentTuple.get("tuple1")).get("a").toString()); assertEquals(140, ((ITuple) currentTuple.get("tuple1")).get("b")); currentTuple = iterator.next(); assertEquals("foo4", ((ITuple) currentTuple.get("tuple1")).get("a").toString()); assertEquals(110, ((ITuple) currentTuple.get("tuple1")).get("b")); currentTuple = iterator.next(); assertEquals("foo5", ((ITuple) currentTuple.get("tuple1")).get("a").toString()); assertEquals(220, ((ITuple) currentTuple.get("tuple1")).get("b")); currentTuple = iterator.next(); assertEquals("foo6", ((ITuple) currentTuple.get("tuple1")).get("a").toString()); assertEquals(260, ((ITuple) currentTuple.get("tuple1")).get("b")); // Second data source BEGINS currentTuple = iterator.next(); assertEquals(4.5, ((ITuple) currentTuple.get("tuple2")).get("c")); assertEquals(true, ((ITuple) currentTuple.get("tuple2")).get("d")); currentTuple = iterator.next(); assertEquals(4.6, ((ITuple) currentTuple.get("tuple2")).get("c")); assertEquals(false, ((ITuple) currentTuple.get("tuple2")).get("d")); }; }); builder.setGroupByFields("partitionId"); builder.setOutput(new Path(OUTPUT), new HadoopOutputFormat(TextOutputFormat.class), Text.class, NullWritable.class); Job job = builder.createJob(); try { job.waitForCompletion(true); } finally { builder.cleanUpInstanceFiles(); } trash(INPUT1, INPUT2, OUTPUT); }
From source file:com.datasalt.pangool.utils.test.AbstractHadoopTestLibrary.java
License:Apache License
public void assertRun(Job job) throws IOException, InterruptedException, ClassNotFoundException { FileSystem fs = FileSystem.get(job.getConfiguration()); HadoopUtils.deleteIfExists(fs, FileOutputFormat.getOutputPath(job)); // Close input writers first for (Map.Entry<String, Object> entry : inputs.entrySet()) { Object in = entry.getValue(); if (in instanceof SequenceFile.Writer) { ((SequenceFile.Writer) in).close(); } else if (in instanceof TupleFile.Writer) { ((TupleFile.Writer) in).close(); }//from ww w . j a v a 2s . c o m } job.waitForCompletion(true); Assert.assertTrue(job.isSuccessful()); }
From source file:com.datasalt.utils.mapred.counter.TestMapRedCounter.java
License:Apache License
public void testWithMinimumCountOtherThan1(boolean withCombiner) throws IOException, InterruptedException, ClassNotFoundException, CloneNotSupportedException { Configuration conf = BaseConfigurationFactory.getInstance().getConf(); Job job; /*//from www. jav a 2 s . co m * Set minimum count */ conf.setInt(MapRedCounter.MINIMUM_COUNT_FOR_GROUP_CONF_PREFIX + "0", 2); conf.setInt(MapRedCounter.MINIMUM_COUNT_FOR_GROUP_CONF_PREFIX + "1", 2); conf.setInt(MapRedCounter.MINIMUM_COUNT_FOR_GROUP_CONF_PREFIX + "2", 2); if (withCombiner) { job = MapRedCounter.buildMapRedCounterJob("counter", SequenceFileOutputFormat.class, OUTPUT_COUNT, conf); } else { job = MapRedCounter.buildMapRedCounterJobWithoutCombiner("counter", SequenceFileOutputFormat.class, OUTPUT_COUNT, conf); } MapRedCounter.addInput(job, new Path(SINGLE_LINE_FILE), TextInputFormat.class, TestMapper.class); job.waitForCompletion(true); HashMap<String, Long> itemCount = itemCountAsMap(getFs(), OUTPUT_COUNT + "/" + MapRedCounter.Outputs.COUNTFILE + "/part-r-00000"); HashMap<String, LongPairWritable> itemGroupCount = itemGroupCountAsMap(getFs(), OUTPUT_COUNT + "/" + MapRedCounter.Outputs.COUNTDISTINCTFILE + "/part-r-00000"); assertCount(2, "2:c6d3:c", itemCount); assertCount(2, "2:c6d3:b", itemCount); assertCount(2, "2:c6d3:a", itemCount); assertCount(2, "1:c3d2:a", itemCount); assertCount(2, "1:c2d1:a", itemCount); assertGroupCount(6, 3, "2:c6d3", itemGroupCount); assertGroupCount(2, 1, "1:c3d2", itemGroupCount); assertGroupCount(2, 1, "1:c2d1", itemGroupCount); }
From source file:com.datasalt.utils.mapred.counter.TestMapRedCounter.java
License:Apache License
public void test(boolean withCombiner) throws IOException, InterruptedException, ClassNotFoundException, CloneNotSupportedException { Configuration conf = getConf(); Job job; if (withCombiner) { job = MapRedCounter.buildMapRedCounterJob("counter", SequenceFileOutputFormat.class, OUTPUT_COUNT, conf);//from w w w . ja v a2s . c om } else { job = MapRedCounter.buildMapRedCounterJobWithoutCombiner("counter", SequenceFileOutputFormat.class, OUTPUT_COUNT, conf); } MapRedCounter.addInput(job, new Path(SINGLE_LINE_FILE), TextInputFormat.class, TestMapper.class); job.waitForCompletion(true); HashMap<String, Long> itemCount = itemCountAsMap(getFs(), OUTPUT_COUNT + "/" + MapRedCounter.Outputs.COUNTFILE + "/part-r-00000"); HashMap<String, LongPairWritable> itemGroupCount = itemGroupCountAsMap(getFs(), OUTPUT_COUNT + "/" + MapRedCounter.Outputs.COUNTDISTINCTFILE + "/part-r-00000"); assertCount(1, "0:single:isingle", itemCount); assertCount(1, "1:c2d2:a", itemCount); assertCount(1, "1:c2d2:b", itemCount); assertCount(2, "1:c2d1:a", itemCount); assertCount(2, "1:c3d2:a", itemCount); assertCount(1, "1:c3d2:b", itemCount); assertCount(1, "2:c4d3:a", itemCount); assertCount(1, "2:c4d3:b", itemCount); assertCount(2, "2:c4d3:c", itemCount); assertGroupCount(1, 1, "0:single", itemGroupCount); assertGroupCount(2, 2, "1:c2d2", itemGroupCount); assertGroupCount(2, 1, "1:c2d1", itemGroupCount); assertGroupCount(3, 2, "1:c3d2", itemGroupCount); assertGroupCount(4, 3, "2:c4d3", itemGroupCount); assertGroupCount(6, 3, "2:c6d3", itemGroupCount); }
From source file:com.datasalt.utils.mapred.joiner.TestJoinOneToMany.java
License:Apache License
@Test public void test() throws IOException, InterruptedException, ClassNotFoundException { File input = new File(INPUT1); if (input.exists()) { while (!input.delete()) ;//from w ww . j a v a 2 s . co m } Files.write("line", input, Charset.defaultCharset()); input = new File(INPUT2); if (input.exists()) { while (!input.delete()) ; } Files.write("line", input, Charset.defaultCharset()); Configuration conf = getConf(); Job job = getMultiJoiner(conf); job.waitForCompletion(true); assertTrue(job.isSuccessful()); File out = new File(OUTPUT, "part-r-00000"); List<String> lines = Files.readLines(out, Charset.defaultCharset()); System.out.println(lines); assertEquals(6, lines.size()); assertTrue(lines.contains("1 foo")); assertTrue(lines.contains("1 bar")); assertTrue(lines.contains("2 oh la la")); assertTrue(lines.contains("2 blah blah")); assertTrue(lines.contains("-1 bluu")); assertTrue(lines.contains("3 snull")); assertTrue(firstWasSecondClass == 1); assertTrue(noSecondClass == true); cleanUp(); }
From source file:com.datasalt.utils.mapred.joiner.TestMultiJoiner.java
License:Apache License
@Test public void test() throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = getConf(); MultiJoiner multiJoiner = new MultiJoiner("MultiJoiner Test", conf); multiJoiner.setReducer(TestReducer.class); multiJoiner.setOutputKeyClass(Text.class); multiJoiner.setOutputValueClass(Text.class); multiJoiner.setOutputFormat(TextOutputFormat.class); multiJoiner.setOutputPath(new Path(OUTPUT_FOR_TEST)); Job job = multiJoiner .addChanneledInput(2, new Path("src/test/resources/multijoiner.test.a.txt"), A.class, TextInputFormat.class, AMapper.class) .addChanneledInput(4, new Path("src/test/resources/multijoiner.test.b.txt"), B.class, TextInputFormat.class, BMapper.class) .getJob();/* w w w . j a v a 2s . com*/ job.waitForCompletion(true); assertTrue(job.isSuccessful()); HadoopUtils.deleteIfExists(FileSystem.get(conf), new Path(OUTPUT_FOR_TEST)); }