Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.spark.streaming; import java.io.*; import java.lang.Iterable; import java.nio.charset.Charset; import java.util.*; import org.apache.commons.lang.mutable.MutableBoolean; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import scala.Tuple2; import org.junit.Assert; import static org.junit.Assert.*; import org.junit.Test; import com.google.common.base.Optional; import com.google.common.collect.Lists; import com.google.common.io.Files; import com.google.common.collect.Sets; import org.apache.spark.HashPartitioner; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.*; import org.apache.spark.storage.StorageLevel; import org.apache.spark.streaming.api.java.*; import org.apache.spark.util.Utils; import org.apache.spark.SparkConf; // The test suite itself is Serializable so that anonymous Function implementations can be // serialized, as an alternative to converting these anonymous classes to static inner classes; // see http://stackoverflow.com/questions/758570/. public class JavaAPISuite extends LocalJavaStreamingContext implements Serializable { public void equalIterator(Iterator<?> a, Iterator<?> b) { while (a.hasNext() && b.hasNext()) { Assert.assertEquals(a.next(), b.next()); } Assert.assertEquals(a.hasNext(), b.hasNext()); } public void equalIterable(Iterable<?> a, Iterable<?> b) { equalIterator(a.iterator(), b.iterator()); } @Test public void testInitialization() { Assert.assertNotNull(ssc.sparkContext()); } @SuppressWarnings("unchecked") @Test public void testCount() { List<List<Integer>> inputData = Arrays.asList(Arrays.asList(1, 2, 3, 4), Arrays.asList(3, 4, 5), Arrays.asList(3)); List<List<Long>> expected = Arrays.asList(Arrays.asList(4L), Arrays.asList(3L), Arrays.asList(1L)); JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaDStream<Long> count = stream.count(); JavaTestUtils.attachTestOutputStream(count); List<List<Long>> result = JavaTestUtils.runStreams(ssc, 3, 3); assertOrderInvariantEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testMap() { List<List<String>> inputData = Arrays.asList(Arrays.asList("hello", "world"), Arrays.asList("goodnight", "moon")); List<List<Integer>> expected = Arrays.asList(Arrays.asList(5, 5), Arrays.asList(9, 4)); JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaDStream<Integer> letterCount = stream.map(new Function<String, Integer>() { @Override public Integer call(String s) throws Exception { return s.length(); } }); JavaTestUtils.attachTestOutputStream(letterCount); List<List<Integer>> result = JavaTestUtils.runStreams(ssc, 2, 2); assertOrderInvariantEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testWindow() { List<List<Integer>> inputData = Arrays.asList(Arrays.asList(1, 2, 3), Arrays.asList(4, 5, 6), Arrays.asList(7, 8, 9)); List<List<Integer>> expected = Arrays.asList(Arrays.asList(1, 2, 3), Arrays.asList(4, 5, 6, 1, 2, 3), Arrays.asList(7, 8, 9, 4, 5, 6), Arrays.asList(7, 8, 9)); JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaDStream<Integer> windowed = stream.window(new Duration(2000)); JavaTestUtils.attachTestOutputStream(windowed); List<List<Integer>> result = JavaTestUtils.runStreams(ssc, 4, 4); assertOrderInvariantEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testWindowWithSlideDuration() { List<List<Integer>> inputData = Arrays.asList(Arrays.asList(1, 2, 3), Arrays.asList(4, 5, 6), Arrays.asList(7, 8, 9), Arrays.asList(10, 11, 12), Arrays.asList(13, 14, 15), Arrays.asList(16, 17, 18)); List<List<Integer>> expected = Arrays.asList(Arrays.asList(1, 2, 3, 4, 5, 6), Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), Arrays.asList(7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18), Arrays.asList(13, 14, 15, 16, 17, 18)); JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaDStream<Integer> windowed = stream.window(new Duration(4000), new Duration(2000)); JavaTestUtils.attachTestOutputStream(windowed); List<List<Integer>> result = JavaTestUtils.runStreams(ssc, 8, 4); assertOrderInvariantEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testFilter() { List<List<String>> inputData = Arrays.asList(Arrays.asList("giants", "dodgers"), Arrays.asList("yankees", "red socks")); List<List<String>> expected = Arrays.asList(Arrays.asList("giants"), Arrays.asList("yankees")); JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaDStream<String> filtered = stream.filter(new Function<String, Boolean>() { @Override public Boolean call(String s) throws Exception { return s.contains("a"); } }); JavaTestUtils.attachTestOutputStream(filtered); List<List<String>> result = JavaTestUtils.runStreams(ssc, 2, 2); assertOrderInvariantEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testRepartitionMorePartitions() { List<List<Integer>> inputData = Arrays.asList(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)); JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 2); JavaDStreamLike<Integer, JavaDStream<Integer>, JavaRDD<Integer>> repartitioned = stream.repartition(4); JavaTestUtils.attachTestOutputStream(repartitioned); List<List<List<Integer>>> result = JavaTestUtils.runStreamsWithPartitions(ssc, 2, 2); Assert.assertEquals(2, result.size()); for (List<List<Integer>> rdd : result) { Assert.assertEquals(4, rdd.size()); Assert.assertEquals(10, rdd.get(0).size() + rdd.get(1).size() + rdd.get(2).size() + rdd.get(3).size()); } } @SuppressWarnings("unchecked") @Test public void testRepartitionFewerPartitions() { List<List<Integer>> inputData = Arrays.asList(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)); JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 4); JavaDStreamLike<Integer, JavaDStream<Integer>, JavaRDD<Integer>> repartitioned = stream.repartition(2); JavaTestUtils.attachTestOutputStream(repartitioned); List<List<List<Integer>>> result = JavaTestUtils.runStreamsWithPartitions(ssc, 2, 2); Assert.assertEquals(2, result.size()); for (List<List<Integer>> rdd : result) { Assert.assertEquals(2, rdd.size()); Assert.assertEquals(10, rdd.get(0).size() + rdd.get(1).size()); } } @SuppressWarnings("unchecked") @Test public void testGlom() { List<List<String>> inputData = Arrays.asList(Arrays.asList("giants", "dodgers"), Arrays.asList("yankees", "red socks")); List<List<List<String>>> expected = Arrays.asList(Arrays.asList(Arrays.asList("giants", "dodgers")), Arrays.asList(Arrays.asList("yankees", "red socks"))); JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaDStream<List<String>> glommed = stream.glom(); JavaTestUtils.attachTestOutputStream(glommed); List<List<List<String>>> result = JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testMapPartitions() { List<List<String>> inputData = Arrays.asList(Arrays.asList("giants", "dodgers"), Arrays.asList("yankees", "red socks")); List<List<String>> expected = Arrays.asList(Arrays.asList("GIANTSDODGERS"), Arrays.asList("YANKEESRED SOCKS")); JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaDStream<String> mapped = stream.mapPartitions(new FlatMapFunction<Iterator<String>, String>() { @Override public Iterable<String> call(Iterator<String> in) { String out = ""; while (in.hasNext()) { out = out + in.next().toUpperCase(); } return Lists.newArrayList(out); } }); JavaTestUtils.attachTestOutputStream(mapped); List<List<String>> result = JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(expected, result); } private class IntegerSum implements Function2<Integer, Integer, Integer> { @Override public Integer call(Integer i1, Integer i2) throws Exception { return i1 + i2; } } private class IntegerDifference implements Function2<Integer, Integer, Integer> { @Override public Integer call(Integer i1, Integer i2) throws Exception { return i1 - i2; } } @SuppressWarnings("unchecked") @Test public void testReduce() { List<List<Integer>> inputData = Arrays.asList(Arrays.asList(1, 2, 3), Arrays.asList(4, 5, 6), Arrays.asList(7, 8, 9)); List<List<Integer>> expected = Arrays.asList(Arrays.asList(6), Arrays.asList(15), Arrays.asList(24)); JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaDStream<Integer> reduced = stream.reduce(new IntegerSum()); JavaTestUtils.attachTestOutputStream(reduced); List<List<Integer>> result = JavaTestUtils.runStreams(ssc, 3, 3); Assert.assertEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testReduceByWindowWithInverse() { testReduceByWindow(true); } @SuppressWarnings("unchecked") @Test public void testReduceByWindowWithoutInverse() { testReduceByWindow(false); } @SuppressWarnings("unchecked") private void testReduceByWindow(boolean withInverse) { List<List<Integer>> inputData = Arrays.asList(Arrays.asList(1, 2, 3), Arrays.asList(4, 5, 6), Arrays.asList(7, 8, 9)); List<List<Integer>> expected = Arrays.asList(Arrays.asList(6), Arrays.asList(21), Arrays.asList(39), Arrays.asList(24)); JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaDStream<Integer> reducedWindowed = null; if (withInverse) { reducedWindowed = stream.reduceByWindow(new IntegerSum(), new IntegerDifference(), new Duration(2000), new Duration(1000)); } else { reducedWindowed = stream.reduceByWindow(new IntegerSum(), new Duration(2000), new Duration(1000)); } JavaTestUtils.attachTestOutputStream(reducedWindowed); List<List<Integer>> result = JavaTestUtils.runStreams(ssc, 4, 4); Assert.assertEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testQueueStream() { List<List<Integer>> expected = Arrays.asList(Arrays.asList(1, 2, 3), Arrays.asList(4, 5, 6), Arrays.asList(7, 8, 9)); JavaSparkContext jsc = new JavaSparkContext(ssc.ssc().sc()); JavaRDD<Integer> rdd1 = ssc.sparkContext().parallelize(Arrays.asList(1, 2, 3)); JavaRDD<Integer> rdd2 = ssc.sparkContext().parallelize(Arrays.asList(4, 5, 6)); JavaRDD<Integer> rdd3 = ssc.sparkContext().parallelize(Arrays.asList(7, 8, 9)); LinkedList<JavaRDD<Integer>> rdds = Lists.newLinkedList(); rdds.add(rdd1); rdds.add(rdd2); rdds.add(rdd3); JavaDStream<Integer> stream = ssc.queueStream(rdds); JavaTestUtils.attachTestOutputStream(stream); List<List<Integer>> result = JavaTestUtils.runStreams(ssc, 3, 3); Assert.assertEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testTransform() { List<List<Integer>> inputData = Arrays.asList(Arrays.asList(1, 2, 3), Arrays.asList(4, 5, 6), Arrays.asList(7, 8, 9)); List<List<Integer>> expected = Arrays.asList(Arrays.asList(3, 4, 5), Arrays.asList(6, 7, 8), Arrays.asList(9, 10, 11)); JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaDStream<Integer> transformed = stream.transform(new Function<JavaRDD<Integer>, JavaRDD<Integer>>() { @Override public JavaRDD<Integer> call(JavaRDD<Integer> in) throws Exception { return in.map(new Function<Integer, Integer>() { @Override public Integer call(Integer i) throws Exception { return i + 2; } }); } }); JavaTestUtils.attachTestOutputStream(transformed); List<List<Integer>> result = JavaTestUtils.runStreams(ssc, 3, 3); assertOrderInvariantEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testVariousTransform() { // tests whether all variations of transform can be called from Java List<List<Integer>> inputData = Arrays.asList(Arrays.asList(1)); JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); List<List<Tuple2<String, Integer>>> pairInputData = Arrays .asList(Arrays.asList(new Tuple2<String, Integer>("x", 1))); JavaPairDStream<String, Integer> pairStream = JavaPairDStream .fromJavaDStream(JavaTestUtils.attachTestInputStream(ssc, pairInputData, 1)); JavaDStream<Integer> transformed1 = stream.transform(new Function<JavaRDD<Integer>, JavaRDD<Integer>>() { @Override public JavaRDD<Integer> call(JavaRDD<Integer> in) throws Exception { return null; } }); JavaDStream<Integer> transformed2 = stream .transform(new Function2<JavaRDD<Integer>, Time, JavaRDD<Integer>>() { @Override public JavaRDD<Integer> call(JavaRDD<Integer> in, Time time) throws Exception { return null; } }); JavaPairDStream<String, Integer> transformed3 = stream .transformToPair(new Function<JavaRDD<Integer>, JavaPairRDD<String, Integer>>() { @Override public JavaPairRDD<String, Integer> call(JavaRDD<Integer> in) throws Exception { return null; } }); JavaPairDStream<String, Integer> transformed4 = stream .transformToPair(new Function2<JavaRDD<Integer>, Time, JavaPairRDD<String, Integer>>() { @Override public JavaPairRDD<String, Integer> call(JavaRDD<Integer> in, Time time) throws Exception { return null; } }); JavaDStream<Integer> pairTransformed1 = pairStream .transform(new Function<JavaPairRDD<String, Integer>, JavaRDD<Integer>>() { @Override public JavaRDD<Integer> call(JavaPairRDD<String, Integer> in) throws Exception { return null; } }); JavaDStream<Integer> pairTransformed2 = pairStream .transform(new Function2<JavaPairRDD<String, Integer>, Time, JavaRDD<Integer>>() { @Override public JavaRDD<Integer> call(JavaPairRDD<String, Integer> in, Time time) throws Exception { return null; } }); JavaPairDStream<String, String> pairTransformed3 = pairStream .transformToPair(new Function<JavaPairRDD<String, Integer>, JavaPairRDD<String, String>>() { @Override public JavaPairRDD<String, String> call(JavaPairRDD<String, Integer> in) throws Exception { return null; } }); JavaPairDStream<String, String> pairTransformed4 = pairStream .transformToPair(new Function2<JavaPairRDD<String, Integer>, Time, JavaPairRDD<String, String>>() { @Override public JavaPairRDD<String, String> call(JavaPairRDD<String, Integer> in, Time time) throws Exception { return null; } }); } @SuppressWarnings("unchecked") @Test public void testTransformWith() { List<List<Tuple2<String, String>>> stringStringKVStream1 = Arrays.asList( Arrays.asList(new Tuple2<String, String>("california", "dodgers"), new Tuple2<String, String>("new york", "yankees")), Arrays.asList(new Tuple2<String, String>("california", "sharks"), new Tuple2<String, String>("new york", "rangers"))); List<List<Tuple2<String, String>>> stringStringKVStream2 = Arrays.asList( Arrays.asList(new Tuple2<String, String>("california", "giants"), new Tuple2<String, String>("new york", "mets")), Arrays.asList(new Tuple2<String, String>("california", "ducks"), new Tuple2<String, String>("new york", "islanders"))); List<HashSet<Tuple2<String, Tuple2<String, String>>>> expected = Arrays.asList( Sets.newHashSet( new Tuple2<String, Tuple2<String, String>>("california", new Tuple2<String, String>("dodgers", "giants")), new Tuple2<String, Tuple2<String, String>>("new york", new Tuple2<String, String>("yankees", "mets"))), Sets.newHashSet( new Tuple2<String, Tuple2<String, String>>("california", new Tuple2<String, String>("sharks", "ducks")), new Tuple2<String, Tuple2<String, String>>("new york", new Tuple2<String, String>("rangers", "islanders")))); JavaDStream<Tuple2<String, String>> stream1 = JavaTestUtils.attachTestInputStream(ssc, stringStringKVStream1, 1); JavaPairDStream<String, String> pairStream1 = JavaPairDStream.fromJavaDStream(stream1); JavaDStream<Tuple2<String, String>> stream2 = JavaTestUtils.attachTestInputStream(ssc, stringStringKVStream2, 1); JavaPairDStream<String, String> pairStream2 = JavaPairDStream.fromJavaDStream(stream2); JavaPairDStream<String, Tuple2<String, String>> joined = pairStream1.transformWithToPair(pairStream2, new Function3<JavaPairRDD<String, String>, JavaPairRDD<String, String>, Time, JavaPairRDD<String, Tuple2<String, String>>>() { @Override public JavaPairRDD<String, Tuple2<String, String>> call(JavaPairRDD<String, String> rdd1, JavaPairRDD<String, String> rdd2, Time time) throws Exception { return rdd1.join(rdd2); } }); JavaTestUtils.attachTestOutputStream(joined); List<List<Tuple2<String, Tuple2<String, String>>>> result = JavaTestUtils.runStreams(ssc, 2, 2); List<HashSet<Tuple2<String, Tuple2<String, String>>>> unorderedResult = Lists.newArrayList(); for (List<Tuple2<String, Tuple2<String, String>>> res : result) { unorderedResult.add(Sets.newHashSet(res)); } Assert.assertEquals(expected, unorderedResult); } @SuppressWarnings("unchecked") @Test public void testVariousTransformWith() { // tests whether all variations of transformWith can be called from Java List<List<Integer>> inputData1 = Arrays.asList(Arrays.asList(1)); List<List<String>> inputData2 = Arrays.asList(Arrays.asList("x")); JavaDStream<Integer> stream1 = JavaTestUtils.attachTestInputStream(ssc, inputData1, 1); JavaDStream<String> stream2 = JavaTestUtils.attachTestInputStream(ssc, inputData2, 1); List<List<Tuple2<String, Integer>>> pairInputData1 = Arrays .asList(Arrays.asList(new Tuple2<String, Integer>("x", 1))); List<List<Tuple2<Double, Character>>> pairInputData2 = Arrays .asList(Arrays.asList(new Tuple2<Double, Character>(1.0, 'x'))); JavaPairDStream<String, Integer> pairStream1 = JavaPairDStream .fromJavaDStream(JavaTestUtils.attachTestInputStream(ssc, pairInputData1, 1)); JavaPairDStream<Double, Character> pairStream2 = JavaPairDStream .fromJavaDStream(JavaTestUtils.attachTestInputStream(ssc, pairInputData2, 1)); JavaDStream<Double> transformed1 = stream1.transformWith(stream2, new Function3<JavaRDD<Integer>, JavaRDD<String>, Time, JavaRDD<Double>>() { @Override public JavaRDD<Double> call(JavaRDD<Integer> rdd1, JavaRDD<String> rdd2, Time time) throws Exception { return null; } }); JavaDStream<Double> transformed2 = stream1.transformWith(pairStream1, new Function3<JavaRDD<Integer>, JavaPairRDD<String, Integer>, Time, JavaRDD<Double>>() { @Override public JavaRDD<Double> call(JavaRDD<Integer> rdd1, JavaPairRDD<String, Integer> rdd2, Time time) throws Exception { return null; } }); JavaPairDStream<Double, Double> transformed3 = stream1.transformWithToPair(stream2, new Function3<JavaRDD<Integer>, JavaRDD<String>, Time, JavaPairRDD<Double, Double>>() { @Override public JavaPairRDD<Double, Double> call(JavaRDD<Integer> rdd1, JavaRDD<String> rdd2, Time time) throws Exception { return null; } }); JavaPairDStream<Double, Double> transformed4 = stream1.transformWithToPair(pairStream1, new Function3<JavaRDD<Integer>, JavaPairRDD<String, Integer>, Time, JavaPairRDD<Double, Double>>() { @Override public JavaPairRDD<Double, Double> call(JavaRDD<Integer> rdd1, JavaPairRDD<String, Integer> rdd2, Time time) throws Exception { return null; } }); JavaDStream<Double> pairTransformed1 = pairStream1.transformWith(stream2, new Function3<JavaPairRDD<String, Integer>, JavaRDD<String>, Time, JavaRDD<Double>>() { @Override public JavaRDD<Double> call(JavaPairRDD<String, Integer> rdd1, JavaRDD<String> rdd2, Time time) throws Exception { return null; } }); JavaDStream<Double> pairTransformed2_ = pairStream1.transformWith(pairStream1, new Function3<JavaPairRDD<String, Integer>, JavaPairRDD<String, Integer>, Time, JavaRDD<Double>>() { @Override public JavaRDD<Double> call(JavaPairRDD<String, Integer> rdd1, JavaPairRDD<String, Integer> rdd2, Time time) throws Exception { return null; } }); JavaPairDStream<Double, Double> pairTransformed3 = pairStream1.transformWithToPair(stream2, new Function3<JavaPairRDD<String, Integer>, JavaRDD<String>, Time, JavaPairRDD<Double, Double>>() { @Override public JavaPairRDD<Double, Double> call(JavaPairRDD<String, Integer> rdd1, JavaRDD<String> rdd2, Time time) throws Exception { return null; } }); JavaPairDStream<Double, Double> pairTransformed4 = pairStream1.transformWithToPair(pairStream2, new Function3<JavaPairRDD<String, Integer>, JavaPairRDD<Double, Character>, Time, JavaPairRDD<Double, Double>>() { @Override public JavaPairRDD<Double, Double> call(JavaPairRDD<String, Integer> rdd1, JavaPairRDD<Double, Character> rdd2, Time time) throws Exception { return null; } }); } @SuppressWarnings("unchecked") @Test public void testStreamingContextTransform() { List<List<Integer>> stream1input = Arrays.asList(Arrays.asList(1), Arrays.asList(2)); List<List<Integer>> stream2input = Arrays.asList(Arrays.asList(3), Arrays.asList(4)); List<List<Tuple2<Integer, String>>> pairStream1input = Arrays.asList( Arrays.asList(new Tuple2<Integer, String>(1, "x")), Arrays.asList(new Tuple2<Integer, String>(2, "y"))); List<List<Tuple2<Integer, Tuple2<Integer, String>>>> expected = Arrays.asList( Arrays.asList(new Tuple2<Integer, Tuple2<Integer, String>>(1, new Tuple2<Integer, String>(1, "x"))), Arrays.asList( new Tuple2<Integer, Tuple2<Integer, String>>(2, new Tuple2<Integer, String>(2, "y")))); JavaDStream<Integer> stream1 = JavaTestUtils.attachTestInputStream(ssc, stream1input, 1); JavaDStream<Integer> stream2 = JavaTestUtils.attachTestInputStream(ssc, stream2input, 1); JavaPairDStream<Integer, String> pairStream1 = JavaPairDStream .fromJavaDStream(JavaTestUtils.attachTestInputStream(ssc, pairStream1input, 1)); List<JavaDStream<?>> listOfDStreams1 = Arrays.<JavaDStream<?>>asList(stream1, stream2); // This is just to test whether this transform to JavaStream compiles JavaDStream<Long> transformed1 = ssc.transform(listOfDStreams1, new Function2<List<JavaRDD<?>>, Time, JavaRDD<Long>>() { @Override public JavaRDD<Long> call(List<JavaRDD<?>> listOfRDDs, Time time) { Assert.assertEquals(2, listOfRDDs.size()); return null; } }); List<JavaDStream<?>> listOfDStreams2 = Arrays.<JavaDStream<?>>asList(stream1, stream2, pairStream1.toJavaDStream()); JavaPairDStream<Integer, Tuple2<Integer, String>> transformed2 = ssc.transformToPair(listOfDStreams2, new Function2<List<JavaRDD<?>>, Time, JavaPairRDD<Integer, Tuple2<Integer, String>>>() { @Override public JavaPairRDD<Integer, Tuple2<Integer, String>> call(List<JavaRDD<?>> listOfRDDs, Time time) { Assert.assertEquals(3, listOfRDDs.size()); JavaRDD<Integer> rdd1 = (JavaRDD<Integer>) listOfRDDs.get(0); JavaRDD<Integer> rdd2 = (JavaRDD<Integer>) listOfRDDs.get(1); JavaRDD<Tuple2<Integer, String>> rdd3 = (JavaRDD<Tuple2<Integer, String>>) listOfRDDs .get(2); JavaPairRDD<Integer, String> prdd3 = JavaPairRDD.fromJavaRDD(rdd3); PairFunction<Integer, Integer, Integer> mapToTuple = new PairFunction<Integer, Integer, Integer>() { @Override public Tuple2<Integer, Integer> call(Integer i) throws Exception { return new Tuple2<Integer, Integer>(i, i); } }; return rdd1.union(rdd2).mapToPair(mapToTuple).join(prdd3); } }); JavaTestUtils.attachTestOutputStream(transformed2); List<List<Tuple2<Integer, Tuple2<Integer, String>>>> result = JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testFlatMap() { List<List<String>> inputData = Arrays.asList(Arrays.asList("go", "giants"), Arrays.asList("boo", "dodgers"), Arrays.asList("athletics")); List<List<String>> expected = Arrays.asList(Arrays.asList("g", "o", "g", "i", "a", "n", "t", "s"), Arrays.asList("b", "o", "o", "d", "o", "d", "g", "e", "r", "s"), Arrays.asList("a", "t", "h", "l", "e", "t", "i", "c", "s")); JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaDStream<String> flatMapped = stream.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Lists.newArrayList(x.split("(?!^)")); } }); JavaTestUtils.attachTestOutputStream(flatMapped); List<List<String>> result = JavaTestUtils.runStreams(ssc, 3, 3); assertOrderInvariantEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testPairFlatMap() { List<List<String>> inputData = Arrays.asList(Arrays.asList("giants"), Arrays.asList("dodgers"), Arrays.asList("athletics")); List<List<Tuple2<Integer, String>>> expected = Arrays.asList( Arrays.asList(new Tuple2<Integer, String>(6, "g"), new Tuple2<Integer, String>(6, "i"), new Tuple2<Integer, String>(6, "a"), new Tuple2<Integer, String>(6, "n"), new Tuple2<Integer, String>(6, "t"), new Tuple2<Integer, String>(6, "s")), Arrays.asList(new Tuple2<Integer, String>(7, "d"), new Tuple2<Integer, String>(7, "o"), new Tuple2<Integer, String>(7, "d"), new Tuple2<Integer, String>(7, "g"), new Tuple2<Integer, String>(7, "e"), new Tuple2<Integer, String>(7, "r"), new Tuple2<Integer, String>(7, "s")), Arrays.asList(new Tuple2<Integer, String>(9, "a"), new Tuple2<Integer, String>(9, "t"), new Tuple2<Integer, String>(9, "h"), new Tuple2<Integer, String>(9, "l"), new Tuple2<Integer, String>(9, "e"), new Tuple2<Integer, String>(9, "t"), new Tuple2<Integer, String>(9, "i"), new Tuple2<Integer, String>(9, "c"), new Tuple2<Integer, String>(9, "s"))); JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream<Integer, String> flatMapped = stream .flatMapToPair(new PairFlatMapFunction<String, Integer, String>() { @Override public Iterable<Tuple2<Integer, String>> call(String in) throws Exception { List<Tuple2<Integer, String>> out = Lists.newArrayList(); for (String letter : in.split("(?!^)")) { out.add(new Tuple2<Integer, String>(in.length(), letter)); } return out; } }); JavaTestUtils.attachTestOutputStream(flatMapped); List<List<Tuple2<Integer, String>>> result = JavaTestUtils.runStreams(ssc, 3, 3); Assert.assertEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testUnion() { List<List<Integer>> inputData1 = Arrays.asList(Arrays.asList(1, 1), Arrays.asList(2, 2), Arrays.asList(3, 3)); List<List<Integer>> inputData2 = Arrays.asList(Arrays.asList(4, 4), Arrays.asList(5, 5), Arrays.asList(6, 6)); List<List<Integer>> expected = Arrays.asList(Arrays.asList(1, 1, 4, 4), Arrays.asList(2, 2, 5, 5), Arrays.asList(3, 3, 6, 6)); JavaDStream<Integer> stream1 = JavaTestUtils.attachTestInputStream(ssc, inputData1, 2); JavaDStream<Integer> stream2 = JavaTestUtils.attachTestInputStream(ssc, inputData2, 2); JavaDStream<Integer> unioned = stream1.union(stream2); JavaTestUtils.attachTestOutputStream(unioned); List<List<Integer>> result = JavaTestUtils.runStreams(ssc, 3, 3); assertOrderInvariantEquals(expected, result); } /* * Performs an order-invariant comparison of lists representing two RDD streams. This allows * us to account for ordering variation within individual RDD's which occurs during windowing. */ public static <T> void assertOrderInvariantEquals(List<List<T>> expected, List<List<T>> actual) { List<Set<T>> expectedSets = new ArrayList<Set<T>>(); for (List<T> list : expected) { expectedSets.add(Collections.unmodifiableSet(new HashSet<T>(list))); } List<Set<T>> actualSets = new ArrayList<Set<T>>(); for (List<T> list : actual) { actualSets.add(Collections.unmodifiableSet(new HashSet<T>(list))); } Assert.assertEquals(expectedSets, actualSets); } // PairDStream Functions @SuppressWarnings("unchecked") @Test public void testPairFilter() { List<List<String>> inputData = Arrays.asList(Arrays.asList("giants", "dodgers"), Arrays.asList("yankees", "red socks")); List<List<Tuple2<String, Integer>>> expected = Arrays.asList( Arrays.asList(new Tuple2<String, Integer>("giants", 6)), Arrays.asList(new Tuple2<String, Integer>("yankees", 7))); JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream<String, Integer> pairStream = stream.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String in) throws Exception { return new Tuple2<String, Integer>(in, in.length()); } }); JavaPairDStream<String, Integer> filtered = pairStream .filter(new Function<Tuple2<String, Integer>, Boolean>() { @Override public Boolean call(Tuple2<String, Integer> in) throws Exception { return in._1().contains("a"); } }); JavaTestUtils.attachTestOutputStream(filtered); List<List<Tuple2<String, Integer>>> result = JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(expected, result); } @SuppressWarnings("unchecked") private List<List<Tuple2<String, String>>> stringStringKVStream = Arrays.asList(Arrays.asList( new Tuple2<String, String>("california", "dodgers"), new Tuple2<String, String>("california", "giants"), new Tuple2<String, String>("new york", "yankees"), new Tuple2<String, String>("new york", "mets")), Arrays.asList(new Tuple2<String, String>("california", "sharks"), new Tuple2<String, String>("california", "ducks"), new Tuple2<String, String>("new york", "rangers"), new Tuple2<String, String>("new york", "islanders"))); @SuppressWarnings("unchecked") private List<List<Tuple2<String, Integer>>> stringIntKVStream = Arrays.asList( Arrays.asList(new Tuple2<String, Integer>("california", 1), new Tuple2<String, Integer>("california", 3), new Tuple2<String, Integer>("new york", 4), new Tuple2<String, Integer>("new york", 1)), Arrays.asList(new Tuple2<String, Integer>("california", 5), new Tuple2<String, Integer>("california", 5), new Tuple2<String, Integer>("new york", 3), new Tuple2<String, Integer>("new york", 1))); @SuppressWarnings("unchecked") @Test public void testPairMap() { // Maps pair -> pair of different type List<List<Tuple2<String, Integer>>> inputData = stringIntKVStream; List<List<Tuple2<Integer, String>>> expected = Arrays.asList( Arrays.asList(new Tuple2<Integer, String>(1, "california"), new Tuple2<Integer, String>(3, "california"), new Tuple2<Integer, String>(4, "new york"), new Tuple2<Integer, String>(1, "new york")), Arrays.asList(new Tuple2<Integer, String>(5, "california"), new Tuple2<Integer, String>(5, "california"), new Tuple2<Integer, String>(3, "new york"), new Tuple2<Integer, String>(1, "new york"))); JavaDStream<Tuple2<String, Integer>> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream<String, Integer> pairStream = JavaPairDStream.fromJavaDStream(stream); JavaPairDStream<Integer, String> reversed = pairStream .mapToPair(new PairFunction<Tuple2<String, Integer>, Integer, String>() { @Override public Tuple2<Integer, String> call(Tuple2<String, Integer> in) throws Exception { return in.swap(); } }); JavaTestUtils.attachTestOutputStream(reversed); List<List<Tuple2<Integer, String>>> result = JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testPairMapPartitions() { // Maps pair -> pair of different type List<List<Tuple2<String, Integer>>> inputData = stringIntKVStream; List<List<Tuple2<Integer, String>>> expected = Arrays.asList( Arrays.asList(new Tuple2<Integer, String>(1, "california"), new Tuple2<Integer, String>(3, "california"), new Tuple2<Integer, String>(4, "new york"), new Tuple2<Integer, String>(1, "new york")), Arrays.asList(new Tuple2<Integer, String>(5, "california"), new Tuple2<Integer, String>(5, "california"), new Tuple2<Integer, String>(3, "new york"), new Tuple2<Integer, String>(1, "new york"))); JavaDStream<Tuple2<String, Integer>> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream<String, Integer> pairStream = JavaPairDStream.fromJavaDStream(stream); JavaPairDStream<Integer, String> reversed = pairStream .mapPartitionsToPair(new PairFlatMapFunction<Iterator<Tuple2<String, Integer>>, Integer, String>() { @Override public Iterable<Tuple2<Integer, String>> call(Iterator<Tuple2<String, Integer>> in) throws Exception { LinkedList<Tuple2<Integer, String>> out = new LinkedList<Tuple2<Integer, String>>(); while (in.hasNext()) { Tuple2<String, Integer> next = in.next(); out.add(next.swap()); } return out; } }); JavaTestUtils.attachTestOutputStream(reversed); List<List<Tuple2<Integer, String>>> result = JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testPairMap2() { // Maps pair -> single List<List<Tuple2<String, Integer>>> inputData = stringIntKVStream; List<List<Integer>> expected = Arrays.asList(Arrays.asList(1, 3, 4, 1), Arrays.asList(5, 5, 3, 1)); JavaDStream<Tuple2<String, Integer>> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream<String, Integer> pairStream = JavaPairDStream.fromJavaDStream(stream); JavaDStream<Integer> reversed = pairStream.map(new Function<Tuple2<String, Integer>, Integer>() { @Override public Integer call(Tuple2<String, Integer> in) throws Exception { return in._2(); } }); JavaTestUtils.attachTestOutputStream(reversed); List<List<Integer>> result = JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testPairToPairFlatMapWithChangingTypes() { // Maps pair -> pair List<List<Tuple2<String, Integer>>> inputData = Arrays.asList( Arrays.asList(new Tuple2<String, Integer>("hi", 1), new Tuple2<String, Integer>("ho", 2)), Arrays.asList(new Tuple2<String, Integer>("hi", 1), new Tuple2<String, Integer>("ho", 2))); List<List<Tuple2<Integer, String>>> expected = Arrays.asList( Arrays.asList(new Tuple2<Integer, String>(1, "h"), new Tuple2<Integer, String>(1, "i"), new Tuple2<Integer, String>(2, "h"), new Tuple2<Integer, String>(2, "o")), Arrays.asList(new Tuple2<Integer, String>(1, "h"), new Tuple2<Integer, String>(1, "i"), new Tuple2<Integer, String>(2, "h"), new Tuple2<Integer, String>(2, "o"))); JavaDStream<Tuple2<String, Integer>> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream<String, Integer> pairStream = JavaPairDStream.fromJavaDStream(stream); JavaPairDStream<Integer, String> flatMapped = pairStream .flatMapToPair(new PairFlatMapFunction<Tuple2<String, Integer>, Integer, String>() { @Override public Iterable<Tuple2<Integer, String>> call(Tuple2<String, Integer> in) throws Exception { List<Tuple2<Integer, String>> out = new LinkedList<Tuple2<Integer, String>>(); for (Character s : in._1().toCharArray()) { out.add(new Tuple2<Integer, String>(in._2(), s.toString())); } return out; } }); JavaTestUtils.attachTestOutputStream(flatMapped); List<List<Tuple2<Integer, String>>> result = JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testPairGroupByKey() { List<List<Tuple2<String, String>>> inputData = stringStringKVStream; List<List<Tuple2<String, List<String>>>> expected = Arrays.asList( Arrays.asList(new Tuple2<String, List<String>>("california", Arrays.asList("dodgers", "giants")), new Tuple2<String, List<String>>("new york", Arrays.asList("yankees", "mets"))), Arrays.asList(new Tuple2<String, List<String>>("california", Arrays.asList("sharks", "ducks")), new Tuple2<String, List<String>>("new york", Arrays.asList("rangers", "islanders")))); JavaDStream<Tuple2<String, String>> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream<String, String> pairStream = JavaPairDStream.fromJavaDStream(stream); JavaPairDStream<String, Iterable<String>> grouped = pairStream.groupByKey(); JavaTestUtils.attachTestOutputStream(grouped); List<List<Tuple2<String, Iterable<String>>>> result = JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(expected.size(), result.size()); Iterator<List<Tuple2<String, Iterable<String>>>> resultItr = result.iterator(); Iterator<List<Tuple2<String, List<String>>>> expectedItr = expected.iterator(); while (resultItr.hasNext() && expectedItr.hasNext()) { Iterator<Tuple2<String, Iterable<String>>> resultElements = resultItr.next().iterator(); Iterator<Tuple2<String, List<String>>> expectedElements = expectedItr.next().iterator(); while (resultElements.hasNext() && expectedElements.hasNext()) { Tuple2<String, Iterable<String>> resultElement = resultElements.next(); Tuple2<String, List<String>> expectedElement = expectedElements.next(); Assert.assertEquals(expectedElement._1(), resultElement._1()); equalIterable(expectedElement._2(), resultElement._2()); } Assert.assertEquals(resultElements.hasNext(), expectedElements.hasNext()); } } @SuppressWarnings("unchecked") @Test public void testPairReduceByKey() { List<List<Tuple2<String, Integer>>> inputData = stringIntKVStream; List<List<Tuple2<String, Integer>>> expected = Arrays.asList( Arrays.asList(new Tuple2<String, Integer>("california", 4), new Tuple2<String, Integer>("new york", 5)), Arrays.asList(new Tuple2<String, Integer>("california", 10), new Tuple2<String, Integer>("new york", 4))); JavaDStream<Tuple2<String, Integer>> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream<String, Integer> pairStream = JavaPairDStream.fromJavaDStream(stream); JavaPairDStream<String, Integer> reduced = pairStream.reduceByKey(new IntegerSum()); JavaTestUtils.attachTestOutputStream(reduced); List<List<Tuple2<String, Integer>>> result = JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testCombineByKey() { List<List<Tuple2<String, Integer>>> inputData = stringIntKVStream; List<List<Tuple2<String, Integer>>> expected = Arrays.asList( Arrays.asList(new Tuple2<String, Integer>("california", 4), new Tuple2<String, Integer>("new york", 5)), Arrays.asList(new Tuple2<String, Integer>("california", 10), new Tuple2<String, Integer>("new york", 4))); JavaDStream<Tuple2<String, Integer>> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream<String, Integer> pairStream = JavaPairDStream.fromJavaDStream(stream); JavaPairDStream<String, Integer> combined = pairStream .<Integer>combineByKey(new Function<Integer, Integer>() { @Override public Integer call(Integer i) throws Exception { return i; } }, new IntegerSum(), new IntegerSum(), new HashPartitioner(2)); JavaTestUtils.attachTestOutputStream(combined); List<List<Tuple2<String, Integer>>> result = JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testCountByValue() { List<List<String>> inputData = Arrays.asList(Arrays.asList("hello", "world"), Arrays.asList("hello", "moon"), Arrays.asList("hello")); List<List<Tuple2<String, Long>>> expected = Arrays.asList( Arrays.asList(new Tuple2<String, Long>("hello", 1L), new Tuple2<String, Long>("world", 1L)), Arrays.asList(new Tuple2<String, Long>("hello", 1L), new Tuple2<String, Long>("moon", 1L)), Arrays.asList(new Tuple2<String, Long>("hello", 1L))); JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream<String, Long> counted = stream.countByValue(); JavaTestUtils.attachTestOutputStream(counted); List<List<Tuple2<String, Long>>> result = JavaTestUtils.runStreams(ssc, 3, 3); Assert.assertEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testGroupByKeyAndWindow() { List<List<Tuple2<String, Integer>>> inputData = stringIntKVStream; List<List<Tuple2<String, List<Integer>>>> expected = Arrays.asList( Arrays.asList(new Tuple2<String, List<Integer>>("california", Arrays.asList(1, 3)), new Tuple2<String, List<Integer>>("new york", Arrays.asList(1, 4))), Arrays.asList(new Tuple2<String, List<Integer>>("california", Arrays.asList(1, 3, 5, 5)), new Tuple2<String, List<Integer>>("new york", Arrays.asList(1, 1, 3, 4))), Arrays.asList(new Tuple2<String, List<Integer>>("california", Arrays.asList(5, 5)), new Tuple2<String, List<Integer>>("new york", Arrays.asList(1, 3)))); JavaDStream<Tuple2<String, Integer>> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream<String, Integer> pairStream = JavaPairDStream.fromJavaDStream(stream); JavaPairDStream<String, Iterable<Integer>> groupWindowed = pairStream .groupByKeyAndWindow(new Duration(2000), new Duration(1000)); JavaTestUtils.attachTestOutputStream(groupWindowed); List<List<Tuple2<String, List<Integer>>>> result = JavaTestUtils.runStreams(ssc, 3, 3); Assert.assertEquals(expected.size(), result.size()); for (int i = 0; i < result.size(); i++) { Assert.assertEquals(convert(expected.get(i)), convert(result.get(i))); } } private HashSet<Tuple2<String, HashSet<Integer>>> convert(List<Tuple2<String, List<Integer>>> listOfTuples) { List<Tuple2<String, HashSet<Integer>>> newListOfTuples = new ArrayList<Tuple2<String, HashSet<Integer>>>(); for (Tuple2<String, List<Integer>> tuple : listOfTuples) { newListOfTuples.add(convert(tuple)); } return new HashSet<Tuple2<String, HashSet<Integer>>>(newListOfTuples); } private Tuple2<String, HashSet<Integer>> convert(Tuple2<String, List<Integer>> tuple) { return new Tuple2<String, HashSet<Integer>>(tuple._1(), new HashSet<Integer>(tuple._2())); } @SuppressWarnings("unchecked") @Test public void testReduceByKeyAndWindow() { List<List<Tuple2<String, Integer>>> inputData = stringIntKVStream; List<List<Tuple2<String, Integer>>> expected = Arrays.asList( Arrays.asList(new Tuple2<String, Integer>("california", 4), new Tuple2<String, Integer>("new york", 5)), Arrays.asList(new Tuple2<String, Integer>("california", 14), new Tuple2<String, Integer>("new york", 9)), Arrays.asList(new Tuple2<String, Integer>("california", 10), new Tuple2<String, Integer>("new york", 4))); JavaDStream<Tuple2<String, Integer>> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream<String, Integer> pairStream = JavaPairDStream.fromJavaDStream(stream); JavaPairDStream<String, Integer> reduceWindowed = pairStream.reduceByKeyAndWindow(new IntegerSum(), new Duration(2000), new Duration(1000)); JavaTestUtils.attachTestOutputStream(reduceWindowed); List<List<Tuple2<String, Integer>>> result = JavaTestUtils.runStreams(ssc, 3, 3); Assert.assertEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testUpdateStateByKey() { List<List<Tuple2<String, Integer>>> inputData = stringIntKVStream; List<List<Tuple2<String, Integer>>> expected = Arrays.asList( Arrays.asList(new Tuple2<String, Integer>("california", 4), new Tuple2<String, Integer>("new york", 5)), Arrays.asList(new Tuple2<String, Integer>("california", 14), new Tuple2<String, Integer>("new york", 9)), Arrays.asList(new Tuple2<String, Integer>("california", 14), new Tuple2<String, Integer>("new york", 9))); JavaDStream<Tuple2<String, Integer>> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream<String, Integer> pairStream = JavaPairDStream.fromJavaDStream(stream); JavaPairDStream<String, Integer> updated = pairStream .updateStateByKey(new Function2<List<Integer>, Optional<Integer>, Optional<Integer>>() { @Override public Optional<Integer> call(List<Integer> values, Optional<Integer> state) { int out = 0; if (state.isPresent()) { out = out + state.get(); } for (Integer v : values) { out = out + v; } return Optional.of(out); } }); JavaTestUtils.attachTestOutputStream(updated); List<List<Tuple2<String, Integer>>> result = JavaTestUtils.runStreams(ssc, 3, 3); Assert.assertEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testUpdateStateByKeyWithInitial() { List<List<Tuple2<String, Integer>>> inputData = stringIntKVStream; List<Tuple2<String, Integer>> initial = Arrays.asList(new Tuple2<String, Integer>("california", 1), new Tuple2<String, Integer>("new york", 2)); JavaRDD<Tuple2<String, Integer>> tmpRDD = ssc.sparkContext().parallelize(initial); JavaPairRDD<String, Integer> initialRDD = JavaPairRDD.fromJavaRDD(tmpRDD); List<List<Tuple2<String, Integer>>> expected = Arrays.asList( Arrays.asList(new Tuple2<String, Integer>("california", 5), new Tuple2<String, Integer>("new york", 7)), Arrays.asList(new Tuple2<String, Integer>("california", 15), new Tuple2<String, Integer>("new york", 11)), Arrays.asList(new Tuple2<String, Integer>("california", 15), new Tuple2<String, Integer>("new york", 11))); JavaDStream<Tuple2<String, Integer>> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream<String, Integer> pairStream = JavaPairDStream.fromJavaDStream(stream); JavaPairDStream<String, Integer> updated = pairStream .updateStateByKey(new Function2<List<Integer>, Optional<Integer>, Optional<Integer>>() { @Override public Optional<Integer> call(List<Integer> values, Optional<Integer> state) { int out = 0; if (state.isPresent()) { out = out + state.get(); } for (Integer v : values) { out = out + v; } return Optional.of(out); } }, new HashPartitioner(1), initialRDD); JavaTestUtils.attachTestOutputStream(updated); List<List<Tuple2<String, Integer>>> result = JavaTestUtils.runStreams(ssc, 3, 3); assertOrderInvariantEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testReduceByKeyAndWindowWithInverse() { List<List<Tuple2<String, Integer>>> inputData = stringIntKVStream; List<List<Tuple2<String, Integer>>> expected = Arrays.asList( Arrays.asList(new Tuple2<String, Integer>("california", 4), new Tuple2<String, Integer>("new york", 5)), Arrays.asList(new Tuple2<String, Integer>("california", 14), new Tuple2<String, Integer>("new york", 9)), Arrays.asList(new Tuple2<String, Integer>("california", 10), new Tuple2<String, Integer>("new york", 4))); JavaDStream<Tuple2<String, Integer>> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream<String, Integer> pairStream = JavaPairDStream.fromJavaDStream(stream); JavaPairDStream<String, Integer> reduceWindowed = pairStream.reduceByKeyAndWindow(new IntegerSum(), new IntegerDifference(), new Duration(2000), new Duration(1000)); JavaTestUtils.attachTestOutputStream(reduceWindowed); List<List<Tuple2<String, Integer>>> result = JavaTestUtils.runStreams(ssc, 3, 3); Assert.assertEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testCountByValueAndWindow() { List<List<String>> inputData = Arrays.asList(Arrays.asList("hello", "world"), Arrays.asList("hello", "moon"), Arrays.asList("hello")); List<HashSet<Tuple2<String, Long>>> expected = Arrays.asList( Sets.newHashSet(new Tuple2<String, Long>("hello", 1L), new Tuple2<String, Long>("world", 1L)), Sets.newHashSet(new Tuple2<String, Long>("hello", 2L), new Tuple2<String, Long>("world", 1L), new Tuple2<String, Long>("moon", 1L)), Sets.newHashSet(new Tuple2<String, Long>("hello", 2L), new Tuple2<String, Long>("moon", 1L))); JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream<String, Long> counted = stream.countByValueAndWindow(new Duration(2000), new Duration(1000)); JavaTestUtils.attachTestOutputStream(counted); List<List<Tuple2<String, Long>>> result = JavaTestUtils.runStreams(ssc, 3, 3); List<HashSet<Tuple2<String, Long>>> unorderedResult = Lists.newArrayList(); for (List<Tuple2<String, Long>> res : result) { unorderedResult.add(Sets.newHashSet(res)); } Assert.assertEquals(expected, unorderedResult); } @SuppressWarnings("unchecked") @Test public void testPairTransform() { List<List<Tuple2<Integer, Integer>>> inputData = Arrays.asList( Arrays.asList(new Tuple2<Integer, Integer>(3, 5), new Tuple2<Integer, Integer>(1, 5), new Tuple2<Integer, Integer>(4, 5), new Tuple2<Integer, Integer>(2, 5)), Arrays.asList(new Tuple2<Integer, Integer>(2, 5), new Tuple2<Integer, Integer>(3, 5), new Tuple2<Integer, Integer>(4, 5), new Tuple2<Integer, Integer>(1, 5))); List<List<Tuple2<Integer, Integer>>> expected = Arrays.asList( Arrays.asList(new Tuple2<Integer, Integer>(1, 5), new Tuple2<Integer, Integer>(2, 5), new Tuple2<Integer, Integer>(3, 5), new Tuple2<Integer, Integer>(4, 5)), Arrays.asList(new Tuple2<Integer, Integer>(1, 5), new Tuple2<Integer, Integer>(2, 5), new Tuple2<Integer, Integer>(3, 5), new Tuple2<Integer, Integer>(4, 5))); JavaDStream<Tuple2<Integer, Integer>> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream<Integer, Integer> pairStream = JavaPairDStream.fromJavaDStream(stream); JavaPairDStream<Integer, Integer> sorted = pairStream .transformToPair(new Function<JavaPairRDD<Integer, Integer>, JavaPairRDD<Integer, Integer>>() { @Override public JavaPairRDD<Integer, Integer> call(JavaPairRDD<Integer, Integer> in) throws Exception { return in.sortByKey(); } }); JavaTestUtils.attachTestOutputStream(sorted); List<List<Tuple2<Integer, Integer>>> result = JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testPairToNormalRDDTransform() { List<List<Tuple2<Integer, Integer>>> inputData = Arrays.asList( Arrays.asList(new Tuple2<Integer, Integer>(3, 5), new Tuple2<Integer, Integer>(1, 5), new Tuple2<Integer, Integer>(4, 5), new Tuple2<Integer, Integer>(2, 5)), Arrays.asList(new Tuple2<Integer, Integer>(2, 5), new Tuple2<Integer, Integer>(3, 5), new Tuple2<Integer, Integer>(4, 5), new Tuple2<Integer, Integer>(1, 5))); List<List<Integer>> expected = Arrays.asList(Arrays.asList(3, 1, 4, 2), Arrays.asList(2, 3, 4, 1)); JavaDStream<Tuple2<Integer, Integer>> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream<Integer, Integer> pairStream = JavaPairDStream.fromJavaDStream(stream); JavaDStream<Integer> firstParts = pairStream .transform(new Function<JavaPairRDD<Integer, Integer>, JavaRDD<Integer>>() { @Override public JavaRDD<Integer> call(JavaPairRDD<Integer, Integer> in) throws Exception { return in.map(new Function<Tuple2<Integer, Integer>, Integer>() { @Override public Integer call(Tuple2<Integer, Integer> in) { return in._1(); } }); } }); JavaTestUtils.attachTestOutputStream(firstParts); List<List<Integer>> result = JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testMapValues() { List<List<Tuple2<String, String>>> inputData = stringStringKVStream; List<List<Tuple2<String, String>>> expected = Arrays.asList( Arrays.asList(new Tuple2<String, String>("california", "DODGERS"), new Tuple2<String, String>("california", "GIANTS"), new Tuple2<String, String>("new york", "YANKEES"), new Tuple2<String, String>("new york", "METS")), Arrays.asList(new Tuple2<String, String>("california", "SHARKS"), new Tuple2<String, String>("california", "DUCKS"), new Tuple2<String, String>("new york", "RANGERS"), new Tuple2<String, String>("new york", "ISLANDERS"))); JavaDStream<Tuple2<String, String>> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream<String, String> pairStream = JavaPairDStream.fromJavaDStream(stream); JavaPairDStream<String, String> mapped = pairStream.mapValues(new Function<String, String>() { @Override public String call(String s) throws Exception { return s.toUpperCase(); } }); JavaTestUtils.attachTestOutputStream(mapped); List<List<Tuple2<String, String>>> result = JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testFlatMapValues() { List<List<Tuple2<String, String>>> inputData = stringStringKVStream; List<List<Tuple2<String, String>>> expected = Arrays.asList( Arrays.asList(new Tuple2<String, String>("california", "dodgers1"), new Tuple2<String, String>("california", "dodgers2"), new Tuple2<String, String>("california", "giants1"), new Tuple2<String, String>("california", "giants2"), new Tuple2<String, String>("new york", "yankees1"), new Tuple2<String, String>("new york", "yankees2"), new Tuple2<String, String>("new york", "mets1"), new Tuple2<String, String>("new york", "mets2")), Arrays.asList(new Tuple2<String, String>("california", "sharks1"), new Tuple2<String, String>("california", "sharks2"), new Tuple2<String, String>("california", "ducks1"), new Tuple2<String, String>("california", "ducks2"), new Tuple2<String, String>("new york", "rangers1"), new Tuple2<String, String>("new york", "rangers2"), new Tuple2<String, String>("new york", "islanders1"), new Tuple2<String, String>("new york", "islanders2"))); JavaDStream<Tuple2<String, String>> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream<String, String> pairStream = JavaPairDStream.fromJavaDStream(stream); JavaPairDStream<String, String> flatMapped = pairStream .flatMapValues(new Function<String, Iterable<String>>() { @Override public Iterable<String> call(String in) { List<String> out = new ArrayList<String>(); out.add(in + "1"); out.add(in + "2"); return out; } }); JavaTestUtils.attachTestOutputStream(flatMapped); List<List<Tuple2<String, String>>> result = JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testCoGroup() { List<List<Tuple2<String, String>>> stringStringKVStream1 = Arrays.asList( Arrays.asList(new Tuple2<String, String>("california", "dodgers"), new Tuple2<String, String>("new york", "yankees")), Arrays.asList(new Tuple2<String, String>("california", "sharks"), new Tuple2<String, String>("new york", "rangers"))); List<List<Tuple2<String, String>>> stringStringKVStream2 = Arrays.asList( Arrays.asList(new Tuple2<String, String>("california", "giants"), new Tuple2<String, String>("new york", "mets")), Arrays.asList(new Tuple2<String, String>("california", "ducks"), new Tuple2<String, String>("new york", "islanders"))); List<List<Tuple2<String, Tuple2<List<String>, List<String>>>>> expected = Arrays.asList( Arrays.asList( new Tuple2<String, Tuple2<List<String>, List<String>>>("california", new Tuple2<List<String>, List<String>>(Arrays.asList("dodgers"), Arrays.asList("giants"))), new Tuple2<String, Tuple2<List<String>, List<String>>>("new york", new Tuple2<List<String>, List<String>>(Arrays.asList("yankees"), Arrays.asList("mets")))), Arrays.asList( new Tuple2<String, Tuple2<List<String>, List<String>>>("california", new Tuple2<List<String>, List<String>>(Arrays.asList("sharks"), Arrays.asList("ducks"))), new Tuple2<String, Tuple2<List<String>, List<String>>>("new york", new Tuple2<List<String>, List<String>>(Arrays.asList("rangers"), Arrays.asList("islanders"))))); JavaDStream<Tuple2<String, String>> stream1 = JavaTestUtils.attachTestInputStream(ssc, stringStringKVStream1, 1); JavaPairDStream<String, String> pairStream1 = JavaPairDStream.fromJavaDStream(stream1); JavaDStream<Tuple2<String, String>> stream2 = JavaTestUtils.attachTestInputStream(ssc, stringStringKVStream2, 1); JavaPairDStream<String, String> pairStream2 = JavaPairDStream.fromJavaDStream(stream2); JavaPairDStream<String, Tuple2<Iterable<String>, Iterable<String>>> grouped = pairStream1 .cogroup(pairStream2); JavaTestUtils.attachTestOutputStream(grouped); List<List<Tuple2<String, Tuple2<Iterable<String>, Iterable<String>>>>> result = JavaTestUtils .runStreams(ssc, 2, 2); Assert.assertEquals(expected.size(), result.size()); Iterator<List<Tuple2<String, Tuple2<Iterable<String>, Iterable<String>>>>> resultItr = result.iterator(); Iterator<List<Tuple2<String, Tuple2<List<String>, List<String>>>>> expectedItr = expected.iterator(); while (resultItr.hasNext() && expectedItr.hasNext()) { Iterator<Tuple2<String, Tuple2<Iterable<String>, Iterable<String>>>> resultElements = resultItr.next() .iterator(); Iterator<Tuple2<String, Tuple2<List<String>, List<String>>>> expectedElements = expectedItr.next() .iterator(); while (resultElements.hasNext() && expectedElements.hasNext()) { Tuple2<String, Tuple2<Iterable<String>, Iterable<String>>> resultElement = resultElements.next(); Tuple2<String, Tuple2<List<String>, List<String>>> expectedElement = expectedElements.next(); Assert.assertEquals(expectedElement._1(), resultElement._1()); equalIterable(expectedElement._2()._1(), resultElement._2()._1()); equalIterable(expectedElement._2()._2(), resultElement._2()._2()); } Assert.assertEquals(resultElements.hasNext(), expectedElements.hasNext()); } } @SuppressWarnings("unchecked") @Test public void testJoin() { List<List<Tuple2<String, String>>> stringStringKVStream1 = Arrays.asList( Arrays.asList(new Tuple2<String, String>("california", "dodgers"), new Tuple2<String, String>("new york", "yankees")), Arrays.asList(new Tuple2<String, String>("california", "sharks"), new Tuple2<String, String>("new york", "rangers"))); List<List<Tuple2<String, String>>> stringStringKVStream2 = Arrays.asList( Arrays.asList(new Tuple2<String, String>("california", "giants"), new Tuple2<String, String>("new york", "mets")), Arrays.asList(new Tuple2<String, String>("california", "ducks"), new Tuple2<String, String>("new york", "islanders"))); List<List<Tuple2<String, Tuple2<String, String>>>> expected = Arrays.asList( Arrays.asList( new Tuple2<String, Tuple2<String, String>>("california", new Tuple2<String, String>("dodgers", "giants")), new Tuple2<String, Tuple2<String, String>>("new york", new Tuple2<String, String>("yankees", "mets"))), Arrays.asList( new Tuple2<String, Tuple2<String, String>>("california", new Tuple2<String, String>("sharks", "ducks")), new Tuple2<String, Tuple2<String, String>>("new york", new Tuple2<String, String>("rangers", "islanders")))); JavaDStream<Tuple2<String, String>> stream1 = JavaTestUtils.attachTestInputStream(ssc, stringStringKVStream1, 1); JavaPairDStream<String, String> pairStream1 = JavaPairDStream.fromJavaDStream(stream1); JavaDStream<Tuple2<String, String>> stream2 = JavaTestUtils.attachTestInputStream(ssc, stringStringKVStream2, 1); JavaPairDStream<String, String> pairStream2 = JavaPairDStream.fromJavaDStream(stream2); JavaPairDStream<String, Tuple2<String, String>> joined = pairStream1.join(pairStream2); JavaTestUtils.attachTestOutputStream(joined); List<List<Tuple2<String, Tuple2<String, String>>>> result = JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testLeftOuterJoin() { List<List<Tuple2<String, String>>> stringStringKVStream1 = Arrays.asList( Arrays.asList(new Tuple2<String, String>("california", "dodgers"), new Tuple2<String, String>("new york", "yankees")), Arrays.asList(new Tuple2<String, String>("california", "sharks"))); List<List<Tuple2<String, String>>> stringStringKVStream2 = Arrays.asList( Arrays.asList(new Tuple2<String, String>("california", "giants")), Arrays.asList(new Tuple2<String, String>("new york", "islanders")) ); List<List<Long>> expected = Arrays.asList(Arrays.asList(2L), Arrays.asList(1L)); JavaDStream<Tuple2<String, String>> stream1 = JavaTestUtils.attachTestInputStream(ssc, stringStringKVStream1, 1); JavaPairDStream<String, String> pairStream1 = JavaPairDStream.fromJavaDStream(stream1); JavaDStream<Tuple2<String, String>> stream2 = JavaTestUtils.attachTestInputStream(ssc, stringStringKVStream2, 1); JavaPairDStream<String, String> pairStream2 = JavaPairDStream.fromJavaDStream(stream2); JavaPairDStream<String, Tuple2<String, Optional<String>>> joined = pairStream1.leftOuterJoin(pairStream2); JavaDStream<Long> counted = joined.count(); JavaTestUtils.attachTestOutputStream(counted); List<List<Long>> result = JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testCheckpointMasterRecovery() throws InterruptedException { List<List<String>> inputData = Arrays.asList(Arrays.asList("this", "is"), Arrays.asList("a", "test"), Arrays.asList("counting", "letters")); List<List<Integer>> expectedInitial = Arrays.asList(Arrays.asList(4, 2)); List<List<Integer>> expectedFinal = Arrays.asList(Arrays.asList(1, 4), Arrays.asList(8, 7)); File tempDir = Files.createTempDir(); tempDir.deleteOnExit(); ssc.checkpoint(tempDir.getAbsolutePath()); JavaDStream<String> stream = JavaCheckpointTestUtils.attachTestInputStream(ssc, inputData, 1); JavaDStream<Integer> letterCount = stream.map(new Function<String, Integer>() { @Override public Integer call(String s) throws Exception { return s.length(); } }); JavaCheckpointTestUtils.attachTestOutputStream(letterCount); List<List<Integer>> initialResult = JavaTestUtils.runStreams(ssc, 1, 1); assertOrderInvariantEquals(expectedInitial, initialResult); Thread.sleep(1000); ssc.stop(); ssc = new JavaStreamingContext(tempDir.getAbsolutePath()); // Tweak to take into consideration that the last batch before failure // will be re-processed after recovery List<List<Integer>> finalResult = JavaCheckpointTestUtils.runStreams(ssc, 2, 3); assertOrderInvariantEquals(expectedFinal, finalResult.subList(1, 3)); Utils.deleteRecursively(tempDir); } @SuppressWarnings("unchecked") @Test public void testContextGetOrCreate() throws InterruptedException { final SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("test").set("newContext", "true"); File emptyDir = Files.createTempDir(); emptyDir.deleteOnExit(); StreamingContextSuite contextSuite = new StreamingContextSuite(); String corruptedCheckpointDir = contextSuite.createCorruptedCheckpoint(); String checkpointDir = contextSuite.createValidCheckpoint(); // Function to create JavaStreamingContext without any output operations // (used to detect the new context) final MutableBoolean newContextCreated = new MutableBoolean(false); Function0<JavaStreamingContext> creatingFunc = new Function0<JavaStreamingContext>() { public JavaStreamingContext call() { newContextCreated.setValue(true); return new JavaStreamingContext(conf, Seconds.apply(1)); } }; newContextCreated.setValue(false); ssc = JavaStreamingContext.getOrCreate(emptyDir.getAbsolutePath(), creatingFunc); Assert.assertTrue("new context not created", newContextCreated.isTrue()); ssc.stop(); newContextCreated.setValue(false); ssc = JavaStreamingContext.getOrCreate(corruptedCheckpointDir, creatingFunc, new org.apache.hadoop.conf.Configuration(), true); Assert.assertTrue("new context not created", newContextCreated.isTrue()); ssc.stop(); newContextCreated.setValue(false); ssc = JavaStreamingContext.getOrCreate(checkpointDir, creatingFunc, new org.apache.hadoop.conf.Configuration()); Assert.assertTrue("old context not recovered", newContextCreated.isFalse()); ssc.stop(); // Function to create JavaStreamingContext using existing JavaSparkContext // without any output operations (used to detect the new context) Function<JavaSparkContext, JavaStreamingContext> creatingFunc2 = new Function<JavaSparkContext, JavaStreamingContext>() { public JavaStreamingContext call(JavaSparkContext context) { newContextCreated.setValue(true); return new JavaStreamingContext(context, Seconds.apply(1)); } }; JavaSparkContext sc = new JavaSparkContext(conf); newContextCreated.setValue(false); ssc = JavaStreamingContext.getOrCreate(emptyDir.getAbsolutePath(), creatingFunc2, sc); Assert.assertTrue("new context not created", newContextCreated.isTrue()); ssc.stop(false); newContextCreated.setValue(false); ssc = JavaStreamingContext.getOrCreate(corruptedCheckpointDir, creatingFunc2, sc, true); Assert.assertTrue("new context not created", newContextCreated.isTrue()); ssc.stop(false); newContextCreated.setValue(false); ssc = JavaStreamingContext.getOrCreate(checkpointDir, creatingFunc2, sc); Assert.assertTrue("old context not recovered", newContextCreated.isFalse()); ssc.stop(); } /* TEST DISABLED: Pending a discussion about checkpoint() semantics with TD @SuppressWarnings("unchecked") @Test public void testCheckpointofIndividualStream() throws InterruptedException { List<List<String>> inputData = Arrays.asList( Arrays.asList("this", "is"), Arrays.asList("a", "test"), Arrays.asList("counting", "letters")); List<List<Integer>> expected = Arrays.asList( Arrays.asList(4,2), Arrays.asList(1,4), Arrays.asList(8,7)); JavaDStream stream = JavaCheckpointTestUtils.attachTestInputStream(ssc, inputData, 1); JavaDStream letterCount = stream.map(new Function<String, Integer>() { @Override public Integer call(String s) throws Exception { return s.length(); } }); JavaCheckpointTestUtils.attachTestOutputStream(letterCount); letterCount.checkpoint(new Duration(1000)); List<List<Integer>> result1 = JavaCheckpointTestUtils.runStreams(ssc, 3, 3); assertOrderInvariantEquals(expected, result1); } */ // Input stream tests. These mostly just test that we can instantiate a given InputStream with // Java arguments and assign it to a JavaDStream without producing type errors. Testing of the // InputStream functionality is deferred to the existing Scala tests. @Test public void testSocketTextStream() { JavaReceiverInputDStream<String> test = ssc.socketTextStream("localhost", 12345); } @Test public void testSocketString() { class Converter implements Function<InputStream, Iterable<String>> { public Iterable<String> call(InputStream in) throws IOException { BufferedReader reader = new BufferedReader(new InputStreamReader(in)); List<String> out = new ArrayList<String>(); while (true) { String line = reader.readLine(); if (line == null) { break; } out.add(line); } return out; } } JavaDStream<String> test = ssc.socketStream("localhost", 12345, new Converter(), StorageLevel.MEMORY_ONLY()); } @SuppressWarnings("unchecked") @Test public void testTextFileStream() throws IOException { File testDir = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "spark"); List<List<String>> expected = fileTestPrepare(testDir); JavaDStream<String> input = ssc.textFileStream(testDir.toString()); JavaTestUtils.attachTestOutputStream(input); List<List<String>> result = JavaTestUtils.runStreams(ssc, 1, 1); assertOrderInvariantEquals(expected, result); } @SuppressWarnings("unchecked") @Test public void testFileStream() throws IOException { File testDir = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "spark"); List<List<String>> expected = fileTestPrepare(testDir); JavaPairInputDStream<LongWritable, Text> inputStream = ssc.fileStream(testDir.toString(), LongWritable.class, Text.class, TextInputFormat.class, new Function<Path, Boolean>() { @Override public Boolean call(Path v1) throws Exception { return Boolean.TRUE; } }, true); JavaDStream<String> test = inputStream.map(new Function<Tuple2<LongWritable, Text>, String>() { @Override public String call(Tuple2<LongWritable, Text> v1) throws Exception { return v1._2().toString(); } }); JavaTestUtils.attachTestOutputStream(test); List<List<String>> result = JavaTestUtils.runStreams(ssc, 1, 1); assertOrderInvariantEquals(expected, result); } @Test public void testRawSocketStream() { JavaReceiverInputDStream<String> test = ssc.rawSocketStream("localhost", 12345); } private List<List<String>> fileTestPrepare(File testDir) throws IOException { File existingFile = new File(testDir, "0"); Files.write("0\n", existingFile, Charset.forName("UTF-8")); assertTrue(existingFile.setLastModified(1000) && existingFile.lastModified() == 1000); List<List<String>> expected = Arrays.asList(Arrays.asList("0")); return expected; } @SuppressWarnings("unchecked") // SPARK-5795: no logic assertions, just testing that intended API invocations compile private void compileSaveAsJavaAPI(JavaPairDStream<LongWritable, Text> pds) { pds.saveAsNewAPIHadoopFiles("", "", LongWritable.class, Text.class, org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat.class); pds.saveAsHadoopFiles("", "", LongWritable.class, Text.class, org.apache.hadoop.mapred.SequenceFileOutputFormat.class); // Checks that a previous common workaround for this API still compiles pds.saveAsNewAPIHadoopFiles("", "", LongWritable.class, Text.class, (Class) org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat.class); pds.saveAsHadoopFiles("", "", LongWritable.class, Text.class, (Class) org.apache.hadoop.mapred.SequenceFileOutputFormat.class); } }