Java tutorial
/* * Copyright 2012 - 2016 Splice Machine, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use * this file except in compliance with the License. You may obtain a copy of the * License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. */ package com.splicemachine.stream; import com.google.common.net.HostAndPort; import com.splicemachine.db.iapi.error.StandardException; import com.splicemachine.db.iapi.sql.execute.ExecRow; import com.splicemachine.derby.impl.SpliceSpark; import com.splicemachine.derby.stream.BaseStreamTest; import org.apache.commons.collections.IteratorUtils; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.function.Function; import org.junit.BeforeClass; import org.junit.Test; import scala.Tuple2; import java.io.Serializable; import java.util.*; import java.util.concurrent.*; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; /** * Created by dgomezferro on 6/1/16. */ public class StreamableRDDTest extends BaseStreamTest implements Serializable { private static final Logger LOG = Logger.getLogger(StreamableRDDTest.class); private static StreamListenerServer server; @BeforeClass public static void setup() throws StandardException { server = new StreamListenerServer(0); server.start(); } @Test public void testBasicStream() throws Exception { StreamListener<ExecRow> sl = new StreamListener<>(); HostAndPort hostAndPort = server.getHostAndPort(); server.register(sl); JavaPairRDD<ExecRow, ExecRow> rdd = SpliceSpark.getContext().parallelizePairs(tenRows, 10); StreamableRDD srdd = new StreamableRDD(rdd.values(), sl.getUuid(), hostAndPort.getHostText(), hostAndPort.getPort()); srdd.submit(); Iterator<ExecRow> it = sl.getIterator(); int count = 0; while (it.hasNext()) { ExecRow execRow = it.next(); LOG.trace(execRow); count++; assertNotNull(execRow); assertTrue(execRow.getColumn(1).getInt() < 10); } assertEquals(10, count); } @Test public void testOrder() throws Exception { StreamListener<ExecRow> sl = new StreamListener<>(); HostAndPort hostAndPort = server.getHostAndPort(); server.register(sl); List<Tuple2<ExecRow, ExecRow>> shuffledRows = new ArrayList<>(tenRows); Collections.shuffle(shuffledRows); JavaPairRDD<ExecRow, ExecRow> rdd = SpliceSpark.getContext().parallelizePairs(shuffledRows, 10); JavaRDD<ExecRow> sorted = rdd.values().sortBy(new Function<ExecRow, Integer>() { @Override public Integer call(ExecRow execRow) throws Exception { return execRow.getColumn(1).getInt(); } }, true, 4); StreamableRDD srdd = new StreamableRDD(sorted, sl.getUuid(), hostAndPort.getHostText(), hostAndPort.getPort()); srdd.submit(); Iterator<ExecRow> it = sl.getIterator(); int count = 0; int last = -1; while (it.hasNext()) { ExecRow execRow = it.next(); LOG.trace(execRow); count++; assertNotNull(execRow); int value = execRow.getColumn(1).getInt(); assertTrue("Results not in order", value > last); last = value; } assertEquals(10, count); } @Test public void testBlocking() throws StandardException { StreamListener<ExecRow> sl = new StreamListener<>(); HostAndPort hostAndPort = server.getHostAndPort(); server.register(sl); List<Tuple2<ExecRow, ExecRow>> manyRows = new ArrayList<>(); for (int i = 0; i < 10000; ++i) { manyRows.add(new Tuple2<ExecRow, ExecRow>(getExecRow(i, 1), getExecRow(i, 2))); } JavaPairRDD<ExecRow, ExecRow> rdd = SpliceSpark.getContext().parallelizePairs(manyRows, 6); final StreamableRDD srdd = new StreamableRDD(rdd.values(), sl.getUuid(), hostAndPort.getHostText(), hostAndPort.getPort()); new Thread() { @Override public void run() { try { srdd.submit(); } catch (Exception e) { LOG.error(e); throw new RuntimeException(e); } } }.start(); Iterator<ExecRow> it = sl.getIterator(); int count = 0; while (it.hasNext()) { ExecRow execRow = it.next(); count++; assertNotNull(execRow); } assertEquals(10000, count); } @Test public void testBlockingLarge() throws StandardException { StreamListener<ExecRow> sl = new StreamListener<>(); HostAndPort hostAndPort = server.getHostAndPort(); server.register(sl); List<Tuple2<ExecRow, ExecRow>> manyRows = new ArrayList<>(); for (int i = 0; i < 100000; ++i) { manyRows.add(new Tuple2<ExecRow, ExecRow>(getExecRow(i, 1), getExecRow(i, 2))); } JavaPairRDD<ExecRow, ExecRow> rdd = SpliceSpark.getContext().parallelizePairs(manyRows, 12); final StreamableRDD srdd = new StreamableRDD(rdd.values(), sl.getUuid(), hostAndPort.getHostText(), hostAndPort.getPort()); new Thread() { @Override public void run() { try { srdd.submit(); } catch (Exception e) { throw new RuntimeException(e); } } }.start(); Iterator<ExecRow> it = sl.getIterator(); int count = 0; while (it.hasNext()) { ExecRow execRow = it.next(); count++; assertNotNull(execRow); } assertEquals(100000, count); } @Test public void testBlockingLargeOddPartitions() throws StandardException { StreamListener<ExecRow> sl = new StreamListener<>(); HostAndPort hostAndPort = server.getHostAndPort(); server.register(sl); List<Tuple2<ExecRow, ExecRow>> manyRows = new ArrayList<>(); for (int i = 0; i < 100000; ++i) { manyRows.add(new Tuple2<ExecRow, ExecRow>(getExecRow(i, 1), getExecRow(i, 2))); } JavaPairRDD<ExecRow, ExecRow> rdd = SpliceSpark.getContext().parallelizePairs(manyRows, 13); final StreamableRDD srdd = new StreamableRDD(rdd.values(), sl.getUuid(), hostAndPort.getHostText(), hostAndPort.getPort()); new Thread() { @Override public void run() { try { srdd.submit(); } catch (Exception e) { throw new RuntimeException(e); } } }.start(); Iterator<ExecRow> it = sl.getIterator(); int count = 0; while (it.hasNext()) { ExecRow execRow = it.next(); count++; assertNotNull(execRow); } assertEquals(100000, count); } @Test public void testSmallOffsetLimit() throws StandardException { int limit = 100; int offset = 2000; int total = 4000; StreamListener<ExecRow> sl = new StreamListener<>(limit, offset); HostAndPort hostAndPort = server.getHostAndPort(); server.register(sl); List<Tuple2<ExecRow, ExecRow>> manyRows = new ArrayList<>(); for (int i = 0; i < total; ++i) { manyRows.add(new Tuple2<ExecRow, ExecRow>(getExecRow(i, 1), getExecRow(i, 2))); } JavaPairRDD<ExecRow, ExecRow> rdd = SpliceSpark.getContext().parallelizePairs(manyRows, 1); final StreamableRDD srdd = new StreamableRDD(rdd.values(), sl.getUuid(), hostAndPort.getHostText(), hostAndPort.getPort()); new Thread() { @Override public void run() { try { srdd.submit(); } catch (Exception e) { throw new RuntimeException(e); } } }.start(); Iterator<ExecRow> it = sl.getIterator(); int count = 0; int first = offset; while (it.hasNext()) { ExecRow execRow = it.next(); assertNotNull(execRow); assertEquals(count + first, execRow.getColumn(1).getInt()); count++; } assertEquals(limit, count); } @Test public void testSmallLimit() throws StandardException { int limit = 2000; int offset = 0; int total = 4000; int batches = 2; int batchSize = 512; StreamListener<ExecRow> sl = new StreamListener<>(limit, offset, batches, batchSize); HostAndPort hostAndPort = server.getHostAndPort(); server.register(sl); List<Tuple2<ExecRow, ExecRow>> manyRows = new ArrayList<>(); for (int i = 0; i < total; ++i) { manyRows.add(new Tuple2<ExecRow, ExecRow>(getExecRow(i, 1), getExecRow(i, 2))); } JavaPairRDD<ExecRow, ExecRow> rdd = SpliceSpark.getContext().parallelizePairs(manyRows, 1); final StreamableRDD srdd = new StreamableRDD(rdd.values(), null, sl.getUuid(), hostAndPort.getHostText(), hostAndPort.getPort(), batches, batchSize); new Thread() { @Override public void run() { try { srdd.submit(); } catch (Exception e) { throw new RuntimeException(e); } } }.start(); Iterator<ExecRow> it = sl.getIterator(); int count = 0; int first = offset; while (it.hasNext()) { ExecRow execRow = it.next(); assertNotNull(execRow); assertEquals(count + first, execRow.getColumn(1).getInt()); count++; } assertEquals(limit, count); } @Test public void testOffsetLimit() throws StandardException { StreamListener<ExecRow> sl = new StreamListener<>(400, 30000); HostAndPort hostAndPort = server.getHostAndPort(); server.register(sl); List<Tuple2<ExecRow, ExecRow>> manyRows = new ArrayList<>(); for (int i = 0; i < 100000; ++i) { manyRows.add(new Tuple2<ExecRow, ExecRow>(getExecRow(i, 1), getExecRow(i, 2))); } JavaPairRDD<ExecRow, ExecRow> rdd = SpliceSpark.getContext().parallelizePairs(manyRows, 13); final StreamableRDD srdd = new StreamableRDD(rdd.values(), sl.getUuid(), hostAndPort.getHostText(), hostAndPort.getPort()); new Thread() { @Override public void run() { try { srdd.submit(); } catch (Exception e) { throw new RuntimeException(e); } } }.start(); Iterator<ExecRow> it = sl.getIterator(); int count = 0; int first = 30000; while (it.hasNext()) { ExecRow execRow = it.next(); assertNotNull(execRow); assertEquals(count + first, execRow.getColumn(1).getInt()); count++; } assertEquals(400, count); } @Test public void testLimit() throws StandardException { StreamListener<ExecRow> sl = new StreamListener<>(400, 0); HostAndPort hostAndPort = server.getHostAndPort(); server.register(sl); List<Tuple2<ExecRow, ExecRow>> manyRows = new ArrayList<>(); for (int i = 0; i < 100000; ++i) { manyRows.add(new Tuple2<ExecRow, ExecRow>(getExecRow(i, 1), getExecRow(i, 2))); } JavaPairRDD<ExecRow, ExecRow> rdd = SpliceSpark.getContext().parallelizePairs(manyRows, 13); final StreamableRDD srdd = new StreamableRDD(rdd.values(), sl.getUuid(), hostAndPort.getHostText(), hostAndPort.getPort()); new Thread() { @Override public void run() { try { srdd.submit(); } catch (Exception e) { throw new RuntimeException(e); } } }.start(); Iterator<ExecRow> it = sl.getIterator(); int count = 0; while (it.hasNext()) { ExecRow execRow = it.next(); assertNotNull(execRow); assertEquals(count, execRow.getColumn(1).getInt()); count++; } assertEquals(400, count); } @Test public void testOffset() throws StandardException { StreamListener<ExecRow> sl = new StreamListener<>(-1, 60000); HostAndPort hostAndPort = server.getHostAndPort(); server.register(sl); List<Tuple2<ExecRow, ExecRow>> manyRows = new ArrayList<>(); for (int i = 0; i < 100000; ++i) { manyRows.add(new Tuple2<ExecRow, ExecRow>(getExecRow(i, 1), getExecRow(i, 2))); } JavaPairRDD<ExecRow, ExecRow> rdd = SpliceSpark.getContext().parallelizePairs(manyRows, 13); final StreamableRDD srdd = new StreamableRDD(rdd.values(), sl.getUuid(), hostAndPort.getHostText(), hostAndPort.getPort()); new Thread() { @Override public void run() { try { srdd.submit(); } catch (Exception e) { throw new RuntimeException(e); } } }.start(); Iterator<ExecRow> it = sl.getIterator(); int count = 0; int first = 60000; while (it.hasNext()) { ExecRow execRow = it.next(); assertNotNull(execRow); assertEquals(count + first, execRow.getColumn(1).getInt()); count++; } assertEquals(100000 - 60000, count); } @Test public void testConcurrentQueries() throws StandardException, ExecutionException, InterruptedException { final StreamListener<ExecRow> sl1 = new StreamListener<>(); final StreamListener<ExecRow> sl2 = new StreamListener<>(); final StreamListener<ExecRow> sl3 = new StreamListener<>(); HostAndPort hostAndPort = server.getHostAndPort(); server.register(sl1); server.register(sl2); server.register(sl3); List<Tuple2<ExecRow, ExecRow>> manyRows = new ArrayList<>(); for (int i = 0; i < 100000; ++i) { manyRows.add(new Tuple2<ExecRow, ExecRow>(getExecRow(i, 1), getExecRow(i, 2))); } JavaPairRDD<ExecRow, ExecRow> rdd = SpliceSpark.getContext().parallelizePairs(manyRows, 12); final StreamableRDD srdd1 = new StreamableRDD(rdd.values(), sl1.getUuid(), hostAndPort.getHostText(), hostAndPort.getPort()); final StreamableRDD srdd2 = new StreamableRDD(rdd.values().map(new Function<ExecRow, ExecRow>() { @Override public ExecRow call(ExecRow o) throws Exception { o.getColumn(1).setValue(0); return o; } }), sl2.getUuid(), hostAndPort.getHostText(), hostAndPort.getPort()); final StreamableRDD srdd3 = new StreamableRDD(rdd.values(), sl3.getUuid(), hostAndPort.getHostText(), hostAndPort.getPort()); for (final StreamableRDD srdd : Arrays.asList(srdd1, srdd2, srdd3)) { new Thread() { @Override public void run() { try { srdd.submit(); } catch (Exception e) { throw new RuntimeException(e); } } }.start(); } // We collect them asynchronously into memory so we are able to iterate over them at the same time. Otherwise // tasks for the third RDD might be blocked by tasks in other RDDs, and we are not consuming elements from the // other iterators so they can become unblocked. ExecutorService executor = Executors.newFixedThreadPool(3); Future<List<ExecRow>> future1 = executor.submit(new Callable<List<ExecRow>>() { @Override public List<ExecRow> call() throws Exception { return IteratorUtils.toList(sl1.getIterator()); } }); Future<List<ExecRow>> future2 = executor.submit(new Callable<List<ExecRow>>() { @Override public List<ExecRow> call() throws Exception { return IteratorUtils.toList(sl2.getIterator()); } }); Future<List<ExecRow>> future3 = executor.submit(new Callable<List<ExecRow>>() { @Override public List<ExecRow> call() throws Exception { return IteratorUtils.toList(sl3.getIterator()); } }); Iterator<ExecRow> it1 = future1.get().iterator(); Iterator<ExecRow> it2 = future2.get().iterator(); Iterator<ExecRow> it3 = future3.get().iterator(); int count = 0; while (it1.hasNext()) { ExecRow r1 = it1.next(); ExecRow r2 = it2.next(); ExecRow r3 = it3.next(); count++; assertNotNull(r1); assertNotNull(r2); assertNotNull(r3); assertEquals(0, r2.getColumn(1).getInt()); assertEquals(r1.getColumn(1), r3.getColumn(1)); assertEquals(r1.getColumn(2), r2.getColumn(2)); } assertEquals(100000, count); } }