com.datatorrent.contrib.kafka.KafkaInputOperatorTest.java Source code

Java tutorial

Introduction

Here is the source code for com.datatorrent.contrib.kafka.KafkaInputOperatorTest.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.datatorrent.contrib.kafka;

import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.apex.malhar.lib.wal.FSWindowDataManager;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestWatcher;
import org.junit.runner.Description;
import org.slf4j.LoggerFactory;

import com.datatorrent.api.Attribute;
import com.datatorrent.api.Context;
import com.datatorrent.api.DAG;
import com.datatorrent.api.DAG.Locality;
import com.datatorrent.api.DefaultInputPort;
import com.datatorrent.api.LocalMode;
import com.datatorrent.api.Operator;
import com.datatorrent.api.Partitioner;
import com.datatorrent.common.util.FSStorageAgent;
import com.datatorrent.common.util.BaseOperator;
import com.datatorrent.lib.helper.OperatorContextTestHelper;
import com.datatorrent.lib.partitioner.StatelessPartitionerTest;
import com.datatorrent.lib.testbench.CollectorTestSink;
import com.datatorrent.stram.StramLocalCluster;

public class KafkaInputOperatorTest extends KafkaOperatorTestBase {
    static final org.slf4j.Logger logger = LoggerFactory.getLogger(KafkaInputOperatorTest.class);
    static AtomicInteger tupleCount = new AtomicInteger();
    static CountDownLatch latch;
    static boolean isSuicide = false;
    static int suicideTrigger = 3000;

    /**
     * Test Operator to collect tuples from KafkaSingleInputStringOperator.
     *
     * @param <T>
     */
    public static class CollectorModule<T> extends BaseOperator {
        public final transient CollectorInputPort<T> inputPort = new CollectorInputPort<T>("myInput", this);
    }

    public static class CollectorInputPort<T> extends DefaultInputPort<T> {

        private int k = 0;

        public CollectorInputPort(String id, Operator module) {
            super();
        }

        @Override
        public void process(T tuple) {
            if (isSuicide && k++ == suicideTrigger) {
                //you can only kill yourself once
                isSuicide = false;
                throw new RuntimeException();
            }
            if (tuple.equals(KafkaOperatorTestBase.END_TUPLE)) {
                if (latch != null) {
                    latch.countDown();
                }
                return;
            }
            tupleCount.incrementAndGet();
        }
    }

    /**
     * Test AbstractKafkaSinglePortInputOperator (i.e. an input adapter for
     * Kafka, aka consumer). This module receives data from an outside test
     * generator through Kafka message bus and feed that data into Malhar
     * streaming platform.
     *
     * [Generate message and send that to Kafka message bus] ==> [Receive that
     * message through Kafka input adapter(i.e. consumer) and send using
     * emitTuples() interface on output port during onMessage call]
     *
     *
     * @throws Exception
     */
    public void testKafkaInputOperator(int sleepTime, final int totalCount, KafkaConsumer consumer, boolean isValid,
            boolean idempotent) throws Exception {
        // initial the latch for this test
        latch = new CountDownLatch(1);

        // Start producer
        KafkaTestProducer p = new KafkaTestProducer(TEST_TOPIC);
        p.setSendCount(totalCount);
        new Thread(p).start();

        // Create DAG for testing.
        LocalMode lma = LocalMode.newInstance();
        DAG dag = lma.getDAG();

        // Create KafkaSinglePortStringInputOperator
        KafkaSinglePortStringInputOperator node = dag.addOperator("Kafka message consumer",
                KafkaSinglePortStringInputOperator.class);
        if (isSuicide) {
            // make some extreme assumptions to make it fail if checkpointing wrong offsets
            dag.setAttribute(Context.DAGContext.CHECKPOINT_WINDOW_COUNT, 1);
            dag.setAttribute(Context.OperatorContext.STORAGE_AGENT,
                    new FSStorageAgent("target/ck", new Configuration()));
            node.setMaxTuplesPerWindow(500);
        }

        if (idempotent) {
            node.setWindowDataManager(new FSWindowDataManager());
        }
        consumer.setTopic(TEST_TOPIC);

        node.setConsumer(consumer);

        consumer.setCacheSize(5000);

        if (isValid) {
            node.setZookeeper("localhost:" + KafkaOperatorTestBase.TEST_ZOOKEEPER_PORT[0]);
        }

        // Create Test tuple collector
        CollectorModule<String> collector = dag.addOperator("TestMessageCollector", new CollectorModule<String>());

        // Connect ports
        dag.addStream("Kafka message", node.outputPort, collector.inputPort).setLocality(Locality.CONTAINER_LOCAL);

        // Create local cluster
        final LocalMode.Controller lc = lma.getController();
        lc.setHeartbeatMonitoringEnabled(false);

        lc.runAsync();

        // Wait 30s for consumer finish consuming all the messages
        Assert.assertTrue("TIMEOUT: 30s ", latch.await(300000, TimeUnit.MILLISECONDS));

        // Check results
        Assert.assertTrue("Expected count >= " + totalCount + "; Actual count " + tupleCount.intValue(),
                totalCount <= tupleCount.intValue());
        logger.debug(String.format("Number of emitted tuples: %d", tupleCount.intValue()));

        p.close();
        lc.shutdown();
    }

    @Test
    public void testKafkaInputOperator_Highlevel() throws Exception {
        int totalCount = 10000;
        Properties props = new Properties();
        props.put("group.id", "group1");
        // This damn property waste me 2 days! It's a 0.8 new property. "smallest" means
        // reset the consumer to the beginning of the message that is not consumed yet
        // otherwise it wont get any of those the produced before!
        KafkaConsumer k = new HighlevelKafkaConsumer(props);
        k.setInitialOffset("earliest");
        testKafkaInputOperator(1000, totalCount, k, true, false);
    }

    @Test
    public void testKafkaInputOperator_Simple() throws Exception {
        int totalCount = 10000;
        KafkaConsumer k = new SimpleKafkaConsumer();
        k.setInitialOffset("earliest");
        testKafkaInputOperator(1000, totalCount, k, true, false);
    }

    @Test
    public void testKafkaInputOperator_SimpleSuicide() throws Exception {
        int totalCount = 10000;
        KafkaConsumer k = new SimpleKafkaConsumer();
        k.setInitialOffset("earliest");
        isSuicide = true;
        testKafkaInputOperator(1000, totalCount, k, true, false);
    }

    @Test
    public void testKafkaInputOperator_Simple_Idempotent() throws Exception {
        int totalCount = 10000;
        KafkaConsumer k = new SimpleKafkaConsumer();
        k.setInitialOffset("earliest");
        testKafkaInputOperator(1000, totalCount, k, true, true);
    }

    @Test
    public void testKafkaInputOperator_Invalid() throws Exception {
        int totalCount = 10000;
        SimpleKafkaConsumer consumer = new SimpleKafkaConsumer();
        try {
            testKafkaInputOperator(1000, totalCount, consumer, false, false);
        } catch (Exception e) {
            // invalid host setup expect to fail here
            Assert.assertEquals("Error creating local cluster", e.getMessage());
        }
    }

    @Override
    @Before
    public void beforeTest() {
        tupleCount.set(0);
        File syncCheckPoint = new File("target", "ck");
        File localFiles = new File("target" + StramLocalCluster.class.getName());
        try {
            FileUtils.deleteQuietly(syncCheckPoint);
            FileUtils.deleteQuietly(localFiles);
        } catch (Exception e) {
            throw new RuntimeException(e);
        } finally {
            super.beforeTest();
        }
    }

    public static class TestMeta extends TestWatcher {
        String baseDir;
        String recoveryDir;
        KafkaSinglePortStringInputOperator operator;
        CollectorTestSink<Object> sink;
        Context.OperatorContext context;

        @Override
        protected void starting(Description description) {
            String methodName = description.getMethodName();
            String className = description.getClassName();
            baseDir = "target/" + className + "/" + methodName;
            recoveryDir = "recovery";
            try {
                FileUtils.deleteDirectory(new File(baseDir, "recovery"));
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    }

    @Rule
    public TestMeta testMeta = new TestMeta();

    @Test
    public void testRecoveryAndIdempotency() throws Exception {
        int totalCount = 1500;

        // initial the latch for this test
        latch = new CountDownLatch(50);

        // Start producer
        KafkaTestProducer p = new KafkaTestProducer(TEST_TOPIC);
        p.setSendCount(totalCount);
        new Thread(p).start();

        KafkaSinglePortStringInputOperator operator = createAndDeployOperator(true);
        latch.await(4000, TimeUnit.MILLISECONDS);
        operator.beginWindow(1);
        operator.emitTuples();
        operator.endWindow();
        operator.beginWindow(2);
        operator.emitTuples();
        operator.endWindow();

        //failure and then re-deployment of operator
        testMeta.sink.collectedTuples.clear();
        operator.teardown();
        operator.deactivate();

        operator = createAndDeployOperator(true);
        Assert.assertEquals("largest recovery window", 2,
                operator.getWindowDataManager().getLargestCompletedWindow());

        operator.beginWindow(1);
        operator.emitTuples();
        operator.endWindow();
        operator.beginWindow(2);
        operator.emitTuples();
        operator.endWindow();
        latch.await(3000, TimeUnit.MILLISECONDS);
        // Emiting data after all recovery windows are replayed
        operator.beginWindow(3);
        operator.emitTuples();
        operator.endWindow();

        Assert.assertEquals("Total messages collected ", totalCount, testMeta.sink.collectedTuples.size());
        testMeta.sink.collectedTuples.clear();
        operator.teardown();
        operator.deactivate();
    }

    @Test
    public void testRecoveryAndExactlyOnce() throws Exception {
        int totalCount = 1500;

        // initial the latch for this test
        latch = new CountDownLatch(50);

        // Start producer
        KafkaTestProducer p = new KafkaTestProducer(TEST_TOPIC);
        p.setSendCount(totalCount);
        new Thread(p).start();

        KafkaSinglePortStringInputOperator operator = createAndDeployOperator(false);
        latch.await(4000, TimeUnit.MILLISECONDS);
        operator.beginWindow(1);
        operator.emitTuples();
        operator.endWindow();
        operator.beginWindow(2);
        operator.emitTuples();
        operator.endWindow();
        operator.checkpointed(2);
        operator.committed(2);
        Map<KafkaPartition, Long> offsetStats = operator.offsetStats;
        int collectedTuplesAfterCheckpoint = testMeta.sink.collectedTuples.size();
        //failure and then re-deployment of operator
        testMeta.sink.collectedTuples.clear();
        operator.teardown();
        operator.deactivate();
        operator = createOperator(false);
        operator.offsetStats = offsetStats;
        operator.setup(testMeta.context);
        operator.activate(testMeta.context);
        latch.await(4000, TimeUnit.MILLISECONDS);
        // Emiting data after all recovery windows are replayed
        operator.beginWindow(3);
        operator.emitTuples();
        operator.endWindow();
        operator.beginWindow(4);
        operator.emitTuples();
        operator.endWindow();
        latch.await(3000, TimeUnit.MILLISECONDS);

        Assert.assertEquals("Total messages collected ", totalCount - collectedTuplesAfterCheckpoint + 1,
                testMeta.sink.collectedTuples.size());
        testMeta.sink.collectedTuples.clear();
        operator.teardown();
        operator.deactivate();
    }

    private KafkaSinglePortStringInputOperator createOperator(boolean isIdempotency) {
        Attribute.AttributeMap attributeMap = new Attribute.AttributeMap.DefaultAttributeMap();
        attributeMap.put(Context.OperatorContext.SPIN_MILLIS, 500);
        attributeMap.put(Context.DAGContext.APPLICATION_PATH, testMeta.baseDir);

        testMeta.context = new OperatorContextTestHelper.TestIdOperatorContext(1, attributeMap);
        testMeta.operator = new KafkaSinglePortStringInputOperator();

        KafkaConsumer consumer = new SimpleKafkaConsumer();
        consumer.setTopic(TEST_TOPIC);
        consumer.setInitialOffset("earliest");

        if (isIdempotency) {
            FSWindowDataManager storageManager = new FSWindowDataManager();
            storageManager.setStatePath(testMeta.recoveryDir);
            testMeta.operator.setWindowDataManager(storageManager);
        }

        testMeta.operator.setConsumer(consumer);
        testMeta.operator.setZookeeper("localhost:" + KafkaOperatorTestBase.TEST_ZOOKEEPER_PORT[0]);
        testMeta.operator.setMaxTuplesPerWindow(500);

        List<Partitioner.Partition<AbstractKafkaInputOperator<KafkaConsumer>>> partitions = new LinkedList<Partitioner.Partition<AbstractKafkaInputOperator<KafkaConsumer>>>();

        Collection<Partitioner.Partition<AbstractKafkaInputOperator<KafkaConsumer>>> newPartitions = testMeta.operator
                .definePartitions(partitions, new StatelessPartitionerTest.PartitioningContextImpl(null, 0));
        Assert.assertEquals(1, newPartitions.size());

        KafkaSinglePortStringInputOperator operator = (KafkaSinglePortStringInputOperator) newPartitions.iterator()
                .next().getPartitionedInstance();

        testMeta.sink = new CollectorTestSink<Object>();
        testMeta.operator.outputPort.setSink(testMeta.sink);
        operator.outputPort.setSink(testMeta.sink);
        return operator;
    }

    private KafkaSinglePortStringInputOperator createAndDeployOperator(boolean isIdempotency) {
        KafkaSinglePortStringInputOperator operator = createOperator(isIdempotency);
        operator.setup(testMeta.context);
        operator.activate(testMeta.context);

        return operator;

    }

    @Test
    public void testMaxTotalSize() throws InterruptedException {
        int totalCount = 1500;
        int maxTotalSize = 500;

        // initial the latch for this test
        latch = new CountDownLatch(1);

        // Start producer
        KafkaTestProducer p = new KafkaTestProducer(TEST_TOPIC);
        p.setSendCount(totalCount);
        Thread t = new Thread(p);
        t.start();

        Attribute.AttributeMap attributeMap = new Attribute.AttributeMap.DefaultAttributeMap();
        attributeMap.put(Context.DAGContext.APPLICATION_PATH, testMeta.baseDir);

        Context.OperatorContext context = new OperatorContextTestHelper.TestIdOperatorContext(1, attributeMap);
        KafkaSinglePortStringInputOperator operator = new KafkaSinglePortStringInputOperator();

        KafkaConsumer consumer = new SimpleKafkaConsumer();
        consumer.setTopic(TEST_TOPIC);
        consumer.setInitialOffset("earliest");

        operator.setConsumer(consumer);
        operator.setZookeeper("localhost:" + KafkaOperatorTestBase.TEST_ZOOKEEPER_PORT[0]);
        operator.setMaxTotalMsgSizePerWindow(maxTotalSize);

        List<Partitioner.Partition<AbstractKafkaInputOperator<KafkaConsumer>>> partitions = new LinkedList<Partitioner.Partition<AbstractKafkaInputOperator<KafkaConsumer>>>();

        Collection<Partitioner.Partition<AbstractKafkaInputOperator<KafkaConsumer>>> newPartitions = operator
                .definePartitions(partitions, new StatelessPartitionerTest.PartitioningContextImpl(null, 0));
        Assert.assertEquals(1, newPartitions.size());

        operator = (KafkaSinglePortStringInputOperator) newPartitions.iterator().next().getPartitionedInstance();

        CollectorTestSink<Object> sink = new CollectorTestSink<Object>();
        operator.outputPort.setSink(sink);
        operator.setup(context);
        operator.activate(context);
        latch.await(4000, TimeUnit.MILLISECONDS);
        operator.beginWindow(1);
        operator.emitTuples();
        operator.endWindow();

        t.join();

        operator.deactivate();
        operator.teardown();
        int size = 0;
        for (Object o : sink.collectedTuples) {
            size += ((String) o).getBytes().length;
        }
        Assert.assertTrue("Total emitted size comparison", size < maxTotalSize);
    }

    @Test
    public void testZookeeper() throws Exception {
        // initial the latch for this test
        latch = new CountDownLatch(50);

        testMeta.operator = new KafkaSinglePortStringInputOperator();

        KafkaConsumer consumer = new SimpleKafkaConsumer();
        consumer.setTopic(TEST_TOPIC);

        testMeta.operator.setConsumer(consumer);
        testMeta.operator
                .setZookeeper("cluster1::node0,node1,node2:2181,node3:2182/chroot/dir;cluster2::node4:2181");
        latch.await(500, TimeUnit.MILLISECONDS);

        Assert.assertEquals("Total size of clusters ", 2, testMeta.operator.getConsumer().zookeeperMap.size());
        Assert.assertEquals("Connection url for cluster1 ", "node0,node1,node2:2181,node3:2182/chroot/dir",
                testMeta.operator.getConsumer().zookeeperMap.get("cluster1").iterator().next());
        Assert.assertEquals("Connection url for cluster 2 ", "node4:2181",
                testMeta.operator.getConsumer().zookeeperMap.get("cluster2").iterator().next());
    }

}