is.artefact.flume.source.kafka.TestKafkaSource.java Source code

Java tutorial

Introduction

Here is the source code for is.artefact.flume.source.kafka.TestKafkaSource.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package is.artefact.flume.source.kafka;

import com.google.common.base.Charsets;
import com.google.common.collect.Lists;
import junit.framework.Assert;
import kafka.common.TopicExistsException;
import org.apache.avro.io.BinaryEncoder;
import org.apache.avro.io.EncoderFactory;
import org.apache.avro.specific.SpecificDatumWriter;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.flume.ChannelException;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.EventDeliveryException;
import org.apache.flume.FlumeException;
import org.apache.flume.PollableSource.Status;
import org.apache.flume.channel.ChannelProcessor;
import org.apache.flume.source.avro.AvroFlumeEvent;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.clients.producer.RecordMetadata;
import org.apache.kafka.common.TopicPartition;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Pattern;

import static is.artefact.flume.source.kafka.KafkaSourceConstants.AVRO_EVENT;
import static is.artefact.flume.source.kafka.KafkaSourceConstants.BATCH_DURATION_MS;
import static is.artefact.flume.source.kafka.KafkaSourceConstants.BATCH_SIZE;
import static is.artefact.flume.source.kafka.KafkaSourceConstants.BOOTSTRAP_SERVERS;
import static is.artefact.flume.source.kafka.KafkaSourceConstants.DEFAULT_AUTO_COMMIT;
import static is.artefact.flume.source.kafka.KafkaSourceConstants.KAFKA_CONSUMER_PREFIX;
import static is.artefact.flume.source.kafka.KafkaSourceConstants.PARTITION_HEADER;
import static is.artefact.flume.source.kafka.KafkaSourceConstants.TIMESTAMP_HEADER;
import static is.artefact.flume.source.kafka.KafkaSourceConstants.TOPICS;
import static is.artefact.flume.source.kafka.KafkaSourceConstants.TOPICS_REGEX;
import static is.artefact.flume.source.kafka.KafkaSourceConstants.TOPIC_HEADER;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.mock;

public class TestKafkaSource {
    private static final Logger log = LoggerFactory.getLogger(TestKafkaSource.class);

    private KafkaSource kafkaSource;
    private KafkaSourceEmbeddedKafka kafkaServer;
    private Context context;
    private List<Event> events;

    private final Set<String> usedTopics = new HashSet<String>();
    private String topic0 = "test1";
    private String topic1 = "topic1";

    @SuppressWarnings("unchecked")
    @Before
    public void setup() throws Exception {
        kafkaSource = new KafkaSource();
        kafkaServer = new KafkaSourceEmbeddedKafka(null);
        try {
            kafkaServer.createTopic(topic0, 1);
            usedTopics.add(topic0);
            kafkaServer.createTopic(topic1, 3);
            usedTopics.add(topic1);
        } catch (TopicExistsException e) {
            //do nothing
            e.printStackTrace();
        }
        context = prepareDefaultContext("flume-group");
        kafkaSource.setChannelProcessor(createGoodChannel());
    }

    private Context prepareDefaultContext(String groupId) {
        Context context = new Context();
        context.put(BOOTSTRAP_SERVERS, kafkaServer.getBootstrapServers());
        context.put(KAFKA_CONSUMER_PREFIX + "group.id", groupId);
        return context;
    }

    @After
    public void tearDown() throws Exception {
        kafkaSource.stop();
        kafkaServer.stop();
    }

    @SuppressWarnings("unchecked")
    @Test
    public void testOffsets() throws InterruptedException, EventDeliveryException {
        long batchDuration = 2000;
        context.put(TOPICS, topic1);
        context.put(BATCH_DURATION_MS, String.valueOf(batchDuration));
        context.put(BATCH_SIZE, "3");
        kafkaSource.configure(context);
        kafkaSource.start();
        Thread.sleep(500L);
        Status status = kafkaSource.process();
        assertEquals(Status.BACKOFF, status);
        assertEquals(0, events.size());
        kafkaServer.produce(topic1, "", "record1");
        kafkaServer.produce(topic1, "", "record2");
        Thread.sleep(500L);
        status = kafkaSource.process();
        assertEquals(Status.READY, status);
        assertEquals(2, events.size());
        events.clear();
        kafkaServer.produce(topic1, "", "record3");
        kafkaServer.produce(topic1, "", "record4");
        kafkaServer.produce(topic1, "", "record5");
        Thread.sleep(500L);
        assertEquals(Status.READY, kafkaSource.process());
        assertEquals(3, events.size());
        assertEquals("record3", new String(events.get(0).getBody(), Charsets.UTF_8));
        assertEquals("record4", new String(events.get(1).getBody(), Charsets.UTF_8));
        assertEquals("record5", new String(events.get(2).getBody(), Charsets.UTF_8));
        events.clear();
        kafkaServer.produce(topic1, "", "record6");
        kafkaServer.produce(topic1, "", "record7");
        kafkaServer.produce(topic1, "", "record8");
        kafkaServer.produce(topic1, "", "record9");
        kafkaServer.produce(topic1, "", "record10");
        Thread.sleep(500L);
        assertEquals(Status.READY, kafkaSource.process());
        assertEquals(3, events.size());
        assertEquals("record6", new String(events.get(0).getBody(), Charsets.UTF_8));
        assertEquals("record7", new String(events.get(1).getBody(), Charsets.UTF_8));
        assertEquals("record8", new String(events.get(2).getBody(), Charsets.UTF_8));
        events.clear();
        kafkaServer.produce(topic1, "", "record11");
        // status must be READY due to time out exceed.
        assertEquals(Status.READY, kafkaSource.process());
        assertEquals(3, events.size());
        assertEquals("record9", new String(events.get(0).getBody(), Charsets.UTF_8));
        assertEquals("record10", new String(events.get(1).getBody(), Charsets.UTF_8));
        assertEquals("record11", new String(events.get(2).getBody(), Charsets.UTF_8));
        events.clear();
        kafkaServer.produce(topic1, "", "record12");
        kafkaServer.produce(topic1, "", "record13");
        // stop kafka source
        kafkaSource.stop();
        // start again
        kafkaSource = new KafkaSource();
        kafkaSource.setChannelProcessor(createGoodChannel());
        kafkaSource.configure(context);
        kafkaSource.start();
        kafkaServer.produce(topic1, "", "record14");
        Thread.sleep(1000L);
        assertEquals(Status.READY, kafkaSource.process());
        assertEquals(3, events.size());
        assertEquals("record12", new String(events.get(0).getBody(), Charsets.UTF_8));
        assertEquals("record13", new String(events.get(1).getBody(), Charsets.UTF_8));
        assertEquals("record14", new String(events.get(2).getBody(), Charsets.UTF_8));
        events.clear();
    }

    @SuppressWarnings("unchecked")
    @Test
    public void testProcessItNotEmpty() throws EventDeliveryException, SecurityException, NoSuchFieldException,
            IllegalArgumentException, IllegalAccessException, InterruptedException {
        context.put(TOPICS, topic0);
        context.put(BATCH_SIZE, "1");
        kafkaSource.configure(context);
        kafkaSource.start();

        Thread.sleep(500L);

        kafkaServer.produce(topic0, "", "hello, world");

        Thread.sleep(500L);
        Assert.assertEquals(Status.READY, kafkaSource.process());
        Assert.assertEquals(Status.BACKOFF, kafkaSource.process());
        Assert.assertEquals(1, events.size());

        Assert.assertEquals("hello, world", new String(events.get(0).getBody(), Charsets.UTF_8));
    }

    @SuppressWarnings("unchecked")
    @Test
    public void testProcessItNotEmptyBatch() throws EventDeliveryException, SecurityException, NoSuchFieldException,
            IllegalArgumentException, IllegalAccessException, InterruptedException {
        context.put(TOPICS, topic0);
        context.put(BATCH_SIZE, "2");
        kafkaSource.configure(context);
        kafkaSource.start();

        Thread.sleep(500L);

        kafkaServer.produce(topic0, "", "hello, world");
        kafkaServer.produce(topic0, "", "foo, bar");

        Thread.sleep(500L);

        Status status = kafkaSource.process();
        assertEquals(Status.READY, status);
        Assert.assertEquals("hello, world", new String(events.get(0).getBody(), Charsets.UTF_8));
        Assert.assertEquals("foo, bar", new String(events.get(1).getBody(), Charsets.UTF_8));

    }

    @SuppressWarnings("unchecked")
    @Test
    public void testProcessItEmpty() throws EventDeliveryException, SecurityException, NoSuchFieldException,
            IllegalArgumentException, IllegalAccessException, InterruptedException {
        context.put(TOPICS, topic0);
        kafkaSource.configure(context);
        kafkaSource.start();
        Thread.sleep(500L);

        Status status = kafkaSource.process();
        assertEquals(Status.BACKOFF, status);
    }

    @SuppressWarnings("unchecked")
    @Test
    public void testNonExistingTopic() throws EventDeliveryException, SecurityException, NoSuchFieldException,
            IllegalArgumentException, IllegalAccessException, InterruptedException {
        context.put(TOPICS, "faketopic");
        kafkaSource.configure(context);
        kafkaSource.start();
        Thread.sleep(500L);

        Status status = kafkaSource.process();
        assertEquals(Status.BACKOFF, status);
    }

    @SuppressWarnings("unchecked")
    @Test(expected = FlumeException.class)
    public void testNonExistingKafkaServer() throws EventDeliveryException, SecurityException, NoSuchFieldException,
            IllegalArgumentException, IllegalAccessException, InterruptedException {
        context.put(TOPICS, topic0);
        context.put(BOOTSTRAP_SERVERS, "blabla:666");
        kafkaSource.configure(context);
        kafkaSource.start();
        Thread.sleep(500L);

        Status status = kafkaSource.process();
        assertEquals(Status.BACKOFF, status);
    }

    @Test
    public void testBatchTime() throws InterruptedException, EventDeliveryException {
        context.put(TOPICS, topic0);
        context.put(BATCH_DURATION_MS, "250");
        kafkaSource.configure(context);
        kafkaSource.start();

        Thread.sleep(500L);

        for (int i = 1; i < 5000; i++) {
            kafkaServer.produce(topic0, "", "hello, world " + i);
        }
        Thread.sleep(500L);

        long error = 50;
        long startTime = System.currentTimeMillis();
        Status status = kafkaSource.process();
        long endTime = System.currentTimeMillis();
        assertEquals(Status.READY, status);
        assertTrue(endTime - startTime < (context.getLong(BATCH_DURATION_MS) + error));
    }

    // Consume event, stop source, start again and make sure we are not
    // consuming same event again
    @Test
    public void testCommit() throws InterruptedException, EventDeliveryException {
        context.put(TOPICS, topic0);
        context.put(BATCH_SIZE, "1");
        kafkaSource.configure(context);
        kafkaSource.start();

        Thread.sleep(500L);

        kafkaServer.produce(topic0, "", "hello, world");

        Thread.sleep(500L);

        Assert.assertEquals(Status.READY, kafkaSource.process());
        kafkaSource.stop();
        Thread.sleep(500L);
        kafkaSource.start();
        Thread.sleep(500L);
        Assert.assertEquals(Status.BACKOFF, kafkaSource.process());
    }

    // Remove channel processor and test if we can consume events again
    @Test
    public void testNonCommit() throws EventDeliveryException, InterruptedException {
        context.put(TOPICS, topic0);
        context.put(BATCH_SIZE, "1");
        context.put(BATCH_DURATION_MS, "30000");
        kafkaSource.configure(context);
        kafkaSource.start();
        Thread.sleep(500L);

        kafkaServer.produce(topic0, "", "hello, world");
        Thread.sleep(500L);

        kafkaSource.setChannelProcessor(createBadChannel());
        log.debug("processing from kafka to bad channel");
        Assert.assertEquals(Status.BACKOFF, kafkaSource.process());

        log.debug("repairing channel");
        kafkaSource.setChannelProcessor(createGoodChannel());

        log.debug("re-process to good channel - this should work");
        kafkaSource.process();
        Assert.assertEquals("hello, world", new String(events.get(0).getBody(), Charsets.UTF_8));
    }

    @Test
    public void testTwoBatches() throws InterruptedException, EventDeliveryException {
        context.put(TOPICS, topic0);
        context.put(BATCH_SIZE, "1");
        context.put(BATCH_DURATION_MS, "30000");
        kafkaSource.configure(context);
        kafkaSource.start();
        Thread.sleep(500L);

        kafkaServer.produce(topic0, "", "event 1");
        Thread.sleep(500L);

        kafkaSource.process();
        Assert.assertEquals("event 1", new String(events.get(0).getBody(), Charsets.UTF_8));
        events.clear();

        kafkaServer.produce(topic0, "", "event 2");
        Thread.sleep(500L);
        kafkaSource.process();
        Assert.assertEquals("event 2", new String(events.get(0).getBody(), Charsets.UTF_8));
    }

    @Test
    public void testTwoBatchesWithAutocommit() throws InterruptedException, EventDeliveryException {
        context.put(TOPICS, topic0);
        context.put(BATCH_SIZE, "1");
        context.put(BATCH_DURATION_MS, "30000");
        context.put(KAFKA_CONSUMER_PREFIX + "enable.auto.commit", "true");
        kafkaSource.configure(context);
        kafkaSource.start();
        Thread.sleep(500L);

        kafkaServer.produce(topic0, "", "event 1");
        Thread.sleep(500L);

        kafkaSource.process();
        Assert.assertEquals("event 1", new String(events.get(0).getBody(), Charsets.UTF_8));
        events.clear();

        kafkaServer.produce(topic0, "", "event 2");
        Thread.sleep(500L);
        kafkaSource.process();
        Assert.assertEquals("event 2", new String(events.get(0).getBody(), Charsets.UTF_8));
    }

    @SuppressWarnings("unchecked")
    @Test
    public void testNullKey() throws EventDeliveryException, SecurityException, NoSuchFieldException,
            IllegalArgumentException, IllegalAccessException, InterruptedException {
        context.put(TOPICS, topic0);
        context.put(BATCH_SIZE, "1");
        kafkaSource.configure(context);
        kafkaSource.start();

        Thread.sleep(500L);

        kafkaServer.produce(topic0, null, "hello, world");

        Thread.sleep(500L);

        Assert.assertEquals(Status.READY, kafkaSource.process());
        Assert.assertEquals(Status.BACKOFF, kafkaSource.process());
        Assert.assertEquals(1, events.size());

        Assert.assertEquals("hello, world", new String(events.get(0).getBody(), Charsets.UTF_8));
    }

    @Test
    public void testSourceProperties() {
        Context context = new Context();
        context.put(TOPICS, "test1, test2");
        context.put(TOPICS_REGEX, "^stream[0-9]$");
        context.put(BOOTSTRAP_SERVERS, "bootstrap-servers-list");
        KafkaSource source = new KafkaSource();
        source.doConfigure(context);

        //check that kafka.topics.regex has higher priority than topics
        //type of subscriber should be PatternSubscriber
        KafkaSource.Subscriber<Pattern> subscriber = source.getSubscriber();
        Pattern pattern = subscriber.get();
        Assert.assertTrue(pattern.matcher("stream1").find());
    }

    @Test
    public void testKafkaProperties() {
        Context context = new Context();
        context.put(TOPICS, "test1, test2");
        context.put(KAFKA_CONSUMER_PREFIX + ConsumerConfig.GROUP_ID_CONFIG, "override.default.group.id");
        context.put(KAFKA_CONSUMER_PREFIX + "fake.property", "kafka.property.value");
        context.put(BOOTSTRAP_SERVERS, "real-bootstrap-servers-list");
        context.put(KAFKA_CONSUMER_PREFIX + "bootstrap.servers", "bad-bootstrap-servers-list");
        KafkaSource source = new KafkaSource();
        source.doConfigure(context);
        Properties kafkaProps = source.getConsumerProps();

        //check that we have defaults set
        assertEquals(String.valueOf(DEFAULT_AUTO_COMMIT),
                kafkaProps.getProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG));
        //check that kafka properties override the default and get correct name
        assertEquals("override.default.group.id", kafkaProps.getProperty(ConsumerConfig.GROUP_ID_CONFIG));
        //check that any kafka property gets in
        assertEquals("kafka.property.value", kafkaProps.getProperty("fake.property"));
        //check that documented property overrides defaults
        assertEquals("real-bootstrap-servers-list", kafkaProps.getProperty("bootstrap.servers"));
    }

    @Test
    public void testPatternBasedSubscription() {
        Context context = new Context();

        context.put(TOPICS_REGEX, "^topic[0-9]$");
        context.put(BOOTSTRAP_SERVERS, "real-bootstrap-servers-list");
        KafkaSource source = new KafkaSource();
        source.doConfigure(context);
        KafkaSource.Subscriber<Pattern> subscriber = source.getSubscriber();
        for (int i = 0; i < 10; i++) {
            Assert.assertTrue(subscriber.get().matcher("topic" + i).find());
        }
        Assert.assertFalse(subscriber.get().matcher("topic").find());
    }

    @Test
    public void testAvroEvent() throws InterruptedException, EventDeliveryException, IOException {
        SpecificDatumWriter<AvroFlumeEvent> writer;
        ByteArrayOutputStream tempOutStream;
        BinaryEncoder encoder;
        byte[] bytes;

        context.put(TOPICS, topic0);
        context.put(BATCH_SIZE, "1");
        context.put(AVRO_EVENT, "true");
        kafkaSource.configure(context);
        kafkaSource.start();

        Thread.sleep(500L);

        tempOutStream = new ByteArrayOutputStream();
        writer = new SpecificDatumWriter<AvroFlumeEvent>(AvroFlumeEvent.class);

        Map<CharSequence, CharSequence> headers = new HashMap<CharSequence, CharSequence>();
        headers.put("header1", "value1");
        headers.put("header2", "value2");

        AvroFlumeEvent e = new AvroFlumeEvent(headers, ByteBuffer.wrap("hello, world".getBytes()));
        encoder = EncoderFactory.get().directBinaryEncoder(tempOutStream, null);
        writer.write(e, encoder);
        encoder.flush();
        bytes = tempOutStream.toByteArray();

        kafkaServer.produce(topic0, "", bytes);

        String currentTimestamp = Long.toString(System.currentTimeMillis());

        headers.put(TIMESTAMP_HEADER, currentTimestamp);
        headers.put(PARTITION_HEADER, "1");
        headers.put(TOPIC_HEADER, "topic0");

        e = new AvroFlumeEvent(headers, ByteBuffer.wrap("hello, world2".getBytes()));
        tempOutStream.reset();
        encoder = EncoderFactory.get().directBinaryEncoder(tempOutStream, null);
        writer.write(e, encoder);
        encoder.flush();
        bytes = tempOutStream.toByteArray();

        kafkaServer.produce(topic0, "", bytes);

        Thread.sleep(500L);
        Assert.assertEquals(Status.READY, kafkaSource.process());
        Assert.assertEquals(Status.READY, kafkaSource.process());
        Assert.assertEquals(Status.BACKOFF, kafkaSource.process());

        Assert.assertEquals(2, events.size());

        Event event = events.get(0);

        Assert.assertEquals("hello, world", new String(event.getBody(), Charsets.UTF_8));

        Assert.assertEquals("value1", e.getHeaders().get("header1"));
        Assert.assertEquals("value2", e.getHeaders().get("header2"));

        event = events.get(1);

        Assert.assertEquals("hello, world2", new String(event.getBody(), Charsets.UTF_8));

        Assert.assertEquals("value1", e.getHeaders().get("header1"));
        Assert.assertEquals("value2", e.getHeaders().get("header2"));
        Assert.assertEquals(currentTimestamp, e.getHeaders().get(TIMESTAMP_HEADER));
        Assert.assertEquals(e.getHeaders().get(PARTITION_HEADER), "1");
        Assert.assertEquals(e.getHeaders().get(TOPIC_HEADER), "topic0");

    }

    ChannelProcessor createGoodChannel() {

        ChannelProcessor channelProcessor = mock(ChannelProcessor.class);

        events = Lists.newArrayList();

        doAnswer(new Answer<Void>() {
            @Override
            public Void answer(InvocationOnMock invocation) throws Throwable {
                events.addAll((List<Event>) invocation.getArguments()[0]);
                return null;
            }
        }).when(channelProcessor).processEventBatch(any(List.class));

        return channelProcessor;
    }

    ChannelProcessor createBadChannel() {
        ChannelProcessor channelProcessor = mock(ChannelProcessor.class);

        doAnswer(new Answer<Void>() {
            @Override
            public Void answer(InvocationOnMock invocation) throws Throwable {
                throw new ChannelException("channel intentional broken");
            }
        }).when(channelProcessor).processEventBatch(any(List.class));

        return channelProcessor;
    }
}