com.netflix.suro.sink.kafka.KafkaSinkV2.java Source code

Java tutorial

Introduction

Here is the source code for com.netflix.suro.sink.kafka.KafkaSinkV2.java

Source

/*
 * Copyright 2014 Netflix, Inc.
 *
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *        http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 */

package com.netflix.suro.sink.kafka;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.core.type.TypeReference;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import com.netflix.servo.monitor.Monitors;
import com.netflix.suro.message.Message;
import com.netflix.suro.message.MessageContainer;
import com.netflix.suro.queue.MemoryQueue4Sink;
import com.netflix.suro.queue.MessageQueue4Sink;
import com.netflix.suro.sink.QueuedSink;
import com.netflix.suro.sink.Sink;
import com.netflix.suro.sink.ThreadPoolQueuedSink;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.clients.producer.RecordMetadata;
import org.apache.kafka.common.Metric;
import org.apache.kafka.common.MetricName;
import org.apache.kafka.common.errors.UnknownTopicOrPartitionException;
import org.apache.kafka.common.serialization.ByteArraySerializer;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicLong;

/**
 * Kafka 0.8.2 Sink, using new Java-native producer, rather than Scala produer.
 * Requests are re-queued indefinitely if they fail.
 * 
 * The configuration parameters for the new kafka producer are listed in:
 * http://kafka.apache.org/documentation.html#newproducerconfigs
 *
 * @author jbae
 * @author starzia
 */
public class KafkaSinkV2 extends ThreadPoolQueuedSink implements Sink {
    public final static String TYPE = "KafkaV2";

    private String clientId;
    private final Map<String, String> keyTopicMap;

    private final KafkaProducer<byte[], byte[]> producer;
    private long msgId = 0;
    private AtomicLong receivedCount = new AtomicLong(0);
    private AtomicLong sentCount = new AtomicLong(0);
    private AtomicLong sentByteCount = new AtomicLong(0);
    /** number of times a message send failed without retrying */
    private AtomicLong droppedCount = new AtomicLong(0);
    /** number of times a message send failed but was requeued */
    private AtomicLong requeuedCount = new AtomicLong(0);

    @JsonCreator
    public KafkaSinkV2(@JsonProperty("queue4Sink") MessageQueue4Sink queue4Sink,
            @JsonProperty("client.id") String clientId,
            @JsonProperty("metadata.broker.list") String bootstrapServers,
            @JsonProperty("compression.codec") String codec, @JsonProperty("send.buffer.bytes") int sendBufferBytes,
            @JsonProperty("batchSize") int batchSize, @JsonProperty("batchTimeout") int batchTimeout,
            @JsonProperty("request.timeout.ms") int requestTimeout, @JsonProperty("kafka.etc") Properties etcProps,
            @JsonProperty("keyTopicMap") Map<String, String> keyTopicMap,
            @JsonProperty("jobQueueSize") int jobQueueSize, @JsonProperty("corePoolSize") int corePoolSize,
            @JsonProperty("maxPoolSize") int maxPoolSize, @JsonProperty("jobTimeout") long jobTimeout,
            @JsonProperty("pauseOnLongQueue") boolean pauseOnLongQueue) {
        super(jobQueueSize, corePoolSize, maxPoolSize, jobTimeout,
                KafkaSink.class.getSimpleName() + "-" + clientId);

        Preconditions.checkNotNull(bootstrapServers);
        Preconditions.checkNotNull(clientId);

        this.clientId = clientId;
        initialize("kafka_" + clientId, queue4Sink == null ? new MemoryQueue4Sink(10000) : queue4Sink, batchSize,
                batchTimeout, pauseOnLongQueue);

        Properties props = new Properties();
        props.put("client.id", clientId);
        // metadata.broker.list was renamed to bootstrap.servers in the new kafka producer
        props.put("bootstrap.servers", bootstrapServers);
        if (codec != null) {
            props.put("compression.codec", codec);
        }
        if (sendBufferBytes > 0) {
            props.put("send.buffer.bytes", Integer.toString(sendBufferBytes));
        }
        if (requestTimeout > 0) {
            props.put("request.timeout.ms", Integer.toString(requestTimeout));
        }

        if (etcProps != null) {
            props.putAll(etcProps);
        }

        this.keyTopicMap = keyTopicMap != null ? keyTopicMap : Maps.<String, String>newHashMap();

        producer = new KafkaProducer<>(props, new ByteArraySerializer(), new ByteArraySerializer());

        Monitors.registerObject(clientId, this);
    }

    @Override
    public void writeTo(MessageContainer message) {
        long key = msgId++;
        if (!keyTopicMap.isEmpty()) {
            try {
                Map<String, Object> msgMap = message.getEntity(new TypeReference<Map<String, Object>>() {
                });
                Object keyField = msgMap.get(keyTopicMap.get(message.getRoutingKey()));
                if (keyField != null) {
                    key = keyField.hashCode();
                }
            } catch (Exception e) {
                QueuedSink.log.error("Exception on getting key field: " + e.getMessage());
            }
        }
        QueuedSink.log.trace("KafkaSink writeTo()");
        receivedCount.incrementAndGet();
        enqueue(new SuroKeyedMessage(key, message.getMessage()));
    }

    @Override
    public void open() {
        setName(KafkaSink.class.getSimpleName() + "-" + clientId);
        start();
    }

    @Override
    protected void beforePolling() throws IOException {
        /*do nothing */}

    @Override
    protected void write(List<Message> msgList) {
        QueuedSink.log.trace("KafkaSink write() with {} messages", msgList.size());
        // prepare "final" copies of the messages to be used in the anonymous class below
        final ArrayList<SuroKeyedMessage> msgCopies = new ArrayList<SuroKeyedMessage>(msgList.size());
        for (Message m : msgList) {
            SuroKeyedMessage sKeyedMsg = (SuroKeyedMessage) m;
            msgCopies.add(new SuroKeyedMessage(sKeyedMsg.getKey(), new Message(m.getRoutingKey(), m.getPayload())));
        }

        // The new KafkaProducer does not have interface for sending multiple messages,
        // so we loop and create lots of Runnables -- this seems inefficient, but the alternative
        // has its own problems.  If we create one "big Runnable" that loops over messages we'll
        // drain the queue4sink too quickly -- all the messages will be queued in the in-memory 
        // job queue storing the Runnables.
        for (final SuroKeyedMessage m : msgCopies) {
            senders.submit(new Runnable() {
                @Override
                public void run() {
                    String topic = m.getRoutingKey();

                    // calculate the kafka partition, with backward compatibility with old kafka producer
                    int numPartitions = producer.partitionsFor(topic).size();
                    int partition = Math.abs((int) (m.getKey() ^ (m.getKey() >>> 32))) % numPartitions;

                    ProducerRecord r = new ProducerRecord(topic, partition, null, // don't store the key
                            m.getPayload());
                    QueuedSink.log.trace("Will send message to Kafka");
                    long startTimeMs = System.currentTimeMillis();
                    // send
                    Future<RecordMetadata> responseFtr = producer.send(r);
                    QueuedSink.log.trace("Started aysnc producer");
                    boolean failure = true;
                    boolean retry = true;
                    if (responseFtr.isCancelled()) {
                        QueuedSink.log.warn("Kafka producer request was cancelled");
                        // we assume that cancelled requests should not be retried.
                        retry = false;
                    }
                    try {
                        // wait for request to finish
                        RecordMetadata response = responseFtr.get();
                        if (response.topic() == null) {
                            QueuedSink.log.warn("Kafka producer got null topic in response");
                        }
                        sentCount.incrementAndGet();
                        sentByteCount.addAndGet(m.getPayload().length);
                        failure = false;
                        retry = false;
                    } catch (InterruptedException e) {
                        // Assume that Interrupted means we're trying to shutdown so don't retry
                        QueuedSink.log.warn("Caught InterruptedException: " + e);
                        retry = false;
                    } catch (UnknownTopicOrPartitionException e) {
                        QueuedSink.log.warn("Caught UnknownTopicOrPartitionException for topic: "
                                + m.getRoutingKey()
                                + " This may be simply because KafkaProducer does not yet have information about the brokers."
                                + " Request will be retried.");
                    } catch (ExecutionException e) {
                        QueuedSink.log.warn("Caught ExecutionException: " + e);
                    } catch (Exception e) {
                        QueuedSink.log.warn("Caught Exception: " + e);
                    }
                    long durationMs = System.currentTimeMillis() - startTimeMs;

                    if (failure) {
                        QueuedSink.log.warn("Kafka producer send failed after {} milliseconds", durationMs);
                        requeuedCount.incrementAndGet();
                        if (retry) {
                            enqueue(m);
                        } else {
                            QueuedSink.log.info("Dropped message");
                            droppedCount.incrementAndGet();
                        }
                    } else {
                        QueuedSink.log.trace("Kafka producer send succeeded after {} milliseconds", durationMs);
                    }
                }
            });
        }
    }

    @Override
    protected void innerClose() {
        super.innerClose();

        producer.close();
    }

    @Override
    public String recvNotice() {
        return null;
    }

    @Override
    public String getStat() {
        Map<MetricName, ? extends Metric> metrics = producer.metrics();
        StringBuilder sb = new StringBuilder();
        // add kafka producer stats, which are rates
        for (Map.Entry<MetricName, ? extends Metric> e : metrics.entrySet()) {
            sb.append("kafka.").append(e.getKey()).append(": ").append(e.getValue().value()).append('\n');
        }
        // also report our counters
        sb.append("messages-in-queue4sink: ").append(this.queue4Sink.size()).append('\n');
        sb.append("queued-jobs: ").append(this.jobQueue.size()).append('\n');
        sb.append("active-threads: ").append(this.senders.getActiveCount()).append('\n');
        sb.append("received-messages: ").append(this.receivedCount.get()).append('\n');
        sb.append("sent-messages: ").append(this.sentCount.get()).append('\n');
        sb.append("sent-bytes: ").append(this.sentByteCount.get()).append('\n');
        sb.append("dropped-messages: ").append(this.droppedCount.get()).append('\n');
        sb.append("requeued-messages: ").append(this.requeuedCount.get()).append('\n');

        return sb.toString();
    }
}