com.yahoo.ads.pb.kafka.KafkaSimpleConsumer.java Source code

Java tutorial

Introduction

Here is the source code for com.yahoo.ads.pb.kafka.KafkaSimpleConsumer.java

Source

/*
 * Copyright 2014 Yahoo! Inc. Licensed under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or
 * agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and
 * limitations under the License. See accompanying LICENSE file.
 */

package com.yahoo.ads.pb.kafka;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import org.apache.commons.configuration.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

//import com.yahoo.ads.pb.platform.kafka.KafkaConnection.CrossColoDedoder;
import com.yahoo.ads.pb.util.ConfigurationManager;

import kafka.api.FetchRequest;
import kafka.message.Message;
import kafka.api.FetchRequestBuilder;
import kafka.api.PartitionOffsetRequestInfo;
import kafka.javaapi.FetchResponse;
import kafka.cluster.Broker;
import kafka.common.ErrorMapping;
import kafka.common.TopicAndPartition;
import kafka.javaapi.OffsetRequest;
import kafka.javaapi.OffsetResponse;
import kafka.javaapi.PartitionMetadata;
import java.nio.ByteBuffer;

import kafka.javaapi.TopicMetadata;
import kafka.javaapi.TopicMetadataRequest;
import kafka.javaapi.TopicMetadataResponse;
import kafka.javaapi.consumer.SimpleConsumer;
import kafka.message.MessageAndOffset;
import kafka.serializer.Decoder;
import kafka.serializer.Encoder;
import kafka.utils.VerifiableProperties;
import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.io.Input;
import com.yahoo.ads.pb.kafka.KeyValue;

/**
 * refer @{link https://cwiki.apache.org/confluence/display/KAFKA/0.8.0+SimpleConsumer+Example}
 *
 * This class is not thread safe, the caller must ensure all the methods be called from single thread
 */
public class KafkaSimpleConsumer {
    public static class CrossColoEncoder implements Encoder<byte[]> {
        public CrossColoEncoder(VerifiableProperties v) {

        }

        @Override
        public byte[] toBytes(byte[] msg) {
            return msg;
            /*
            try {
               //YCR ycr = YCR.createYCR(YCR.NATIVE_YCR);
               //String signature = ycr.getKey(conf.getString("Kafka.Encrpt.Sign.Key"));
                
               //SymmetricContext sc = ycr.createSymmetricContext(conf
               //      .getString("Kafka.Encrpt.Queue.Key"));
                
               //byte[] encryptMsg = sc.encryptSign64Bytes(msg, signature.getBytes());
               logger.debug("Encode msg---" + new String(msg));
               logger.debug("Encode result ---" + encryptMsg);
               return encryptMsg;
            } catch (YCRException | KeyDBException e) {
                
               logger.error("Can't encode msg, msg---" + new String(msg));
               logger.error("Can't encode msg, reason---" + e.getMessage());
               logger.error("Can't encode msg, st---" + e);
               return "".getBytes();
            }
            */
        }

    }

    public static class CrossColoDedoder implements Decoder<byte[]> {
        public CrossColoDedoder() {
        }

        @Override
        public byte[] fromBytes(byte[] msg) {
            try {
                Kryo kryo = new Kryo();
                Input input = new Input(msg);

                KeyValue keyValue = kryo.readObject(input, KeyValue.class);
                input.close();
            } catch (Exception e) {
            }

            return msg;
            /*
            logger.debug("decode message");
            YCR ycr;
            try {
               ycr = YCR.createYCR(YCR.NATIVE_YCR);
               SymmetricContext sc = ycr.createSymmetricContext(conf
              .getString("Kafka.Encrpt.Queue.Key"));
               String signature = ycr.getKey(conf.getString("Kafka.Encrpt.Sign.Key"));
               logger.debug("Decode msg---" + new String(msg));
               //            ByteBuffer bb = msg.payload();
               //            byte[] result = new byte[bb.remaining()];
               //            bb.get(msg);
                
               logger.debug("Decode result ---"
              + sc.decryptSign64Bytes(msg, signature.getBytes()));
               return sc.decryptSign64Bytes(msg, signature.getBytes());
            } catch (YCRException | KeyDBException e) {
               logger.error("Can't decode msg, msg---" + msg);
               logger.error("Can't decode msg, reason---" + e.getMessage());
               logger.error("Can't decode msg, st---" + e);
               return "".getBytes();
            }
            */
        }

        public byte[] fromMessage(Message msg) {
            ByteBuffer bb = msg.payload();
            byte[] result = new byte[bb.remaining()];
            bb.get(result);
            return fromBytes(result);
        }
    }

    public static final List<BytesMessageWithOffset> EMPTY_MSGS = new ArrayList<>();

    private static final Logger logger = LoggerFactory.getLogger(KafkaSimpleConsumer.class);
    private static final Configuration conf = ConfigurationManager.getConfiguration();

    private static final CrossColoDedoder decoder = new CrossColoDedoder();

    private final List<KafkaBroker> allBrokers;
    private final String topic;
    private final int partitionId;
    private final String clientId;

    private volatile Broker leaderBroker;
    private List<KafkaBroker> replicaBrokers;
    private SimpleConsumer consumer = null;

    public KafkaSimpleConsumer(String topic, int partitionId, String clientId) {
        this(topic, partitionId, clientId, true);
    }

    @SuppressWarnings("unchecked")
    public KafkaSimpleConsumer(String topic, int partitionId, String clientId, boolean crossColo) {
        List<String> brokers = null;
        brokers = conf.getList("kafka.metadata.broker.list");
        List<KafkaBroker> brokerList = new ArrayList<KafkaBroker>();
        for (String broker : brokers) {
            String[] tokens = broker.split(":");
            if (tokens.length != 2) {
                logger.warn("wrong broker name {}, its format should be host:port", broker);
                continue;
            }
            String host = tokens[0];
            int port = -1;
            try {
                port = Integer.parseInt(tokens[1]);
            } catch (NumberFormatException e) {
                logger.warn("wrong broker name {}, its format should be host:port", broker);
                continue;
            }
            brokerList.add(new KafkaBroker(host, port));
        }
        this.allBrokers = Collections.unmodifiableList(brokerList);
        this.topic = topic;
        this.partitionId = partitionId;
        this.clientId = String.format("%s_%d_%s", topic, partitionId, clientId);

        this.replicaBrokers = new ArrayList<>();
        this.replicaBrokers.addAll(this.allBrokers);
    }

    public long getEarlistOffset() {
        if (consumer == null)
            return -1;

        try {
            return getOffset(true);
        } catch (Exception e) {
            return -1;
        }
    }

    public long getLatestOffset() {
        if (consumer == null)
            return -1;

        try {
            return getOffset(false);
        } catch (Exception e) {
            return -1;
        }
    }

    /**
     * Copy around the internal state of other KafkaSimpleConsumer to a newly constructed one. Mainly for leaderBroker 
     * Can be used to share KafkaSimpleConsumer across different threads
     * 
     * @param other
     */
    public KafkaSimpleConsumer(KafkaSimpleConsumer other) {
        this.allBrokers = other.allBrokers;
        this.topic = other.topic;
        this.partitionId = other.partitionId;
        this.clientId = other.clientId;
        this.leaderBroker = other.leaderBroker;
        this.replicaBrokers = new ArrayList<>();
        this.replicaBrokers.addAll(this.allBrokers);
    }

    private SimpleConsumer ensureConsumer(Broker leader) throws InterruptedException {
        if (consumer == null) {
            while (leaderBroker == null) {
                leaderBroker = findNewLeader(leader);
            }

            logger.info("create SimpleConsumer for {} - {}, leader broker {}:{}", topic, partitionId,
                    leaderBroker.host(), leaderBroker.port());

            consumer = new SimpleConsumer(leaderBroker.host(), leaderBroker.port(),
                    conf.getInt("Pistachio.Kafka.soTimeout"), conf.getInt("Pistachio.Kafka.bufferSize"), clientId);
        }
        return consumer;
    }

    public static class BytesMessageWithOffset {
        final byte[] msg;
        final long offset;

        public BytesMessageWithOffset(byte[] msg, long offset) {
            this.msg = msg;
            this.offset = offset;
        }

        public byte[] message() {
            return msg;
        }

        public long offset() {
            return offset;
        }
    }

    static class KafkaBroker {
        final String host;
        final int port;

        KafkaBroker(String host, int port) {
            this.host = host;
            this.port = port;
        }

        @Override
        public String toString() {
            return String.format("%s:%d", host, port);
        }
    }

    private Iterable<BytesMessageWithOffset> filterAndDecode(Iterable<MessageAndOffset> kafkaMessages,
            long offset) {
        List<BytesMessageWithOffset> ret = new LinkedList<>();
        for (MessageAndOffset msgAndOffset : kafkaMessages) {
            if (msgAndOffset.offset() >= offset) {
                byte[] payload = decoder.fromMessage(msgAndOffset.message());
                // add nextOffset here, thus next fetch will use nextOffset instead of current offset
                ret.add(new BytesMessageWithOffset(payload, msgAndOffset.nextOffset()));
            }
        }
        return ret;
    }

    private long getOffset(boolean earliest) throws InterruptedException {
        TopicAndPartition topicAndPartition = new TopicAndPartition(topic, partitionId);
        Map<TopicAndPartition, PartitionOffsetRequestInfo> requestInfo = new HashMap<TopicAndPartition, PartitionOffsetRequestInfo>();
        requestInfo.put(topicAndPartition, new PartitionOffsetRequestInfo(
                earliest ? kafka.api.OffsetRequest.EarliestTime() : kafka.api.OffsetRequest.LatestTime(), 1));
        OffsetRequest request = new OffsetRequest(requestInfo, kafka.api.OffsetRequest.CurrentVersion(), clientId);
        OffsetResponse response = null;
        try {
            response = consumer.getOffsetsBefore(request);
        } catch (Exception e) {
            // e could be an instance of ClosedByInterruptException as SimpleConsumer.getOffsetsBefore uses nio
            if (Thread.interrupted()) {
                logger.info("catch exception of {} with interrupted in getOffset({}) for {} - {}",
                        e.getClass().getName(), earliest, topic, partitionId);

                throw new InterruptedException();
            }

            logger.error("caught exception in getOffsetsBefore {} - {}", topic, partitionId, e);
            return -1;
        }
        if (response.hasError()) {
            logger.error("error fetching data Offset from the Broker {}. reason: {}", leaderBroker.host(),
                    response.errorCode(topic, partitionId));
            return -1;
        }
        long[] offsets = response.offsets(topic, partitionId);
        return earliest ? offsets[0] : offsets[offsets.length - 1];
    }

    // 
    public Iterable<BytesMessageWithOffset> fetch(long offset, int timeoutMs) throws InterruptedException {
        List<BytesMessageWithOffset> newOffsetMsg = new ArrayList<BytesMessageWithOffset>();
        FetchResponse response = null;
        Broker previousLeader = leaderBroker;
        while (true) {
            ensureConsumer(previousLeader);

            FetchRequest request = new FetchRequestBuilder().clientId(clientId)
                    .addFetch(topic, partitionId, offset, 100000000).maxWait(timeoutMs).minBytes(1).build();

            //logger.debug("fetch offset {}", offset);

            try {
                response = consumer.fetch(request);
            } catch (Exception e) {
                // e could be an instance of ClosedByInterruptException as SimpleConsumer.fetch uses nio
                if (Thread.interrupted()) {
                    logger.info("catch exception of {} with interrupted in fetch for {} - {} with offset {}",
                            e.getClass().getName(), topic, partitionId, offset);

                    throw new InterruptedException();
                }
                logger.warn("caughte exception in fetch {} - {}", topic, partitionId, e);
                response = null;
            }

            if (response == null || response.hasError()) {
                short errorCode = response != null ? response.errorCode(topic, partitionId)
                        : ErrorMapping.UnknownCode();
                logger.debug("fetch {} - {} with offset {} encounters error: {}", topic, partitionId, offset,
                        errorCode);

                boolean needNewLeader = false;
                if (errorCode == ErrorMapping.RequestTimedOutCode()) {
                    //TODO: leave it here
                } else if (errorCode == ErrorMapping.OffsetOutOfRangeCode()) {
                    //TODO: fetch the earliest offset or latest offset ?
                    // seems no obvious correct way to handle it
                    long earliestOffset = getOffset(true);
                    logger.debug("get earilset offset {} for {} - {}", earliestOffset, topic, partitionId);
                    if (earliestOffset < 0) {
                        needNewLeader = true;
                    } else {
                        newOffsetMsg.add(new BytesMessageWithOffset(null, earliestOffset));
                        offset = earliestOffset;
                        continue;
                    }
                } else {
                    needNewLeader = true;
                }

                if (needNewLeader) {
                    stopConsumer();
                    previousLeader = leaderBroker;
                    leaderBroker = null;
                    continue;
                }
            } else {
                break;
            }
        }

        return response != null ? filterAndDecode(response.messageSet(topic, partitionId), offset)
                : (newOffsetMsg.size() > 0 ? newOffsetMsg : EMPTY_MSGS);
    }

    private void stopConsumer() {
        if (consumer != null) {
            try {
                consumer.close();
                logger.info("stop consumer for {} - {}, leader broker {}", topic, partitionId, leaderBroker);
            } catch (Exception e) {
                logger.warn("stop consumer for {} - {} failed", topic, partitionId, e);
            } finally {
                consumer = null;
            }
        }
    }

    // stop the consumer
    public void stop() {
        stopConsumer();
        logger.info("KafkaSimpleConsumer stopped for {} - {}", topic, partitionId);
    }

    private PartitionMetadata findLeader() throws InterruptedException {
        List<String> topics = new ArrayList<String>();
        topics.add(topic);

        for (KafkaBroker broker : replicaBrokers) {
            SimpleConsumer consumer = null;
            try {
                logger.debug("findLeader, try broker {}:{}", broker.host, broker.port);
                consumer = new SimpleConsumer(broker.host, broker.port, conf.getInt("Pistachio.Kafka.soTimeout"),
                        conf.getInt("Pistachio.Kafka.bufferSize"), clientId + "leaderLookup");
                TopicMetadataResponse resp = consumer.send(new TopicMetadataRequest(topics));

                // just one topic inside the topics
                List<TopicMetadata> metaData = resp.topicsMetadata();
                for (TopicMetadata item : metaData) {
                    for (PartitionMetadata part : item.partitionsMetadata()) {
                        if (part.partitionId() == partitionId) {
                            replicaBrokers.clear();
                            for (Broker replica : part.replicas()) {
                                replicaBrokers.add(new KafkaBroker(replica.host(), replica.port()));
                            }
                            return part;
                        }
                    }
                }
            } catch (Exception e) {
                // e could be an instance of ClosedByInterruptException as SimpleConsumer.send uses nio
                if (Thread.interrupted()) {
                    logger.info("catch exception of {} with interrupted in find leader for {} - {}",
                            e.getClass().getName(), topic, partitionId);

                    throw new InterruptedException();
                }
                logger.warn("error communicating with Broker {} to find leader for {} - {}", broker, topic,
                        partitionId, e);
            } finally {
                if (consumer != null) {
                    try {
                        consumer.close();
                    } catch (Exception e) {
                    }
                }
            }
        }

        return null;
    }

    private Broker findNewLeader(Broker oldLeader) throws InterruptedException {
        long retryCnt = 0;
        while (true) {
            PartitionMetadata metadata = findLeader();
            logger.debug("findNewLeader - meta leader {}, previous leader {}", metadata, oldLeader);
            if (metadata != null && metadata.leader() != null
                    && (oldLeader == null || (!(oldLeader.host().equalsIgnoreCase(metadata.leader().host())
                            && (oldLeader.port() == metadata.leader().port())) || retryCnt != 0))) {
                // first time through if the leader hasn't changed give ZooKeeper a second to recover
                // second time, assume the broker did recover before failover, or it was a non-Broker issue
                logger.info("findNewLeader - using new leader {} from meta data, previous leader {}",
                        metadata.leader(), oldLeader);
                return metadata.leader();
            }
            //TODO: backoff retry
            Thread.sleep(1000L);
            retryCnt++;
            // if could not find the leader for current replicaBrokers, let's try to find one via allBrokers
            if (retryCnt >= 3 && (retryCnt - 3) % 5 == 0) {
                logger.warn("can nof find leader for {} - {} after {} retries", topic, partitionId, retryCnt);
                replicaBrokers.clear();
                replicaBrokers.addAll(allBrokers);
            }
        }
    }

    public long getLastOffset() throws InterruptedException {
        OffsetResponse response = null;
        Broker previousLeader = leaderBroker;
        while (true) {
            TopicAndPartition topicAndPartition = new TopicAndPartition(topic, partitionId);
            Map<TopicAndPartition, PartitionOffsetRequestInfo> requestInfo = new HashMap<TopicAndPartition, PartitionOffsetRequestInfo>();
            requestInfo.put(topicAndPartition,
                    new PartitionOffsetRequestInfo(kafka.api.OffsetRequest.LatestTime(), 1));
            kafka.javaapi.OffsetRequest request = new kafka.javaapi.OffsetRequest(requestInfo,
                    kafka.api.OffsetRequest.CurrentVersion(), clientId);

            ensureConsumer(previousLeader);
            try {
                response = consumer.getOffsetsBefore(request);
            } catch (Exception e) {
                // e could be an instance of ClosedByInterruptException as SimpleConsumer.fetch uses nio
                if (Thread.interrupted()) {
                    logger.info("catch exception of {} with interrupted in getLastOffset for {} - {}",
                            e.getClass().getName(), topic, partitionId);

                    throw new InterruptedException();
                }
                logger.warn("caughte exception in getLastOffset {} - {}", topic, partitionId, e);
                response = null;
            }
            if (response == null || response.hasError()) {
                short errorCode = response != null ? response.errorCode(topic, partitionId)
                        : ErrorMapping.UnknownCode();

                logger.warn("Error fetching data Offset for {} - {}, the Broker. Reason: {}", topic, partitionId,
                        errorCode);

                stopConsumer();
                previousLeader = leaderBroker;
                leaderBroker = null;
                continue;
            }
            break;
        }
        long[] offsets = response.offsets(topic, partitionId);
        return offsets[offsets.length - 1];
    }

    public static void main(String[] args) {
        /**
         */
        if (args.length < 3) {
            System.out.println("Usage: class $topic $clientId $offset");
            return;
        }

        KafkaSimpleConsumer consumer = new KafkaSimpleConsumer(args[0], 0, args[1]);

        long offset = Long.parseLong(args[2]);

        try {
            while (true) {
                Iterable<BytesMessageWithOffset> msgs = consumer.fetch(offset, 1000);
                for (BytesMessageWithOffset msg : msgs) {
                    System.out.println("receive msg >>>" + msg.msg);
                    offset = msg.offset();
                }
                Thread.sleep(1000L);
            }
        } catch (InterruptedException e) {

        }

        consumer.stop();
    }
}