consumer.kafka.ProcessedOffsetManager.java Source code

Java tutorial

Introduction

Here is the source code for consumer.kafka.ProcessedOffsetManager.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package consumer.kafka;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.dstream.DStream;
import scala.Tuple2;
import scala.reflect.ClassTag;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;

public class ProcessedOffsetManager<T> {

    private static final Log LOG = LogFactory.getLog(ProcessedOffsetManager.class);

    public static <T> JavaPairDStream<Integer, Iterable<Long>> getPartitionOffset(
            JavaDStream<MessageAndMetadata<T>> unionStreams, Properties props) {
        JavaPairDStream<Integer, Long> partitonOffsetStream = unionStreams
                .mapPartitionsToPair(new PartitionOffsetPair<>());
        JavaPairDStream<Integer, Iterable<Long>> partitonOffset = partitonOffsetStream.groupByKey(1);
        return partitonOffset;
    }

    @SuppressWarnings("deprecation")
    public static void persists(JavaPairDStream<Integer, Iterable<Long>> partitonOffset, Properties props) {
        partitonOffset.foreachRDD(new VoidFunction<JavaPairRDD<Integer, Iterable<Long>>>() {
            @Override
            public void call(JavaPairRDD<Integer, Iterable<Long>> po) throws Exception {
                List<Tuple2<Integer, Iterable<Long>>> poList = po.collect();
                doPersists(poList, props);
            }
        });
    }

    public static <T> DStream<Tuple2<Integer, Iterable<Long>>> getPartitionOffset(
            DStream<MessageAndMetadata<T>> unionStreams, Properties props) {
        ClassTag<MessageAndMetadata<T>> messageMetaClassTag = ScalaUtil.<T>getMessageAndMetadataClassTag();
        JavaDStream<MessageAndMetadata<T>> javaDStream = new JavaDStream<MessageAndMetadata<T>>(unionStreams,
                messageMetaClassTag);
        JavaPairDStream<Integer, Iterable<Long>> partitonOffset = getPartitionOffset(javaDStream, props);
        return partitonOffset.dstream();
    }

    @SuppressWarnings("deprecation")
    public static void persists(DStream<Tuple2<Integer, Iterable<Long>>> partitonOffset, Properties props) {
        ClassTag<Tuple2<Integer, Iterable<Long>>> tuple2ClassTag = ScalaUtil
                .<Integer, Iterable<Long>>getTuple2ClassTag();
        JavaDStream<Tuple2<Integer, Iterable<Long>>> jpartitonOffset = new JavaDStream<Tuple2<Integer, Iterable<Long>>>(
                partitonOffset, tuple2ClassTag);
        jpartitonOffset.foreachRDD(new VoidFunction<JavaRDD<Tuple2<Integer, Iterable<Long>>>>() {
            @Override
            public void call(JavaRDD<Tuple2<Integer, Iterable<Long>>> po) throws Exception {
                List<Tuple2<Integer, Iterable<Long>>> poList = po.collect();
                doPersists(poList, props);
            }
        });
    }

    private static void doPersists(List<Tuple2<Integer, Iterable<Long>>> poList, Properties props) {
        Map<Integer, Long> partitionOffsetMap = new HashMap<Integer, Long>();
        for (Tuple2<Integer, Iterable<Long>> tuple : poList) {
            int partition = tuple._1();
            Long offset = getMaximum(tuple._2());
            partitionOffsetMap.put(partition, offset);
        }
        persistProcessedOffsets(props, partitionOffsetMap);
    }

    private static <T extends Comparable<T>> T getMaximum(Iterable<T> values) {
        T max = null;
        for (T value : values) {
            if (max == null || max.compareTo(value) < 0) {
                max = value;
            }
        }
        return max;
    }

    private static void persistProcessedOffsets(Properties props, Map<Integer, Long> partitionOffsetMap) {
        ZkState state = new ZkState(props.getProperty(Config.ZOOKEEPER_CONSUMER_CONNECTION));
        for (Map.Entry<Integer, Long> po : partitionOffsetMap.entrySet()) {
            String path = processedPath(po.getKey(), props);
            try {
                state.writeBytes(path, po.getValue().toString().getBytes());
                LOG.info("Wrote processed offset " + po.getValue() + " for Parittion " + po.getKey());
            } catch (Exception ex) {
                LOG.error(
                        "Error while comitting processed offset " + po.getValue() + " for Parittion " + po.getKey(),
                        ex);
                state.close();
                throw ex;
            }
        }
        state.close();
    }

    public static String processedPath(int partition, Properties props) {
        String consumerZkPath = "/consumers";
        if (props.getProperty("zookeeper.consumer.path") != null) {
            consumerZkPath = props.getProperty("zookeeper.consumer.path");
        }
        return consumerZkPath + "/" + props.getProperty(Config.KAFKA_CONSUMER_ID) + "/processed/"
                + props.getProperty(Config.KAFKA_TOPIC) + "/" + partition;
    }
}