consumer.kafka.ProcessedOffsetManager.java Source code

Java tutorial

Introduction

Here is the source code for consumer.kafka.ProcessedOffsetManager.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package consumer.kafka;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.dstream.DStream;

import scala.Tuple2;
import scala.reflect.ClassTag;

import com.google.common.collect.ImmutableMap;

public class ProcessedOffsetManager {

    private static void persistProcessedOffsets(Properties props, Map<Integer, Long> partitionOffsetMap) {
        ZkState state = new ZkState(props.getProperty(Config.ZOOKEEPER_CONSUMER_CONNECTION));
        for (Map.Entry<Integer, Long> po : partitionOffsetMap.entrySet()) {
            Map<Object, Object> data = (Map<Object, Object>) ImmutableMap.builder()
                    .put("consumer", ImmutableMap.of("id", props.getProperty(Config.KAFKA_CONSUMER_ID)))
                    .put("offset", po.getValue()).put("partition", po.getKey())
                    .put("broker", ImmutableMap.of("host", "", "port", ""))
                    .put("topic", props.getProperty(Config.KAFKA_TOPIC)).build();
            String path = processedPath(po.getKey(), props);
            try {
                state.writeJSON(path, data);
            } catch (Exception ex) {
                state.close();
                throw ex;
            }
        }
        state.close();
    }

    private static String processedPath(int partition, Properties props) {
        return props.getProperty(Config.ZOOKEEPER_CONSUMER_PATH) + "/" + props.getProperty(Config.KAFKA_CONSUMER_ID)
                + "/" + props.getProperty(Config.KAFKA_TOPIC) + "/processed/" + "partition_" + partition;
    }

    public static JavaPairDStream<Integer, Iterable<Long>> getPartitionOffset(
            JavaDStream<MessageAndMetadata> unionStreams) {

        JavaPairDStream<Integer, Long> partitonOffsetStream = unionStreams
                .mapPartitionsToPair(new PairFlatMapFunction<Iterator<MessageAndMetadata>, Integer, Long>() {
                    @Override
                    public Iterable<Tuple2<Integer, Long>> call(Iterator<MessageAndMetadata> entry)
                            throws Exception {
                        MessageAndMetadata mmeta = null;
                        List<Tuple2<Integer, Long>> l = new ArrayList<Tuple2<Integer, Long>>();
                        while (entry.hasNext()) {
                            mmeta = entry.next();
                        }
                        if (mmeta != null) {
                            l.add(new Tuple2<Integer, Long>(mmeta.getPartition().partition, mmeta.getOffset()));
                        }
                        return l;
                    }
                });
        JavaPairDStream<Integer, Iterable<Long>> partitonOffset = partitonOffsetStream.groupByKey(1);
        return partitonOffset;
    }

    @SuppressWarnings("deprecation")
    public static void persists(JavaPairDStream<Integer, Iterable<Long>> partitonOffset, Properties props) {
        partitonOffset.foreachRDD(new Function<JavaPairRDD<Integer, Iterable<Long>>, Void>() {
            @Override
            public Void call(JavaPairRDD<Integer, Iterable<Long>> po) throws Exception {
                List<Tuple2<Integer, Iterable<Long>>> poList = po.collect();
                Map<Integer, Long> partitionOffsetMap = new HashMap<Integer, Long>();
                for (Tuple2<Integer, Iterable<Long>> tuple : poList) {
                    int partition = tuple._1();
                    Long offset = getMaximum(tuple._2());
                    partitionOffsetMap.put(partition, offset);
                }
                persistProcessedOffsets(props, partitionOffsetMap);
                return null;
            }

            public <T extends Comparable<T>> T getMaximum(Iterable<T> values) {
                T max = null;
                for (T value : values) {
                    if (max == null || max.compareTo(value) < 0) {
                        max = value;
                    }
                }
                return max;
            }
        });
    }

    public static DStream<Tuple2<Integer, Iterable<Long>>> getPartitionOffset(
            DStream<MessageAndMetadata> unionStreams) {
        ClassTag<MessageAndMetadata> messageMetaClassTag = ScalaUtil
                .<MessageAndMetadata>getClassTag(MessageAndMetadata.class);
        JavaDStream<MessageAndMetadata> javaDStream = new JavaDStream<MessageAndMetadata>(unionStreams,
                messageMetaClassTag);
        JavaPairDStream<Integer, Long> partitonOffsetStream = javaDStream
                .mapPartitionsToPair(new PairFlatMapFunction<Iterator<MessageAndMetadata>, Integer, Long>() {
                    @Override
                    public Iterable<Tuple2<Integer, Long>> call(Iterator<MessageAndMetadata> entry)
                            throws Exception {
                        MessageAndMetadata mmeta = null;
                        List<Tuple2<Integer, Long>> l = new ArrayList<Tuple2<Integer, Long>>();
                        while (entry.hasNext()) {
                            mmeta = entry.next();
                        }
                        if (mmeta != null) {
                            l.add(new Tuple2<Integer, Long>(mmeta.getPartition().partition, mmeta.getOffset()));
                        }
                        return l;
                    }
                });
        JavaPairDStream<Integer, Iterable<Long>> partitonOffset = partitonOffsetStream.groupByKey(1);
        return partitonOffset.dstream();
    }

    @SuppressWarnings("deprecation")
    public static void persists(DStream<Tuple2<Integer, Iterable<Long>>> partitonOffset, Properties props) {
        ClassTag<Tuple2<Integer, Iterable<Long>>> tuple2ClassTag = ScalaUtil
                .<Integer, Iterable<Long>>getTuple2ClassTag();
        JavaDStream<Tuple2<Integer, Iterable<Long>>> jpartitonOffset = new JavaDStream<Tuple2<Integer, Iterable<Long>>>(
                partitonOffset, tuple2ClassTag);
        jpartitonOffset.foreachRDD(new Function<JavaRDD<Tuple2<Integer, Iterable<Long>>>, Void>() {
            @Override
            public Void call(JavaRDD<Tuple2<Integer, Iterable<Long>>> po) throws Exception {
                List<Tuple2<Integer, Iterable<Long>>> poList = po.collect();
                Map<Integer, Long> partitionOffsetMap = new HashMap<Integer, Long>();
                for (Tuple2<Integer, Iterable<Long>> tuple : poList) {
                    int partition = tuple._1();
                    Long offset = getMaximum(tuple._2());
                    partitionOffsetMap.put(partition, offset);
                }
                persistProcessedOffsets(props, partitionOffsetMap);
                return null;
            }

            public <T extends Comparable<T>> T getMaximum(Iterable<T> values) {
                T max = null;
                for (T value : values) {
                    if (max == null || max.compareTo(value) < 0) {
                        max = value;
                    }
                }
                return max;
            }
        });
    }
}