Timestamp apache Kafka - Java Big Data

Java examples for Big Data:apache kafka

Description

Timestamp apache Kafka

Demo Code



import java.util.Map;
import java.util.Properties;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;

import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.spark.streaming.kafka010.*;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.storage.StorageLevel;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.*;

public final class TimestampKafka {

    public static void main(String[] args) throws Exception {
        if (args.length != 3) {
            System.err//from   w  w  w  . java2s .c o  m
                    .println("Usage: TimestampKafka <topics> <batch-interval (ms)> <checkpointing>");
            System.err.println("\t <checkpointing>: [0|1]");
            System.exit(1);
        }

        //SPARK CONFIGURATION
        SparkConf sparkConf = new SparkConf().setAppName("TimestampKafka");
        JavaStreamingContext jStreamingContext = new JavaStreamingContext(
                sparkConf, Durations.milliseconds(Integer.valueOf(args[1])));

        if (Integer.valueOf(args[2]) == 1) {
            jStreamingContext
                    .checkpoint("file:///home/spark");
        }

        //KAFKA CONSUMER CONFIGURATION
        Map<String, Object> param = new HashMap<>();
        param.put("bootstrap.servers", "192.168.0.155:9092");
        param.put("key.deserializer", StringDeserializer.class);
        param.put("value.deserializer", StringDeserializer.class);
        param.put("group.id", "spark");
        param.put("auto.offset.reset", "latest");
        param.put("enable.auto.commit", false);

        Collection<String> topics = Arrays.asList(args[0].split(","));

        final JavaInputDStream<ConsumerRecord<String, String>> message = KafkaUtils
                .createDirectStream(jStreamingContext, LocationStrategies
                        .PreferConsistent(), ConsumerStrategies
                        .<String, String> Subscribe(topics, param));

        //MAIN PROGRAM 
        JavaDStream<String> line = message.map(new MapperKafka()).persist(
                StorageLevel.MEMORY_ONLY());

        //Add timestamp and calculate the difference with the creation time
        JavaDStream<String> lineTS = line.map(new TimestampAdder());

        //Send the result to Kafka
        lineTS.foreachRDD(new KafkaPublisher());

        jStreamingContext.start();
        jStreamingContext.awaitTermination();

    }

    //FUNCTIONS used in the program implementations:

    public static class MapperKafka implements
            Function<ConsumerRecord<String, String>, String> {
        private static final long serialVersionUID = 1L;

        public String call(ConsumerRecord<String, String> record)
                throws Exception {
            return record.value().toString();
        }
    };

    public static class TimestampAdder implements Function<String, String> {
        private static final long serialVersionUID = 1L;

        public String call(String line) {
            Long currentTime = System.currentTimeMillis();
            String totalTime = String.valueOf(currentTime
                    - Long.valueOf(line.split(" ")[1]));
            String newLine = line.concat(" " + String.valueOf(currentTime)
                    + " " + totalTime);

            return newLine;
        }
    };

    public static class KafkaPublisher implements
            VoidFunction<JavaRDD<String>> {
        private static final long serialVersionUID = 1L;

        public void call(JavaRDD<String> rdd) throws Exception {

            //KAFKA PRODUCER
            Properties props = new Properties();
            props.put("bootstrap.servers", "192.168.0.155:9092");
            props.put("acks", "0");
            props.put("retries", 0);
            props.put("batch.size", 16384);
            props.put("linger.ms", 0);
            props.put("buffer.memory", 33554432);
            props.put("key.serializer",
                    "org.apache.kafka.common.serialization.StringSerializer");
            props.put("value.serializer",
                    "org.apache.kafka.common.serialization.StringSerializer");

            rdd.foreachPartition(new VoidFunction<Iterator<String>>() {
                private static final long serialVersionUID = 1L;

                public void call(Iterator<String> partitionOfRecords)
                        throws Exception {
                    Producer<String, String> producer = new KafkaProducer<>(
                            props);
                    while (partitionOfRecords.hasNext()) {
                        producer.send(new ProducerRecord<String, String>(
                                "testRes", partitionOfRecords.next()));
                    }
                    producer.close();
                }
            });
        }
    };

}

Related Tutorials