Java examples for Big Data:apache kafka
Timestamp apache Kafka
import java.util.Map; import java.util.Properties; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.Producer; import org.apache.kafka.clients.producer.ProducerRecord; import org.apache.kafka.common.serialization.StringDeserializer; import org.apache.spark.streaming.kafka010.*; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.function.Function; import org.apache.spark.api.java.function.VoidFunction; import org.apache.spark.storage.StorageLevel; import org.apache.spark.streaming.Durations; import org.apache.spark.streaming.api.java.*; public final class TimestampKafka { public static void main(String[] args) throws Exception { if (args.length != 3) { System.err//from w w w . java2s .c o m .println("Usage: TimestampKafka <topics> <batch-interval (ms)> <checkpointing>"); System.err.println("\t <checkpointing>: [0|1]"); System.exit(1); } //SPARK CONFIGURATION SparkConf sparkConf = new SparkConf().setAppName("TimestampKafka"); JavaStreamingContext jStreamingContext = new JavaStreamingContext( sparkConf, Durations.milliseconds(Integer.valueOf(args[1]))); if (Integer.valueOf(args[2]) == 1) { jStreamingContext .checkpoint("file:///home/spark"); } //KAFKA CONSUMER CONFIGURATION Map<String, Object> param = new HashMap<>(); param.put("bootstrap.servers", "192.168.0.155:9092"); param.put("key.deserializer", StringDeserializer.class); param.put("value.deserializer", StringDeserializer.class); param.put("group.id", "spark"); param.put("auto.offset.reset", "latest"); param.put("enable.auto.commit", false); Collection<String> topics = Arrays.asList(args[0].split(",")); final JavaInputDStream<ConsumerRecord<String, String>> message = KafkaUtils .createDirectStream(jStreamingContext, LocationStrategies .PreferConsistent(), ConsumerStrategies .<String, String> Subscribe(topics, param)); //MAIN PROGRAM JavaDStream<String> line = message.map(new MapperKafka()).persist( StorageLevel.MEMORY_ONLY()); //Add timestamp and calculate the difference with the creation time JavaDStream<String> lineTS = line.map(new TimestampAdder()); //Send the result to Kafka lineTS.foreachRDD(new KafkaPublisher()); jStreamingContext.start(); jStreamingContext.awaitTermination(); } //FUNCTIONS used in the program implementations: public static class MapperKafka implements Function<ConsumerRecord<String, String>, String> { private static final long serialVersionUID = 1L; public String call(ConsumerRecord<String, String> record) throws Exception { return record.value().toString(); } }; public static class TimestampAdder implements Function<String, String> { private static final long serialVersionUID = 1L; public String call(String line) { Long currentTime = System.currentTimeMillis(); String totalTime = String.valueOf(currentTime - Long.valueOf(line.split(" ")[1])); String newLine = line.concat(" " + String.valueOf(currentTime) + " " + totalTime); return newLine; } }; public static class KafkaPublisher implements VoidFunction<JavaRDD<String>> { private static final long serialVersionUID = 1L; public void call(JavaRDD<String> rdd) throws Exception { //KAFKA PRODUCER Properties props = new Properties(); props.put("bootstrap.servers", "192.168.0.155:9092"); props.put("acks", "0"); props.put("retries", 0); props.put("batch.size", 16384); props.put("linger.ms", 0); props.put("buffer.memory", 33554432); props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); rdd.foreachPartition(new VoidFunction<Iterator<String>>() { private static final long serialVersionUID = 1L; public void call(Iterator<String> partitionOfRecords) throws Exception { Producer<String, String> producer = new KafkaProducer<>( props); while (partitionOfRecords.hasNext()) { producer.send(new ProducerRecord<String, String>( "testRes", partitionOfRecords.next())); } producer.close(); } }); } }; }