Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package com.anhth12.test; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import java.util.HashMap; import java.util.Map; import kafka.serializer.StringDecoder; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.Function; import org.apache.spark.api.java.function.Function2; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.storage.StorageLevel; import org.apache.spark.streaming.Duration; import org.apache.spark.streaming.Time; import org.apache.spark.streaming.api.java.JavaDStream; import org.apache.spark.streaming.api.java.JavaPairDStream; import org.apache.spark.streaming.api.java.JavaStreamingContext; import org.apache.spark.streaming.kafka.KafkaUtils; import scala.Tuple2; /** * * @author Tong Hoang Anh */ public class Main { public static void main(String[] args) { SparkConf conf = new SparkConf(); conf.setMaster("spark://192.168.56.101:7077"); conf.setAppName("TEST"); conf.setIfMissing("spark.executor.instance", Integer.toString(1)); conf.setIfMissing("spark.executor.core", Integer.toString(1)); conf.setIfMissing("spark.executor.memory", "512m"); conf.setIfMissing("spark.driver.memory", "512m"); String blockIntervalString = Long.toString(1000l); conf.setIfMissing("spark.streaming.blockInterval", blockIntervalString); conf.setIfMissing("spark.streaming.gracefulStopTimeout", blockIntervalString); conf.setIfMissing("spark.clean.ttl", Integer.toString(20 * 3000)); conf.setIfMissing("spark.logConf", "true"); conf.setIfMissing("spark.ui.port", Integer.toString(4040)); try { conf.setJars(new String[] { Main.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath() }); } catch (Exception e) { throw new IllegalStateException(e); } JavaStreamingContext streamingContext = new JavaStreamingContext(new JavaSparkContext(conf), new Duration(100)); Map<String, String> kafkaParams = new HashMap<>(); kafkaParams.put("zookeeper.connect", "192.168.56.101:2181"); kafkaParams.put("group.id", "LAMBDA-BATCHLAYER-" + System.currentTimeMillis()); kafkaParams.put("serializer.encoding", "UTF-8"); // kafkaParams.put(null, null) Map<String, Integer> topicMap = Maps.newHashMap(); topicMap.put("LambdaInput", 1); JavaPairDStream<String, String> dstream = KafkaUtils.createStream(streamingContext, "192.168.56.101:2181", "GROUP", topicMap); // JavaPairDStream<String, String> dstream = KafkaUtils.createStream(streamingContext, // String.class, // String.class, // StringDecoder.class, // StringDecoder.class, // kafkaParams, topicMap, // StorageLevel.MEMORY_AND_DISK_2()); JavaDStream<String> lines = dstream.map(new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { System.out.println("message: " + tuple2._2()); return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { System.out.println("x: " + x); return Lists.newArrayList(x.split(",")); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); streamingContext.start(); streamingContext.awaitTermination(); } private static class PrintFunction implements Function2<JavaPairRDD<String, String>, Time, Void> { @Override public Void call(JavaPairRDD<String, String> t1, Time t2) throws Exception { if (t1.take(1).isEmpty()) { System.out.println("No data in current generation's RDD; nothing to do"); return null; } System.out.println("-->Time: " + t2.toString() + " count: " + t1.count()); return null; } } }