com.anhth12.test.Main.java Source code

Java tutorial

Introduction

Here is the source code for com.anhth12.test.Main.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package com.anhth12.test;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.util.HashMap;
import java.util.Map;
import kafka.serializer.StringDecoder;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.storage.StorageLevel;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.Time;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka.KafkaUtils;
import scala.Tuple2;

/**
 *
 * @author Tong Hoang Anh
 */
public class Main {

    public static void main(String[] args) {

        SparkConf conf = new SparkConf();
        conf.setMaster("spark://192.168.56.101:7077");
        conf.setAppName("TEST");

        conf.setIfMissing("spark.executor.instance", Integer.toString(1));
        conf.setIfMissing("spark.executor.core", Integer.toString(1));
        conf.setIfMissing("spark.executor.memory", "512m");
        conf.setIfMissing("spark.driver.memory", "512m");

        String blockIntervalString = Long.toString(1000l);
        conf.setIfMissing("spark.streaming.blockInterval", blockIntervalString);
        conf.setIfMissing("spark.streaming.gracefulStopTimeout", blockIntervalString);
        conf.setIfMissing("spark.clean.ttl", Integer.toString(20 * 3000));
        conf.setIfMissing("spark.logConf", "true");
        conf.setIfMissing("spark.ui.port", Integer.toString(4040));

        try {
            conf.setJars(new String[] {
                    Main.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath() });
        } catch (Exception e) {
            throw new IllegalStateException(e);
        }

        JavaStreamingContext streamingContext = new JavaStreamingContext(new JavaSparkContext(conf),
                new Duration(100));

        Map<String, String> kafkaParams = new HashMap<>();
        kafkaParams.put("zookeeper.connect", "192.168.56.101:2181");
        kafkaParams.put("group.id", "LAMBDA-BATCHLAYER-" + System.currentTimeMillis());
        kafkaParams.put("serializer.encoding", "UTF-8");
        //        kafkaParams.put(null, null)

        Map<String, Integer> topicMap = Maps.newHashMap();
        topicMap.put("LambdaInput", 1);

        JavaPairDStream<String, String> dstream = KafkaUtils.createStream(streamingContext, "192.168.56.101:2181",
                "GROUP", topicMap);
        //        JavaPairDStream<String, String> dstream = KafkaUtils.createStream(streamingContext,
        //                String.class,
        //                String.class,
        //                StringDecoder.class,
        //                StringDecoder.class,
        //                kafkaParams, topicMap,
        //                StorageLevel.MEMORY_AND_DISK_2());

        JavaDStream<String> lines = dstream.map(new Function<Tuple2<String, String>, String>() {
            @Override
            public String call(Tuple2<String, String> tuple2) {
                System.out.println("message: " + tuple2._2());
                return tuple2._2();
            }
        });

        JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
            @Override
            public Iterable<String> call(String x) {
                System.out.println("x: " + x);
                return Lists.newArrayList(x.split(","));
            }
        });

        JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() {
            @Override
            public Tuple2<String, Integer> call(String s) {
                return new Tuple2<String, Integer>(s, 1);
            }
        }).reduceByKey(new Function2<Integer, Integer, Integer>() {
            @Override
            public Integer call(Integer i1, Integer i2) {
                return i1 + i2;
            }
        });

        wordCounts.print();

        streamingContext.start();
        streamingContext.awaitTermination();

    }

    private static class PrintFunction implements Function2<JavaPairRDD<String, String>, Time, Void> {

        @Override
        public Void call(JavaPairRDD<String, String> t1, Time t2) throws Exception {
            if (t1.take(1).isEmpty()) {
                System.out.println("No data in current generation's RDD; nothing to do");
                return null;
            }
            System.out.println("-->Time: " + t2.toString() + " count: " + t1.count());
            return null;
        }

    }
}