Demonstrates Apache Spark Streaming functioning by consuming data from Twitter and printing number of tweets - Java Social Media

Java examples for Social Media:Twitter

Description

Demonstrates Apache Spark Streaming functioning by consuming data from Twitter and printing number of tweets

Demo Code



    import org.apache.spark.SparkConf;
    import org.apache.spark.api.java.JavaPairRDD;
    import org.apache.spark.api.java.function.VoidFunction;
    import org.apache.spark.streaming.Duration;
    import org.apache.spark.streaming.api.java.JavaDStream;
    import org.apache.spark.streaming.api.java.JavaPairDStream;
    import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
    import org.apache.spark.streaming.api.java.JavaStreamingContext;
    import org.apache.spark.streaming.twitter.TwitterUtils;

    import scala.Tuple2;
    import twitter4j.Status;
    import twitter4j.auth.Authorization;
    import twitter4j.auth.OAuthAuthorization;
    import twitter4j.conf.Configuration;
    import twitter4j.conf.ConfigurationBuilder;

    import com.google.common.collect.Iterables;

    public class SparkTwitterDataProcessor {
        public static void main(String[] args) {
    final SparkConf sparkConf = new SparkConf().setAppName("Twitter Data Processing").setMaster("local[10]");
    final JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Duration.apply(10000));
    final Configuration conf = new ConfigurationBuilder().setDebugEnabled(false)
                    .setOAuthConsumerKey("<put-your-consumer-key>")
                    .setOAuthConsumerSecret("<put-your-consumer-key-secret>")
                    .setOAuthAccessToken("<put-your-access-token>")
                    .setOAuthAccessTokenSecret("<put-your-consumer-token-secret>")
                    .build();//  w  ww. j a v a2  s  .  c om
    final Authorization twitterAuth = new OAuthAuthorization(conf);
    final JavaReceiverInputDStream<Status> inputDStream = TwitterUtils.createStream(streamingContext, twitterAuth, new String[]{});
    final JavaDStream<Status> enTweetsDStream = inputDStream.filter((status) -> "en".equalsIgnoreCase(status.getLang()));
    final JavaPairDStream<String, String> userTweetsStream = 
                enTweetsDStream.mapToPair(
                  (status) -> new Tuple2<String, String>(status.getUser().getScreenName(), status.getText())
                );
    
    final JavaPairDStream<String, Iterable<String>> tweetsReducedByUser = userTweetsStream.groupByKey();
    final JavaPairDStream<String, Integer> tweetsMappedByUser = tweetsReducedByUser.mapToPair(
          userTweets -> new Tuple2<String, Integer>(userTweets._1, Iterables.size(userTweets._2))
        );
    tweetsMappedByUser.foreachRDD((VoidFunction<JavaPairRDD<String, Integer>>)pairRDD -> {
      pairRDD.foreach(new VoidFunction<Tuple2<String,Integer>>() {

        @Override
        public void call(Tuple2<String, Integer> t) throws Exception {
          System.out.println(t._1() + "," + t._2());
        }
        
      });
    });
    streamingContext.start();
    streamingContext.awaitTermination();
    
  }
    }

Related Tutorials