Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See tthe License for the specific language governing permissions and * limitations under the License. */ import scala.Tuple2; import com.google.common.collect.Lists; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.*; import org.apache.spark.api.java.StorageLevels; import org.apache.spark.streaming.Durations; import org.apache.spark.streaming.api.java.JavaDStream; import org.apache.spark.streaming.api.java.JavaPairDStream; import org.apache.spark.streaming.api.java.JavaReceiverInputDStream; import org.apache.spark.streaming.api.java.JavaStreamingContext; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.*; import org.apache.spark.streaming.twitter.TwitterUtils; import twitter4j.conf.ConfigurationBuilder; import twitter4j.TwitterFactory; import twitter4j.Twitter; import twitter4j.Status; import twitter4j.StatusListener; import twitter4j.TwitterStream; import twitter4j.TwitterException; import twitter4j.TwitterStreamFactory; import twitter4j.StatusDeletionNotice; import twitter4j.StallWarning; import twitter4j.conf.Configuration; import java.util.regex.Pattern; import java.util.ArrayList; import java.util.List; import java.io.IOException; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.Iterator; import java.util.Calendar; import java.text.SimpleDateFormat; public final class TwitterHashTagCount { // set OAuth credentials private static void setTwitterOAuth() { System.setProperty("twitter4j.oauth.consumerKey", TwitterOAuthKey.consumerKey); System.setProperty("twitter4j.oauth.consumerSecret", TwitterOAuthKey.consumerSecret); System.setProperty("twitter4j.oauth.accessToken", TwitterOAuthKey.accessToken); System.setProperty("twitter4j.oauth.accessTokenSecret", TwitterOAuthKey.accessTokenSecret); } private static void twitterStreaming(int window, int slide) { // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaNetworkWordCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); JavaReceiverInputDStream<twitter4j.Status> stream = TwitterUtils.createStream(ssc); FlatMapFunction<twitter4j.Status, String> mapFunc = new FlatMapFunction<twitter4j.Status, String>() { @Override public Iterable<String> call(twitter4j.Status status) { ArrayList<String> hashTag = new ArrayList<String>(); Pattern p = Pattern.compile("#(\\w+)\\b"); Matcher m = p.matcher(status.getText()); while (m.find()) { hashTag.add(m.group(1)); } return hashTag; } }; VoidFunction<JavaPairRDD<Integer, String>> outFunc = new VoidFunction<JavaPairRDD<Integer, String>>() { @Override public void call(JavaPairRDD<Integer, String> rdd) { List<Tuple2<Integer, String>> list = rdd.take(10); Iterator<Tuple2<Integer, String>> ite = list.iterator(); System.out.println("-------------------------"); String timeStamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(Calendar.getInstance().getTime()); System.out.println(" " + timeStamp); System.out.println("-------------------------"); while (ite.hasNext()) { Tuple2<Integer, String> tag = ite.next(); System.out.println(tag.toString()); } } }; stream.flatMap(mapFunc).mapToPair((String s) -> { return new Tuple2<String, Integer>(s, 1); }).reduceByKeyAndWindow((Integer i1, Integer i2) -> { return i1 + i2; }, Durations.seconds(window), Durations.seconds(slide)).mapToPair((Tuple2<String, Integer> item) -> { return item.swap(); }).transformToPair((JavaPairRDD<Integer, String> rdd) -> { return rdd.sortByKey(false); }).foreachRDD(outFunc); ssc.start(); ssc.awaitTermination(); } public static void main(String[] args) { if (args.length != 2) { System.out.println("please input window time and slide time"); return; } System.out.println("Windows time = " + args[0] + " seconds"); System.out.println("Windows slide = " + args[1] + " seconds"); int window = Integer.parseInt(args[0]); int slide = Integer.parseInt(args[1]); setTwitterOAuth(); twitterStreaming(window, slide); } }