Java tutorial
/* * Copyright 2015 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.hydrator.plugin.realtime.source; import co.cask.cdap.api.annotation.Description; import co.cask.cdap.api.annotation.Name; import co.cask.cdap.api.annotation.Plugin; import co.cask.cdap.api.data.format.StructuredRecord; import co.cask.cdap.api.data.schema.Schema; import co.cask.cdap.etl.api.Emitter; import co.cask.cdap.etl.api.PipelineConfigurer; import co.cask.cdap.etl.api.realtime.RealtimeContext; import co.cask.cdap.etl.api.realtime.RealtimeSource; import co.cask.cdap.etl.api.realtime.SourceState; import co.cask.hydrator.common.ReferencePluginConfig; import co.cask.hydrator.common.ReferenceRealtimeSource; import com.google.common.collect.Queues; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import twitter4j.StallWarning; import twitter4j.Status; import twitter4j.StatusDeletionNotice; import twitter4j.StatusListener; import twitter4j.TwitterStream; import twitter4j.TwitterStreamFactory; import twitter4j.conf.ConfigurationBuilder; import java.util.Date; import java.util.Queue; import javax.annotation.Nullable; /** * Implementation of {@link RealtimeSource} that reads data from Twitter API. * Users should pass in the following runtime arguments with appropriate OAuth credentials * ConsumerKey, ConsumerSecret, AccessToken, AccessTokenSecret. */ @Plugin(type = "realtimesource") @Name("Twitter") @Description("Samples tweets in real-time") public class TwitterSource extends ReferenceRealtimeSource<StructuredRecord> { private static final Logger LOG = LoggerFactory.getLogger(TwitterSource.class); private static final String CONSUMER_KEY = "ConsumerKey"; private static final String CONSUMER_SECRET = "ConsumerSecret"; private static final String ACCESS_TOKEN = "AccessToken"; private static final String ACCESS_SECRET = "AccessTokenSecret"; private static final String ID = "id"; private static final String MSG = "message"; private static final String LANG = "lang"; private static final String TIME = "time"; private static final String FAVC = "favCount"; private static final String RTC = "rtCount"; private static final String SRC = "source"; private static final String GLAT = "geoLat"; private static final String GLNG = "geoLong"; private static final String ISRT = "isRetweet"; private static final Schema SCHEMA = Schema.recordOf("tweet", Schema.Field.of(ID, Schema.of(Schema.Type.LONG)), Schema.Field.of(MSG, Schema.of(Schema.Type.STRING)), Schema.Field.of(LANG, Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of(TIME, Schema.nullableOf(Schema.of(Schema.Type.LONG))), Schema.Field.of(FAVC, Schema.of(Schema.Type.INT)), Schema.Field.of(RTC, Schema.of(Schema.Type.INT)), Schema.Field.of(SRC, Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of(GLAT, Schema.nullableOf(Schema.of(Schema.Type.DOUBLE))), Schema.Field.of(GLNG, Schema.nullableOf(Schema.of(Schema.Type.DOUBLE))), Schema.Field.of(ISRT, Schema.of(Schema.Type.BOOLEAN))); private TwitterStream twitterStream; private StatusListener statusListener; private Queue<Status> tweetQ = Queues.newConcurrentLinkedQueue(); private final TwitterConfig twitterConfig; public TwitterSource(TwitterConfig twitterConfig) { super(twitterConfig); this.twitterConfig = twitterConfig; } /** * Config class for TwitterSource. */ public static class TwitterConfig extends ReferencePluginConfig { @Name(CONSUMER_KEY) @Description("Consumer Key") private String consumerKey; @Name(CONSUMER_SECRET) @Description("Consumer Secret") private String consumeSecret; @Name(ACCESS_TOKEN) @Description("Access Token") private String accessToken; @Name(ACCESS_SECRET) @Description("Access Token Secret") private String accessTokenSecret; public TwitterConfig(String referenceName, String consumerKey, String consumeSecret, String accessToken, String accessTokenSecret) { super(referenceName); this.consumerKey = consumerKey; this.consumeSecret = consumeSecret; this.accessToken = accessToken; this.accessTokenSecret = accessTokenSecret; } } @Override public void configurePipeline(PipelineConfigurer pipelineConfigurer) { super.configurePipeline(pipelineConfigurer); pipelineConfigurer.getStageConfigurer().setOutputSchema(SCHEMA); } private StructuredRecord convertTweet(Status tweet) { StructuredRecord.Builder recordBuilder = StructuredRecord.builder(SCHEMA); recordBuilder.set(ID, tweet.getId()); recordBuilder.set(MSG, tweet.getText()); recordBuilder.set(LANG, tweet.getLang()); Date tweetDate = tweet.getCreatedAt(); if (tweetDate != null) { recordBuilder.set(TIME, tweetDate.getTime()); } recordBuilder.set(FAVC, tweet.getFavoriteCount()); recordBuilder.set(RTC, tweet.getRetweetCount()); recordBuilder.set(SRC, tweet.getSource()); if (tweet.getGeoLocation() != null) { recordBuilder.set(GLAT, tweet.getGeoLocation().getLatitude()); recordBuilder.set(GLNG, tweet.getGeoLocation().getLongitude()); } recordBuilder.set(ISRT, tweet.isRetweet()); return recordBuilder.build(); } @Nullable @Override public SourceState poll(Emitter<StructuredRecord> writer, SourceState currentState) { if (!tweetQ.isEmpty()) { Status status = tweetQ.remove(); StructuredRecord tweet = convertTweet(status); writer.emit(tweet); } return currentState; } @Override public void initialize(RealtimeContext context) throws Exception { super.initialize(context); // Disable chatty logging from twitter4j. System.setProperty("twitter4j.loggerFactory", "twitter4j.NullLoggerFactory"); statusListener = new StatusListener() { @Override public void onStatus(Status status) { tweetQ.add(status); } @Override public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) { // No-op } @Override public void onTrackLimitationNotice(int i) { // No-op } @Override public void onScrubGeo(long l, long l1) { // No-op } @Override public void onStallWarning(StallWarning stallWarning) { // No-op } @Override public void onException(Exception e) { // No-op } }; ConfigurationBuilder configurationBuilder = new ConfigurationBuilder(); configurationBuilder.setDebugEnabled(false).setOAuthConsumerKey(twitterConfig.consumerKey) .setOAuthConsumerSecret(twitterConfig.consumeSecret).setOAuthAccessToken(twitterConfig.accessToken) .setOAuthAccessTokenSecret(twitterConfig.accessTokenSecret); twitterStream = new TwitterStreamFactory(configurationBuilder.build()).getInstance(); twitterStream.addListener(statusListener); twitterStream.sample(); } @Override public void destroy() { if (twitterStream != null) { twitterStream.shutdown(); } } }