Java tutorial
/* * Copyright 2014 Borja Gil Perez <borjagilperez at github.com>. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package source; import java.util.HashMap; import java.util.Map; import org.apache.flume.Context; import org.apache.flume.Event; import org.apache.flume.EventDrivenSource; import org.apache.flume.channel.ChannelProcessor; import org.apache.flume.conf.Configurable; import org.apache.flume.event.EventBuilder; import org.apache.flume.source.AbstractSource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import twitter4j.FilterQuery; import twitter4j.StallWarning; import twitter4j.Status; import twitter4j.StatusDeletionNotice; import twitter4j.StatusListener; import twitter4j.TwitterStream; import twitter4j.TwitterStreamFactory; import twitter4j.conf.ConfigurationBuilder; import twitter4j.json.DataObjectFactory; /** * Twitter Flume Source, which pulls data from Twitter's streaming API. * Currently, this supports pulling from the Streaming API with two request * parameters: locations and keywords. * * @author Borja Gil Perez <borjagilperez at github.com> */ public class TwitterSource extends AbstractSource implements EventDrivenSource, Configurable { private static final Logger logger = LoggerFactory.getLogger(TwitterSource.class); // Information necessary for accessing the Twitter API private String consumerKey; private String consumerSecret; private String accessToken; private String accessTokenSecret; private double[][] locations = new double[2][2]; private String[] keywords; // The actual Twitter stream. It's set up to collect raw JSON data private TwitterStream twitterStream; /** * The initialization method for the Source. The context contains all the * Flume configuration info, and can be used to retrieve any configuration * values necessary to set up the Source. * * @param context Key-value store used to pass configuration information * throughout the system. */ @Override public void configure(Context context) { consumerKey = context.getString(TwitterSourceConstants.CONSUMER_KEY); consumerSecret = context.getString(TwitterSourceConstants.CONSUMER_SECRET); accessToken = context.getString(TwitterSourceConstants.ACCESS_TOKEN); accessTokenSecret = context.getString(TwitterSourceConstants.ACCESS_TOKEN_SECRET); String swString = context.getString(TwitterSourceConstants.SW_LNG_LAT); String neString = context.getString(TwitterSourceConstants.NE_LNG_LAT); if (swString != null && neString != null) { String[] sw = swString.split(","); String[] ne = neString.split(","); if (sw.length == 2 && ne.length == 2) { for (int i = 0; i < 2; i++) { locations[0][i] = Double.parseDouble(sw[i].trim()); locations[1][i] = Double.parseDouble(ne[i].trim()); } } else { locations = null; } } else { locations = null; } String keywordString = context.getString(TwitterSourceConstants.KEYWORDS); if (keywordString != null) { keywords = keywordString.split(","); for (int i = 0; i < keywords.length; i++) { keywords[i] = keywords[i].trim(); } } ConfigurationBuilder cb = new ConfigurationBuilder(); cb.setOAuthConsumerKey(consumerKey); cb.setOAuthConsumerSecret(consumerSecret); cb.setOAuthAccessToken(accessToken); cb.setOAuthAccessTokenSecret(accessTokenSecret); cb.setJSONStoreEnabled(true); cb.setIncludeEntitiesEnabled(true); cb.setIncludeRTsEnabled(true); twitterStream = new TwitterStreamFactory(cb.build()).getInstance(); } /** * Start processing events. This uses the Twitter Streaming API to sample * Twitter, and process tweets. */ @Override public void start() { // The channel is the piece of Flume that sits between the Source and // Sink, and is used to process events final ChannelProcessor channel = getChannelProcessor(); final Map<String, String> headers = new HashMap<String, String>(); // The StatusListener is a twitter4j API, which can be added to a // Twitter stream, and will execute methods every time a message comes // in through the stream StatusListener listener = new StatusListener() { // The onStatus method is executed every time a new tweet comes in public void onStatus(Status status) { // The EventBuilder is used to build an event using the headers // and the raw JSON of a tweet logger.debug(status.getUser().getScreenName() + ": " + status.getText()); headers.put("timestamp", String.valueOf(status.getCreatedAt().getTime())); Event event = EventBuilder.withBody(DataObjectFactory.getRawJSON(status).getBytes(), headers); channel.processEvent(event); } // This listener will ignore everything except for new tweets public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) { } public void onTrackLimitationNotice(int numberOfLimitedStatuses) { } public void onScrubGeo(long userId, long upToStatusId) { } public void onException(Exception ex) { } public void onStallWarning(StallWarning warning) { } }; logger.debug("Setting up Twitter sample stream using consumer key {} and" + " access token {}", new String[] { consumerKey, accessToken }); // Set up the stream's listener (defined above), and set any necessary // security information twitterStream.addListener(listener); // Set up a filter to pull out industry-relevant tweets logger.debug("Starting up Twitter filtering..."); FilterQuery query = new FilterQuery().count(0); if (locations == null) { logger.debug("No locations specified"); } else { String debugString = "Locations specified: "; debugString += "SW={" + locations[0][0] + ", " + locations[0][1] + "}, "; debugString += "NE={" + locations[1][0] + ", " + locations[1][1] + "}"; logger.debug(debugString); query.locations(locations); } if (keywords == null) { logger.debug("No keywords specified"); } else { String debugString = keywords.length + " keywords specified: "; for (int i = 0; i < keywords.length; i++) { debugString += keywords[i]; if (i != keywords.length - 1) { debugString += ", "; } } logger.debug(debugString); query.track(keywords); } twitterStream.filter(query); super.start(); } /** * Stops the Source's event processing and shuts down the Twitter stream. */ @Override public void stop() { logger.debug("Shutting down Twitter stream..."); twitterStream.shutdown(); super.stop(); } }