Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * <p/> * http://www.apache.org/licenses/LICENSE-2.0 * <p/> * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.pulzitinc.flume.source; import org.apache.flume.Context; import org.apache.flume.Event; import org.apache.flume.EventDrivenSource; import org.apache.flume.channel.ChannelProcessor; import org.apache.flume.conf.Configurable; import org.apache.flume.event.EventBuilder; import org.apache.flume.source.AbstractSource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import twitter4j.*; import twitter4j.conf.ConfigurationBuilder; import twitter4j.json.DataObjectFactory; import java.util.HashMap; import java.util.Map; /** * A Flume Source, which pulls data from Twitter's streaming API. Currently, * this only supports pulling from the sample API, and only gets new status * updates. */ public class TwitterSource extends AbstractSource implements EventDrivenSource, Configurable { private static final Logger logger = LoggerFactory.getLogger(TwitterSource.class); /** Information necessary for accessing the Twitter API */ private String consumerKey; private String consumerSecret; private String accessToken; private String accessTokenSecret; /** The actual Twitter stream. It's set up to collect raw JSON data */ private TwitterStream twitterStream; private long[] accountsToFollow; /** * The initialization method for the Source. The context contains all the * Flume configuration info, and can be used to retrieve any configuration * values necessary to set up the Source. */ @Override public void configure(Context context) { logger.info(context.toString()); consumerKey = context.getString(TwitterSourceConstants.CONSUMER_KEY_KEY); consumerSecret = context.getString(TwitterSourceConstants.CONSUMER_SECRET_KEY); accessToken = context.getString(TwitterSourceConstants.ACCESS_TOKEN_KEY); accessTokenSecret = context.getString(TwitterSourceConstants.ACCESS_TOKEN_SECRET_KEY); String accountIdsString = context.getString(TwitterSourceConstants.ACCOUNT_IDS_KEY, ""); if (accountIdsString.trim().length() == 0) { throw new IllegalStateException("No accounts to follow provided"); } else { String[] accountIds = accountIdsString.split(","); accountsToFollow = new long[accountIds.length]; for (int i = 0; i < accountIds.length; i++) { accountsToFollow[i] = Long.valueOf(accountIds[i]); } } ConfigurationBuilder cb = new ConfigurationBuilder(); cb.setOAuthConsumerKey(consumerKey); cb.setOAuthConsumerSecret(consumerSecret); cb.setOAuthAccessToken(accessToken); cb.setOAuthAccessTokenSecret(accessTokenSecret); cb.setJSONStoreEnabled(true); cb.setIncludeEntitiesEnabled(true); twitterStream = new TwitterStreamFactory(cb.build()).getInstance(); } /** * Start processing events. This uses the Twitter Streaming API to sample * Twitter, and process tweets. */ @Override public void start() { // The channel is the piece of Flume that sits between the Source and Sink, // and is used to process events. final ChannelProcessor channel = getChannelProcessor(); final Map<String, String> headers = new HashMap<String, String>(); // The StatusListener is a twitter4j API, which can be added to a Twitter // stream, and will execute methods every time a message comes in through // the stream. UserStreamListener listener = new UserStreamAdapter() { // The onStatus method is executed every time a new tweet comes in. public void onStatus(Status status) { // The EventBuilder is used to build an event using the headers and // the raw JSON of a tweet logger.debug(status.getUser().getScreenName() + ": " + status.getText() + " - " + TwitterObjectFactory.getRawJSON(status)); headers.put("timestamp", String.valueOf(status.getCreatedAt().getTime())); Event event = EventBuilder.withBody(DataObjectFactory.getRawJSON(status).getBytes(), headers); channel.processEvent(event); } public void onException(Exception ex) { ex.printStackTrace(); } }; logger.debug("Setting up Twitter stream using consumer key {} and access token {}", new String[] { consumerKey, accessToken }); // Set up the stream's listener (defined above), twitterStream.addListener(listener); logger.debug("Starting up Twitter consuming..."); twitterStream.user(); super.start(); } /** * Stops the Source's event processing and shuts down the Twitter stream. */ @Override public void stop() { logger.debug("Shutting down Twitter sample stream..."); twitterStream.shutdown(); super.stop(); } }