com.pulzitinc.flume.source.TwitterSource.java Source code

Java tutorial

Introduction

Here is the source code for com.pulzitinc.flume.source.TwitterSource.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * <p/>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p/>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.pulzitinc.flume.source;

import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.EventDrivenSource;
import org.apache.flume.channel.ChannelProcessor;
import org.apache.flume.conf.Configurable;
import org.apache.flume.event.EventBuilder;
import org.apache.flume.source.AbstractSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import twitter4j.*;
import twitter4j.conf.ConfigurationBuilder;
import twitter4j.json.DataObjectFactory;

import java.util.HashMap;
import java.util.Map;

/**
 * A Flume Source, which pulls data from Twitter's streaming API. Currently,
 * this only supports pulling from the sample API, and only gets new status
 * updates.
 */
public class TwitterSource extends AbstractSource implements EventDrivenSource, Configurable {

    private static final Logger logger = LoggerFactory.getLogger(TwitterSource.class);

    /** Information necessary for accessing the Twitter API */
    private String consumerKey;
    private String consumerSecret;
    private String accessToken;
    private String accessTokenSecret;

    /** The actual Twitter stream. It's set up to collect raw JSON data */
    private TwitterStream twitterStream;

    private long[] accountsToFollow;

    /**
     * The initialization method for the Source. The context contains all the
     * Flume configuration info, and can be used to retrieve any configuration
     * values necessary to set up the Source.
     */
    @Override
    public void configure(Context context) {

        logger.info(context.toString());

        consumerKey = context.getString(TwitterSourceConstants.CONSUMER_KEY_KEY);
        consumerSecret = context.getString(TwitterSourceConstants.CONSUMER_SECRET_KEY);
        accessToken = context.getString(TwitterSourceConstants.ACCESS_TOKEN_KEY);
        accessTokenSecret = context.getString(TwitterSourceConstants.ACCESS_TOKEN_SECRET_KEY);

        String accountIdsString = context.getString(TwitterSourceConstants.ACCOUNT_IDS_KEY, "");
        if (accountIdsString.trim().length() == 0) {
            throw new IllegalStateException("No accounts to follow provided");
        } else {
            String[] accountIds = accountIdsString.split(",");
            accountsToFollow = new long[accountIds.length];
            for (int i = 0; i < accountIds.length; i++) {
                accountsToFollow[i] = Long.valueOf(accountIds[i]);
            }
        }

        ConfigurationBuilder cb = new ConfigurationBuilder();
        cb.setOAuthConsumerKey(consumerKey);
        cb.setOAuthConsumerSecret(consumerSecret);
        cb.setOAuthAccessToken(accessToken);
        cb.setOAuthAccessTokenSecret(accessTokenSecret);
        cb.setJSONStoreEnabled(true);
        cb.setIncludeEntitiesEnabled(true);

        twitterStream = new TwitterStreamFactory(cb.build()).getInstance();
    }

    /**
     * Start processing events. This uses the Twitter Streaming API to sample
     * Twitter, and process tweets.
     */
    @Override
    public void start() {
        // The channel is the piece of Flume that sits between the Source and Sink,
        // and is used to process events.
        final ChannelProcessor channel = getChannelProcessor();

        final Map<String, String> headers = new HashMap<String, String>();

        // The StatusListener is a twitter4j API, which can be added to a Twitter
        // stream, and will execute methods every time a message comes in through
        // the stream.
        UserStreamListener listener = new UserStreamAdapter() {
            // The onStatus method is executed every time a new tweet comes in.
            public void onStatus(Status status) {
                // The EventBuilder is used to build an event using the headers and
                // the raw JSON of a tweet
                logger.debug(status.getUser().getScreenName() + ": " + status.getText() + " - "
                        + TwitterObjectFactory.getRawJSON(status));

                headers.put("timestamp", String.valueOf(status.getCreatedAt().getTime()));
                Event event = EventBuilder.withBody(DataObjectFactory.getRawJSON(status).getBytes(), headers);

                channel.processEvent(event);
            }

            public void onException(Exception ex) {
                ex.printStackTrace();
            }
        };

        logger.debug("Setting up Twitter stream using consumer key {} and access token {}",
                new String[] { consumerKey, accessToken });

        // Set up the stream's listener (defined above),
        twitterStream.addListener(listener);

        logger.debug("Starting up Twitter consuming...");

        twitterStream.user();

        super.start();
    }

    /**
     * Stops the Source's event processing and shuts down the Twitter stream.
     */
    @Override
    public void stop() {
        logger.debug("Shutting down Twitter sample stream...");
        twitterStream.shutdown();
        super.stop();
    }
}