com.datatorrent.contrib.twitter.TwitterSampleInput.java Source code

Java tutorial

Introduction

Here is the source code for com.datatorrent.contrib.twitter.TwitterSampleInput.java

Source

/*
 * Copyright (c) 2013 DataTorrent, Inc. ALL Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.datatorrent.contrib.twitter;

import java.util.concurrent.ArrayBlockingQueue;

import javax.validation.constraints.Min;
import javax.validation.constraints.NotNull;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import twitter4j.*;
import twitter4j.conf.ConfigurationBuilder;

import com.datatorrent.api.ActivationListener;
import com.datatorrent.api.Context.OperatorContext;
import com.datatorrent.api.DefaultOutputPort;
import com.datatorrent.api.InputOperator;

/**
 * Read input from Twitter. <p> <br>
 *
 * This test can only be run from command line using command line interface script.
 * You need to set twitter authentication credentials in $HOME/.dt/dt-site.xml file in order to run this.
 * The authentication requires following 4 information.
 * Your application consumer key,
 * Your application consumer secret,
 * Your twitter access token, and
 * Your twitter access token secret.
 *
 * @since 0.3.2
 */
public class TwitterSampleInput implements InputOperator, ActivationListener<OperatorContext>, StatusListener {
    public final transient DefaultOutputPort<Status> status = new DefaultOutputPort<Status>();
    public final transient DefaultOutputPort<String> text = new DefaultOutputPort<String>();
    public final transient DefaultOutputPort<String> url = new DefaultOutputPort<String>();
    public final transient DefaultOutputPort<String> hashtag = new DefaultOutputPort<String>();

    /* the following 3 ports are not implemented so far */
    public final transient DefaultOutputPort<?> userMention = null;
    public final transient DefaultOutputPort<?> media = null;
    /**
     * Enable debugging.
     */
    private boolean debug;
    /**
     * For tapping into the tweets.
     */
    private transient Thread operatorThread;
    private transient TwitterStream ts;
    private transient ArrayBlockingQueue<Status> statuses = new ArrayBlockingQueue<Status>(1024 * 1024);
    transient int count;
    /**
     * The state which we would like to save for this operator.
     */
    private int feedMultiplier = 1;
    @Min(0)
    private int feedMultiplierVariance = 0;

    /* Following twitter access credentials should be set before using this operator. */
    @NotNull
    private String consumerKey;
    @NotNull
    private String consumerSecret;
    @NotNull
    private String accessToken;
    @NotNull
    private String accessTokenSecret;
    /* If twitter connection breaks then do we need to reconnect or exit */
    private boolean reConnect;

    @Override
    public void setup(OperatorContext context) {
        operatorThread = Thread.currentThread();

        if (feedMultiplier != 1) {
            logger.info("Load set to be {}% of the entire twitter feed", feedMultiplier);
        }

        ConfigurationBuilder cb = new ConfigurationBuilder();
        cb.setDebugEnabled(debug).setOAuthConsumerKey(consumerKey).setOAuthConsumerSecret(consumerSecret)
                .setOAuthAccessToken(accessToken).setOAuthAccessTokenSecret(accessTokenSecret);

        ts = new TwitterStreamFactory(cb.build()).getInstance();
    }

    @Override
    public void teardown() {
        ts = null;
    }

    @Override
    public void onStatus(Status status) {
        int randomMultiplier = feedMultiplier;

        if (feedMultiplierVariance > 0) {
            int min = feedMultiplier - feedMultiplierVariance;
            if (min < 0) {
                min = 0;
            }

            int max = feedMultiplier + feedMultiplierVariance;
            randomMultiplier = min + (int) (Math.random() * ((max - min) + 1));
        }
        try {
            for (int i = randomMultiplier; i-- > 0;) {
                statuses.put(status);
                count++;
            }
        } catch (InterruptedException ex) {
            logger.debug("Streaming interrupted; Passing the inerruption to the operator", ex);
            operatorThread.interrupt();
        }
    }

    @Override
    public void endWindow() {
        if (count % 16 == 0) {
            logger.debug("processed {} statuses", count);
        }
    }

    @Override
    public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {
        // do nothing
    }

    @Override
    public void onTrackLimitationNotice(int numberOfLimitedStatuses) {
        // do nothing
    }

    @Override
    public void onScrubGeo(long userId, long upToStatusId) {
        // do nothing
    }

    @Override
    public void onException(Exception ex) {
        logger.error("Sampling Error", ex);
        logger.debug("reconnect: {}", reConnect);
        ts.shutdown();
        if (reConnect) {
            try {
                Thread.sleep(1000);
            } catch (Exception e) {
            }
            setUpTwitterConnection();
        } else {
            operatorThread.interrupt();
        }
    }

    private void setUpTwitterConnection() {
        ConfigurationBuilder cb = new ConfigurationBuilder();
        cb.setDebugEnabled(debug).setOAuthConsumerKey(consumerKey).setOAuthConsumerSecret(consumerSecret)
                .setOAuthAccessToken(accessToken).setOAuthAccessTokenSecret(accessTokenSecret);

        ts = new TwitterStreamFactory(cb.build()).getInstance();
        ts.addListener(TwitterSampleInput.this);
        // we can only listen to tweets containing links by callng ts.links().
        // it seems it requires prior signed agreement with twitter.
        ts.sample();
    }

    @Override
    public void beginWindow(long windowId) {
    }

    @Override
    public void activate(OperatorContext context) {
        ts.addListener(this);
        // we can only listen to tweets containing links by callng ts.links().
        // it seems it requires prior signed agreement with twitter.
        ts.sample();
    }

    @Override
    public void deactivate() {
        ts.shutdown();
    }

    public void setFeedMultiplier(int multiplier) {
        this.feedMultiplier = multiplier;
    }

    public int getFeedMultiplier() {
        return this.feedMultiplier;
    }

    public void setFeedMultiplierVariance(int multiplierVariance) {
        this.feedMultiplierVariance = multiplierVariance;
    }

    public int getFeedMultiplierVariance() {
        return this.feedMultiplierVariance;
    }

    @Override
    public void emitTuples() {
        for (int size = statuses.size(); size-- > 0;) {
            Status s = statuses.poll();
            if (status.isConnected()) {
                status.emit(s);
            }

            if (text.isConnected()) {
                text.emit(s.getText());
            }

            if (url.isConnected()) {
                URLEntity[] entities = s.getURLEntities();
                if (entities != null) {
                    for (URLEntity ue : entities) {
                        url.emit((ue.getExpandedURL() == null ? ue.getURL() : ue.getExpandedURL()).toString());
                    }
                }
            }

            if (hashtag.isConnected()) {
                HashtagEntity[] hashtagEntities = s.getHashtagEntities();
                if (hashtagEntities != null) {
                    for (HashtagEntity he : hashtagEntities) {
                        hashtag.emit(he.getText());
                    }
                }
            }
        }
    }

    /**
     * @param debug the debug to set
     */
    public void setDebug(boolean debug) {
        this.debug = debug;
    }

    /**
     * @param consumerKey the consumerKey to set
     */
    public void setConsumerKey(String consumerKey) {
        this.consumerKey = consumerKey;
    }

    /**
     * @param consumerSecret the consumerSecret to set
     */
    public void setConsumerSecret(String consumerSecret) {
        this.consumerSecret = consumerSecret;
    }

    /**
     * @param accessToken the accessToken to set
     */
    public void setAccessToken(String accessToken) {
        this.accessToken = accessToken;
    }

    /**
     * @param accessTokenSecret the accessTokenSecret to set
     */
    public void setAccessTokenSecret(String accessTokenSecret) {
        this.accessTokenSecret = accessTokenSecret;
    }

    public boolean isReConnect() {
        return reConnect;
    }

    public void setReConnect(boolean reConnect) {
        this.reConnect = reConnect;
    }

    @Override
    @SuppressWarnings({ "ConstantConditions" })
    public int hashCode() {
        int hash = 7;
        hash = 11 * hash + (this.debug ? 1 : 0);
        hash = 11 * hash + this.feedMultiplier;
        hash = 11 * hash + this.feedMultiplierVariance;
        hash = 11 * hash + (this.consumerKey != null ? this.consumerKey.hashCode() : 0);
        hash = 11 * hash + (this.consumerSecret != null ? this.consumerSecret.hashCode() : 0);
        hash = 11 * hash + (this.accessToken != null ? this.accessToken.hashCode() : 0);
        hash = 11 * hash + (this.accessTokenSecret != null ? this.accessTokenSecret.hashCode() : 0);
        return hash;
    }

    @Override
    @SuppressWarnings({ "ConstantConditions" })
    public boolean equals(Object obj) {
        if (obj == null) {
            return false;
        }
        if (getClass() != obj.getClass()) {
            return false;
        }
        final TwitterSampleInput other = (TwitterSampleInput) obj;
        if (this.debug != other.debug) {
            return false;
        }
        if (this.feedMultiplier != other.feedMultiplier) {
            return false;
        }
        if (this.feedMultiplierVariance != other.feedMultiplierVariance) {
            return false;
        }
        if ((this.consumerKey == null) ? (other.consumerKey != null)
                : !this.consumerKey.equals(other.consumerKey)) {
            return false;
        }
        if ((this.consumerSecret == null) ? (other.consumerSecret != null)
                : !this.consumerSecret.equals(other.consumerSecret)) {
            return false;
        }
        if ((this.accessToken == null) ? (other.accessToken != null)
                : !this.accessToken.equals(other.accessToken)) {
            return false;
        }
        if ((this.accessTokenSecret == null) ? (other.accessTokenSecret != null)
                : !this.accessTokenSecret.equals(other.accessTokenSecret)) {
            return false;
        }
        return true;
    }

    @Override
    public String toString() {
        return "TwitterSampleInput{debug=" + debug + ", feedMultiplier=" + feedMultiplier
                + ", feedMultiplierVariance=" + feedMultiplierVariance + ", consumerKey=" + consumerKey
                + ", consumerSecret=" + consumerSecret + ", accessToken=" + accessToken + ", accessTokenSecret="
                + accessTokenSecret + '}';
    }

    private static final Logger logger = LoggerFactory.getLogger(TwitterSampleInput.class);
}