com.nlp.twitterstream.FilteredTwitterStream.java Source code

Java tutorial

Introduction

Here is the source code for com.nlp.twitterstream.FilteredTwitterStream.java

Source

/*
twitter-spell-check - Uses Natural Language Processing to spell-check tweets and add new data to dictionary
Copyright (C) 2013  Sudeep Duggal
    
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
    
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.
    
You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
    
Contact- Sudeep Duggal - sudeepduggal@gmail.com
*/

package com.nlp.twitterstream;

import java.util.Timer;
import java.util.TimerTask;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;

import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;

import com.google.common.collect.Lists;

import com.nlp.spellcheck.SpellCheck;
import com.twitter.hbc.ClientBuilder;
import com.twitter.hbc.core.Constants;
import com.twitter.hbc.core.endpoint.StatusesFilterEndpoint;
import com.twitter.hbc.core.processor.StringDelimitedProcessor;
import com.twitter.hbc.httpclient.BasicClient;
import com.twitter.hbc.httpclient.auth.Authentication;
import com.twitter.hbc.httpclient.auth.OAuth1;

public class FilteredTwitterStream {

    BlockingQueue<String> queue;
    BasicClient client;

    String clientDoneMsg = "";

    Timer spellCheckTimer;

    // Queue for streaming message text
    LinkedBlockingQueue<String> streamMessageQueue = new LinkedBlockingQueue<String>(10000);

    /**
     * Get twitter stream
     */
    public void getStream() {

        try {
            // Connect to stream
            oauth(TwitterStreamNLPConstants.consumerKey, TwitterStreamNLPConstants.consumerSecret,
                    TwitterStreamNLPConstants.token, TwitterStreamNLPConstants.secret);
        } catch (InterruptedException e) {
            e.printStackTrace();
            return;
        }
    }

    /**
     * Authenticate via OAuth1 
     * @param consumerKey
     * @param consumerSecret
     * @param token
     * @param secret
     * @throws InterruptedException
     */
    public void oauth(String consumerKey, String consumerSecret, String token, String secret)
            throws InterruptedException {

        queue = new LinkedBlockingQueue<String>(10000);

        // Create endpoints with track terms
        StatusesFilterEndpoint endpoint = new StatusesFilterEndpoint();
        endpoint.trackTerms(Lists.newArrayList(TwitterStreamNLPConstants.trackTermsFilter));
        endpoint.languages(Lists.newArrayList(TwitterStreamNLPConstants.languagesFilter));

        Authentication auth = new OAuth1(consumerKey, consumerSecret, token, secret);

        // Create a new BasicClient. By default gzip is enabled.
        client = new ClientBuilder().hosts(Constants.STREAM_HOST).endpoint(endpoint).authentication(auth)
                .processor(new StringDelimitedProcessor(queue)).build();

        // Establish a connection
        client.connect();

        if (!isConnectionActive()) {
            handleReconnection();
        }
    }

    /**
     * Add messages from stream to DB
     */
    public void parseMessages() {

        String msg = null;

        int reconnectTimer = 0; // in seconds - 90sec timer before reconnect with backoff strateges

        boolean isLastMsgNull = false;

        while (isConnectionActive()) {
            try {
                msg = queue.poll(TwitterStreamNLPConstants.POLLTIME, TimeUnit.SECONDS);
            } catch (InterruptedException e) {
                e.printStackTrace();
                return;
            }

            if (msg == null) {
                System.out.println("Did not receive a message in 5 seconds");

                if (isLastMsgNull) {
                    reconnectTimer += TwitterStreamNLPConstants.POLLTIME;

                    if (reconnectTimer >= TwitterStreamNLPConstants.RECONNECTATTEMPT) {

                        handleReconnection();
                    } else {
                        // DO nothing
                    }

                } else {
                    reconnectTimer = TwitterStreamNLPConstants.POLLTIME;
                }

                isLastMsgNull = true;

            } else {

                isLastMsgNull = false;

                // Get message text
                String msgText = getMessageText(msg);

                // Add to stream queue
                if (msgText != null) {
                    // Add twitter stream to queue
                    streamMessageQueue.offer(msgText);
                }
            }
        }

        // Handle reconnection as connection not alive
        handleReconnection();
    }

    /**
     * Start timer for spell checking
     */
    public void startSpellCheckTimer() {
        spellCheckTimer = new Timer();
        spellCheckTimer.schedule(new SpellCheckTimerTask(), 5000, 30000);

    }

    /**
     * SpellCheck TimerTask class
     * @author sudeep
     */
    class SpellCheckTimerTask extends TimerTask {

        @Override
        public void run() {
            while (!streamMessageQueue.isEmpty()) {

                SpellCheck spellCheckObj = new SpellCheck();
                String res = spellCheckObj.runSpellCheck(streamMessageQueue.remove());
            }
        }
    };

    /**
     * Get message from twitter stream json
     * @param msg string
     * @return text in message
     */
    public String getMessageText(String msg) {

        String text = "";
        JSONParser jsonParser = new JSONParser();
        Object obj = null;

        try {
            obj = jsonParser.parse(msg);
        } catch (ParseException e) {
            e.printStackTrace();
            return "";
        }

        JSONObject jsonObject = (JSONObject) obj;

        text = (String) jsonObject.get("text");

        return text;
    }

    /**
     * Check if connection closed
     */
    public boolean isConnectionActive() {
        if (client.isDone()) {

            clientDoneMsg = client.getExitEvent().getMessage();

            System.out.println("Client connection closed unexpectedly: " + client.getExitEvent().getMessage());

            return false;
        } else {
            return true;
        }
    }

    /**
     * TODO: Handle reconnection with backoff strategies
     * @return
     */
    public boolean handleReconnection() {

        // Attempt to reconnect immediately
        client.reconnect();

        if (!isConnectionActive()) {
            // TODO: Attempt to reconnect with backoff strategies based on error code

            return true;
        } else {

            return true;
        }
    }

    /**
     * Close connection
     */
    public void closeConnection() {
        client.stop();
    }

}