Java tutorial
/* twitter-spell-check - Uses Natural Language Processing to spell-check tweets and add new data to dictionary Copyright (C) 2013 Sudeep Duggal This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. Contact- Sudeep Duggal - sudeepduggal@gmail.com */ package com.nlp.twitterstream; import java.util.Timer; import java.util.TimerTask; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; import com.google.common.collect.Lists; import com.nlp.spellcheck.SpellCheck; import com.twitter.hbc.ClientBuilder; import com.twitter.hbc.core.Constants; import com.twitter.hbc.core.endpoint.StatusesFilterEndpoint; import com.twitter.hbc.core.processor.StringDelimitedProcessor; import com.twitter.hbc.httpclient.BasicClient; import com.twitter.hbc.httpclient.auth.Authentication; import com.twitter.hbc.httpclient.auth.OAuth1; public class FilteredTwitterStream { BlockingQueue<String> queue; BasicClient client; String clientDoneMsg = ""; Timer spellCheckTimer; // Queue for streaming message text LinkedBlockingQueue<String> streamMessageQueue = new LinkedBlockingQueue<String>(10000); /** * Get twitter stream */ public void getStream() { try { // Connect to stream oauth(TwitterStreamNLPConstants.consumerKey, TwitterStreamNLPConstants.consumerSecret, TwitterStreamNLPConstants.token, TwitterStreamNLPConstants.secret); } catch (InterruptedException e) { e.printStackTrace(); return; } } /** * Authenticate via OAuth1 * @param consumerKey * @param consumerSecret * @param token * @param secret * @throws InterruptedException */ public void oauth(String consumerKey, String consumerSecret, String token, String secret) throws InterruptedException { queue = new LinkedBlockingQueue<String>(10000); // Create endpoints with track terms StatusesFilterEndpoint endpoint = new StatusesFilterEndpoint(); endpoint.trackTerms(Lists.newArrayList(TwitterStreamNLPConstants.trackTermsFilter)); endpoint.languages(Lists.newArrayList(TwitterStreamNLPConstants.languagesFilter)); Authentication auth = new OAuth1(consumerKey, consumerSecret, token, secret); // Create a new BasicClient. By default gzip is enabled. client = new ClientBuilder().hosts(Constants.STREAM_HOST).endpoint(endpoint).authentication(auth) .processor(new StringDelimitedProcessor(queue)).build(); // Establish a connection client.connect(); if (!isConnectionActive()) { handleReconnection(); } } /** * Add messages from stream to DB */ public void parseMessages() { String msg = null; int reconnectTimer = 0; // in seconds - 90sec timer before reconnect with backoff strateges boolean isLastMsgNull = false; while (isConnectionActive()) { try { msg = queue.poll(TwitterStreamNLPConstants.POLLTIME, TimeUnit.SECONDS); } catch (InterruptedException e) { e.printStackTrace(); return; } if (msg == null) { System.out.println("Did not receive a message in 5 seconds"); if (isLastMsgNull) { reconnectTimer += TwitterStreamNLPConstants.POLLTIME; if (reconnectTimer >= TwitterStreamNLPConstants.RECONNECTATTEMPT) { handleReconnection(); } else { // DO nothing } } else { reconnectTimer = TwitterStreamNLPConstants.POLLTIME; } isLastMsgNull = true; } else { isLastMsgNull = false; // Get message text String msgText = getMessageText(msg); // Add to stream queue if (msgText != null) { // Add twitter stream to queue streamMessageQueue.offer(msgText); } } } // Handle reconnection as connection not alive handleReconnection(); } /** * Start timer for spell checking */ public void startSpellCheckTimer() { spellCheckTimer = new Timer(); spellCheckTimer.schedule(new SpellCheckTimerTask(), 5000, 30000); } /** * SpellCheck TimerTask class * @author sudeep */ class SpellCheckTimerTask extends TimerTask { @Override public void run() { while (!streamMessageQueue.isEmpty()) { SpellCheck spellCheckObj = new SpellCheck(); String res = spellCheckObj.runSpellCheck(streamMessageQueue.remove()); } } }; /** * Get message from twitter stream json * @param msg string * @return text in message */ public String getMessageText(String msg) { String text = ""; JSONParser jsonParser = new JSONParser(); Object obj = null; try { obj = jsonParser.parse(msg); } catch (ParseException e) { e.printStackTrace(); return ""; } JSONObject jsonObject = (JSONObject) obj; text = (String) jsonObject.get("text"); return text; } /** * Check if connection closed */ public boolean isConnectionActive() { if (client.isDone()) { clientDoneMsg = client.getExitEvent().getMessage(); System.out.println("Client connection closed unexpectedly: " + client.getExitEvent().getMessage()); return false; } else { return true; } } /** * TODO: Handle reconnection with backoff strategies * @return */ public boolean handleReconnection() { // Attempt to reconnect immediately client.reconnect(); if (!isConnectionActive()) { // TODO: Attempt to reconnect with backoff strategies based on error code return true; } else { return true; } } /** * Close connection */ public void closeConnection() { client.stop(); } }