Java tutorial
/** * Copyright 2014 Otto (GmbH & Co KG) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.ottogroup.bi.asap.operator.twitter.consumer; import java.io.IOException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Properties; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; import org.apache.commons.lang3.StringUtils; import org.apache.log4j.Logger; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.SerializationFeature; import com.fasterxml.jackson.databind.node.ObjectNode; import com.ottogroup.bi.asap.component.ComponentConfiguration; import com.ottogroup.bi.asap.component.ComponentType; import com.ottogroup.bi.asap.component.annotation.AsapComponent; import com.ottogroup.bi.asap.component.source.Source; import com.ottogroup.bi.asap.component.strategy.config.MessageWaitStrategyConfiguration; import com.ottogroup.bi.asap.exception.RequiredInputMissingException; import com.ottogroup.bi.asap.exception.handler.ExceptionHandlerType; import com.ottogroup.bi.asap.exception.handler.config.ExceptionHandlerConfiguration; import com.ottogroup.bi.asap.mailbox.Mailbox; import com.ottogroup.bi.asap.mailbox.config.MailboxConfiguration; import com.ottogroup.bi.asap.message.StreamingDataMessage; import com.twitter.hbc.ClientBuilder; import com.twitter.hbc.core.Constants; import com.twitter.hbc.core.endpoint.Location; import com.twitter.hbc.core.endpoint.StatusesFilterEndpoint; import com.twitter.hbc.core.processor.StringDelimitedProcessor; import com.twitter.hbc.httpclient.BasicClient; import com.twitter.hbc.httpclient.auth.Authentication; import com.twitter.hbc.httpclient.auth.OAuth1; /** * Reads data from {@linkplain http://twitter.com} stream and inserts them into the pipeline * @author mnxfst * @since Nov 28, 2014 */ @AsapComponent(type = ComponentType.SOURCE, name = "twitterStreamConsumer", version = "0.0.1", description = "Reads data from twitter stream") public class TwitterStreamConsumer implements Source { /** our faithful logging service ;-) */ private static final Logger logger = Logger.getLogger(TwitterStreamConsumer.class); // 2014-09-11T08:01:53.000Z private final SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"); private final ObjectMapper jsonMapper = new ObjectMapper(); /////////////////////////////////////////////////////////////////////////////////// // configuration options public static final String CFG_COMPONENT_ID = "twitter.component.id"; public static final String CFG_TWITTER_CONSUMER_KEY = "twitter.consumer.key"; public static final String CFG_TWITTER_CONSUMER_SECRET = "twitter.consumer.secret"; public static final String CFG_TWITTER_TOKEN_KEY = "twitter.token.key"; public static final String CFG_TWITTER_TOKEN_SECRET = "twitter.token.secret"; public static final String CFG_TWITTER_TWEET_SEARCH_TERMS = "twitter.tweet.terms"; public static final String CFG_TWITTER_TWEET_LANGUAGES = "twitter.tweet.languages"; public static final String CFG_TWITTER_PROFILES = "twitter.profiles"; // /////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////// // required stream settings /** component identifer */ private String id = null; /** consumer key issued by twitter.com */ private String consumerKey = null; /** consumer secrect issued by twitter.com */ private String consumerSecret = null; /** token key issued by twitter.com */ private String tokenKey = null; /** token secret issued by twitter.com */ private String tokenSecret = null; /** terms to search for in twitter status stream - applied across all status updates */ private final List<String> searchTerms = new ArrayList<>(); /** twitter profiles to retrieve data from - applied in addition to search terms, data will be merged */ private final List<Long> profiles = new ArrayList<>(); /** languages to filter twitter stream for */ private final List<String> languages = new ArrayList<>(); /** locations to filter twitter stream for */ private final List<Location> locations = new ArrayList<>(); /** internal message queue used for buffering before data is being handed over to publisher */ private final BlockingQueue<String> streamMessageQueue = new LinkedBlockingQueue<String>(100000); // /////////////////////////////////////////////////////////////////////////////////// /** mailbox */ private Mailbox mailbox = null; /** client handling communication with stream.twitter.com */ private BasicClient twitterClient = null; /** state */ private boolean isRunning = false; /** message counter */ private long numProcessedMessages = 0; /** * @see com.ottogroup.bi.asap.component.Component#init(java.util.Properties) */ public void init(Properties props) throws RequiredInputMissingException { if (props == null || props.isEmpty()) throw new RequiredInputMissingException("Missing required configuration"); ////////////////////////////////////////////////////////// // extract required configurational data this.consumerKey = props.getProperty(CFG_TWITTER_CONSUMER_KEY); this.consumerSecret = props.getProperty(CFG_TWITTER_CONSUMER_SECRET); this.tokenKey = props.getProperty(CFG_TWITTER_TOKEN_KEY); this.tokenSecret = props.getProperty(CFG_TWITTER_TOKEN_SECRET); this.id = props.getProperty(CFG_COMPONENT_ID); String inSearchTerms = props.getProperty(CFG_TWITTER_TWEET_SEARCH_TERMS); String[] splittedSearchTerms = (inSearchTerms != null ? inSearchTerms.split(",") : null); if (splittedSearchTerms != null) { for (String sst : splittedSearchTerms) { this.searchTerms.add(StringUtils.trim(sst)); } } String inLanguages = props.getProperty(CFG_TWITTER_TWEET_LANGUAGES); String[] splittedLanguages = (inLanguages != null ? inLanguages.split(",") : null); if (splittedLanguages != null) { for (String s : splittedLanguages) { this.languages.add(StringUtils.trim(s)); } } String inProfiles = props.getProperty(CFG_TWITTER_PROFILES); String[] splittedProfiles = (inProfiles != null ? inProfiles.split(",") : null); if (splittedProfiles != null) { for (String sp : splittedProfiles) { if (StringUtils.isNotBlank(sp)) { try { this.profiles.add(Long.parseLong(sp.trim())); } catch (Exception e) { logger.error("Failed to parse profile identifier from input '" + sp + "'"); } } } } // ////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////// // validate provided input before attempting to establish connection with stream.twitter.com if (StringUtils.isBlank(id)) throw new RequiredInputMissingException("Missing required component identifier"); if (StringUtils.isBlank(this.consumerKey)) throw new RequiredInputMissingException( "Missing required consumer key to establish connection with stream.twitter.com"); if (StringUtils.isBlank(this.consumerSecret)) throw new RequiredInputMissingException( "Missing required consumer secrect to establish connection with stream.twitter.com"); if (StringUtils.isBlank(this.tokenKey)) throw new RequiredInputMissingException( "Missing required token key to establish connection with stream.twitter.com"); if (StringUtils.isBlank(this.tokenSecret)) throw new RequiredInputMissingException( "Missing required token secret to establish connection with stream.twitter.com"); boolean isFilterTermsEmpty = (this.searchTerms == null || this.searchTerms.isEmpty()); boolean isLanguagesEmpty = (this.languages == null || this.languages.isEmpty()); boolean isUserAccountEmpty = (this.profiles == null || this.profiles.isEmpty()); boolean isLocationsEmpty = (this.locations == null || this.locations.isEmpty()); if (isFilterTermsEmpty && isLanguagesEmpty && isUserAccountEmpty && isLocationsEmpty) throw new RequiredInputMissingException( "Mishandle sing information what to filter twitter stream for: terms, languages, user accounts or locations"); // //////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////// // establish connection with stream.twitter.com Authentication auth = new OAuth1(this.consumerKey, this.consumerSecret, this.tokenKey, this.tokenSecret); StatusesFilterEndpoint filterEndpoint = new StatusesFilterEndpoint(); if (!isFilterTermsEmpty) filterEndpoint.trackTerms(searchTerms); if (!isLanguagesEmpty) filterEndpoint.languages(languages); if (!isUserAccountEmpty) filterEndpoint.followings(profiles); if (!isLocationsEmpty) filterEndpoint.locations(locations); if (this.twitterClient == null) { this.twitterClient = new ClientBuilder().name(id).hosts(Constants.STREAM_HOST).endpoint(filterEndpoint) .authentication(auth).processor(new StringDelimitedProcessor(streamMessageQueue)).build(); this.twitterClient.connect(); } // ////////////////////////////////////////////////////////// this.isRunning = true; } /** * @see com.ottogroup.bi.asap.node.pipeline.component.DataComponent#shutdown() */ public boolean shutdown() { this.isRunning = false; return true; } /** * @see java.lang.Runnable#run() */ public void run() { if (logger.isDebugEnabled()) logger.debug("twitter stream consumer initialized [id=" + id + "]"); // keep on consuming until either the consumer or the client is interrupted while (this.isRunning && !this.twitterClient.isDone()) { try { String msg = streamMessageQueue.poll(100, TimeUnit.MILLISECONDS); if (msg != null) { this.mailbox.insert(new StreamingDataMessage(this.id, timing(msg), System.currentTimeMillis())); this.numProcessedMessages++; // TODO implement back pressure handling } } catch (InterruptedException e) { } } // stop the twitter client in case the consumer has been interrupted by external signal if (this.twitterClient != null && !this.twitterClient.isDone()) this.twitterClient.stop(); logger.info("twitter stream consumer received " + this.numProcessedMessages + " messages"); } protected String timing(String msg) { try { ObjectNode node = (ObjectNode) jsonMapper.readTree(msg); long timestampMillis = node.get("timestamp_ms").asLong(); node.put("@timestamp", sdf.format(new Date(timestampMillis))); return jsonMapper.writeValueAsString(node); } catch (IOException e) { // } return msg; } /** * @see com.ottogroup.bi.asap.component.Component#getId() */ public String getId() { return this.id; } /** * @see com.ottogroup.bi.asap.component.Component#setId(java.lang.String) */ public void setId(String id) { this.id = id; } /** * @see com.ottogroup.bi.asap.component.Component#getTotalNumOfMessages() */ public long getTotalNumOfMessages() { return numProcessedMessages; } /** * @see com.ottogroup.bi.asap.component.source.Source#setMailbox(com.ottogroup.bi.asap.mailbox.Mailbox) */ public void setMailbox(Mailbox mailbox) { this.mailbox = mailbox; } /////////////////////////////////////////////////////////////////////// // REQUIRED FOR TESTING ONLY protected String getConsumerKey() { return consumerKey; } protected String getConsumerSecret() { return consumerSecret; } protected String getTokenKey() { return tokenKey; } protected String getTokenSecret() { return tokenSecret; } protected List<Long> getProfiles() { return profiles; } protected List<String> getLanguages() { return languages; } protected List<Location> getLocations() { return locations; } protected boolean isRunning() { return isRunning; } protected List<String> getSearchTerms() { return searchTerms; } protected BlockingQueue<String> getStreamMessageQueue() { return streamMessageQueue; } protected void setTwitterClient(BasicClient twitterClient) { this.twitterClient = twitterClient; } protected void setRunning(boolean isRunning) { this.isRunning = isRunning; } // /////////////////////////////////////////////////////////////////////// public static void main(String[] args) throws Exception { ObjectMapper m = new ObjectMapper(); m.enable(SerializationFeature.INDENT_OUTPUT); ComponentConfiguration cfg = new ComponentConfiguration("twitter-stream-reader", ComponentType.SOURCE, "twitterStreamConsumer", "0.0.1"); cfg.setComponentExceptionHandler( new ExceptionHandlerConfiguration(ExceptionHandlerType.COMPONENT_EXCEPTION_HANDLER, "twitterComponentExceptionHandler", "log4jExceptionHandler", "0.0.1")); cfg.setExecutorExceptionHandler( new ExceptionHandlerConfiguration(ExceptionHandlerType.EXECUTOR_EXCEPTION_HANDLER, "twitterExecutorExceptionHandler", "log4jExceptionHandler", "0.0.1")); cfg.setMessageWaitStrategy(new MessageWaitStrategyConfiguration("twitterExecutorWaitStrategy", "sleepingMessageWaitStrategy", "0.0.1")); cfg.setMailbox( new MailboxConfiguration("twitterConsumerMailbox", "oneToOneConcurrentArrayQueueMailbox", "0.0.1")); cfg.addSubscription("simpleFilteringOperator"); cfg.addSetting(TwitterStreamConsumer.CFG_TWITTER_CONSUMER_KEY, "<consumer_id>"); cfg.addSetting(TwitterStreamConsumer.CFG_TWITTER_CONSUMER_SECRET, "<consumer_secret>"); cfg.addSetting(TwitterStreamConsumer.CFG_TWITTER_PROFILES, "1,2,3"); cfg.addSetting(TwitterStreamConsumer.CFG_TWITTER_TOKEN_KEY, "<token_key>"); cfg.addSetting(TwitterStreamConsumer.CFG_TWITTER_TOKEN_SECRET, "<token_secret>"); cfg.addSetting(TwitterStreamConsumer.CFG_TWITTER_TWEET_LANGUAGES, "DE,FR,EN"); cfg.addSetting(TwitterStreamConsumer.CFG_TWITTER_TWEET_SEARCH_TERMS, "SOCCER,FOOTBALL,FUSSBALL"); System.out.println(m.writeValueAsString(cfg)); } }