Java tutorial
/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.river.twitter; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.bulk.BulkItemResponse; import org.elasticsearch.action.bulk.BulkProcessor; import org.elasticsearch.action.bulk.BulkRequest; import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.client.Client; import org.elasticsearch.client.Requests; import org.elasticsearch.cluster.block.ClusterBlockException; import org.elasticsearch.common.Strings; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.indices.IndexAlreadyExistsException; import org.elasticsearch.river.AbstractRiverComponent; import org.elasticsearch.river.River; import org.elasticsearch.river.RiverName; import org.elasticsearch.river.RiverSettings; import org.elasticsearch.threadpool.ThreadPool; import twitter4j.*; import twitter4j.conf.ConfigurationBuilder; import twitter4j.json.DataObjectFactory; import java.util.List; import java.util.Map; /** * */ public class TwitterRiver extends AbstractRiverComponent implements River { private final ThreadPool threadPool; private final Client client; private String oauthConsumerKey = null; private String oauthConsumerSecret = null; private String oauthAccessToken = null; private String oauthAccessTokenSecret = null; private String proxyHost; private String proxyPort; private String proxyUser; private String proxyPassword; private boolean raw = false; private boolean ignoreRetweet = false; private final String indexName; private final String typeName; private final int bulkSize; private final int maxConcurrentBulk; private final TimeValue bulkFlushInterval; private FilterQuery filterQuery; private String streamType; private volatile TwitterStream stream; private volatile BulkProcessor bulkProcessor; private volatile boolean closed = false; @SuppressWarnings({ "unchecked" }) @Inject public TwitterRiver(RiverName riverName, RiverSettings settings, Client client, ThreadPool threadPool) { super(riverName, settings); this.client = client; this.threadPool = threadPool; if (settings.settings().containsKey("twitter")) { Map<String, Object> twitterSettings = (Map<String, Object>) settings.settings().get("twitter"); // Check removed properties if (twitterSettings.get("user") != null || twitterSettings.get("password") != null) { logger.warn( "user and password are not supported anymore. See https://github.com/elasticsearch/elasticsearch-river-twitter/issues/28"); } raw = XContentMapValues.nodeBooleanValue(twitterSettings.get("raw"), false); ignoreRetweet = XContentMapValues.nodeBooleanValue(twitterSettings.get("ignore_retweet"), false); if (twitterSettings.containsKey("oauth")) { Map<String, Object> oauth = (Map<String, Object>) twitterSettings.get("oauth"); if (oauth.containsKey("consumerKey")) { oauthConsumerKey = XContentMapValues.nodeStringValue(oauth.get("consumerKey"), null); } if (oauth.containsKey("consumer_key")) { oauthConsumerKey = XContentMapValues.nodeStringValue(oauth.get("consumer_key"), null); } if (oauth.containsKey("consumerSecret")) { oauthConsumerSecret = XContentMapValues.nodeStringValue(oauth.get("consumerSecret"), null); } if (oauth.containsKey("consumer_secret")) { oauthConsumerSecret = XContentMapValues.nodeStringValue(oauth.get("consumer_secret"), null); } if (oauth.containsKey("accessToken")) { oauthAccessToken = XContentMapValues.nodeStringValue(oauth.get("accessToken"), null); } if (oauth.containsKey("access_token")) { oauthAccessToken = XContentMapValues.nodeStringValue(oauth.get("access_token"), null); } if (oauth.containsKey("accessTokenSecret")) { oauthAccessTokenSecret = XContentMapValues.nodeStringValue(oauth.get("accessTokenSecret"), null); } if (oauth.containsKey("access_token_secret")) { oauthAccessTokenSecret = XContentMapValues.nodeStringValue(oauth.get("access_token_secret"), null); } } if (twitterSettings.containsKey("proxy")) { Map<String, Object> proxy = (Map<String, Object>) twitterSettings.get("proxy"); if (proxy.containsKey("host")) { proxyHost = XContentMapValues.nodeStringValue(proxy.get("host"), null); } if (proxy.containsKey("port")) { proxyPort = XContentMapValues.nodeStringValue(proxy.get("port"), null); } if (proxy.containsKey("user")) { proxyUser = XContentMapValues.nodeStringValue(proxy.get("user"), null); } if (proxy.containsKey("password")) { proxyPassword = XContentMapValues.nodeStringValue(proxy.get("password"), null); } } streamType = XContentMapValues.nodeStringValue(twitterSettings.get("type"), "sample"); Map<String, Object> filterSettings = (Map<String, Object>) twitterSettings.get("filter"); if (streamType.equals("filter") && filterSettings == null) { stream = null; indexName = null; typeName = "status"; bulkSize = 100; this.maxConcurrentBulk = 1; this.bulkFlushInterval = TimeValue.timeValueSeconds(5); logger.warn("no filter defined for type filter. Disabling river..."); return; } if (filterSettings != null) { streamType = "filter"; filterQuery = new FilterQuery(); filterQuery.count(XContentMapValues.nodeIntegerValue(filterSettings.get("count"), 0)); Object tracks = filterSettings.get("tracks"); boolean filterSet = false; if (tracks != null) { if (tracks instanceof List) { List<String> lTracks = (List<String>) tracks; filterQuery.track(lTracks.toArray(new String[lTracks.size()])); } else { filterQuery.track(Strings.commaDelimitedListToStringArray(tracks.toString())); } filterSet = true; } Object follow = filterSettings.get("follow"); if (follow != null) { if (follow instanceof List) { List lFollow = (List) follow; long[] followIds = new long[lFollow.size()]; for (int i = 0; i < lFollow.size(); i++) { Object o = lFollow.get(i); if (o instanceof Number) { followIds[i] = ((Number) o).intValue(); } else { followIds[i] = Long.parseLong(o.toString()); } } filterQuery.follow(followIds); } else { String[] ids = Strings.commaDelimitedListToStringArray(follow.toString()); long[] followIds = new long[ids.length]; for (int i = 0; i < ids.length; i++) { followIds[i] = Long.parseLong(ids[i]); } filterQuery.follow(followIds); } filterSet = true; } Object locations = filterSettings.get("locations"); if (locations != null) { if (locations instanceof List) { List lLocations = (List) locations; double[][] dLocations = new double[lLocations.size()][]; for (int i = 0; i < lLocations.size(); i++) { Object loc = lLocations.get(i); double lat; double lon; if (loc instanceof List) { List lLoc = (List) loc; if (lLoc.get(0) instanceof Number) { lon = ((Number) lLoc.get(0)).doubleValue(); } else { lon = Double.parseDouble(lLoc.get(0).toString()); } if (lLoc.get(1) instanceof Number) { lat = ((Number) lLoc.get(1)).doubleValue(); } else { lat = Double.parseDouble(lLoc.get(1).toString()); } } else { String[] sLoc = Strings.commaDelimitedListToStringArray(loc.toString()); lon = Double.parseDouble(sLoc[0]); lat = Double.parseDouble(sLoc[1]); } dLocations[i] = new double[] { lon, lat }; } filterQuery.locations(dLocations); } else { String[] sLocations = Strings.commaDelimitedListToStringArray(locations.toString()); double[][] dLocations = new double[sLocations.length / 2][]; int dCounter = 0; for (int i = 0; i < sLocations.length; i++) { double lon = Double.parseDouble(sLocations[i]); double lat = Double.parseDouble(sLocations[++i]); dLocations[dCounter++] = new double[] { lon, lat }; } filterQuery.locations(dLocations); } filterSet = true; } Object language = filterSettings.get("language"); if (language != null) { if (filterSet) { if (language instanceof List) { List<String> lLanguage = (List<String>) language; filterQuery.language(lLanguage.toArray(new String[lLanguage.size()])); } else { filterQuery.language(Strings.commaDelimitedListToStringArray(language.toString())); } } else { indexName = null; typeName = "status"; bulkSize = 100; this.maxConcurrentBulk = 1; this.bulkFlushInterval = TimeValue.timeValueSeconds(5); logger.warn( "can not set language filter without tracks, follow or locations. Disabling river."); return; } } } } logger.info("creating twitter stream river"); if (raw && logger.isDebugEnabled()) { logger.debug("will index twitter raw content..."); } if (oauthAccessToken == null && oauthConsumerKey == null && oauthConsumerSecret == null && oauthAccessTokenSecret == null) { stream = null; indexName = null; typeName = "status"; bulkSize = 100; this.maxConcurrentBulk = 1; this.bulkFlushInterval = TimeValue.timeValueSeconds(5); logger.warn("no oauth specified, disabling river..."); return; } if (settings.settings().containsKey("index")) { Map<String, Object> indexSettings = (Map<String, Object>) settings.settings().get("index"); indexName = XContentMapValues.nodeStringValue(indexSettings.get("index"), riverName.name()); typeName = XContentMapValues.nodeStringValue(indexSettings.get("type"), "status"); this.bulkSize = XContentMapValues.nodeIntegerValue(indexSettings.get("bulk_size"), 100); this.bulkFlushInterval = TimeValue.parseTimeValue( XContentMapValues.nodeStringValue(indexSettings.get("flush_interval"), "5s"), TimeValue.timeValueSeconds(5)); this.maxConcurrentBulk = XContentMapValues.nodeIntegerValue(indexSettings.get("max_concurrent_bulk"), 1); } else { indexName = riverName.name(); typeName = "status"; bulkSize = 100; this.maxConcurrentBulk = 1; this.bulkFlushInterval = TimeValue.timeValueSeconds(5); } stream = buildTwitterStream(); } /** * Twitter Stream Builder * @return */ private TwitterStream buildTwitterStream() { logger.debug("creating TwitterStreamFactory"); ConfigurationBuilder cb = new ConfigurationBuilder(); cb.setOAuthConsumerKey(oauthConsumerKey).setOAuthConsumerSecret(oauthConsumerSecret) .setOAuthAccessToken(oauthAccessToken).setOAuthAccessTokenSecret(oauthAccessTokenSecret); if (proxyHost != null) cb.setHttpProxyHost(proxyHost); if (proxyPort != null) cb.setHttpProxyPort(Integer.parseInt(proxyPort)); if (proxyUser != null) cb.setHttpProxyUser(proxyUser); if (proxyPassword != null) cb.setHttpProxyPassword(proxyPassword); if (raw) cb.setJSONStoreEnabled(true); // We force SSL usage cb.setUseSSL(true); TwitterStream stream = new TwitterStreamFactory(cb.build()).getInstance(); stream.addListener(new StatusHandler()); return stream; } /** * Start twitter stream */ private void startTwitterStream() { logger.info("starting {} twitter stream", streamType); if (streamType.equals("filter") || filterQuery != null) { stream.filter(filterQuery); } else if (streamType.equals("firehose")) { stream.firehose(0); } else { stream.sample(); } } @Override public void start() { if (stream == null) { return; } try { // We push ES mapping only if raw is false if (!raw) { String mapping = XContentFactory.jsonBuilder().startObject().startObject(typeName) .startObject("properties").startObject("location").field("type", "geo_point").endObject() .startObject("language").field("type", "string").field("index", "not_analyzed").endObject() .startObject("user").startObject("properties").startObject("screen_name") .field("type", "string").field("index", "not_analyzed").endObject().endObject().endObject() .startObject("mention").startObject("properties").startObject("screen_name") .field("type", "string").field("index", "not_analyzed").endObject().endObject().endObject() .startObject("in_reply").startObject("properties").startObject("user_screen_name") .field("type", "string").field("index", "not_analyzed").endObject().endObject().endObject() .endObject().endObject().endObject().string(); client.admin().indices().prepareCreate(indexName).addMapping(typeName, mapping).execute() .actionGet(); } } catch (Exception e) { if (ExceptionsHelper.unwrapCause(e) instanceof IndexAlreadyExistsException) { // that's fine } else if (ExceptionsHelper.unwrapCause(e) instanceof ClusterBlockException) { // ok, not recovered yet..., lets start indexing and hope we recover by the first bulk // TODO: a smarter logic can be to register for cluster event listener here, and only start sampling when the block is removed... } else { logger.warn("failed to create index [{}], disabling river...", e, indexName); return; } } // Creating bulk processor this.bulkProcessor = BulkProcessor.builder(client, new BulkProcessor.Listener() { @Override public void beforeBulk(long executionId, BulkRequest request) { logger.debug("Going to execute new bulk composed of {} actions", request.numberOfActions()); } @Override public void afterBulk(long executionId, BulkRequest request, BulkResponse response) { logger.debug("Executed bulk composed of {} actions", request.numberOfActions()); if (response.hasFailures()) { logger.warn("There was failures while executing bulk", response.buildFailureMessage()); if (logger.isDebugEnabled()) { for (BulkItemResponse item : response.getItems()) { if (item.isFailed()) { logger.debug("Error for {}/{}/{} for {} operation: {}", item.getIndex(), item.getType(), item.getId(), item.getOpType(), item.getFailureMessage()); } } } } } @Override public void afterBulk(long executionId, BulkRequest request, Throwable failure) { logger.warn("Error executing bulk", failure); } }).setBulkActions(bulkSize).setConcurrentRequests(maxConcurrentBulk).setFlushInterval(bulkFlushInterval) .build(); startTwitterStream(); } private void reconnect() { if (closed) { logger.debug("can not reconnect twitter on a closed river"); return; } try { stream.cleanUp(); } catch (Exception e) { logger.debug("failed to cleanup after failure", e); } try { stream.shutdown(); } catch (Exception e) { logger.debug("failed to shutdown after failure", e); } if (closed) { return; } try { stream = buildTwitterStream(); startTwitterStream(); } catch (Exception e) { if (closed) { close(); return; } // TODO, we can update the status of the river to RECONNECT logger.warn("failed to connect after failure, throttling", e); threadPool.schedule(TimeValue.timeValueSeconds(10), ThreadPool.Names.GENERIC, new Runnable() { @Override public void run() { reconnect(); } }); } } @Override public void close() { this.closed = true; logger.info("closing twitter stream river"); bulkProcessor.close(); if (stream != null) { stream.cleanUp(); stream.shutdown(); } } private class StatusHandler extends StatusAdapter { @Override public void onStatus(Status status) { try { // #24: We want to ignore retweets (default to false) https://github.com/elasticsearch/elasticsearch-river-twitter/issues/24 if (!ignoreRetweet || status.isRetweet()) { if (logger.isTraceEnabled()) { logger.trace("status {} : {}", status.getUser().getName(), status.getText()); } // If we want to index tweets as is, we don't need to convert it to JSon doc if (raw) { String rawJSON = DataObjectFactory.getRawJSON(status); bulkProcessor.add(Requests.indexRequest(indexName).type(typeName) .id(Long.toString(status.getId())).create(true).source(rawJSON)); } else { XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); builder.field("text", status.getText()); builder.field("created_at", status.getCreatedAt()); builder.field("source", status.getSource()); builder.field("truncated", status.isTruncated()); builder.field("language", status.getIsoLanguageCode()); if (status.getUserMentionEntities() != null) { builder.startArray("mention"); for (UserMentionEntity user : status.getUserMentionEntities()) { builder.startObject(); builder.field("id", user.getId()); builder.field("name", user.getName()); builder.field("screen_name", user.getScreenName()); builder.field("start", user.getStart()); builder.field("end", user.getEnd()); builder.endObject(); } builder.endArray(); } if (status.getRetweetCount() != -1) { builder.field("retweet_count", status.getRetweetCount()); } if (status.isRetweet() && status.getRetweetedStatus() != null) { builder.startObject("retweet"); builder.field("id", status.getRetweetedStatus().getId()); if (status.getRetweetedStatus().getUser() != null) { builder.field("user_id", status.getRetweetedStatus().getUser().getId()); builder.field("user_screen_name", status.getRetweetedStatus().getUser().getScreenName()); if (status.getRetweetedStatus().getRetweetCount() != -1) { builder.field("retweet_count", status.getRetweetedStatus().getRetweetCount()); } } builder.endObject(); } if (status.getInReplyToStatusId() != -1) { builder.startObject("in_reply"); builder.field("status", status.getInReplyToStatusId()); if (status.getInReplyToUserId() != -1) { builder.field("user_id", status.getInReplyToUserId()); builder.field("user_screen_name", status.getInReplyToScreenName()); } builder.endObject(); } if (status.getHashtagEntities() != null) { builder.startArray("hashtag"); for (HashtagEntity hashtag : status.getHashtagEntities()) { builder.startObject(); builder.field("text", hashtag.getText()); builder.field("start", hashtag.getStart()); builder.field("end", hashtag.getEnd()); builder.endObject(); } builder.endArray(); } if (status.getContributors() != null && status.getContributors().length > 0) { builder.array("contributor", status.getContributors()); } if (status.getGeoLocation() != null) { builder.startObject("location"); builder.field("lat", status.getGeoLocation().getLatitude()); builder.field("lon", status.getGeoLocation().getLongitude()); builder.endObject(); } if (status.getPlace() != null) { builder.startObject("place"); builder.field("id", status.getPlace().getId()); builder.field("name", status.getPlace().getName()); builder.field("type", status.getPlace().getPlaceType()); builder.field("full_name", status.getPlace().getFullName()); builder.field("street_address", status.getPlace().getStreetAddress()); builder.field("country", status.getPlace().getCountry()); builder.field("country_code", status.getPlace().getCountryCode()); builder.field("url", status.getPlace().getURL()); builder.endObject(); } if (status.getURLEntities() != null) { builder.startArray("link"); for (URLEntity url : status.getURLEntities()) { if (url != null) { builder.startObject(); if (url.getURL() != null) { builder.field("url", url.getURL()); } if (url.getDisplayURL() != null) { builder.field("display_url", url.getDisplayURL()); } if (url.getExpandedURL() != null) { builder.field("expand_url", url.getExpandedURL()); } builder.field("start", url.getStart()); builder.field("end", url.getEnd()); builder.endObject(); } } builder.endArray(); } builder.startObject("user"); builder.field("id", status.getUser().getId()); builder.field("name", status.getUser().getName()); builder.field("screen_name", status.getUser().getScreenName()); builder.field("location", status.getUser().getLocation()); builder.field("description", status.getUser().getDescription()); builder.field("profile_image_url", status.getUser().getProfileImageURL()); builder.field("profile_image_url_https", status.getUser().getProfileImageURLHttps()); builder.endObject(); builder.endObject(); bulkProcessor.add(Requests.indexRequest(indexName).type(typeName) .id(Long.toString(status.getId())).create(true).source(builder)); } } else if (logger.isTraceEnabled()) { logger.trace("ignoring status cause retweet {} : {}", status.getUser().getName(), status.getText()); } } catch (Exception e) { logger.warn("failed to construct index request", e); } } @Override public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) { if (statusDeletionNotice.getStatusId() != -1) { bulkProcessor.add(Requests.deleteRequest(indexName).type(typeName) .id(Long.toString(statusDeletionNotice.getStatusId()))); } } @Override public void onTrackLimitationNotice(int numberOfLimitedStatuses) { logger.info("received track limitation notice, number_of_limited_statuses {}", numberOfLimitedStatuses); } @Override public void onException(Exception ex) { logger.warn("stream failure, restarting stream...", ex); threadPool.generic().execute(new Runnable() { @Override public void run() { reconnect(); } }); } } }