org.loklak.scraper.TwitterRiver.java Source code

Java tutorial

Introduction

Here is the source code for org.loklak.scraper.TwitterRiver.java

Source

/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.loklak.scraper;

/**
 * THIS CLASS WAS TAKEN FROM AN ELASTICSEARCH PLUGIN AND IS NOT (YET) USED IN THIS APPLIACTION
 * IT IS ONLY HERE AS A REFERENCE FOR COMPATIBLE DATA STRUCTURES
 * IT MAY SERVE AS A TEMPLATE TO IMPORT TWEETS USING THE TWITTER4J LIBRARY
 */

import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkProcessor;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.Requests;
import org.elasticsearch.cluster.block.ClusterBlockException;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.indices.IndexAlreadyExistsException;
import org.elasticsearch.river.AbstractRiverComponent;
import org.elasticsearch.river.River;
import org.elasticsearch.river.RiverName;
import org.elasticsearch.river.RiverSettings;
import org.elasticsearch.threadpool.ThreadPool;

import twitter4j.FilterQuery;
import twitter4j.HashtagEntity;
import twitter4j.PagableResponseList;
import twitter4j.Status;
import twitter4j.StatusAdapter;
import twitter4j.StatusDeletionNotice;
import twitter4j.Twitter;
import twitter4j.TwitterException;
import twitter4j.TwitterFactory;
import twitter4j.TwitterObjectFactory;
import twitter4j.TwitterStream;
import twitter4j.TwitterStreamFactory;
import twitter4j.URLEntity;
import twitter4j.User;
import twitter4j.UserMentionEntity;
import twitter4j.UserStreamAdapter;
import twitter4j.conf.Configuration;
import twitter4j.conf.ConfigurationBuilder;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;

public class TwitterRiver extends AbstractRiverComponent implements River {

    private final ThreadPool threadPool;

    private final Client client;

    private final String oauthConsumerKey;
    private final String oauthConsumerSecret;
    private final String oauthAccessToken;
    private final String oauthAccessTokenSecret;

    private final TimeValue retryAfter;

    private final String proxyHost;
    private final String proxyPort;
    private final String proxyUser;
    private final String proxyPassword;

    private final boolean raw;
    private final boolean ignoreRetweet;
    private final boolean geoAsArray;

    private final String indexName;

    private final String typeName;

    private final int bulkSize;
    private final int maxConcurrentBulk;
    private final TimeValue bulkFlushInterval;

    private final FilterQuery filterQuery;

    private final String streamType;

    private RiverStatus riverStatus;

    private volatile TwitterStream stream;

    private volatile BulkProcessor bulkProcessor;

    @SuppressWarnings({ "unchecked", "rawtypes" })
    @Inject
    public TwitterRiver(RiverName riverName, RiverSettings riverSettings, Client client, ThreadPool threadPool,
            Settings settings) {
        super(riverName, riverSettings);
        this.riverStatus = RiverStatus.UNKNOWN;
        this.client = client;
        this.threadPool = threadPool;

        String riverStreamType;

        if (riverSettings.settings().containsKey("twitter")) {
            Map<String, Object> twitterSettings = (Map<String, Object>) riverSettings.settings().get("twitter");

            raw = XContentMapValues.nodeBooleanValue(twitterSettings.get("raw"), false);
            ignoreRetweet = XContentMapValues.nodeBooleanValue(twitterSettings.get("ignore_retweet"), false);
            geoAsArray = XContentMapValues.nodeBooleanValue(twitterSettings.get("geo_as_array"), false);

            if (twitterSettings.containsKey("oauth")) {
                Map<String, Object> oauth = (Map<String, Object>) twitterSettings.get("oauth");
                if (oauth.containsKey("consumer_key")) {
                    oauthConsumerKey = XContentMapValues.nodeStringValue(oauth.get("consumer_key"), null);
                } else {
                    oauthConsumerKey = settings.get("river.twitter.oauth.consumer_key");
                }
                if (oauth.containsKey("consumer_secret")) {
                    oauthConsumerSecret = XContentMapValues.nodeStringValue(oauth.get("consumer_secret"), null);
                } else {
                    oauthConsumerSecret = settings.get("river.twitter.oauth.consumer_secret");
                }
                if (oauth.containsKey("access_token")) {
                    oauthAccessToken = XContentMapValues.nodeStringValue(oauth.get("access_token"), null);
                } else {
                    oauthAccessToken = settings.get("river.twitter.oauth.access_token");
                }
                if (oauth.containsKey("access_token_secret")) {
                    oauthAccessTokenSecret = XContentMapValues.nodeStringValue(oauth.get("access_token_secret"),
                            null);
                } else {
                    oauthAccessTokenSecret = settings.get("river.twitter.oauth.access_token_secret");
                }
            } else {
                oauthConsumerKey = settings.get("river.twitter.oauth.consumer_key");
                oauthConsumerSecret = settings.get("river.twitter.oauth.consumer_secret");
                oauthAccessToken = settings.get("river.twitter.oauth.access_token");
                oauthAccessTokenSecret = settings.get("river.twitter.oauth.access_token_secret");
            }

            if (twitterSettings.containsKey("retry_after")) {
                retryAfter = XContentMapValues.nodeTimeValue(twitterSettings.get("retry_after"),
                        TimeValue.timeValueSeconds(10));
            } else {
                retryAfter = XContentMapValues.nodeTimeValue(settings.get("river.twitter.retry_after"),
                        TimeValue.timeValueSeconds(10));
            }

            if (twitterSettings.containsKey("proxy")) {
                Map<String, Object> proxy = (Map<String, Object>) twitterSettings.get("proxy");
                proxyHost = XContentMapValues.nodeStringValue(proxy.get("host"), null);
                proxyPort = XContentMapValues.nodeStringValue(proxy.get("port"), null);
                proxyUser = XContentMapValues.nodeStringValue(proxy.get("user"), null);
                proxyPassword = XContentMapValues.nodeStringValue(proxy.get("password"), null);
            } else {
                // Let's see if we have that in node settings
                proxyHost = settings.get("river.twitter.proxy.host");
                proxyPort = settings.get("river.twitter.proxy.port");
                proxyUser = settings.get("river.twitter.proxy.user");
                proxyPassword = settings.get("river.twitter.proxy.password");
            }

            riverStreamType = XContentMapValues.nodeStringValue(twitterSettings.get("type"), "sample");
            Map<String, Object> filterSettings = (Map<String, Object>) twitterSettings.get("filter");

            if (riverStreamType.equals("filter") && filterSettings == null) {
                filterQuery = null;
                stream = null;
                streamType = null;
                indexName = null;
                typeName = "status";
                bulkSize = 100;
                this.maxConcurrentBulk = 1;
                this.bulkFlushInterval = TimeValue.timeValueSeconds(5);
                logger.warn("no filter defined for type filter. Disabling river...");
                return;
            }

            if (filterSettings != null) {
                riverStreamType = "filter";
                filterQuery = new FilterQuery();
                filterQuery.count(XContentMapValues.nodeIntegerValue(filterSettings.get("count"), 0));
                Object tracks = filterSettings.get("tracks");
                boolean filterSet = false;
                if (tracks != null) {
                    if (tracks instanceof List) {
                        List<String> lTracks = (List<String>) tracks;
                        filterQuery.track(lTracks.toArray(new String[lTracks.size()]));
                    } else {
                        filterQuery.track(Strings.commaDelimitedListToStringArray(tracks.toString()));
                    }
                    filterSet = true;
                }
                Object follow = filterSettings.get("follow");
                if (follow != null) {
                    if (follow instanceof List) {
                        List lFollow = (List) follow;
                        long[] followIds = new long[lFollow.size()];
                        for (int i = 0; i < lFollow.size(); i++) {
                            Object o = lFollow.get(i);
                            if (o instanceof Number) {
                                followIds[i] = ((Number) o).intValue();
                            } else {
                                followIds[i] = Long.parseLong(o.toString());
                            }
                        }
                        filterQuery.follow(followIds);
                    } else {
                        String[] ids = Strings.commaDelimitedListToStringArray(follow.toString());
                        long[] followIds = new long[ids.length];
                        for (int i = 0; i < ids.length; i++) {
                            followIds[i] = Long.parseLong(ids[i]);
                        }
                        filterQuery.follow(followIds);
                    }
                    filterSet = true;
                }
                Object locations = filterSettings.get("locations");
                if (locations != null) {
                    if (locations instanceof List) {
                        List lLocations = (List) locations;
                        double[][] dLocations = new double[lLocations.size()][];
                        for (int i = 0; i < lLocations.size(); i++) {
                            Object loc = lLocations.get(i);
                            double lat;
                            double lon;
                            if (loc instanceof List) {
                                List lLoc = (List) loc;
                                if (lLoc.get(0) instanceof Number) {
                                    lon = ((Number) lLoc.get(0)).doubleValue();
                                } else {
                                    lon = Double.parseDouble(lLoc.get(0).toString());
                                }
                                if (lLoc.get(1) instanceof Number) {
                                    lat = ((Number) lLoc.get(1)).doubleValue();
                                } else {
                                    lat = Double.parseDouble(lLoc.get(1).toString());
                                }
                            } else {
                                String[] sLoc = Strings.commaDelimitedListToStringArray(loc.toString());
                                lon = Double.parseDouble(sLoc[0]);
                                lat = Double.parseDouble(sLoc[1]);
                            }
                            dLocations[i] = new double[] { lon, lat };
                        }
                        filterQuery.locations(dLocations);
                    } else {
                        String[] sLocations = Strings.commaDelimitedListToStringArray(locations.toString());
                        double[][] dLocations = new double[sLocations.length / 2][];
                        int dCounter = 0;
                        for (int i = 0; i < sLocations.length; i++) {
                            double lon = Double.parseDouble(sLocations[i]);
                            double lat = Double.parseDouble(sLocations[++i]);
                            dLocations[dCounter++] = new double[] { lon, lat };
                        }
                        filterQuery.locations(dLocations);
                    }
                    filterSet = true;
                }
                Object userLists = filterSettings.get("user_lists");
                if (userLists != null) {
                    if (userLists instanceof List) {
                        List<String> lUserlists = (List<String>) userLists;
                        String[] tUserlists = lUserlists.toArray(new String[lUserlists.size()]);
                        filterQuery.follow(getUsersListMembers(tUserlists));
                    } else {
                        String[] tUserlists = Strings.commaDelimitedListToStringArray(userLists.toString());
                        filterQuery.follow(getUsersListMembers(tUserlists));
                    }
                    filterSet = true;
                }

                // We should have something to filter
                if (!filterSet) {
                    streamType = null;
                    indexName = null;
                    typeName = "status";
                    bulkSize = 100;
                    this.maxConcurrentBulk = 1;
                    this.bulkFlushInterval = TimeValue.timeValueSeconds(5);
                    logger.warn(
                            "can not set language filter without tracks, follow, locations or user_lists. Disabling river.");
                    return;
                }

                Object language = filterSettings.get("language");
                if (language != null) {
                    if (language instanceof List) {
                        List<String> lLanguage = (List<String>) language;
                        filterQuery.language(lLanguage.toArray(new String[lLanguage.size()]));
                    } else {
                        filterQuery.language(Strings.commaDelimitedListToStringArray(language.toString()));
                    }
                }
            } else {
                filterQuery = null;
            }
        } else {
            // No specific settings. We need to use some defaults
            riverStreamType = "sample";
            raw = false;
            ignoreRetweet = false;
            geoAsArray = false;
            oauthConsumerKey = settings.get("river.twitter.oauth.consumer_key");
            oauthConsumerSecret = settings.get("river.twitter.oauth.consumer_secret");
            oauthAccessToken = settings.get("river.twitter.oauth.access_token");
            oauthAccessTokenSecret = settings.get("river.twitter.oauth.access_token_secret");
            retryAfter = XContentMapValues.nodeTimeValue(settings.get("river.twitter.retry_after"),
                    TimeValue.timeValueSeconds(10));
            filterQuery = null;
            proxyHost = null;
            proxyPort = null;
            proxyUser = null;
            proxyPassword = null;
        }

        if (oauthAccessToken == null || oauthConsumerKey == null || oauthConsumerSecret == null
                || oauthAccessTokenSecret == null) {
            stream = null;
            streamType = null;
            indexName = null;
            typeName = "status";
            bulkSize = 100;
            this.maxConcurrentBulk = 1;
            this.bulkFlushInterval = TimeValue.timeValueSeconds(5);
            logger.warn("no oauth specified, disabling river...");
            return;
        }

        if (riverSettings.settings().containsKey("index")) {
            Map<String, Object> indexSettings = (Map<String, Object>) riverSettings.settings().get("index");
            indexName = XContentMapValues.nodeStringValue(indexSettings.get("index"), riverName.name());
            typeName = XContentMapValues.nodeStringValue(indexSettings.get("type"), "status");
            this.bulkSize = XContentMapValues.nodeIntegerValue(indexSettings.get("bulk_size"), 100);
            this.bulkFlushInterval = TimeValue.parseTimeValue(
                    XContentMapValues.nodeStringValue(indexSettings.get("flush_interval"), "5s"),
                    TimeValue.timeValueSeconds(5));
            this.maxConcurrentBulk = XContentMapValues.nodeIntegerValue(indexSettings.get("max_concurrent_bulk"),
                    1);
        } else {
            indexName = riverName.name();
            typeName = "status";
            bulkSize = 100;
            this.maxConcurrentBulk = 1;
            this.bulkFlushInterval = TimeValue.timeValueSeconds(5);
        }

        logger.info("creating twitter stream river");
        if (raw && logger.isDebugEnabled()) {
            logger.debug("will index twitter raw content...");
        }

        streamType = riverStreamType;
        this.riverStatus = RiverStatus.INITIALIZED;
    }

    /**
     * Get users id of each list to stream them.
     * @param tUserlists List of user list. Should be a public list.
     * @return
     */
    private long[] getUsersListMembers(String[] tUserlists) {
        logger.debug("Fetching user id of given lists");
        List<Long> listUserIdToFollow = new ArrayList<Long>();
        Configuration cb = buildTwitterConfiguration();
        Twitter twitterImpl = new TwitterFactory(cb).getInstance();

        //For each list given in parameter
        for (String listId : tUserlists) {
            logger.debug("Adding users of list {} ", listId);
            String[] splitListId = listId.split("/");
            try {
                long cursor = -1;
                PagableResponseList<User> itUserListMembers;
                do {
                    itUserListMembers = twitterImpl.getUserListMembers(splitListId[0], splitListId[1], cursor);
                    for (User member : itUserListMembers) {
                        long userId = member.getId();
                        listUserIdToFollow.add(userId);
                    }
                } while ((cursor = itUserListMembers.getNextCursor()) != 0);

            } catch (TwitterException te) {
                logger.error("Failed to get list members for : {}", listId, te);
            }
        }

        //Just casting from Long to long
        long ret[] = new long[listUserIdToFollow.size()];
        int pos = 0;
        for (Long userId : listUserIdToFollow) {
            ret[pos] = userId;
            pos++;
        }
        return ret;
    }

    /**
     * Build configuration object with credentials and proxy settings
     * @return
     */
    private Configuration buildTwitterConfiguration() {
        logger.debug("creating twitter configuration");
        ConfigurationBuilder cb = new ConfigurationBuilder();

        cb.setOAuthConsumerKey(oauthConsumerKey).setOAuthConsumerSecret(oauthConsumerSecret)
                .setOAuthAccessToken(oauthAccessToken).setOAuthAccessTokenSecret(oauthAccessTokenSecret);

        if (proxyHost != null)
            cb.setHttpProxyHost(proxyHost);
        if (proxyPort != null)
            cb.setHttpProxyPort(Integer.parseInt(proxyPort));
        if (proxyUser != null)
            cb.setHttpProxyUser(proxyUser);
        if (proxyPassword != null)
            cb.setHttpProxyPassword(proxyPassword);
        if (raw)
            cb.setJSONStoreEnabled(true);
        logger.debug("twitter configuration created");
        return cb.build();
    }

    /**
     * Start twitter stream
     */
    private void startTwitterStream() {
        logger.info("starting {} twitter stream", streamType);

        if (stream == null) {
            logger.debug("creating twitter stream");

            stream = new TwitterStreamFactory(buildTwitterConfiguration()).getInstance();
            if (streamType.equals("user")) {
                stream.addListener(new UserStreamHandler());
            } else {
                stream.addListener(new StatusHandler());
            }

            logger.debug("twitter stream created");
        }

        if (riverStatus != RiverStatus.STOPPED && riverStatus != RiverStatus.STOPPING) {
            if (streamType.equals("filter") || filterQuery != null) {
                stream.filter(filterQuery);
            } else if (streamType.equals("firehose")) {
                stream.firehose(0);
            } else if (streamType.equals("user")) {
                stream.user();
            } else {
                stream.sample();
            }
        }
        logger.debug("{} twitter stream started!", streamType);
    }

    @Override
    public void start() {
        this.riverStatus = RiverStatus.STARTING;
        // Let's start this in another thread so we won't stop the start process
        threadPool.generic().execute(new Runnable() {
            @Override
            public void run() {
                if (riverStatus != RiverStatus.STOPPED && riverStatus != RiverStatus.STOPPING) {
                    // We are first waiting for a yellow state at least
                    logger.debug("waiting for yellow status");
                    client.admin().cluster().prepareHealth("_river").setWaitForYellowStatus().get();
                    logger.debug("yellow or green status received");
                }

                if (riverStatus != RiverStatus.STOPPED && riverStatus != RiverStatus.STOPPING) {
                    // We push ES mapping only if raw is false
                    if (!raw) {
                        try {
                            logger.debug("Trying to create index [{}]", indexName);
                            client.admin().indices().prepareCreate(indexName).execute().actionGet();
                            logger.debug("index created [{}]", indexName);
                        } catch (Exception e) {
                            if (ExceptionsHelper.unwrapCause(e) instanceof IndexAlreadyExistsException) {
                                // that's fine
                                logger.debug("Index [{}] already exists, skipping...", indexName);
                            } else if (ExceptionsHelper.unwrapCause(e) instanceof ClusterBlockException) {
                                // ok, not recovered yet..., lets start indexing and hope we recover by the first bulk
                                // TODO: a smarter logic can be to register for cluster event listener here, and only start sampling when the block is removed...
                                logger.debug(
                                        "Cluster is blocked for now. Index [{}] can not be created, skipping...",
                                        indexName);
                            } else {
                                logger.warn("failed to create index [{}], disabling river...", e, indexName);
                                riverStatus = RiverStatus.STOPPED;
                                return;
                            }
                        }

                        if (client.admin().indices().prepareGetMappings(indexName).setTypes(typeName).get()
                                .getMappings().isEmpty()) {
                            try {
                                String mapping = XContentFactory.jsonBuilder().startObject().startObject(typeName)
                                        .startObject("properties").startObject("location")
                                        .field("type", "geo_point").endObject().startObject("language")
                                        .field("type", "string").field("index", "not_analyzed").endObject()
                                        .startObject("user").startObject("properties").startObject("screen_name")
                                        .field("type", "string").field("index", "not_analyzed").endObject()
                                        .endObject().endObject().startObject("mention").startObject("properties")
                                        .startObject("screen_name").field("type", "string")
                                        .field("index", "not_analyzed").endObject().endObject().endObject()
                                        .startObject("in_reply").startObject("properties")
                                        .startObject("user_screen_name").field("type", "string")
                                        .field("index", "not_analyzed").endObject().endObject().endObject()
                                        .startObject("retweet").startObject("properties")
                                        .startObject("user_screen_name").field("type", "string")
                                        .field("index", "not_analyzed").endObject().endObject().endObject()
                                        .endObject().endObject().endObject().string();
                                logger.debug("Applying default mapping for [{}]/[{}]: {}", indexName, typeName,
                                        mapping);
                                client.admin().indices().preparePutMapping(indexName).setType(typeName)
                                        .setSource(mapping).execute().actionGet();
                            } catch (Exception e) {
                                logger.warn("failed to apply default mapping [{}]/[{}], disabling river...", e,
                                        indexName, typeName);
                                return;
                            }
                        } else {
                            logger.debug("Mapping already exists for [{}]/[{}], skipping...", indexName, typeName);
                        }
                    }
                }

                // Creating bulk processor
                logger.debug("creating bulk processor [{}]", indexName);
                bulkProcessor = BulkProcessor.builder(client, new BulkProcessor.Listener() {
                    @Override
                    public void beforeBulk(long executionId, BulkRequest request) {
                        logger.debug("Going to execute new bulk composed of {} actions", request.numberOfActions());
                    }

                    @Override
                    public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
                        logger.debug("Executed bulk composed of {} actions", request.numberOfActions());
                        if (response.hasFailures()) {
                            logger.warn("There was failures while executing bulk", response.buildFailureMessage());
                            if (logger.isDebugEnabled()) {
                                for (BulkItemResponse item : response.getItems()) {
                                    if (item.isFailed()) {
                                        logger.debug("Error for {}/{}/{} for {} operation: {}", item.getIndex(),
                                                item.getType(), item.getId(), item.getOpType(),
                                                item.getFailureMessage());
                                    }
                                }
                            }
                        }
                    }

                    @Override
                    public void afterBulk(long executionId, BulkRequest request, Throwable failure) {
                        logger.warn("Error executing bulk", failure);
                    }
                }).setBulkActions(bulkSize).setConcurrentRequests(maxConcurrentBulk)
                        .setFlushInterval(bulkFlushInterval).build();

                logger.debug("Bulk processor created with bulkSize [{}], bulkFlushInterval [{}]", bulkSize,
                        bulkFlushInterval);
                if (riverStatus != RiverStatus.STOPPED && riverStatus != RiverStatus.STOPPING) {
                    startTwitterStream();
                    riverStatus = RiverStatus.RUNNING;
                }
            }
        });
    }

    private void reconnect() {
        if (riverStatus == RiverStatus.STOPPING || riverStatus == RiverStatus.STOPPED) {
            logger.debug("can not reconnect twitter on a closed river");
            return;
        }

        riverStatus = RiverStatus.STARTING;

        if (stream != null) {
            try {
                logger.debug("cleanup stream");
                stream.cleanUp();
            } catch (Exception e) {
                logger.debug("failed to cleanup after failure", e);
            }
            try {
                logger.debug("shutdown stream");
                stream.shutdown();
            } catch (Exception e) {
                logger.debug("failed to shutdown after failure", e);
            }
        }

        if (riverStatus == RiverStatus.STOPPING || riverStatus == RiverStatus.STOPPED) {
            logger.debug("can not reconnect twitter on a closed river");
            return;
        }

        try {
            startTwitterStream();
            riverStatus = RiverStatus.RUNNING;
        } catch (Exception e) {
            if (riverStatus == RiverStatus.STOPPING || riverStatus == RiverStatus.STOPPED) {
                logger.debug("river is closing. we won't reconnect.");
                close();
                return;
            }
            // TODO, we can update the status of the river to RECONNECT
            logger.warn("failed to connect after failure, throttling", e);
            threadPool.schedule(retryAfter, ThreadPool.Names.GENERIC, new Runnable() {
                @Override
                public void run() {
                    reconnect();
                }
            });
        }
    }

    @Override
    public void close() {
        riverStatus = RiverStatus.STOPPING;

        logger.info("closing twitter stream river");

        if (bulkProcessor != null) {
            bulkProcessor.close();
        }

        if (stream != null) {
            // No need to call stream.cleanUp():
            // - since it is done by the implementation of shutdown()
            // - it will lead to a thread leak (see TwitterStreamImpl.cleanUp() and TwitterStreamImpl.shutdown() )
            stream.shutdown();
        }

        riverStatus = RiverStatus.STOPPED;
    }

    private class StatusHandler extends StatusAdapter {

        @Override
        public void onStatus(Status status) {
            if (riverStatus != RiverStatus.STOPPED && riverStatus != RiverStatus.STOPPING) {
                try {
                    // #24: We want to ignore retweets (default to false) https://github.com/elasticsearch/elasticsearch-river-twitter/issues/24
                    if (status.isRetweet() && ignoreRetweet) {
                        if (logger.isTraceEnabled()) {
                            logger.trace("ignoring status cause retweet {} : {}", status.getUser().getName(),
                                    status.getText());
                        }
                    } else {
                        if (logger.isTraceEnabled()) {
                            logger.trace("status {} : {}", status.getUser().getName(), status.getText());
                        }

                        // If we want to index tweets as is, we don't need to convert it to JSon doc
                        if (raw) {
                            String rawJSON = TwitterObjectFactory.getRawJSON(status);
                            if (riverStatus != RiverStatus.STOPPED && riverStatus != RiverStatus.STOPPING) {
                                bulkProcessor.add(Requests.indexRequest(indexName).type(typeName)
                                        .id(Long.toString(status.getId())).source(rawJSON));
                            }
                        } else {
                            XContentBuilder builder = XContentFactory.jsonBuilder().startObject();
                            builder.field("text", status.getText());
                            builder.field("created_at", status.getCreatedAt());
                            builder.field("source", status.getSource());
                            builder.field("truncated", status.isTruncated());
                            builder.field("language", status.getLang());

                            if (status.getUserMentionEntities() != null) {
                                builder.startArray("mention");
                                for (UserMentionEntity user : status.getUserMentionEntities()) {
                                    builder.startObject();
                                    builder.field("id", user.getId());
                                    builder.field("name", user.getName());
                                    builder.field("screen_name", user.getScreenName());
                                    builder.field("start", user.getStart());
                                    builder.field("end", user.getEnd());
                                    builder.endObject();
                                }
                                builder.endArray();
                            }

                            if (status.getRetweetCount() != -1) {
                                builder.field("retweet_count", status.getRetweetCount());
                            }

                            if (status.isRetweet() && status.getRetweetedStatus() != null) {
                                builder.startObject("retweet");
                                builder.field("id", status.getRetweetedStatus().getId());
                                if (status.getRetweetedStatus().getUser() != null) {
                                    builder.field("user_id", status.getRetweetedStatus().getUser().getId());
                                    builder.field("user_screen_name",
                                            status.getRetweetedStatus().getUser().getScreenName());
                                    if (status.getRetweetedStatus().getRetweetCount() != -1) {
                                        builder.field("retweet_count",
                                                status.getRetweetedStatus().getRetweetCount());
                                    }
                                }
                                builder.endObject();
                            }

                            if (status.getInReplyToStatusId() != -1) {
                                builder.startObject("in_reply");
                                builder.field("status", status.getInReplyToStatusId());
                                if (status.getInReplyToUserId() != -1) {
                                    builder.field("user_id", status.getInReplyToUserId());
                                    builder.field("user_screen_name", status.getInReplyToScreenName());
                                }
                                builder.endObject();
                            }

                            if (status.getHashtagEntities() != null) {
                                builder.startArray("hashtag");
                                for (HashtagEntity hashtag : status.getHashtagEntities()) {
                                    builder.startObject();
                                    builder.field("text", hashtag.getText());
                                    builder.field("start", hashtag.getStart());
                                    builder.field("end", hashtag.getEnd());
                                    builder.endObject();
                                }
                                builder.endArray();
                            }
                            if (status.getContributors() != null && status.getContributors().length > 0) {
                                builder.array("contributor", status.getContributors());
                            }
                            if (status.getGeoLocation() != null) {
                                if (geoAsArray) {
                                    builder.startArray("location");
                                    builder.value(status.getGeoLocation().getLongitude());
                                    builder.value(status.getGeoLocation().getLatitude());
                                    builder.endArray();
                                } else {
                                    builder.startObject("location");
                                    builder.field("lat", status.getGeoLocation().getLatitude());
                                    builder.field("lon", status.getGeoLocation().getLongitude());
                                    builder.endObject();
                                }
                            }
                            if (status.getPlace() != null) {
                                builder.startObject("place");
                                builder.field("id", status.getPlace().getId());
                                builder.field("name", status.getPlace().getName());
                                builder.field("type", status.getPlace().getPlaceType());
                                builder.field("full_name", status.getPlace().getFullName());
                                builder.field("street_address", status.getPlace().getStreetAddress());
                                builder.field("country", status.getPlace().getCountry());
                                builder.field("country_code", status.getPlace().getCountryCode());
                                builder.field("url", status.getPlace().getURL());
                                builder.endObject();
                            }
                            if (status.getURLEntities() != null) {
                                builder.startArray("link");
                                for (URLEntity url : status.getURLEntities()) {
                                    if (url != null) {
                                        builder.startObject();
                                        if (url.getURL() != null) {
                                            builder.field("url", url.getURL());
                                        }
                                        if (url.getDisplayURL() != null) {
                                            builder.field("display_url", url.getDisplayURL());
                                        }
                                        if (url.getExpandedURL() != null) {
                                            builder.field("expand_url", url.getExpandedURL());
                                        }
                                        builder.field("start", url.getStart());
                                        builder.field("end", url.getEnd());
                                        builder.endObject();
                                    }
                                }
                                builder.endArray();
                            }

                            builder.startObject("user");
                            builder.field("id", status.getUser().getId());
                            builder.field("name", status.getUser().getName());
                            builder.field("screen_name", status.getUser().getScreenName());
                            builder.field("location", status.getUser().getLocation());
                            builder.field("description", status.getUser().getDescription());
                            builder.field("profile_image_url", status.getUser().getProfileImageURL());
                            builder.field("profile_image_url_https", status.getUser().getProfileImageURLHttps());

                            builder.endObject();

                            builder.endObject();
                            if (riverStatus != RiverStatus.STOPPED && riverStatus != RiverStatus.STOPPING) {
                                bulkProcessor.add(Requests.indexRequest(indexName).type(typeName)
                                        .id(Long.toString(status.getId())).source(builder));
                            }
                        }
                    }

                } catch (Exception e) {
                    logger.warn("failed to construct index request", e);
                }
            } else {
                logger.debug("river is closing. ignoring tweet [{}]", status.getId());
            }
        }

        @Override
        public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {
            if (riverStatus != RiverStatus.STOPPED && riverStatus != RiverStatus.STOPPING) {
                if (statusDeletionNotice.getStatusId() != -1) {
                    bulkProcessor.add(Requests.deleteRequest(indexName).type(typeName)
                            .id(Long.toString(statusDeletionNotice.getStatusId())));
                }
            } else {
                logger.debug("river is closing. ignoring deletion of tweet [{}]",
                        statusDeletionNotice.getStatusId());
            }
        }

        @Override
        public void onTrackLimitationNotice(int numberOfLimitedStatuses) {
            logger.info("received track limitation notice, number_of_limited_statuses {}", numberOfLimitedStatuses);
        }

        @Override
        public void onException(Exception ex) {
            logger.warn("stream failure, restarting stream...", ex);
            threadPool.generic().execute(new Runnable() {
                @Override
                public void run() {
                    reconnect();
                }
            });
        }
    }

    private class UserStreamHandler extends UserStreamAdapter {

        private final StatusHandler statusHandler = new StatusHandler();

        @Override
        public void onException(Exception ex) {
            statusHandler.onException(ex);
        }

        @Override
        public void onStatus(Status status) {
            statusHandler.onStatus(status);
        }

        @Override
        public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {
            statusHandler.onDeletionNotice(statusDeletionNotice);
        }
    }

    public enum RiverStatus {
        UNKNOWN, INITIALIZED, STARTING, RUNNING, STOPPING, STOPPED;
    }
}