de.twitterlivesearch.twitter.TwitterStreamListener.java Source code

Java tutorial

Introduction

Here is the source code for de.twitterlivesearch.twitter.TwitterStreamListener.java

Source

/*
 * Copyright 2015 Tobias Larscheid, Jan Schmitz-Hermes, Felix Nordhusen, Florian Scheil
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package de.twitterlivesearch.twitter;

import java.io.IOException;
import java.util.List;

import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.store.Directory;

import twitter4j.DirectMessage;
import twitter4j.StallWarning;
import twitter4j.Status;
import twitter4j.StatusDeletionNotice;
import twitter4j.StatusListener;
import twitter4j.User;
import twitter4j.UserList;
import twitter4j.UserStreamListener;
import de.twitterlivesearch.analysis.AnalyzerMapping;
import de.twitterlivesearch.analysis.FieldNames;
import de.twitterlivesearch.analysis.Searcher;
import de.twitterlivesearch.analysis.Tokenizer;
import de.twitterlivesearch.filter.FilterManager;
import de.twitterlivesearch.model.IdGenerator;
import de.twitterlivesearch.model.QueryManager;
import de.twitterlivesearch.model.QueryWrapper;
import de.twitterlivesearch.model.TweetHolder;

/**
 * This is a wrapper class which simplifies handling of UserStreamListener and
 * StatusListener in Twitter4J. Since twitter4j expects the user to decide which
 * listener should be used, we can easily use this listener for Gardenhose and
 * Userstream. <br />
 * There are several possibilites to handle different events in tihs class;
 * urrently there is only an action if a new tweet is incoming, though. Please
 * feel free to add events as desired. <br />
 * <br />
 * Current implementation: <br />
 * <ul>
 * <li>A new incoming tweet is added to the index.</li>
 * <li>in case the buffer is full an old tweet will be deleted</li>
 * <li>the incoming tweet is checked against all registered queries. In case the
 * tweet matches a query, the listener of the query is invoked.</li
 * </ul>
 * <br />
 *
 * @author schmitzhermes
 *
 */
public class TwitterStreamListener implements UserStreamListener, StatusListener {
    private TweetHolder tweetHolder;
    private IndexWriter iwriter;
    private Searcher searcher;
    private static Logger log = LogManager.getLogger();

    public TwitterStreamListener(Directory directory, TweetHolder tweetHolder, IndexWriter iwriter,
            Searcher searcher) {
        this.tweetHolder = tweetHolder;
        this.iwriter = iwriter;
        this.searcher = searcher;
    }

    @Override
    public void onStatus(Status status) {
        if (log.isTraceEnabled()) {
            log.trace("Incoming Tweet: " + status.getText());
        }
        String textForDoc = StringUtils.join(Tokenizer.getTokensForString(status.getText(), status.getLang()),
                AnalyzerMapping.getInstance().TOKEN_DELIMITER);
        // in case the document is empty after tokenizing (i.e. just stopwords)
        // we shouldnt add it to the index...
        if (textForDoc.isEmpty()) {
            return;
        }

        Document doc = new Document();
        Integer id = IdGenerator.getInstance().getNextId();
        // updating the tweet holder which holds all tweet objects
        try {
            tweetHolder.getTweets().set(id, status);
        } catch (IndexOutOfBoundsException e) {
            tweetHolder.getTweets().add(status);
        }
        // adding a new document to the lucene index, text the tweets message
        // and gets tokenized
        doc.add(new IntField(FieldNames.ID.getField(), id, Field.Store.YES));

        if (log.isTraceEnabled()) {
            log.trace("Indexing Document: " + textForDoc);
        }

        doc.add(new Field(FieldNames.TEXT.getField(), textForDoc, TextField.TYPE_NOT_STORED));
        try {
            // deleting the oldest document
            iwriter.deleteDocuments(NumericRangeQuery.newIntRange(FieldNames.ID.getField(), id, id, true, true));
            // writing the new document
            iwriter.addDocument(doc);
        } catch (IOException e) {
            log.fatal("Error when writing or deleting from the index!", e);
        }
        try {
            iwriter.commit();
        } catch (IOException e) {
            log.fatal("Error when trying to commit to index!", e);
        }
        // iterating through all the queries in the queryHolder to see if the
        // added tweet matches any existing query
        for (String queryString : QueryManager.getInstance().getQueries()) {
            List<Document> hits = searcher.searchForTweets(id, queryString);
            if (hits.size() > 1) {
                IllegalStateException e = new IllegalStateException(
                        "The query with string " + queryString + " and id " + id + " returned " + hits.size()
                                + " documents. This can only happen if id is not unique!");
                log.fatal("Error when searching", e);
            }
            for (Document document : hits) {
                // invoking every action listener registered for the given query
                // with every document (must actually be a single result,
                // because id must be unique!)
                for (QueryWrapper qw : QueryManager.getInstance().getQueryWrappersForQuery(queryString)) {
                    Status tweet = tweetHolder.getTweet(document.get(FieldNames.ID.getField()));
                    if (FilterManager.tweetMatchesFilter(tweet, qw.getFilter())) {
                        qw.getListener().handleNewTweet(tweet);
                        if (log.isTraceEnabled()) {
                            log.trace("Informed Listener " + qw.getListener() + " that new tweet is incoming: "
                                    + queryString + " (tokenized)");
                        }
                    }

                }
            }
        }
    }

    @Override
    public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {

    }

    @Override
    public void onTrackLimitationNotice(int numberOfLimitedStatuses) {

    }

    @Override
    public void onScrubGeo(long userId, long upToStatusId) {

    }

    @Override
    public void onStallWarning(StallWarning warning) {

    }

    @Override
    public void onException(Exception ex) {

    }

    @Override
    public void onDeletionNotice(long directMessageId, long userId) {

    }

    @Override
    public void onFriendList(long[] friendIds) {

    }

    @Override
    public void onFavorite(User source, User target, Status favoritedStatus) {

    }

    @Override
    public void onUnfavorite(User source, User target, Status unfavoritedStatus) {

    }

    @Override
    public void onFollow(User source, User followedUser) {

    }

    @Override
    public void onUnfollow(User source, User unfollowedUser) {

    }

    @Override
    public void onDirectMessage(DirectMessage directMessage) {

    }

    @Override
    public void onUserListMemberAddition(User addedMember, User listOwner, UserList list) {

    }

    @Override
    public void onUserListMemberDeletion(User deletedMember, User listOwner, UserList list) {

    }

    @Override
    public void onUserListSubscription(User subscriber, User listOwner, UserList list) {

    }

    @Override
    public void onUserListUnsubscription(User subscriber, User listOwner, UserList list) {

    }

    @Override
    public void onUserListCreation(User listOwner, UserList list) {

    }

    @Override
    public void onUserListUpdate(User listOwner, UserList list) {

    }

    @Override
    public void onUserListDeletion(User listOwner, UserList list) {

    }

    @Override
    public void onUserProfileUpdate(User updatedUser) {

    }

    @Override
    public void onUserSuspension(long suspendedUser) {

    }

    @Override
    public void onUserDeletion(long deletedUser) {

    }

    @Override
    public void onBlock(User source, User blockedUser) {

    }

    @Override
    public void onUnblock(User source, User unblockedUser) {

    }

}