trendulo.ingest.twitter.TwitterStreamingStringSequenceSource.java Source code

Java tutorial

Introduction

Here is the source code for trendulo.ingest.twitter.TwitterStreamingStringSequenceSource.java

Source

// =================================================================================================
// Copyright 2012 Jared Winick
// -------------------------------------------------------------------------------------------------
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this work except in compliance with the License.
// You may obtain a copy of the License in the LICENSE file, or at:
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =================================================================================================

package trendulo.ingest.twitter;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.zip.GZIPOutputStream;

import org.apache.log4j.Logger;

import trendulo.ingest.TemporalStringSequence;
import trendulo.ingest.TemporalStringSequenceSource;
import twitter4j.FilterQuery;
import twitter4j.Status;
import twitter4j.StatusDeletionNotice;
import twitter4j.StatusListener;
import twitter4j.TwitterStream;
import twitter4j.TwitterStreamFactory;
import twitter4j.auth.Authorization;
import twitter4j.auth.BasicAuthorization;
import twitter4j.json.DataObjectFactory;

public class TwitterStreamingStringSequenceSource implements TemporalStringSequenceSource, StatusListener {

    private BlockingQueue<Status> statusQueue;
    private Authorization auth;
    private TwitterStream twitterStream;
    private OutputStream archiveOutputStream;
    private StatusFilter statusFilter;

    private Logger log = Logger.getLogger(TwitterStreamingStringSequenceSource.class);
    public final static int DEFAULT_BUFFER_SIZE = 10000;

    public TwitterStreamingStringSequenceSource(String username, String password) {
        this(username, password, null, null);
    }

    public TwitterStreamingStringSequenceSource(String username, String password, String archiveTwitterFilePath) {
        this(username, password, archiveTwitterFilePath, null);
    }

    public TwitterStreamingStringSequenceSource(String username, String password, String archiveTwitterFilePath,
            StatusFilter statusFilter) {

        this.statusFilter = statusFilter;
        statusQueue = new ArrayBlockingQueue<Status>(DEFAULT_BUFFER_SIZE);

        // If the user wants status messages archived, create an OutputStream
        if (archiveTwitterFilePath != null) {
            try {
                // Verify that the file doesn't already exist so we don't clobber the archive
                if (new File(archiveTwitterFilePath).exists()) {
                    throw new RuntimeException(
                            "Archive File Already Exists. Please move/rename before starting Ingest: "
                                    + archiveTwitterFilePath);
                }

                FileOutputStream fos = new FileOutputStream(archiveTwitterFilePath);
                if (archiveTwitterFilePath.endsWith("gz")) {
                    archiveOutputStream = new GZIPOutputStream(new BufferedOutputStream(fos));
                } else {
                    archiveOutputStream = fos;
                }
            } catch (IOException e) {
                log.error("Error creating archive file", e);
            }
        }

        // Configure the Twitter Streaming API client

        // this system property is required to access raw json
        System.setProperty("twitter4j.jsonStoreEnabled", "true");

        auth = new BasicAuthorization(username, password);
        twitterStream = new TwitterStreamFactory().getInstance(auth);
        twitterStream.addListener(this);

        // We only want status messages with a geo. This needs to be pulled
        // out so it is configurable
        // Right now just the lower 48 of the US
        FilterQuery filterQuery = new FilterQuery();
        double[][] locations = { { -126.0, 24.0 }, { -67.0, 49.0 } };
        filterQuery.locations(locations);
        twitterStream.filter(filterQuery);
    }

    public void shutdown() {
        twitterStream.cleanUp();
        if (archiveOutputStream != null) {
            try {
                archiveOutputStream.close();
            } catch (IOException e) {
                log.error("Error cleaning up OutputStream", e);
            }
        }
    }

    /**
     * Needs work, copied from TwitterFileStringSequenceSource and running out of time
     */
    @Override
    public TemporalStringSequence nextStringSequence() {
        TemporalStringSequence temporalStringSequence = null;
        Status status = null;
        boolean statusAccepted = false;

        // Pull a status off the queue. Block until available
        do {
            try {
                status = statusQueue.take();
            } catch (InterruptedException e) {
                log.error(e);
            }
            if (status != null) {
                // if the user has specified a StatusFilter, check to see if the
                // Status object
                // is accepted. If not, we will continue around the loop
                if (statusFilter != null) {
                    if (statusFilter.accept(status)) {
                        statusAccepted = true;
                    }
                }
                // Every status is accepted if there is no filter
                else {
                    statusAccepted = true;
                }
            }
        } while (status != null && statusAccepted == false);

        if (status != null && statusAccepted == true) {
            temporalStringSequence = new TemporalStringSequence(status.getText(), status.getCreatedAt().getTime());
        }

        return temporalStringSequence;
    }

    @Override
    public void onException(Exception ex) {
        // TODO Auto-generated method stub

    }

    @Override
    public void onStatus(Status status) {
        // Save the status to disk if the user has specified an archive file
        if (archiveOutputStream != null) {
            archiveStatus(status);
        }

        // Put the message on the status queue if there is room
        statusQueue.offer(status);
    }

    private void archiveStatus(Status status) {
        String json = DataObjectFactory.getRawJSON(status);
        log.trace(json);

        try {
            archiveOutputStream.write(json.getBytes());
            archiveOutputStream.write("\n".getBytes());
        } catch (IOException e) {
            log.error("Error writing to archive", e);
        }
    }

    @Override
    public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {
        // TODO Auto-generated method stub

    }

    @Override
    public void onTrackLimitationNotice(int numberOfLimitedStatuses) {
        log.info("LIMITED STATUSES: " + numberOfLimitedStatuses);

    }

    @Override
    public void onScrubGeo(long userId, long upToStatusId) {
        // TODO Auto-generated method stub

    }
}