trendulo.ingest.twitter.TwitterFileStringSequenceSource.java Source code

Java tutorial

Introduction

Here is the source code for trendulo.ingest.twitter.TwitterFileStringSequenceSource.java

Source

// =================================================================================================
// Copyright 2012 Jared Winick
// -------------------------------------------------------------------------------------------------
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this work except in compliance with the License.
// You may obtain a copy of the License in the LICENSE file, or at:
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =================================================================================================

package trendulo.ingest.twitter;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.zip.GZIPInputStream;

import trendulo.ingest.TemporalStringSequence;
import trendulo.ingest.TemporalStringSequenceSource;
import twitter4j.Status;
import twitter4j.TwitterException;
import twitter4j.internal.logging.Logger;
import twitter4j.json.DataObjectFactory;

public class TwitterFileStringSequenceSource implements TemporalStringSequenceSource {

    private String twitterFilePath;
    private BufferedReader bufferedReader;
    private StatusFilter statusFilter;
    private Logger log = Logger.getLogger(TwitterFileStringSequenceSource.class);

    public TwitterFileStringSequenceSource(String twitterFilePath) throws IOException {
        this(twitterFilePath, null);
    }

    /**
     * Creates a new TwitterFileStringSequenceSource. This class reads a file that has been 
     * created by writing the stream of JSON strings from the Twitter Streaming API. 
     * @param twitterFilePath The full filesystem path or path on classpath to the file of JSON-serialized Twitter Statuses. 
     *    This file can be plain-text or GZIPed (designated by a .gz extension)
     * @param statusFilter a StatusFilter to accept/reject Status messages
     * @throws IOException 
     */
    public TwitterFileStringSequenceSource(String twitterFilePath, StatusFilter statusFilter) throws IOException {
        this.twitterFilePath = twitterFilePath;
        this.statusFilter = statusFilter;

        InputStream inputStream = null;

        // Check if the file existing on the file system at the specified path
        try {
            inputStream = new FileInputStream(twitterFilePath);
        } catch (FileNotFoundException e) {
            log.debug(String.format("Absolute path [%s] not found. Trying classpath...", twitterFilePath));
            inputStream = getClass().getClassLoader().getResourceAsStream(twitterFilePath);
        }

        // check to see if the inputStream is null. if so, the file doesn't exist
        if (inputStream == null) {
            throw new FileNotFoundException(
                    String.format("File [%s] not found on file system or classpath", twitterFilePath));
        }

        if (twitterFilePath.endsWith(".gz")) {
            inputStream = new GZIPInputStream(inputStream);
        }
        log.debug("Opening file: " + twitterFilePath);
        bufferedReader = new BufferedReader(new InputStreamReader(inputStream));

        System.setProperty("twitter4j.jsonStoreEnabled", "true");
    }

    public TemporalStringSequence nextStringSequence() {

        TemporalStringSequence temporalStringSequence = null;
        String line = null;
        Status status = null;
        boolean statusAccepted = false;

        // Read a line from the file and serialize the JSON string into a Status object
        try {
            do {
                line = bufferedReader.readLine();
                if (line != null) {
                    status = DataObjectFactory.createStatus(line);
                    // if the user has specified a StatusFilter, check to see if the Status object
                    // is accepted. If not, we will continue around the loop
                    if (statusFilter != null) {
                        if (statusFilter.accept(status)) {
                            statusAccepted = true;
                        }
                    }
                    // Every status is accepted if there is no filter
                    else {
                        statusAccepted = true;
                    }
                }
            } while (line != null && statusAccepted == false);

        } catch (IOException e) {
            log.error("Error reading from file: " + twitterFilePath, e);
        } catch (TwitterException e) {
            log.error("Error parsing JSON status string: " + line, e);
        }

        if (status != null && statusAccepted == true) {
            temporalStringSequence = new TemporalStringSequence(status.getText(), status.getCreatedAt().getTime());
        }

        return temporalStringSequence;
    }

}