org.apache.streams.twitter.provider.TwitterStreamProvider.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.streams.twitter.provider.TwitterStreamProvider.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.streams.twitter.provider;

import org.apache.streams.config.ComponentConfigurator;
import org.apache.streams.config.StreamsConfiguration;
import org.apache.streams.config.StreamsConfigurator;
import org.apache.streams.core.DatumStatus;
import org.apache.streams.core.DatumStatusCountable;
import org.apache.streams.core.DatumStatusCounter;
import org.apache.streams.core.StreamsDatum;
import org.apache.streams.core.StreamsProvider;
import org.apache.streams.core.StreamsResultSet;
import org.apache.streams.jackson.StreamsJacksonMapper;
import org.apache.streams.twitter.TwitterStreamConfiguration;
import org.apache.streams.twitter.converter.TwitterDateTimeFormat;
import org.apache.streams.util.ComponentUtils;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Preconditions;
import com.google.common.util.concurrent.Uninterruptibles;
import com.twitter.hbc.ClientBuilder;
import com.twitter.hbc.core.Constants;
import com.twitter.hbc.core.Hosts;
import com.twitter.hbc.core.HttpHosts;
import com.twitter.hbc.core.endpoint.StatusesFilterEndpoint;
import com.twitter.hbc.core.endpoint.StatusesFirehoseEndpoint;
import com.twitter.hbc.core.endpoint.StatusesSampleEndpoint;
import com.twitter.hbc.core.endpoint.StreamingEndpoint;
import com.twitter.hbc.core.endpoint.UserstreamEndpoint;
import com.twitter.hbc.httpclient.BasicClient;
import com.twitter.hbc.httpclient.auth.Authentication;
import com.twitter.hbc.httpclient.auth.BasicAuth;
import com.twitter.hbc.httpclient.auth.OAuth1;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import com.typesafe.config.ConfigParseOptions;
import org.apache.commons.lang.NotImplementedException;
import org.joda.time.DateTime;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.io.Serializable;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Queue;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;

/**
 * TwitterStreamProvider wraps a hosebird client and passes received documents
 * to subscribing components.
 */
public class TwitterStreamProvider implements StreamsProvider, Serializable, DatumStatusCountable {

    public static final String STREAMS_ID = "TwitterStreamProvider";

    private static final Logger LOGGER = LoggerFactory.getLogger(TwitterStreamProvider.class);

    /**
     * To use from command line:
     *
     * <p/>
     * Supply (at least) the following required configuration in application.conf:
     *
     * <p/>
     * twitter.oauth.consumerKey
     * twitter.oauth.consumerSecret
     * twitter.oauth.accessToken
     * twitter.oauth.accessTokenSecret
     *
     * <p/>
     * Launch using:
     *
     * <p/>
     * mvn exec:java -Dexec.mainClass=org.apache.streams.twitter.provider.TwitterStreamProvider -Dexec.args="application.conf tweets.json"
     *
     * @param args
     */
    public static void main(String[] args) {

        Preconditions.checkArgument(args.length >= 2);

        String configfile = args[0];
        String outfile = args[1];

        Config reference = ConfigFactory.load();
        File file = new File(configfile);
        assert (file.exists());
        Config testResourceConfig = ConfigFactory.parseFileAnySyntax(file,
                ConfigParseOptions.defaults().setAllowMissing(false));

        Config typesafe = testResourceConfig.withFallback(reference).resolve();

        StreamsConfiguration streamsConfiguration = StreamsConfigurator.detectConfiguration(typesafe);
        TwitterStreamConfiguration config = new ComponentConfigurator<>(TwitterStreamConfiguration.class)
                .detectConfiguration(typesafe, "twitter");
        TwitterStreamProvider provider = new TwitterStreamProvider(config);

        ObjectMapper mapper = StreamsJacksonMapper
                .getInstance(Collections.singletonList(TwitterDateTimeFormat.TWITTER_FORMAT));

        PrintStream outStream;
        try {
            outStream = new PrintStream(new BufferedOutputStream(new FileOutputStream(outfile)));
        } catch (FileNotFoundException ex) {
            LOGGER.error("FileNotFoundException", ex);
            return;
        }
        provider.prepare(config);
        provider.startStream();
        do {
            Uninterruptibles.sleepUninterruptibly(streamsConfiguration.getBatchFrequencyMs(),
                    TimeUnit.MILLISECONDS);
            for (StreamsDatum datum : provider.readCurrent()) {
                String json;
                try {
                    json = mapper.writeValueAsString(datum.getDocument());
                    outStream.println(json);
                } catch (JsonProcessingException ex) {
                    System.err.println(ex.getMessage());
                }
            }
        } while (provider.isRunning());
        provider.cleanUp();
        outStream.flush();
    }

    private static final int MAX_BATCH = 1000;

    private TwitterStreamConfiguration config;

    public TwitterStreamConfiguration getConfig() {
        return config;
    }

    public void setConfig(TwitterStreamConfiguration config) {
        this.config = config;
    }

    private volatile Queue<Future<List<StreamsDatum>>> providerQueue;

    private Authentication auth;
    protected StreamingEndpoint endpoint;
    private BasicClient client;
    private AtomicBoolean running = new AtomicBoolean(false);
    protected TwitterStreamHelper processor = new TwitterStreamHelper(this);
    private DatumStatusCounter countersCurrent = new DatumStatusCounter();
    private DatumStatusCounter countersTotal = new DatumStatusCounter();

    public TwitterStreamProvider() {
        this.config = new ComponentConfigurator<>(TwitterStreamConfiguration.class)
                .detectConfiguration(StreamsConfigurator.config, "twitter");
    }

    public TwitterStreamProvider(TwitterStreamConfiguration config) {
        this.config = config;
    }

    @Override
    public String getId() {
        return STREAMS_ID;
    }

    @Override
    public void startStream() {
        client.connect();
        running.set(true);
    }

    @Override
    public synchronized StreamsResultSet readCurrent() {

        StreamsResultSet current;
        synchronized (this) {
            Queue<StreamsDatum> drain = new LinkedBlockingDeque<>();
            drainTo(drain);
            current = new StreamsResultSet(drain);
            current.setCounter(new DatumStatusCounter());
            current.getCounter().add(countersCurrent);
            countersTotal.add(countersCurrent);
            countersCurrent = new DatumStatusCounter();
        }

        return current;
    }

    @Override
    public StreamsResultSet readNew(BigInteger sequence) {
        throw new NotImplementedException();
    }

    @Override
    public StreamsResultSet readRange(DateTime start, DateTime end) {
        throw new NotImplementedException();
    }

    @Override
    public boolean isRunning() {
        return this.running.get() && !client.isDone();
    }

    @Override
    public void prepare(Object configurationObject) {

        Objects.requireNonNull(config.getEndpoint());

        Hosts hosebirdHosts;
        if (config.getEndpoint().equals("userstream")) {

            hosebirdHosts = new HttpHosts(Constants.USERSTREAM_HOST);

            UserstreamEndpoint userstreamEndpoint = new UserstreamEndpoint();
            userstreamEndpoint.withFollowings(true);
            userstreamEndpoint.withUser(false);
            userstreamEndpoint.allReplies(false);
            endpoint = userstreamEndpoint;
        } else if (config.getEndpoint().equals("sample")) {

            hosebirdHosts = new HttpHosts(Constants.STREAM_HOST);

            boolean track = config.getTrack() != null && !config.getTrack().isEmpty();
            boolean follow = config.getFollow() != null && !config.getFollow().isEmpty();

            if (track || follow) {
                LOGGER.debug("***\tPRESENT\t***");
                StatusesFilterEndpoint statusesFilterEndpoint = new StatusesFilterEndpoint();
                if (track) {
                    statusesFilterEndpoint.trackTerms(config.getTrack());
                }
                if (follow) {
                    statusesFilterEndpoint.followings(config.getFollow());
                }
                this.endpoint = statusesFilterEndpoint;
            } else {
                endpoint = new StatusesSampleEndpoint();
            }

        } else if (config.getEndpoint().endsWith("firehose")) {
            hosebirdHosts = new HttpHosts(Constants.STREAM_HOST);
            endpoint = new StatusesFirehoseEndpoint();
        } else {
            LOGGER.error("NO ENDPOINT RESOLVED");
            return;
        }

        if (config.getBasicauth() != null) {

            Objects.requireNonNull(config.getBasicauth().getUsername());
            Objects.requireNonNull(config.getBasicauth().getPassword());

            auth = new BasicAuth(config.getBasicauth().getUsername(), config.getBasicauth().getPassword());

        } else if (config.getOauth() != null) {

            Objects.requireNonNull(config.getOauth().getConsumerKey());
            Objects.requireNonNull(config.getOauth().getConsumerSecret());
            Objects.requireNonNull(config.getOauth().getAccessToken());
            Objects.requireNonNull(config.getOauth().getAccessTokenSecret());

            auth = new OAuth1(config.getOauth().getConsumerKey(), config.getOauth().getConsumerSecret(),
                    config.getOauth().getAccessToken(), config.getOauth().getAccessTokenSecret());

        } else {
            LOGGER.error("NO AUTH RESOLVED");
            return;
        }

        LOGGER.debug("host={}\tendpoint={}\taut={}", hosebirdHosts, endpoint, auth);

        providerQueue = new LinkedBlockingQueue<>(MAX_BATCH);

        client = new ClientBuilder().name("apache/streams/streams-contrib/streams-provider-twitter")
                .hosts(hosebirdHosts).endpoint(endpoint).authentication(auth).connectionTimeout(1200000)
                .processor(processor).build();

    }

    @Override
    public void cleanUp() {
        this.client.stop();
        this.processor.cleanUp();
        this.running.set(false);
    }

    @Override
    public DatumStatusCounter getDatumStatusCounter() {
        return countersTotal;
    }

    protected boolean addDatum(Future<List<StreamsDatum>> future) {
        try {
            ComponentUtils.offerUntilSuccess(future, providerQueue);
            countersCurrent.incrementStatus(DatumStatus.SUCCESS);
            return true;
        } catch (Exception ex) {
            countersCurrent.incrementStatus(DatumStatus.FAIL);
            LOGGER.warn("Unable to enqueue item from Twitter stream");
            return false;
        }
    }

    protected void drainTo(Queue<StreamsDatum> drain) {
        int count = 0;
        while (!providerQueue.isEmpty() && count <= MAX_BATCH) {
            for (StreamsDatum datum : pollForDatum()) {
                ComponentUtils.offerUntilSuccess(datum, drain);
                count++;
            }
        }
    }

    protected List<StreamsDatum> pollForDatum() {
        try {
            return providerQueue.poll().get();
        } catch (InterruptedException ex) {
            LOGGER.warn("Interrupted while waiting for future.  Initiate shutdown.");
            this.cleanUp();
            Thread.currentThread().interrupt();
            return new ArrayList<>();
        } catch (ExecutionException ex) {
            LOGGER.warn("Error getting tweet from future");
            return new ArrayList<>();
        }
    }
}