io.rakam.datasource.twitter.TweetProcessor.java Source code

Java tutorial

Introduction

Here is the source code for io.rakam.datasource.twitter.TweetProcessor.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.rakam.datasource.twitter;

import com.google.common.base.Throwables;
import io.rakam.ApiClient;
import io.rakam.ApiException;
import io.rakam.client.api.CollectApi;
import io.rakam.client.model.Event;
import io.rakam.client.model.EventContext;
import io.rakam.client.model.EventList;
import twitter4j.GeoLocation;
import twitter4j.HashtagEntity;
import twitter4j.Place;
import twitter4j.StallWarning;
import twitter4j.Status;
import twitter4j.StatusDeletionNotice;
import twitter4j.StatusListener;
import twitter4j.URLEntity;
import twitter4j.User;
import twitter4j.UserMentionEntity;

import java.time.Duration;
import java.time.LocalDateTime;
import java.time.temporal.ChronoUnit;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Queue;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;

class TweetProcessor implements StatusListener {
    private final Logger LOGGER = Logger.getLogger(TweetProcessor.class.getName());
    private final Classify classifier = new Classify();
    private final CollectApi eventApi;
    private final EventContext eventContext;
    private final Queue<Event> buffer;
    private static final long maxFlushDurationInMillis = Duration.ofSeconds(10).toMillis();
    private static LocalDateTime lastFlush;
    private final AtomicInteger counter;
    private final String collection;

    public TweetProcessor(String apiUrl, String apiKey, AtomicInteger counter, String collection) {
        ApiClient apiClient = new ApiClient();
        apiClient.setBasePath(apiUrl);
        eventApi = new CollectApi(apiClient);
        this.counter = counter;
        this.collection = collection;
        buffer = new ConcurrentLinkedQueue<>();
        eventContext = new EventContext();
        eventContext.apiKey(apiKey);
    }

    @Override
    public void onStatus(Status status) {
        Map<String, Object> map = new HashMap<>();

        GeoLocation geoLocation = status.getGeoLocation();
        if (geoLocation != null) {
            map.put("latitude", geoLocation.getLatitude());
            map.put("longitude", geoLocation.getLongitude());
        }

        map.put("_time", status.getCreatedAt().getTime());
        Place place = status.getPlace();
        if (place != null) {
            map.put("country_code", place.getCountryCode());
            map.put("place", place.getName());
            map.put("place_type", place.getPlaceType());
            map.put("place_id", place.getId());
        }

        User user = status.getUser();
        map.put("_user", user.getId());
        map.put("user_lang", user.getLang());
        map.put("user_created", user.getCreatedAt());
        map.put("user_followers", user.getFollowersCount());
        map.put("user_status_count", user.getStatusesCount());
        map.put("user_verified", user.isVerified());

        map.put("id", status.getId());
        map.put("is_reply", status.getInReplyToUserId() > -1);
        map.put("is_retweet", status.isRetweet());
        map.put("has_media", status.getMediaEntities().length > 0);
        map.put("urls",
                Arrays.stream(status.getURLEntities()).map(URLEntity::getText).collect(Collectors.toList()));
        map.put("hashtags", Arrays.stream(status.getHashtagEntities()).map(HashtagEntity::getText)
                .collect(Collectors.toList()));
        map.put("user_mentions", Arrays.stream(status.getUserMentionEntities()).map(UserMentionEntity::getText)
                .collect(Collectors.toList()));
        map.put("language", "und".equals(status.getLang()) ? null : status.getLang());
        map.put("is_positive", classifier.isPositive(status.getText()));

        Event event = new Event().properties(map).collection(collection);
        buffer.add(event);

        commitIfNecessary();
    }

    @Override
    public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {
    }

    private void commitIfNecessary() {
        int size = buffer.size();
        LocalDateTime now = LocalDateTime.now();
        if (lastFlush == null || lastFlush.until(now, ChronoUnit.MILLIS) > maxFlushDurationInMillis) {
            synchronized (this) {
                try {
                    EventList eventList = new EventList().api(eventContext);
                    Event[] events = new Event[size];
                    for (int i = 0; i < size; i++) {
                        events[i] = buffer.poll();
                    }
                    eventList.setEvents(Arrays.asList(events));
                    eventApi.batchEvents(eventList);
                    counter.addAndGet(size);
                } catch (ApiException e) {
                    throw Throwables.propagate(e);
                } finally {
                    lastFlush = now;
                }
            }
        }
    }

    @Override
    public void onTrackLimitationNotice(int limit) {
        LOGGER.log(Level.WARNING, String.format("We hit the Twitter API limits, maximum %s can be fetched", limit));
    }

    @Override
    public void onScrubGeo(long l, long l1) {
    }

    @Override
    public void onStallWarning(StallWarning warning) {
        LOGGER.log(Level.WARNING, String.format("Warning while sending tweets to Rakam: %s", warning.getMessage()));
    }

    @Override
    public void onException(Exception e) {
        LOGGER.log(Level.SEVERE, String.format("Error while sending tweets to Rakam: %s", e.getMessage()));
    }
}