org.gabrielebaldassarre.twitter.tweet.TalendFlowTweetBehaviour.java Source code

Java tutorial

Introduction

Here is the source code for org.gabrielebaldassarre.twitter.tweet.TalendFlowTweetBehaviour.java

Source

/*
   This file is part of tTwitterInput Talend component
    
Talend Bridge is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
    
Talend Bridge is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
    
You should have received a copy of the GNU General Public License
along with Nome-Programma.  If not, see <http://www.gnu.org/licenses/>.
 */
package org.gabrielebaldassarre.twitter.tweet;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Observable;
import java.util.ResourceBundle;
import java.util.Map.Entry;
import java.math.BigDecimal;
import java.text.SimpleDateFormat;

import org.gabrielebaldassarre.tcomponent.bridge.TalendFlow;
import org.gabrielebaldassarre.tcomponent.bridge.TalendFlowBehaviour;
import org.gabrielebaldassarre.tcomponent.bridge.TalendColumn;
import org.gabrielebaldassarre.tcomponent.bridge.TalendRow;
import org.gabrielebaldassarre.tcomponent.bridge.TalendRowFactory;
import org.gabrielebaldassarre.tcomponent.bridge.TalendType;
import org.gabrielebaldassarre.tcomponent.bridge.TalendValue;
import org.gabrielebaldassarre.twitter.commodities.tweet.TweetField;

import com.google.common.base.Joiner;

import twitter4j.GeoLocation;
import twitter4j.HashtagEntity;
import twitter4j.Status;
import twitter4j.URLEntity;
import twitter4j.UserMentionEntity;
import twitter4j.json.DataObjectFactory;

/**
 * This visitor parse a twitter resultset and fit a twitter data floe
 * 
 * @author Gabriele Baldassarre
 *
 */
public class TalendFlowTweetBehaviour extends Observable implements TalendFlowBehaviour {

    private Map<TalendColumn, TweetField> associations;
    private TalendValue resultSet;
    private boolean valid;
    private boolean includeHash;
    private String entitiesSepatator;

    private static SimpleDateFormat DATEFORMAT = new SimpleDateFormat("yyyyMMddHHmmss");

    /**
     * Build a visitor to parse a resultset gained from {@link TalendFlowQueryResultBehaviour}
     * 
     * @param entitiesSeparator the substring to use as separator for serialized entity list, ie '|' or ','
     * @param includeHash true if you prefer to have any entity to be prefixed by its reserver character, ie '#' or '@'
     */
    public TalendFlowTweetBehaviour(String entitiesSeparator, boolean includeHash) {
        this.associations = new HashMap<TalendColumn, TweetField>();
        this.entitiesSepatator = entitiesSeparator;
        this.includeHash = includeHash;
    }

    /**
     * Check if this flow is valid
     * 
     */
    public Boolean isValid() {
        return valid;
    }

    /**
     * Return the string used to separate serialized entity list
     * 
     * @return the string used as separator
     */
    public String getEntitiesSeparator() {
        return entitiesSepatator;
    }

    /**
     * Return true if entities are prefixed with their typical character, ie '#' or '@'
     * 
     * @return true if entities will be prefixed
     */
    public boolean includeHash() {
        return includeHash;
    }

    /**
     * Set to true if you want your entities to be prefixed by a specific character, ie '#' or '@'
     * 
     * @param includeHash true if you want the reserved character to be prefixed
     */
    public void includeHash(boolean includeHash) {
        this.includeHash = includeHash;
    }

    /**
     * Se the string to be used as serialized entities list separator, ie <em>"|", ","...</em>
     * 
     * @param entitiesSeparator the string to use
     */
    public void setEntitiesSeparator(String entitiesSeparator) {
        this.entitiesSepatator = entitiesSeparator;
    }

    /**
     * Visit a target {@link TalendFlow} for parsed or raw json output.
     * 
     * @param target the data flow to fill
     */
    public void visit(TalendFlow target) {
        ResourceBundle rb = ResourceBundle.getBundle("tTwitterInput", Locale.getDefault());

        TalendRowFactory rowFactory = target.getModel().getRowFactory();
        valid = false;

        @SuppressWarnings("unchecked")
        List<Status> tweets = (List<Status>) resultSet.getValue();
        for (int i = 0; i < tweets.size(); i++) {
            Status tweet = tweets.get(i);
            TalendRow current = rowFactory.newRow(target);

            Iterator<Entry<TalendColumn, TweetField>> col = associations.entrySet().iterator();
            while (col.hasNext()) {
                List<String> h;
                List<Long> l;

                Map.Entry<TalendColumn, TweetField> row = (Map.Entry<TalendColumn, TweetField>) col.next();

                if (target != null && !row.getKey().getFlow().equals(target)) {
                    throw new IllegalArgumentException(String.format(rb.getString("exception.columnNotInFlow"),
                            row.getKey().getName(), target.getName()));
                }

                switch (row.getValue()) {
                case CREATION_DATE:
                    String literalDate = (new StringBuilder(
                            TalendFlowTweetBehaviour.DATEFORMAT.format(tweet.getCreatedAt()))).toString();

                    switch (row.getKey().getType()) {
                    case BIGDECIMAL:
                        current.setValue(row.getKey(), new BigDecimal(literalDate));
                    case LONG:
                        current.setValue(row.getKey(), Long.parseLong(literalDate));
                    case DOUBLE:
                        current.setValue(row.getKey(), Double.parseDouble(literalDate));
                    case FLOAT:
                        current.setValue(row.getKey(), Float.parseFloat(literalDate));
                    case INTEGER:
                        current.setValue(row.getKey(), Integer.parseInt(literalDate));
                    case DATE:
                        current.setValue(row.getKey(), tweet.getCreatedAt());
                        break;
                    case STRING:
                        current.setValue(row.getKey(), literalDate);
                        break;
                    default:
                        throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"),
                                row.getKey().getType().getTypeString(), row.getKey().getName()));
                    }
                    break;
                case FROM_NAME:
                    switch (row.getKey().getType()) {
                    case STRING:
                        current.setValue(row.getKey(), tweet.getUser().getName());
                        break;
                    default:
                        throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"),
                                row.getKey().getType().getTypeString(), row.getKey().getName()));
                    }
                    break;
                case FROM_USERID:
                    switch (row.getKey().getType()) {
                    case BIGDECIMAL:
                        current.setValue(row.getKey(), new BigDecimal(tweet.getUser().getId()));
                        break;
                    case DOUBLE:
                        current.setValue(row.getKey(), new Double(tweet.getUser().getId()));
                        break;
                    case FLOAT:
                        current.setValue(row.getKey(), new Float(tweet.getUser().getId()));
                        break;
                    case LONG:
                        current.setValue(row.getKey(), new Long(tweet.getUser().getId()));
                        break;
                    case STRING:
                        current.setValue(row.getKey(), String.valueOf((tweet.getUser().getId())));
                        break;
                    default:
                        throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"),
                                row.getKey().getType().getTypeString(), row.getKey().getName()));
                    }
                    break;
                case FROM_SCREEN_NAME:
                    switch (row.getKey().getType()) {
                    case STRING:
                        current.setValue(row.getKey(), tweet.getUser().getScreenName());
                        break;
                    default:
                        throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"),
                                row.getKey().getType().getTypeString(), row.getKey().getName()));
                    }
                    break;
                case HASHTAGS:
                    List<HashtagEntity> hashtags = Arrays.asList(tweet.getHashtagEntities());
                    h = new ArrayList<String>(hashtags.size());

                    for (HashtagEntity hashtag : hashtags) {
                        h.add((includeHash() ? "#" : "") + hashtag.getText());
                    }
                    switch (row.getKey().getType()) {
                    case STRING:
                    case LIST:
                        current.setValue(row.getKey(), !TalendType.STRING.equals(row.getKey().getType()) ? h
                                : Joiner.on(getEntitiesSeparator()).join(h));
                        break;
                    default:
                        throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"),
                                row.getKey().getType().getTypeString(), row.getKey().getName()));
                    }
                    break;
                case IS_FAVORITED:
                    switch (row.getKey().getType()) {
                    case BIGDECIMAL:
                        current.setValue(row.getKey(), new BigDecimal(tweet.isFavorited() ? 1 : 0));
                        break;
                    case BOOLEAN:
                        current.setValue(row.getKey(), tweet.isFavorited());
                        break;
                    case BYTE:
                        current.setValue(row.getKey(), (byte) (tweet.isFavorited() ? 1 : 0));
                        break;
                    case CHARACTER:
                        current.setValue(row.getKey(), (tweet.isFavorited() ? '1' : '0'));
                        break;
                    case DOUBLE:
                        current.setValue(row.getKey(), (double) (tweet.isFavorited() ? 1d : 0d));
                        break;
                    case FLOAT:
                        current.setValue(row.getKey(), (float) (tweet.isFavorited() ? 1f : 0f));
                        break;
                    case INTEGER:
                        current.setValue(row.getKey(), (tweet.isFavorited() ? 1 : 0));
                        break;
                    case LONG:
                        current.setValue(row.getKey(), (long) (tweet.isFavorited() ? 1l : 0l));
                        break;
                    case SHORT:
                        current.setValue(row.getKey(), (short) (tweet.isFavorited() ? (short) 1 : (short) 0));
                        break;
                    case STRING:
                        current.setValue(row.getKey(), (tweet.isFavorited() ? "1" : "0"));
                        break;
                    default:
                        throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"),
                                row.getKey().getType().getTypeString(), row.getKey().getName()));

                    }
                    break;
                case IS_POSSIBLY_SENSITIVE:
                    switch (row.getKey().getType()) {
                    case BIGDECIMAL:
                        current.setValue(row.getKey(), new BigDecimal(tweet.isPossiblySensitive() ? 1 : 0));
                        break;
                    case BOOLEAN:
                        current.setValue(row.getKey(), tweet.isPossiblySensitive());
                        break;
                    case BYTE:
                        current.setValue(row.getKey(), (byte) (tweet.isPossiblySensitive() ? 1 : 0));
                        break;
                    case CHARACTER:
                        current.setValue(row.getKey(), (tweet.isPossiblySensitive() ? '1' : '0'));
                        break;
                    case DOUBLE:
                        current.setValue(row.getKey(), (double) (tweet.isPossiblySensitive() ? 1d : 0d));
                        break;
                    case FLOAT:
                        current.setValue(row.getKey(), (float) (tweet.isPossiblySensitive() ? 1f : 0f));
                        break;
                    case INTEGER:
                        current.setValue(row.getKey(), (tweet.isPossiblySensitive() ? 1 : 0));
                        break;
                    case LONG:
                        current.setValue(row.getKey(), (long) (tweet.isPossiblySensitive() ? 1l : 0l));
                        break;
                    case SHORT:
                        current.setValue(row.getKey(),
                                (short) (tweet.isPossiblySensitive() ? (short) 1 : (short) 0));
                        break;
                    case STRING:
                        current.setValue(row.getKey(), (tweet.isPossiblySensitive() ? "1" : "0"));
                        break;
                    default:
                        throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"),
                                row.getKey().getType().getTypeString(), row.getKey().getName()));
                    }
                    break;
                case IS_RETWEET:
                    switch (row.getKey().getType()) {
                    case BIGDECIMAL:
                        current.setValue(row.getKey(), new BigDecimal(tweet.isRetweet() ? 1 : 0));
                        break;
                    case BOOLEAN:
                        current.setValue(row.getKey(), tweet.isRetweet());
                        break;
                    case BYTE:
                        current.setValue(row.getKey(), (byte) (tweet.isRetweet() ? 1 : 0));
                        break;
                    case CHARACTER:
                        current.setValue(row.getKey(), (tweet.isRetweet() ? '1' : '0'));
                        break;
                    case DOUBLE:
                        current.setValue(row.getKey(), (double) (tweet.isRetweet() ? 1d : 0d));
                        break;
                    case FLOAT:
                        current.setValue(row.getKey(), (float) (tweet.isRetweet() ? 1f : 0f));
                        break;
                    case INTEGER:
                        current.setValue(row.getKey(), (tweet.isRetweet() ? 1 : 0));
                        break;
                    case LONG:
                        current.setValue(row.getKey(), (long) (tweet.isRetweet() ? 1l : 0l));
                        break;
                    case SHORT:
                        current.setValue(row.getKey(), (short) (tweet.isRetweet() ? (short) 1 : (short) 0));
                        break;
                    case STRING:
                        current.setValue(row.getKey(), (tweet.isRetweet() ? "1" : "0"));
                        break;
                    default:
                        throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"),
                                row.getKey().getType().getTypeString(), row.getKey().getName()));
                    }
                    current.setValue(row.getKey(), tweet.isRetweet());
                    break;
                case LOCATION:
                    GeoLocation g = tweet.getGeoLocation();
                    switch (row.getKey().getType()) {
                    case STRING:
                        current.setValue(row.getKey(),
                                g != null
                                        ? String.valueOf(g.getLatitude()) + getEntitiesSeparator()
                                                + String.valueOf(g.getLongitude())
                                        : null);
                        break;
                    case OBJECT:
                        current.setValue(row.getKey(), g);
                        break;
                    default:
                        throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"),
                                row.getKey().getType().getTypeString(), row.getKey().getName()));
                    }
                    break;
                case REPLYTO_SCREEN_NAME:
                    switch (row.getKey().getType()) {
                    case STRING:
                        current.setValue(row.getKey(), tweet.getInReplyToScreenName());
                        break;
                    default:
                        throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"),
                                row.getKey().getType().getTypeString(), row.getKey().getName()));
                    }
                    break;
                case REPLYTO_STATUSID:
                    switch (row.getKey().getType()) {
                    case BIGDECIMAL:
                        current.setValue(row.getKey(), new BigDecimal(tweet.getInReplyToStatusId()));
                        break;
                    case DOUBLE:
                        current.setValue(row.getKey(), new Double(tweet.getInReplyToStatusId()));
                        break;
                    case FLOAT:
                        current.setValue(row.getKey(), new Float(tweet.getInReplyToStatusId()));
                        break;
                    case LONG:
                        current.setValue(row.getKey(), new Long(tweet.getInReplyToStatusId()));
                        break;
                    case STRING:
                        current.setValue(row.getKey(), String.valueOf((tweet.getInReplyToStatusId())));
                        break;
                    default:
                        throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"),
                                row.getKey().getType().getTypeString(), row.getKey().getName()));
                    }
                    break;
                case REPLYTO_USERID:
                    switch (row.getKey().getType()) {
                    case BIGDECIMAL:
                        current.setValue(row.getKey(), new BigDecimal(tweet.getInReplyToUserId()));
                        break;
                    case DOUBLE:
                        current.setValue(row.getKey(), new Double(tweet.getInReplyToUserId()));
                        break;
                    case FLOAT:
                        current.setValue(row.getKey(), new Float(tweet.getInReplyToUserId()));
                        break;
                    case LONG:
                        current.setValue(row.getKey(), new Long(tweet.getInReplyToUserId()));
                        break;
                    case STRING:
                        current.setValue(row.getKey(), String.valueOf((tweet.getInReplyToUserId())));
                        break;
                    default:
                        throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"),
                                row.getKey().getType().getTypeString(), row.getKey().getName()));
                    }
                    break;
                case RETWEET_COUNT:
                    switch (row.getKey().getType()) {
                    case BIGDECIMAL:
                        current.setValue(row.getKey(), new BigDecimal(tweet.getRetweetCount()));
                        break;
                    case DOUBLE:
                        current.setValue(row.getKey(), new Double(tweet.getRetweetCount()));
                        break;
                    case FLOAT:
                        current.setValue(row.getKey(), new Float(tweet.getRetweetCount()));
                        break;
                    case LONG:
                        current.setValue(row.getKey(), new Long(tweet.getRetweetCount()));
                        break;
                    case STRING:
                        current.setValue(row.getKey(), String.valueOf((tweet.getRetweetCount())));
                        break;
                    default:
                        throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"),
                                row.getKey().getType().getTypeString(), row.getKey().getName()));
                    }
                    break;
                case SOURCE:
                    switch (row.getKey().getType()) {
                    case STRING:
                        current.setValue(row.getKey(), tweet.getSource());
                        break;
                    default:
                        throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"),
                                row.getKey().getType().getTypeString(), row.getKey().getName()));
                    }
                    break;
                case STATUS_ID:
                    switch (row.getKey().getType()) {
                    case BIGDECIMAL:
                        current.setValue(row.getKey(), new BigDecimal(tweet.getId()));
                        break;
                    case DOUBLE:
                        current.setValue(row.getKey(), new Double(tweet.getId()));
                        break;
                    case FLOAT:
                        current.setValue(row.getKey(), new Float(tweet.getId()));
                        break;
                    case LONG:
                        current.setValue(row.getKey(), new Long(tweet.getId()));
                        break;
                    case STRING:
                        current.setValue(row.getKey(), String.valueOf((tweet.getId())));
                        break;
                    default:
                        throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"),
                                row.getKey().getType().getTypeString(), row.getKey().getName()));
                    }
                    break;
                case TEXT:
                    switch (row.getKey().getType()) {
                    case STRING:
                        current.setValue(row.getKey(), tweet.getText());
                        break;
                    default:
                        throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"),
                                row.getKey().getType().getTypeString(), row.getKey().getName()));
                    }
                    break;
                case URL_ENTITIES:
                case URL_ENTITIES_STRING:
                    List<URLEntity> urlEntities = Arrays.asList(tweet.getURLEntities());
                    h = new ArrayList<String>(urlEntities.size());

                    for (URLEntity urlEntity : urlEntities) {
                        h.add(urlEntity.getExpandedURL());
                    }
                    switch (row.getKey().getType()) {
                    case STRING:
                    case LIST:
                        current.setValue(row.getKey(), !TalendType.STRING.equals(row.getKey().getType()) ? h
                                : Joiner.on(getEntitiesSeparator()).join(h));
                        break;
                    default:
                        throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"),
                                row.getKey().getType().getTypeString(), row.getKey().getName()));
                    }
                    break;
                case USER_MENTIONS:
                    List<UserMentionEntity> userMentionsEntities = Arrays.asList(tweet.getUserMentionEntities());
                    l = new ArrayList<Long>(userMentionsEntities.size());

                    for (UserMentionEntity userMention : userMentionsEntities) {
                        l.add(userMention.getId());
                    }
                    switch (row.getKey().getType()) {
                    case STRING:
                    case LIST:
                        current.setValue(row.getKey(), !TalendType.STRING.equals(row.getKey().getType()) ? l
                                : Joiner.on(getEntitiesSeparator()).join(l));
                        break;
                    default:
                        throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"),
                                row.getKey().getType().getTypeString(), row.getKey().getName()));
                    }
                    break;
                case USER_MENTIONS_SCREEN_NAME:
                    List<UserMentionEntity> userMentionsScreen = Arrays.asList(tweet.getUserMentionEntities());
                    h = new ArrayList<String>(userMentionsScreen.size());

                    for (UserMentionEntity userMention : userMentionsScreen) {
                        h.add((includeHash() ? "@" : "") + userMention.getScreenName());
                    }
                    switch (row.getKey().getType()) {
                    case STRING:
                    case LIST:
                        current.setValue(row.getKey(), !TalendType.STRING.equals(row.getKey().getType()) ? h
                                : Joiner.on(getEntitiesSeparator()).join(h));
                        break;
                    default:
                        throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"),
                                row.getKey().getType().getTypeString(), row.getKey().getName()));
                    }
                    break;
                case JSON:
                    switch (row.getKey().getType()) {
                    case STRING:
                        current.setValue(row.getKey(), DataObjectFactory.getRawJSON(tweet));
                        break;
                    default:
                        throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"),
                                row.getKey().getType().getTypeString(), row.getKey().getName()));
                    }
                    break;
                default:
                    throw new IllegalArgumentException(
                            String.format(rb.getString("exception.unparseableColumn"), row.getKey().getName()));

                }

            }

        }
        valid = true;
    }

    /**
     * Link a column of visiting {@link TalendFlow} to a proper type as described on {@link TweetField}
     * 
     * @param column the column to associate with
     * @param data the type of output; if null, no link is estabilished
     * @return a reference to the visitor itself
     */
    public TalendFlowTweetBehaviour setColumnLink(TalendColumn column, TweetField data) {
        ResourceBundle rb = ResourceBundle.getBundle("tTwitterInput", Locale.getDefault());

        if (data == null)
            return this;
        if (column == null)
            throw new IllegalArgumentException(rb.getString("exception.columnIsNull"));

        associations.put(column, data);
        return this;
    }

    /**
     * Set a resultset from {@link TalendFlowQueryResultBehaviour} to be used as input for this parser
     * 
     * @param resultSet the {@link TalendValue} to be used as input
     */
    public void setInput(TalendValue resultSet) {
        ResourceBundle rb = ResourceBundle.getBundle("tTwitterInput", Locale.getDefault());
        if (resultSet == null)
            throw new RuntimeException(rb.getString("exception.inputIsNull"));
        this.resultSet = resultSet;

    }
}