List of usage examples for twitter4j HashtagEntity getText
@Override String getText();
From source file:crawler.DataStream.java
License:Apache License
private void freqUpdate(Status status) { //String hashtags = "";// HashtagEntity[] tags = status.getHashtagEntities(); for (HashtagEntity t : tags) { String hashtag = "#" + t.getText().toLowerCase(); int val = 1; if (Settings.TFHashtagFreq.containsKey(hashtag)) { val += Settings.TFHashtagFreq.get(hashtag); }//from w w w . java 2 s. c o m Settings.TFHashtagFreq.put(hashtag, val); //hashtags+=t.getText()+",";// } //System.out.println(status.getCreatedAt()+", [" + hashtags + "] - " + status.getText());// }
From source file:DataCollections.TweetHelper.java
public String stringifyHashtags(HashtagEntity[] hashtags) { JSONArray jsonarray = new JSONArray(); for (HashtagEntity hashtag : hashtags) { jsonarray.add(hashtag.getText()); }// w w w .j a va2 s .c o m return jsonarray.toJSONString(); }
From source file:druid.examples.twitter.TwitterSpritzerFirehoseFactory.java
License:Open Source License
@Override public Firehose connect() throws IOException { final ConnectionLifeCycleListener connectionLifeCycleListener = new ConnectionLifeCycleListener() { @Override//from www . j a v a2 s .c o m public void onConnect() { log.info("Connected_to_Twitter"); } @Override public void onDisconnect() { log.info("Disconnect_from_Twitter"); } /** * called before thread gets cleaned up */ @Override public void onCleanUp() { log.info("Cleanup_twitter_stream"); } }; // ConnectionLifeCycleListener final TwitterStream twitterStream; final StatusListener statusListener; final int QUEUE_SIZE = 2000; /** This queue is used to move twitter events from the twitter4j thread to the druid ingest thread. */ final BlockingQueue<Status> queue = new ArrayBlockingQueue<Status>(QUEUE_SIZE); final LinkedList<String> dimensions = new LinkedList<String>(); final long startMsec = System.currentTimeMillis(); dimensions.add("htags"); dimensions.add("lang"); dimensions.add("utc_offset"); // // set up Twitter Spritzer // twitterStream = new TwitterStreamFactory().getInstance(); twitterStream.addConnectionLifeCycleListener(connectionLifeCycleListener); statusListener = new StatusListener() { // This is what really gets called to deliver stuff from twitter4j @Override public void onStatus(Status status) { // time to stop? if (Thread.currentThread().isInterrupted()) { throw new RuntimeException("Interrupted, time to stop"); } try { boolean success = queue.offer(status, 15L, TimeUnit.SECONDS); if (!success) { log.warn("queue too slow!"); } } catch (InterruptedException e) { throw new RuntimeException("InterruptedException", e); } } @Override public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) { //log.info("Got a status deletion notice id:" + statusDeletionNotice.getStatusId()); } @Override public void onTrackLimitationNotice(int numberOfLimitedStatuses) { // This notice will be sent each time a limited stream becomes unlimited. // If this number is high and or rapidly increasing, it is an indication that your predicate is too broad, and you should consider a predicate with higher selectivity. log.warn("Got track limitation notice:" + numberOfLimitedStatuses); } @Override public void onScrubGeo(long userId, long upToStatusId) { //log.info("Got scrub_geo event userId:" + userId + " upToStatusId:" + upToStatusId); } @Override public void onException(Exception ex) { ex.printStackTrace(); } @Override public void onStallWarning(StallWarning warning) { System.out.println("Got stall warning:" + warning); } }; twitterStream.addListener(statusListener); twitterStream.sample(); // creates a generic StatusStream log.info("returned from sample()"); return new Firehose() { private final Runnable doNothingRunnable = new Runnable() { public void run() { } }; private long rowCount = 0L; private boolean waitIfmax = (maxEventCount < 0L); private final Map<String, Object> theMap = new HashMap<String, Object>(2); // DIY json parsing // private final ObjectMapper omapper = new ObjectMapper(); private boolean maxTimeReached() { if (maxRunMinutes <= 0) { return false; } else { return (System.currentTimeMillis() - startMsec) / 60000L >= maxRunMinutes; } } private boolean maxCountReached() { return maxEventCount >= 0 && rowCount >= maxEventCount; } @Override public boolean hasMore() { if (maxCountReached() || maxTimeReached()) { return waitIfmax; } else { return true; } } @Override public InputRow nextRow() { // Interrupted to stop? if (Thread.currentThread().isInterrupted()) { throw new RuntimeException("Interrupted, time to stop"); } // all done? if (maxCountReached() || maxTimeReached()) { if (waitIfmax) { // sleep a long time instead of terminating try { log.info("reached limit, sleeping a long time..."); sleep(2000000000L); } catch (InterruptedException e) { throw new RuntimeException("InterruptedException", e); } } else { // allow this event through, and the next hasMore() call will be false } } if (++rowCount % 1000 == 0) { log.info("nextRow() has returned %,d InputRows", rowCount); } Status status; try { status = queue.take(); } catch (InterruptedException e) { throw new RuntimeException("InterruptedException", e); } HashtagEntity[] hts = status.getHashtagEntities(); if (hts != null && hts.length > 0) { List<String> hashTags = Lists.newArrayListWithExpectedSize(hts.length); for (HashtagEntity ht : hts) { hashTags.add(ht.getText()); } theMap.put("htags", Arrays.asList(hashTags.get(0))); } long retweetCount = status.getRetweetCount(); theMap.put("retweet_count", retweetCount); User user = status.getUser(); if (user != null) { theMap.put("follower_count", user.getFollowersCount()); theMap.put("friends_count", user.getFriendsCount()); theMap.put("lang", user.getLang()); theMap.put("utc_offset", user.getUtcOffset()); // resolution in seconds, -1 if not available? theMap.put("statuses_count", user.getStatusesCount()); } return new MapBasedInputRow(status.getCreatedAt().getTime(), dimensions, theMap); } @Override public Runnable commit() { // ephemera in, ephemera out. return doNothingRunnable; // reuse the same object each time } @Override public void close() throws IOException { log.info("CLOSE twitterstream"); twitterStream.shutdown(); // invokes twitterStream.cleanUp() } }; }
From source file:edu.cmu.cs.lti.discoursedb.io.twitter.converter.TwitterConverterService.java
License:Open Source License
/** * Maps a Tweet represented as a Twitter4J Status object to DiscourseDB * // w w w . j av a 2 s . c om * @param discourseName the name of the discourse * @param datasetName the dataset identifier * @param tweet the Tweet to store in DiscourseDB */ public void mapTweet(String discourseName, String datasetName, Status tweet, PemsStationMetaData pemsMetaData) { if (tweet == null) { return; } Assert.hasText(discourseName, "The discourse name has to be specified and cannot be empty."); Assert.hasText(datasetName, "The dataset name has to be specified and cannot be empty."); if (dataSourceService.dataSourceExists(String.valueOf(tweet.getId()), TweetSourceMapping.ID_TO_CONTRIBUTION, datasetName)) { log.trace("Tweet with id " + tweet.getId() + " already exists in database. Skipping"); return; } log.trace("Mapping Tweet " + tweet.getId()); Discourse discourse = discourseService.createOrGetDiscourse(discourseName); twitter4j.User tUser = tweet.getUser(); User user = null; if (!userService.findUserByDiscourseAndUsername(discourse, tUser.getScreenName()).isPresent()) { user = userService.createOrGetUser(discourse, tUser.getScreenName()); user.setRealname(tUser.getName()); user.setEmail(tUser.getEmail()); user.setLocation(tUser.getLocation()); user.setLanguage(tUser.getLang()); user.setStartTime(tweet.getUser().getCreatedAt()); AnnotationInstance userInfo = annoService.createTypedAnnotation("twitter_user_info"); annoService.addFeature(userInfo, annoService.createTypedFeature(String.valueOf(tUser.getFavouritesCount()), "favorites_count")); annoService.addFeature(userInfo, annoService.createTypedFeature(String.valueOf(tUser.getFollowersCount()), "followers_count")); annoService.addFeature(userInfo, annoService.createTypedFeature(String.valueOf(tUser.getFriendsCount()), "friends_count")); annoService.addFeature(userInfo, annoService.createTypedFeature(String.valueOf(tUser.getStatusesCount()), "statuses_count")); annoService.addFeature(userInfo, annoService.createTypedFeature(String.valueOf(tUser.getListedCount()), "listed_count")); if (tUser.getDescription() != null) { annoService.addFeature(userInfo, annoService.createTypedFeature(String.valueOf(tUser.getDescription()), "description")); } annoService.addAnnotation(user, userInfo); } Contribution curContrib = contributionService.createTypedContribution(ContributionTypes.TWEET); DataSourceInstance contribSource = dataSourceService.createIfNotExists(new DataSourceInstance( String.valueOf(tweet.getId()), TweetSourceMapping.ID_TO_CONTRIBUTION, datasetName)); curContrib.setStartTime(tweet.getCreatedAt()); dataSourceService.addSource(curContrib, contribSource); AnnotationInstance tweetInfo = annoService.createTypedAnnotation("twitter_tweet_info"); if (tweet.getSource() != null) { annoService.addFeature(tweetInfo, annoService.createTypedFeature(tweet.getSource(), "tweet_source")); } annoService.addFeature(tweetInfo, annoService.createTypedFeature(String.valueOf(tweet.getFavoriteCount()), "favorites_count")); if (tweet.getHashtagEntities() != null) { for (HashtagEntity hashtag : tweet.getHashtagEntities()) { annoService.addFeature(tweetInfo, annoService.createTypedFeature(hashtag.getText(), "hashtag")); } } if (tweet.getMediaEntities() != null) { for (MediaEntity media : tweet.getMediaEntities()) { //NOTE: additional info is available for MediaEntities annoService.addFeature(tweetInfo, annoService.createTypedFeature(media.getMediaURL(), "media_url")); } } //TODO this should be represented as a relation if the related tweet is part of the dataset if (tweet.getInReplyToStatusId() > 0) { annoService.addFeature(tweetInfo, annoService .createTypedFeature(String.valueOf(tweet.getInReplyToStatusId()), "in_reply_to_status_id")); } //TODO this should be represented as a relation if the related tweet is part of the dataset if (tweet.getInReplyToScreenName() != null) { annoService.addFeature(tweetInfo, annoService.createTypedFeature(tweet.getInReplyToScreenName(), "in_reply_to_screen_name")); } annoService.addAnnotation(curContrib, tweetInfo); GeoLocation geo = tweet.getGeoLocation(); if (geo != null) { AnnotationInstance coord = annoService.createTypedAnnotation("twitter_tweet_geo_location"); annoService.addFeature(coord, annoService.createTypedFeature(String.valueOf(geo.getLongitude()), "long")); annoService.addFeature(coord, annoService.createTypedFeature(String.valueOf(geo.getLatitude()), "lat")); annoService.addAnnotation(curContrib, coord); } Place place = tweet.getPlace(); if (place != null) { AnnotationInstance placeAnno = annoService.createTypedAnnotation("twitter_tweet_place"); annoService.addFeature(placeAnno, annoService.createTypedFeature(String.valueOf(place.getPlaceType()), "place_type")); if (place.getGeometryType() != null) { annoService.addFeature(placeAnno, annoService.createTypedFeature(String.valueOf(place.getGeometryType()), "geo_type")); } annoService.addFeature(placeAnno, annoService .createTypedFeature(String.valueOf(place.getBoundingBoxType()), "bounding_box_type")); annoService.addFeature(placeAnno, annoService.createTypedFeature(String.valueOf(place.getFullName()), "place_name")); if (place.getStreetAddress() != null) { annoService.addFeature(placeAnno, annoService.createTypedFeature(String.valueOf(place.getStreetAddress()), "street_address")); } annoService.addFeature(placeAnno, annoService.createTypedFeature(String.valueOf(place.getCountry()), "country")); if (place.getBoundingBoxCoordinates() != null) { annoService.addFeature(placeAnno, annoService.createTypedFeature( convertGeoLocationArray(place.getBoundingBoxCoordinates()), "bounding_box_lat_lon_array")); } if (place.getGeometryCoordinates() != null) { annoService.addFeature(placeAnno, annoService.createTypedFeature( convertGeoLocationArray(place.getGeometryCoordinates()), "geometry_lat_lon_array")); } annoService.addAnnotation(curContrib, placeAnno); } Content curContent = contentService.createContent(); curContent.setText(tweet.getText()); curContent.setAuthor(user); curContent.setStartTime(tweet.getCreatedAt()); curContrib.setCurrentRevision(curContent); curContrib.setFirstRevision(curContent); DataSourceInstance contentSource = dataSourceService.createIfNotExists(new DataSourceInstance( String.valueOf(tweet.getId()), TweetSourceMapping.ID_TO_CONTENT, datasetName)); dataSourceService.addSource(curContent, contentSource); if (pemsMetaData != null) { log.warn("PEMS station meta data mapping not implemented yet"); //TODO map pems meta data if available } }
From source file:io.druid.examples.twitter.TwitterSpritzerFirehoseFactory.java
License:Apache License
@Override public Firehose connect(InputRowParser parser) throws IOException { final ConnectionLifeCycleListener connectionLifeCycleListener = new ConnectionLifeCycleListener() { @Override//from w ww .ja v a2 s . com public void onConnect() { log.info("Connected_to_Twitter"); } @Override public void onDisconnect() { log.info("Disconnect_from_Twitter"); } /** * called before thread gets cleaned up */ @Override public void onCleanUp() { log.info("Cleanup_twitter_stream"); } }; // ConnectionLifeCycleListener final TwitterStream twitterStream; final StatusListener statusListener; final int QUEUE_SIZE = 2000; /** This queue is used to move twitter events from the twitter4j thread to the druid ingest thread. */ final BlockingQueue<Status> queue = new ArrayBlockingQueue<Status>(QUEUE_SIZE); final long startMsec = System.currentTimeMillis(); // // set up Twitter Spritzer // twitterStream = new TwitterStreamFactory().getInstance(); twitterStream.addConnectionLifeCycleListener(connectionLifeCycleListener); statusListener = new StatusListener() { // This is what really gets called to deliver stuff from twitter4j @Override public void onStatus(Status status) { // time to stop? if (Thread.currentThread().isInterrupted()) { throw new RuntimeException("Interrupted, time to stop"); } try { boolean success = queue.offer(status, 15L, TimeUnit.SECONDS); if (!success) { log.warn("queue too slow!"); } } catch (InterruptedException e) { throw new RuntimeException("InterruptedException", e); } } @Override public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) { //log.info("Got a status deletion notice id:" + statusDeletionNotice.getStatusId()); } @Override public void onTrackLimitationNotice(int numberOfLimitedStatuses) { // This notice will be sent each time a limited stream becomes unlimited. // If this number is high and or rapidly increasing, it is an indication that your predicate is too broad, and you should consider a predicate with higher selectivity. log.warn("Got track limitation notice:" + numberOfLimitedStatuses); } @Override public void onScrubGeo(long userId, long upToStatusId) { //log.info("Got scrub_geo event userId:" + userId + " upToStatusId:" + upToStatusId); } @Override public void onException(Exception ex) { ex.printStackTrace(); } @Override public void onStallWarning(StallWarning warning) { System.out.println("Got stall warning:" + warning); } }; twitterStream.addListener(statusListener); twitterStream.sample(); // creates a generic StatusStream log.info("returned from sample()"); return new Firehose() { private final Runnable doNothingRunnable = new Runnable() { public void run() { } }; private long rowCount = 0L; private boolean waitIfmax = (getMaxEventCount() < 0L); private final Map<String, Object> theMap = new TreeMap<>(); // DIY json parsing // private final ObjectMapper omapper = new ObjectMapper(); private boolean maxTimeReached() { if (getMaxRunMinutes() <= 0) { return false; } else { return (System.currentTimeMillis() - startMsec) / 60000L >= getMaxRunMinutes(); } } private boolean maxCountReached() { return getMaxEventCount() >= 0 && rowCount >= getMaxEventCount(); } @Override public boolean hasMore() { if (maxCountReached() || maxTimeReached()) { return waitIfmax; } else { return true; } } @Override public InputRow nextRow() { // Interrupted to stop? if (Thread.currentThread().isInterrupted()) { throw new RuntimeException("Interrupted, time to stop"); } // all done? if (maxCountReached() || maxTimeReached()) { if (waitIfmax) { // sleep a long time instead of terminating try { log.info("reached limit, sleeping a long time..."); sleep(2000000000L); } catch (InterruptedException e) { throw new RuntimeException("InterruptedException", e); } } else { // allow this event through, and the next hasMore() call will be false } } if (++rowCount % 1000 == 0) { log.info("nextRow() has returned %,d InputRows", rowCount); } Status status; try { status = queue.take(); } catch (InterruptedException e) { throw new RuntimeException("InterruptedException", e); } theMap.clear(); HashtagEntity[] hts = status.getHashtagEntities(); String text = status.getText(); theMap.put("text", (null == text) ? "" : text); theMap.put("htags", (hts.length > 0) ? Lists.transform(Arrays.asList(hts), new Function<HashtagEntity, String>() { @Nullable @Override public String apply(HashtagEntity input) { return input.getText(); } }) : ImmutableList.<String>of()); long[] lcontrobutors = status.getContributors(); List<String> contributors = new ArrayList<>(); for (long contrib : lcontrobutors) { contributors.add(String.format("%d", contrib)); } theMap.put("contributors", contributors); GeoLocation geoLocation = status.getGeoLocation(); if (null != geoLocation) { double lat = status.getGeoLocation().getLatitude(); double lon = status.getGeoLocation().getLongitude(); theMap.put("lat", lat); theMap.put("lon", lon); } else { theMap.put("lat", null); theMap.put("lon", null); } if (status.getSource() != null) { Matcher m = sourcePattern.matcher(status.getSource()); theMap.put("source", m.find() ? m.group(1) : status.getSource()); } theMap.put("retweet", status.isRetweet()); if (status.isRetweet()) { Status original = status.getRetweetedStatus(); theMap.put("retweet_count", original.getRetweetCount()); User originator = original.getUser(); theMap.put("originator_screen_name", originator != null ? originator.getScreenName() : ""); theMap.put("originator_follower_count", originator != null ? originator.getFollowersCount() : ""); theMap.put("originator_friends_count", originator != null ? originator.getFriendsCount() : ""); theMap.put("originator_verified", originator != null ? originator.isVerified() : ""); } User user = status.getUser(); final boolean hasUser = (null != user); theMap.put("follower_count", hasUser ? user.getFollowersCount() : 0); theMap.put("friends_count", hasUser ? user.getFriendsCount() : 0); theMap.put("lang", hasUser ? user.getLang() : ""); theMap.put("utc_offset", hasUser ? user.getUtcOffset() : -1); // resolution in seconds, -1 if not available? theMap.put("statuses_count", hasUser ? user.getStatusesCount() : 0); theMap.put("user_id", hasUser ? String.format("%d", user.getId()) : ""); theMap.put("screen_name", hasUser ? user.getScreenName() : ""); theMap.put("location", hasUser ? user.getLocation() : ""); theMap.put("verified", hasUser ? user.isVerified() : ""); theMap.put("ts", status.getCreatedAt().getTime()); List<String> dimensions = Lists.newArrayList(theMap.keySet()); return new MapBasedInputRow(status.getCreatedAt().getTime(), dimensions, theMap); } @Override public Runnable commit() { // ephemera in, ephemera out. return doNothingRunnable; // reuse the same object each time } @Override public void close() throws IOException { log.info("CLOSE twitterstream"); twitterStream.shutdown(); // invokes twitterStream.cleanUp() } }; }
From source file:org.apache.blur.demo.twitter.TwitterSearchQueueReader.java
License:Apache License
private RowMutation toRowMutation(Status tweet) { RowMutation rowMutation = new RowMutation(); rowMutation.setRowId(tweet.getUser().getScreenName()); rowMutation.setTable(tableName);//from w w w. ja v a 2 s . c o m rowMutation.setRowMutationType(RowMutationType.UPDATE_ROW); Record record = new Record(); record.setFamily("tweets"); record.setRecordId(tweet.getUser().getScreenName() + "-" + tweet.getId()); record.addToColumns(new Column("message", tweet.getText())); for (UserMentionEntity mention : tweet.getUserMentionEntities()) { record.addToColumns(new Column("mentions", mention.getScreenName())); } for (HashtagEntity tag : tweet.getHashtagEntities()) { record.addToColumns(new Column("hashtags", tag.getText())); } rowMutation.addToRecordMutations(new RecordMutation(RecordMutationType.REPLACE_ENTIRE_RECORD, record)); log.trace(rowMutation); return rowMutation; }
From source file:org.apache.druid.examples.twitter.TwitterSpritzerFirehoseFactory.java
License:Apache License
@Override public Firehose connect(InputRowParser parser, File temporaryDirectory) { final ConnectionLifeCycleListener connectionLifeCycleListener = new ConnectionLifeCycleListener() { @Override/*w w w.ja va2 s. co m*/ public void onConnect() { log.info("Connected_to_Twitter"); } @Override public void onDisconnect() { log.info("Disconnect_from_Twitter"); } /** * called before thread gets cleaned up */ @Override public void onCleanUp() { log.info("Cleanup_twitter_stream"); } }; // ConnectionLifeCycleListener final TwitterStream twitterStream; final StatusListener statusListener; final int QUEUE_SIZE = 2000; /** This queue is used to move twitter events from the twitter4j thread to the druid ingest thread. */ final BlockingQueue<Status> queue = new ArrayBlockingQueue<Status>(QUEUE_SIZE); final long startMsec = System.currentTimeMillis(); // // set up Twitter Spritzer // twitterStream = new TwitterStreamFactory().getInstance(); twitterStream.addConnectionLifeCycleListener(connectionLifeCycleListener); statusListener = new StatusListener() { // This is what really gets called to deliver stuff from twitter4j @Override public void onStatus(Status status) { // time to stop? if (Thread.currentThread().isInterrupted()) { throw new RuntimeException("Interrupted, time to stop"); } try { boolean success = queue.offer(status, 15L, TimeUnit.SECONDS); if (!success) { log.warn("queue too slow!"); } } catch (InterruptedException e) { throw new RuntimeException("InterruptedException", e); } } @Override public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) { //log.info("Got a status deletion notice id:" + statusDeletionNotice.getStatusId()); } @Override public void onTrackLimitationNotice(int numberOfLimitedStatuses) { // This notice will be sent each time a limited stream becomes unlimited. // If this number is high and or rapidly increasing, it is an indication that your predicate is too broad, and you should consider a predicate with higher selectivity. log.warn("Got track limitation notice:" + numberOfLimitedStatuses); } @Override public void onScrubGeo(long userId, long upToStatusId) { //log.info("Got scrub_geo event userId:" + userId + " upToStatusId:" + upToStatusId); } @Override public void onException(Exception ex) { log.error(ex, "Got exception"); } @Override public void onStallWarning(StallWarning warning) { log.warn("Got stall warning: %s", warning); } }; twitterStream.addListener(statusListener); twitterStream.sample(); // creates a generic StatusStream log.info("returned from sample()"); return new Firehose() { private final Runnable doNothingRunnable = new Runnable() { @Override public void run() { } }; private long rowCount = 0L; private boolean waitIfmax = (getMaxEventCount() < 0L); private final Map<String, Object> theMap = new TreeMap<>(); // DIY json parsing // private final ObjectMapper omapper = new ObjectMapper(); private boolean maxTimeReached() { if (getMaxRunMinutes() <= 0) { return false; } else { return (System.currentTimeMillis() - startMsec) / 60000L >= getMaxRunMinutes(); } } private boolean maxCountReached() { return getMaxEventCount() >= 0 && rowCount >= getMaxEventCount(); } @Override public boolean hasMore() { if (maxCountReached() || maxTimeReached()) { return waitIfmax; } else { return true; } } @Nullable @Override public InputRow nextRow() { // Interrupted to stop? if (Thread.currentThread().isInterrupted()) { throw new RuntimeException("Interrupted, time to stop"); } // all done? if (maxCountReached() || maxTimeReached()) { if (waitIfmax) { // sleep a long time instead of terminating try { log.info("reached limit, sleeping a long time..."); Thread.sleep(2000000000L); } catch (InterruptedException e) { throw new RuntimeException("InterruptedException", e); } } else { // allow this event through, and the next hasMore() call will be false } } if (++rowCount % 1000 == 0) { log.info("nextRow() has returned %,d InputRows", rowCount); } Status status; try { status = queue.take(); } catch (InterruptedException e) { throw new RuntimeException("InterruptedException", e); } theMap.clear(); HashtagEntity[] hts = status.getHashtagEntities(); String text = status.getText(); theMap.put("text", (null == text) ? "" : text); theMap.put("htags", (hts.length > 0) ? Lists.transform(Arrays.asList(hts), new Function<HashtagEntity, String>() { @Nullable @Override public String apply(HashtagEntity input) { return input.getText(); } }) : ImmutableList.<String>of()); long[] lcontrobutors = status.getContributors(); List<String> contributors = new ArrayList<>(); for (long contrib : lcontrobutors) { contributors.add(StringUtils.format("%d", contrib)); } theMap.put("contributors", contributors); GeoLocation geoLocation = status.getGeoLocation(); if (null != geoLocation) { double lat = status.getGeoLocation().getLatitude(); double lon = status.getGeoLocation().getLongitude(); theMap.put("lat", lat); theMap.put("lon", lon); } else { theMap.put("lat", null); theMap.put("lon", null); } if (status.getSource() != null) { Matcher m = sourcePattern.matcher(status.getSource()); theMap.put("source", m.find() ? m.group(1) : status.getSource()); } theMap.put("retweet", status.isRetweet()); if (status.isRetweet()) { Status original = status.getRetweetedStatus(); theMap.put("retweet_count", original.getRetweetCount()); User originator = original.getUser(); theMap.put("originator_screen_name", originator != null ? originator.getScreenName() : ""); theMap.put("originator_follower_count", originator != null ? originator.getFollowersCount() : ""); theMap.put("originator_friends_count", originator != null ? originator.getFriendsCount() : ""); theMap.put("originator_verified", originator != null ? originator.isVerified() : ""); } User user = status.getUser(); final boolean hasUser = (null != user); theMap.put("follower_count", hasUser ? user.getFollowersCount() : 0); theMap.put("friends_count", hasUser ? user.getFriendsCount() : 0); theMap.put("lang", hasUser ? user.getLang() : ""); theMap.put("utc_offset", hasUser ? user.getUtcOffset() : -1); // resolution in seconds, -1 if not available? theMap.put("statuses_count", hasUser ? user.getStatusesCount() : 0); theMap.put("user_id", hasUser ? StringUtils.format("%d", user.getId()) : ""); theMap.put("screen_name", hasUser ? user.getScreenName() : ""); theMap.put("location", hasUser ? user.getLocation() : ""); theMap.put("verified", hasUser ? user.isVerified() : ""); theMap.put("ts", status.getCreatedAt().getTime()); List<String> dimensions = Lists.newArrayList(theMap.keySet()); return new MapBasedInputRow(status.getCreatedAt().getTime(), dimensions, theMap); } @Override public Runnable commit() { // ephemera in, ephemera out. return doNothingRunnable; // reuse the same object each time } @Override public void close() { log.info("CLOSE twitterstream"); twitterStream.shutdown(); // invokes twitterStream.cleanUp() } }; }
From source file:org.gabrielebaldassarre.twitter.stream.tweet.TalendRowTweetBehaviour.java
License:Open Source License
public void visit(TalendFlow target) { ResourceBundle rb = ResourceBundle.getBundle("tTwitterStreamInput", Locale.getDefault()); valid = false;// www . ja va2s.c om if (status != null) { TalendRowFactory rowFactory = target.getModel().getRowFactory(); Status tweet = status; status = null; TalendRow current = rowFactory.newRow(target); Iterator<Entry<TalendColumn, TweetField>> col = associations.entrySet().iterator(); while (col.hasNext()) { List<String> h; List<Long> l; Map.Entry<TalendColumn, TweetField> row = (Map.Entry<TalendColumn, TweetField>) col.next(); if (target != null && !row.getKey().getFlow().equals(target)) { throw new IllegalArgumentException(String.format(rb.getString("exception.columnNotInFlow"), row.getKey().getName(), target.getName())); } switch (row.getValue()) { case CREATION_DATE: String literalDate = (new StringBuilder( TalendRowTweetBehaviour.DATEFORMAT.format(tweet.getCreatedAt()))).toString(); switch (row.getKey().getType()) { case BIGDECIMAL: current.setValue(row.getKey(), new BigDecimal(literalDate)); case LONG: current.setValue(row.getKey(), Long.parseLong(literalDate)); case DOUBLE: current.setValue(row.getKey(), Double.parseDouble(literalDate)); case FLOAT: current.setValue(row.getKey(), Float.parseFloat(literalDate)); case INTEGER: current.setValue(row.getKey(), Integer.parseInt(literalDate)); case DATE: current.setValue(row.getKey(), tweet.getCreatedAt()); break; case STRING: current.setValue(row.getKey(), literalDate); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case FROM_NAME: switch (row.getKey().getType()) { case STRING: current.setValue(row.getKey(), tweet.getUser().getName()); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case FROM_USERID: switch (row.getKey().getType()) { case BIGDECIMAL: current.setValue(row.getKey(), new BigDecimal(tweet.getUser().getId())); break; case DOUBLE: current.setValue(row.getKey(), new Double(tweet.getUser().getId())); break; case FLOAT: current.setValue(row.getKey(), new Float(tweet.getUser().getId())); break; case LONG: current.setValue(row.getKey(), new Long(tweet.getUser().getId())); break; case STRING: current.setValue(row.getKey(), String.valueOf((tweet.getUser().getId()))); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case FROM_SCREEN_NAME: switch (row.getKey().getType()) { case STRING: current.setValue(row.getKey(), tweet.getUser().getScreenName()); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case HASHTAGS: List<HashtagEntity> hashtags = Arrays.asList(tweet.getHashtagEntities()); h = new ArrayList<String>(hashtags.size()); for (HashtagEntity hashtag : hashtags) { h.add((includeHash() ? "#" : "") + hashtag.getText()); } switch (row.getKey().getType()) { case STRING: case LIST: current.setValue(row.getKey(), !TalendType.STRING.equals(row.getKey().getType()) ? h : Joiner.on(getEntitiesSeparator()).join(h)); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case IS_FAVORITED: switch (row.getKey().getType()) { case BIGDECIMAL: current.setValue(row.getKey(), new BigDecimal(tweet.isFavorited() ? 1 : 0)); break; case BOOLEAN: current.setValue(row.getKey(), tweet.isFavorited()); break; case BYTE: current.setValue(row.getKey(), (byte) (tweet.isFavorited() ? 1 : 0)); break; case CHARACTER: current.setValue(row.getKey(), (tweet.isFavorited() ? '1' : '0')); break; case DOUBLE: current.setValue(row.getKey(), (double) (tweet.isFavorited() ? 1d : 0d)); break; case FLOAT: current.setValue(row.getKey(), (float) (tweet.isFavorited() ? 1f : 0f)); break; case INTEGER: current.setValue(row.getKey(), (tweet.isFavorited() ? 1 : 0)); break; case LONG: current.setValue(row.getKey(), (long) (tweet.isFavorited() ? 1l : 0l)); break; case SHORT: current.setValue(row.getKey(), (short) (tweet.isFavorited() ? (short) 1 : (short) 0)); break; case STRING: current.setValue(row.getKey(), (tweet.isFavorited() ? "1" : "0")); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case IS_POSSIBLY_SENSITIVE: switch (row.getKey().getType()) { case BIGDECIMAL: current.setValue(row.getKey(), new BigDecimal(tweet.isPossiblySensitive() ? 1 : 0)); break; case BOOLEAN: current.setValue(row.getKey(), tweet.isPossiblySensitive()); break; case BYTE: current.setValue(row.getKey(), (byte) (tweet.isPossiblySensitive() ? 1 : 0)); break; case CHARACTER: current.setValue(row.getKey(), (tweet.isPossiblySensitive() ? '1' : '0')); break; case DOUBLE: current.setValue(row.getKey(), (double) (tweet.isPossiblySensitive() ? 1d : 0d)); break; case FLOAT: current.setValue(row.getKey(), (float) (tweet.isPossiblySensitive() ? 1f : 0f)); break; case INTEGER: current.setValue(row.getKey(), (tweet.isPossiblySensitive() ? 1 : 0)); break; case LONG: current.setValue(row.getKey(), (long) (tweet.isPossiblySensitive() ? 1l : 0l)); break; case SHORT: current.setValue(row.getKey(), (short) (tweet.isPossiblySensitive() ? (short) 1 : (short) 0)); break; case STRING: current.setValue(row.getKey(), (tweet.isPossiblySensitive() ? "1" : "0")); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case IS_RETWEET: switch (row.getKey().getType()) { case BIGDECIMAL: current.setValue(row.getKey(), new BigDecimal(tweet.isRetweet() ? 1 : 0)); break; case BOOLEAN: current.setValue(row.getKey(), tweet.isRetweet()); break; case BYTE: current.setValue(row.getKey(), (byte) (tweet.isRetweet() ? 1 : 0)); break; case CHARACTER: current.setValue(row.getKey(), (tweet.isRetweet() ? '1' : '0')); break; case DOUBLE: current.setValue(row.getKey(), (double) (tweet.isRetweet() ? 1d : 0d)); break; case FLOAT: current.setValue(row.getKey(), (float) (tweet.isRetweet() ? 1f : 0f)); break; case INTEGER: current.setValue(row.getKey(), (tweet.isRetweet() ? 1 : 0)); break; case LONG: current.setValue(row.getKey(), (long) (tweet.isRetweet() ? 1l : 0l)); break; case SHORT: current.setValue(row.getKey(), (short) (tweet.isRetweet() ? (short) 1 : (short) 0)); break; case STRING: current.setValue(row.getKey(), (tweet.isRetweet() ? "1" : "0")); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } current.setValue(row.getKey(), tweet.isRetweet()); break; case LOCATION: GeoLocation g = tweet.getGeoLocation(); switch (row.getKey().getType()) { case STRING: current.setValue(row.getKey(), g != null ? String.valueOf(g.getLatitude()) + getEntitiesSeparator() + String.valueOf(g.getLongitude()) : null); break; case OBJECT: current.setValue(row.getKey(), g); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case REPLYTO_SCREEN_NAME: switch (row.getKey().getType()) { case STRING: current.setValue(row.getKey(), tweet.getInReplyToScreenName()); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case REPLYTO_STATUSID: switch (row.getKey().getType()) { case BIGDECIMAL: current.setValue(row.getKey(), new BigDecimal(tweet.getInReplyToStatusId())); break; case DOUBLE: current.setValue(row.getKey(), new Double(tweet.getInReplyToStatusId())); break; case FLOAT: current.setValue(row.getKey(), new Float(tweet.getInReplyToStatusId())); break; case LONG: current.setValue(row.getKey(), new Long(tweet.getInReplyToStatusId())); break; case STRING: current.setValue(row.getKey(), String.valueOf((tweet.getInReplyToStatusId()))); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case REPLYTO_USERID: switch (row.getKey().getType()) { case BIGDECIMAL: current.setValue(row.getKey(), new BigDecimal(tweet.getInReplyToUserId())); break; case DOUBLE: current.setValue(row.getKey(), new Double(tweet.getInReplyToUserId())); break; case FLOAT: current.setValue(row.getKey(), new Float(tweet.getInReplyToUserId())); break; case LONG: current.setValue(row.getKey(), new Long(tweet.getInReplyToUserId())); break; case STRING: current.setValue(row.getKey(), String.valueOf((tweet.getInReplyToUserId()))); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case RETWEET_COUNT: switch (row.getKey().getType()) { case BIGDECIMAL: current.setValue(row.getKey(), new BigDecimal(tweet.getRetweetCount())); break; case DOUBLE: current.setValue(row.getKey(), new Double(tweet.getRetweetCount())); break; case FLOAT: current.setValue(row.getKey(), new Float(tweet.getRetweetCount())); break; case LONG: current.setValue(row.getKey(), new Long(tweet.getRetweetCount())); break; case STRING: current.setValue(row.getKey(), String.valueOf((tweet.getRetweetCount()))); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case SOURCE: switch (row.getKey().getType()) { case STRING: current.setValue(row.getKey(), tweet.getSource()); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case STATUS_ID: switch (row.getKey().getType()) { case BIGDECIMAL: current.setValue(row.getKey(), new BigDecimal(tweet.getId())); break; case DOUBLE: current.setValue(row.getKey(), new Double(tweet.getId())); break; case FLOAT: current.setValue(row.getKey(), new Float(tweet.getId())); break; case LONG: current.setValue(row.getKey(), new Long(tweet.getId())); break; case STRING: current.setValue(row.getKey(), String.valueOf((tweet.getId()))); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case TEXT: switch (row.getKey().getType()) { case STRING: current.setValue(row.getKey(), tweet.getText()); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case URL_ENTITIES: case URL_ENTITIES_STRING: List<URLEntity> urlEntities = Arrays.asList(tweet.getURLEntities()); h = new ArrayList<String>(urlEntities.size()); for (URLEntity urlEntity : urlEntities) { h.add(urlEntity.getExpandedURL()); } switch (row.getKey().getType()) { case STRING: case LIST: current.setValue(row.getKey(), !TalendType.STRING.equals(row.getKey().getType()) ? h : Joiner.on(getEntitiesSeparator()).join(h)); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case USER_MENTIONS: List<UserMentionEntity> userMentionsEntities = Arrays.asList(tweet.getUserMentionEntities()); l = new ArrayList<Long>(userMentionsEntities.size()); for (UserMentionEntity userMention : userMentionsEntities) { l.add(userMention.getId()); } switch (row.getKey().getType()) { case STRING: case LIST: current.setValue(row.getKey(), !TalendType.STRING.equals(row.getKey().getType()) ? l : Joiner.on(getEntitiesSeparator()).join(l)); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case USER_MENTIONS_SCREEN_NAME: List<UserMentionEntity> userMentionsScreen = Arrays.asList(tweet.getUserMentionEntities()); h = new ArrayList<String>(userMentionsScreen.size()); for (UserMentionEntity userMention : userMentionsScreen) { h.add((includeHash() ? "@" : "") + userMention.getScreenName()); } switch (row.getKey().getType()) { case STRING: case LIST: current.setValue(row.getKey(), !TalendType.STRING.equals(row.getKey().getType()) ? h : Joiner.on(getEntitiesSeparator()).join(h)); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; default: throw new IllegalArgumentException( String.format(rb.getString("exception.unparseableColumn"), row.getKey().getName())); } } } valid = true; }
From source file:org.gabrielebaldassarre.twitter.stream.tweet.TalendRowTweetBehaviour.java
License:Open Source License
private boolean filter(Status status) { if (filter_links && status.getURLEntities().length == 0) return false; if (excludes.length == 0 && froms.length == 0 && ats.length == 0) return true; HashtagEntity[] hashtagEntities = status.getHashtagEntities(); List<String> hashtagEntityList = new ArrayList<String>(); for (HashtagEntity hashtagEntity : hashtagEntities) { hashtagEntityList.add(hashtagEntity.getText()); }//from w w w. jav a 2s.c om if (this.twitterLogicalOperator.equals(TwitterQueryBuilderLogicalOperator.AND)) { for (String exclude : excludes) { if (status.getText().contains(exclude)) return false; } for (String from : froms) { if (!status.getUser().getScreenName().equalsIgnoreCase(from)) return false; } for (String at : ats) { if (!hashtagEntityList.contains(at)) return false; } } else { for (String exclude : excludes) { if (!status.getText().contains(exclude)) return true; } for (String from : froms) { if (status.getUser().getScreenName().equalsIgnoreCase(from)) return true; } for (String at : ats) { if (hashtagEntityList.contains(at)) return true; } return false; } return true; }
From source file:org.gabrielebaldassarre.twitter.tweet.TalendFlowTweetBehaviour.java
License:Open Source License
/** * Visit a target {@link TalendFlow} for parsed or raw json output. * /*from ww w. j a v a 2 s . co m*/ * @param target the data flow to fill */ public void visit(TalendFlow target) { ResourceBundle rb = ResourceBundle.getBundle("tTwitterInput", Locale.getDefault()); TalendRowFactory rowFactory = target.getModel().getRowFactory(); valid = false; @SuppressWarnings("unchecked") List<Status> tweets = (List<Status>) resultSet.getValue(); for (int i = 0; i < tweets.size(); i++) { Status tweet = tweets.get(i); TalendRow current = rowFactory.newRow(target); Iterator<Entry<TalendColumn, TweetField>> col = associations.entrySet().iterator(); while (col.hasNext()) { List<String> h; List<Long> l; Map.Entry<TalendColumn, TweetField> row = (Map.Entry<TalendColumn, TweetField>) col.next(); if (target != null && !row.getKey().getFlow().equals(target)) { throw new IllegalArgumentException(String.format(rb.getString("exception.columnNotInFlow"), row.getKey().getName(), target.getName())); } switch (row.getValue()) { case CREATION_DATE: String literalDate = (new StringBuilder( TalendFlowTweetBehaviour.DATEFORMAT.format(tweet.getCreatedAt()))).toString(); switch (row.getKey().getType()) { case BIGDECIMAL: current.setValue(row.getKey(), new BigDecimal(literalDate)); case LONG: current.setValue(row.getKey(), Long.parseLong(literalDate)); case DOUBLE: current.setValue(row.getKey(), Double.parseDouble(literalDate)); case FLOAT: current.setValue(row.getKey(), Float.parseFloat(literalDate)); case INTEGER: current.setValue(row.getKey(), Integer.parseInt(literalDate)); case DATE: current.setValue(row.getKey(), tweet.getCreatedAt()); break; case STRING: current.setValue(row.getKey(), literalDate); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case FROM_NAME: switch (row.getKey().getType()) { case STRING: current.setValue(row.getKey(), tweet.getUser().getName()); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case FROM_USERID: switch (row.getKey().getType()) { case BIGDECIMAL: current.setValue(row.getKey(), new BigDecimal(tweet.getUser().getId())); break; case DOUBLE: current.setValue(row.getKey(), new Double(tweet.getUser().getId())); break; case FLOAT: current.setValue(row.getKey(), new Float(tweet.getUser().getId())); break; case LONG: current.setValue(row.getKey(), new Long(tweet.getUser().getId())); break; case STRING: current.setValue(row.getKey(), String.valueOf((tweet.getUser().getId()))); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case FROM_SCREEN_NAME: switch (row.getKey().getType()) { case STRING: current.setValue(row.getKey(), tweet.getUser().getScreenName()); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case HASHTAGS: List<HashtagEntity> hashtags = Arrays.asList(tweet.getHashtagEntities()); h = new ArrayList<String>(hashtags.size()); for (HashtagEntity hashtag : hashtags) { h.add((includeHash() ? "#" : "") + hashtag.getText()); } switch (row.getKey().getType()) { case STRING: case LIST: current.setValue(row.getKey(), !TalendType.STRING.equals(row.getKey().getType()) ? h : Joiner.on(getEntitiesSeparator()).join(h)); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case IS_FAVORITED: switch (row.getKey().getType()) { case BIGDECIMAL: current.setValue(row.getKey(), new BigDecimal(tweet.isFavorited() ? 1 : 0)); break; case BOOLEAN: current.setValue(row.getKey(), tweet.isFavorited()); break; case BYTE: current.setValue(row.getKey(), (byte) (tweet.isFavorited() ? 1 : 0)); break; case CHARACTER: current.setValue(row.getKey(), (tweet.isFavorited() ? '1' : '0')); break; case DOUBLE: current.setValue(row.getKey(), (double) (tweet.isFavorited() ? 1d : 0d)); break; case FLOAT: current.setValue(row.getKey(), (float) (tweet.isFavorited() ? 1f : 0f)); break; case INTEGER: current.setValue(row.getKey(), (tweet.isFavorited() ? 1 : 0)); break; case LONG: current.setValue(row.getKey(), (long) (tweet.isFavorited() ? 1l : 0l)); break; case SHORT: current.setValue(row.getKey(), (short) (tweet.isFavorited() ? (short) 1 : (short) 0)); break; case STRING: current.setValue(row.getKey(), (tweet.isFavorited() ? "1" : "0")); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case IS_POSSIBLY_SENSITIVE: switch (row.getKey().getType()) { case BIGDECIMAL: current.setValue(row.getKey(), new BigDecimal(tweet.isPossiblySensitive() ? 1 : 0)); break; case BOOLEAN: current.setValue(row.getKey(), tweet.isPossiblySensitive()); break; case BYTE: current.setValue(row.getKey(), (byte) (tweet.isPossiblySensitive() ? 1 : 0)); break; case CHARACTER: current.setValue(row.getKey(), (tweet.isPossiblySensitive() ? '1' : '0')); break; case DOUBLE: current.setValue(row.getKey(), (double) (tweet.isPossiblySensitive() ? 1d : 0d)); break; case FLOAT: current.setValue(row.getKey(), (float) (tweet.isPossiblySensitive() ? 1f : 0f)); break; case INTEGER: current.setValue(row.getKey(), (tweet.isPossiblySensitive() ? 1 : 0)); break; case LONG: current.setValue(row.getKey(), (long) (tweet.isPossiblySensitive() ? 1l : 0l)); break; case SHORT: current.setValue(row.getKey(), (short) (tweet.isPossiblySensitive() ? (short) 1 : (short) 0)); break; case STRING: current.setValue(row.getKey(), (tweet.isPossiblySensitive() ? "1" : "0")); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case IS_RETWEET: switch (row.getKey().getType()) { case BIGDECIMAL: current.setValue(row.getKey(), new BigDecimal(tweet.isRetweet() ? 1 : 0)); break; case BOOLEAN: current.setValue(row.getKey(), tweet.isRetweet()); break; case BYTE: current.setValue(row.getKey(), (byte) (tweet.isRetweet() ? 1 : 0)); break; case CHARACTER: current.setValue(row.getKey(), (tweet.isRetweet() ? '1' : '0')); break; case DOUBLE: current.setValue(row.getKey(), (double) (tweet.isRetweet() ? 1d : 0d)); break; case FLOAT: current.setValue(row.getKey(), (float) (tweet.isRetweet() ? 1f : 0f)); break; case INTEGER: current.setValue(row.getKey(), (tweet.isRetweet() ? 1 : 0)); break; case LONG: current.setValue(row.getKey(), (long) (tweet.isRetweet() ? 1l : 0l)); break; case SHORT: current.setValue(row.getKey(), (short) (tweet.isRetweet() ? (short) 1 : (short) 0)); break; case STRING: current.setValue(row.getKey(), (tweet.isRetweet() ? "1" : "0")); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } current.setValue(row.getKey(), tweet.isRetweet()); break; case LOCATION: GeoLocation g = tweet.getGeoLocation(); switch (row.getKey().getType()) { case STRING: current.setValue(row.getKey(), g != null ? String.valueOf(g.getLatitude()) + getEntitiesSeparator() + String.valueOf(g.getLongitude()) : null); break; case OBJECT: current.setValue(row.getKey(), g); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case REPLYTO_SCREEN_NAME: switch (row.getKey().getType()) { case STRING: current.setValue(row.getKey(), tweet.getInReplyToScreenName()); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case REPLYTO_STATUSID: switch (row.getKey().getType()) { case BIGDECIMAL: current.setValue(row.getKey(), new BigDecimal(tweet.getInReplyToStatusId())); break; case DOUBLE: current.setValue(row.getKey(), new Double(tweet.getInReplyToStatusId())); break; case FLOAT: current.setValue(row.getKey(), new Float(tweet.getInReplyToStatusId())); break; case LONG: current.setValue(row.getKey(), new Long(tweet.getInReplyToStatusId())); break; case STRING: current.setValue(row.getKey(), String.valueOf((tweet.getInReplyToStatusId()))); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case REPLYTO_USERID: switch (row.getKey().getType()) { case BIGDECIMAL: current.setValue(row.getKey(), new BigDecimal(tweet.getInReplyToUserId())); break; case DOUBLE: current.setValue(row.getKey(), new Double(tweet.getInReplyToUserId())); break; case FLOAT: current.setValue(row.getKey(), new Float(tweet.getInReplyToUserId())); break; case LONG: current.setValue(row.getKey(), new Long(tweet.getInReplyToUserId())); break; case STRING: current.setValue(row.getKey(), String.valueOf((tweet.getInReplyToUserId()))); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case RETWEET_COUNT: switch (row.getKey().getType()) { case BIGDECIMAL: current.setValue(row.getKey(), new BigDecimal(tweet.getRetweetCount())); break; case DOUBLE: current.setValue(row.getKey(), new Double(tweet.getRetweetCount())); break; case FLOAT: current.setValue(row.getKey(), new Float(tweet.getRetweetCount())); break; case LONG: current.setValue(row.getKey(), new Long(tweet.getRetweetCount())); break; case STRING: current.setValue(row.getKey(), String.valueOf((tweet.getRetweetCount()))); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case SOURCE: switch (row.getKey().getType()) { case STRING: current.setValue(row.getKey(), tweet.getSource()); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case STATUS_ID: switch (row.getKey().getType()) { case BIGDECIMAL: current.setValue(row.getKey(), new BigDecimal(tweet.getId())); break; case DOUBLE: current.setValue(row.getKey(), new Double(tweet.getId())); break; case FLOAT: current.setValue(row.getKey(), new Float(tweet.getId())); break; case LONG: current.setValue(row.getKey(), new Long(tweet.getId())); break; case STRING: current.setValue(row.getKey(), String.valueOf((tweet.getId()))); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case TEXT: switch (row.getKey().getType()) { case STRING: current.setValue(row.getKey(), tweet.getText()); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case URL_ENTITIES: case URL_ENTITIES_STRING: List<URLEntity> urlEntities = Arrays.asList(tweet.getURLEntities()); h = new ArrayList<String>(urlEntities.size()); for (URLEntity urlEntity : urlEntities) { h.add(urlEntity.getExpandedURL()); } switch (row.getKey().getType()) { case STRING: case LIST: current.setValue(row.getKey(), !TalendType.STRING.equals(row.getKey().getType()) ? h : Joiner.on(getEntitiesSeparator()).join(h)); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case USER_MENTIONS: List<UserMentionEntity> userMentionsEntities = Arrays.asList(tweet.getUserMentionEntities()); l = new ArrayList<Long>(userMentionsEntities.size()); for (UserMentionEntity userMention : userMentionsEntities) { l.add(userMention.getId()); } switch (row.getKey().getType()) { case STRING: case LIST: current.setValue(row.getKey(), !TalendType.STRING.equals(row.getKey().getType()) ? l : Joiner.on(getEntitiesSeparator()).join(l)); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case USER_MENTIONS_SCREEN_NAME: List<UserMentionEntity> userMentionsScreen = Arrays.asList(tweet.getUserMentionEntities()); h = new ArrayList<String>(userMentionsScreen.size()); for (UserMentionEntity userMention : userMentionsScreen) { h.add((includeHash() ? "@" : "") + userMention.getScreenName()); } switch (row.getKey().getType()) { case STRING: case LIST: current.setValue(row.getKey(), !TalendType.STRING.equals(row.getKey().getType()) ? h : Joiner.on(getEntitiesSeparator()).join(h)); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; case JSON: switch (row.getKey().getType()) { case STRING: current.setValue(row.getKey(), DataObjectFactory.getRawJSON(tweet)); break; default: throw new IllegalArgumentException(String.format(rb.getString("exception.uncastableColumn"), row.getKey().getType().getTypeString(), row.getKey().getName())); } break; default: throw new IllegalArgumentException( String.format(rb.getString("exception.unparseableColumn"), row.getKey().getName())); } } } valid = true; }