List of usage examples for twitter4j Status getHashtagEntities
HashtagEntity[] getHashtagEntities();
From source file:com.twitstreet.twitter.AdsListenerMgrImpl.java
License:Open Source License
@Override public void start() { TwitterStream twitterStream = new TwitterStreamFactory().getInstance(); Announcer announcer = announcerMgr.randomAnnouncerData(); twitterStream.setOAuthConsumer(announcer.getConsumerKey(), announcer.getConsumerSecret()); twitterStream/*from ww w . j a v a 2 s . c om*/ .setOAuthAccessToken(new AccessToken(announcer.getAccessToken(), announcer.getAccessTokenSecret())); twitterStream.addListener(new StatusListener() { @Override public void onException(Exception arg0) { } @Override public void onTrackLimitationNotice(int arg0) { } @Override public void onStatus(Status status) { HashtagEntity[] hashtagEntities = status.getHashtagEntities(); String screenName = status.getUser().getScreenName(); User user = status.getUser(); if (user != null && (System.currentTimeMillis() - lastMessage > TEN_MIN)) { lastMessage = System.currentTimeMillis(); int action = (int) (ACTION_TYPES * Math.random()); switch (action) { case REGULAR_TWEET: LocalizationUtil lutil = LocalizationUtil.getInstance(); int sentenceSize = Integer .parseInt(lutil.get("announcer.sentence.size", LocalizationUtil.DEFAULT_LANGUAGE)); int random = (int) (Math.random() * sentenceSize); String rndMessage = lutil.get("announcer.sentence." + random, LocalizationUtil.DEFAULT_LANGUAGE); announcerMgr.announceFromRandomAnnouncer(rndMessage); break; case RETWEEET: announcerMgr.retweet(status.getId()); break; case FAVOURITE: announcerMgr.favourite(status.getId()); break; default: String message = constructAdsMessage(screenName, hashtagEntities, status.getUser().getLang()); announcerMgr.reply(message, status.getId()); break; } } } @Override public void onScrubGeo(long arg0, long arg1) { } @Override public void onDeletionNotice(StatusDeletionNotice arg0) { } }); FilterQuery filterQuery = new FilterQuery(); filterQuery.count(0); filterQuery.track(FILTER_TERMS); twitterStream.filter(filterQuery); }
From source file:com.twitter.graphjet.demo.TwitterStreamReader.java
License:Open Source License
public static void main(String[] argv) throws Exception { final TwitterStreamReaderArgs args = new TwitterStreamReaderArgs(); CmdLineParser parser = new CmdLineParser(args, ParserProperties.defaults().withUsageWidth(90)); try {//from w w w . j a v a 2 s.c om parser.parseArgument(argv); } catch (CmdLineException e) { System.err.println(e.getMessage()); parser.printUsage(System.err); return; } final Date demoStart = new Date(); final MultiSegmentPowerLawBipartiteGraph userTweetBigraph = new MultiSegmentPowerLawBipartiteGraph( args.maxSegments, args.maxEdgesPerSegment, args.leftSize, args.leftDegree, args.leftPowerLawExponent, args.rightSize, args.rightDegree, args.rightPowerLawExponent, new IdentityEdgeTypeMask(), new NullStatsReceiver()); final MultiSegmentPowerLawBipartiteGraph tweetHashtagBigraph = new MultiSegmentPowerLawBipartiteGraph( args.maxSegments, args.maxEdgesPerSegment, args.leftSize, args.leftDegree, args.leftPowerLawExponent, args.rightSize, args.rightDegree, args.rightPowerLawExponent, new IdentityEdgeTypeMask(), new NullStatsReceiver()); // Note that we're keeping track of the nodes on the left and right sides externally, apart from the bigraphs, // because the bigraph currently does not provide an API for enumerating over nodes. Currently, this is liable to // running out of memory, but this is fine for the demo. Long2ObjectOpenHashMap<String> users = new Long2ObjectOpenHashMap<>(); LongOpenHashSet tweets = new LongOpenHashSet(); Long2ObjectOpenHashMap<String> hashtags = new Long2ObjectOpenHashMap<>(); // It is accurate of think of these two data structures as holding all users and tweets observed on the stream since // the demo program was started. StatusListener listener = new StatusListener() { long statusCnt = 0; public void onStatus(Status status) { String screenname = status.getUser().getScreenName(); long userId = status.getUser().getId(); long tweetId = status.getId(); long resolvedTweetId = status.isRetweet() ? status.getRetweetedStatus().getId() : status.getId(); HashtagEntity[] hashtagEntities = status.getHashtagEntities(); userTweetBigraph.addEdge(userId, resolvedTweetId, (byte) 0); if (!users.containsKey(userId)) { users.put(userId, screenname); } if (!tweets.contains(tweetId)) { tweets.add(tweetId); } if (!tweets.contains(resolvedTweetId)) { tweets.add(resolvedTweetId); } for (HashtagEntity entity : hashtagEntities) { long hashtagHash = (long) entity.getText().toLowerCase().hashCode(); tweetHashtagBigraph.addEdge(tweetId, hashtagHash, (byte) 0); if (!hashtags.containsKey(hashtagHash)) { hashtags.put(hashtagHash, entity.getText().toLowerCase()); } } statusCnt++; // Note that status updates are currently performed synchronously (i.e., blocking). Best practices dictate that // they should happen on another thread so as to not interfere with ingest, but this is okay for the pruposes // of the demo and the volume of the sample stream. // Minor status update: just print counters. if (statusCnt % args.minorUpdateInterval == 0) { long duration = (new Date().getTime() - demoStart.getTime()) / 1000; System.out.println(String.format( "%tc: %,d statuses, %,d unique tweets, %,d unique hashtags (observed); " + "%.2f edges/s; totalMemory(): %,d bytes, freeMemory(): %,d bytes", new Date(), statusCnt, tweets.size(), hashtags.size(), (float) statusCnt / duration, Runtime.getRuntime().totalMemory(), Runtime.getRuntime().freeMemory())); } // Major status update: iterate over right and left nodes. if (statusCnt % args.majorUpdateInterval == 0) { int leftCnt = 0; LongIterator leftIter = tweets.iterator(); while (leftIter.hasNext()) { if (userTweetBigraph.getLeftNodeDegree(leftIter.nextLong()) != 0) leftCnt++; } int rightCnt = 0; LongIterator rightIter = hashtags.keySet().iterator(); while (rightIter.hasNext()) { if (userTweetBigraph.getRightNodeDegree(rightIter.nextLong()) != 0) rightCnt++; } System.out.println(String.format("%tc: Current user-tweet graph state: %,d left nodes (users), " + "%,d right nodes (tweets)", new Date(), leftCnt, rightCnt)); } } public void onScrubGeo(long userId, long upToStatusId) { } public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) { } public void onTrackLimitationNotice(int numberOfLimitedStatuses) { } public void onStallWarning(StallWarning warning) { } public void onException(Exception e) { e.printStackTrace(); } }; TwitterStream twitterStream = new TwitterStreamFactory().getInstance(); twitterStream.addListener(listener); twitterStream.sample(); ServletContextHandler context = new ServletContextHandler(ServletContextHandler.SESSIONS); context.setContextPath("/"); Server jettyServer = new Server(args.port); jettyServer.setHandler(context); context.addServlet(new ServletHolder(new TopUsersServlet(userTweetBigraph, users)), "/userTweetGraph/topUsers"); context.addServlet( new ServletHolder( new TopTweetsServlet(userTweetBigraph, tweets, TopTweetsServlet.GraphType.USER_TWEET)), "/userTweetGraph/topTweets"); context.addServlet(new ServletHolder( new TopTweetsServlet(tweetHashtagBigraph, tweets, TopTweetsServlet.GraphType.TWEET_HASHTAG)), "/tweetHashtagGraph/topTweets"); context.addServlet(new ServletHolder(new TopHashtagsServlet(tweetHashtagBigraph, hashtags)), "/tweetHashtagGraph/topHashtags"); context.addServlet(new ServletHolder(new GetEdgesServlet(userTweetBigraph, GetEdgesServlet.Side.LEFT)), "/userTweetGraphEdges/users"); context.addServlet(new ServletHolder(new GetEdgesServlet(userTweetBigraph, GetEdgesServlet.Side.RIGHT)), "/userTweetGraphEdges/tweets"); context.addServlet(new ServletHolder(new GetEdgesServlet(tweetHashtagBigraph, GetEdgesServlet.Side.LEFT)), "/tweetHashtagGraphEdges/tweets"); context.addServlet(new ServletHolder(new GetEdgesServlet(tweetHashtagBigraph, GetEdgesServlet.Side.RIGHT)), "/tweetHashtagGraphEdges/hashtags"); context.addServlet(new ServletHolder(new GetSimilarHashtagsServlet(tweetHashtagBigraph, hashtags)), "/similarHashtags"); System.out.println(String.format("%tc: Starting service on port %d", new Date(), args.port)); try { jettyServer.start(); jettyServer.join(); } finally { jettyServer.destroy(); } }
From source file:com.wso2.stream.connector.protocol.TweetContent.java
License:Open Source License
public OMElement createBodyContent(OMFactory omFactory, Status status) { OMElement tweet = omFactory.createOMElement(qTweet); OMElement text = omFactory.createOMElement(qText); tweet.addChild(text);//from w w w . ja v a 2 s . c o m text.addChild(omFactory.createOMText(status.getText())); OMElement createdAt = omFactory.createOMElement(qCreatedAt); tweet.addChild(createdAt); createdAt.addChild(omFactory.createOMText(status.getCreatedAt().toString())); OMElement latitude = omFactory.createOMElement(qLatitude); tweet.addChild(latitude); OMElement longitude = omFactory.createOMElement(qLongitude); tweet.addChild(longitude); if (status.getGeoLocation() != null) { latitude.addChild(omFactory.createOMText(String.valueOf(status.getGeoLocation().getLatitude()))); longitude.addChild(omFactory.createOMText(String.valueOf(status.getGeoLocation().getLongitude()))); } OMElement country = omFactory.createOMElement(qCountry); tweet.addChild(country); OMElement countryCode = omFactory.createOMElement(qCountryCode); tweet.addChild(countryCode); if (status.getPlace() != null) { country.addChild(omFactory.createOMText(status.getPlace().getCountry())); countryCode.addChild(omFactory.createOMText(status.getPlace().getCountryCode())); } OMElement location = omFactory.createOMElement(qLocation); tweet.addChild(location); if (status.getUser() != null) { location.addChild(omFactory.createOMText(status.getUser().getLocation())); } OMElement hashTags = omFactory.createOMElement(qHasTags); tweet.addChild(hashTags); if (status.getHashtagEntities().length > 0) { String tags = ""; for (HashtagEntity h : status.getHashtagEntities()) { tags += h.getText() + ";"; } tags = tags.substring(0, tags.length() - 1); hashTags.addChild(omFactory.createOMText(tags)); } return tweet; }
From source file:crawler.DataStorage.java
License:Apache License
private static void sqlStore(Status status) throws SQLException { long sql_pid = Settings.pid; Settings.pid++;//from w ww . j ava 2s . c om SimpleDateFormat tempDate = new SimpleDateFormat("yyyy-MM-dd, HH:mm:ss, z"); String sqlCreateAt = tempDate.format(new java.util.Date(status.getCreatedAt().getTime())); double sqlGeoLocationLat = 0; double sqlGeoLocationLong = 0; if (status.getGeoLocation() != null) { sqlGeoLocationLat = status.getGeoLocation().getLatitude(); sqlGeoLocationLong = status.getGeoLocation().getLongitude(); } String sqlPlace = (status.getPlace() != null ? status.getPlace().getFullName() : ""); long sqlId = status.getId(); String sqlTweet = status.getText().replace("'", "''"); String sqlSource = status.getSource().replace("'", "''"); sqlSource = sqlSource.replace("\\", "\\\\"); String sqlLang = status.getUser().getLang(); String sqlScreenName = status.getUser().getScreenName(); String sqlReplyTo = status.getInReplyToScreenName(); long sqlRtCount = status.getRetweetCount(); HashtagEntity[] hashs = status.getHashtagEntities(); String sqlHashtags = ""; for (HashtagEntity hash : hashs) sqlHashtags += hash.getText() + " "; pstm.setLong(1, sql_pid); pstm.setString(2, sqlCreateAt); pstm.setDouble(3, sqlGeoLocationLat); pstm.setDouble(4, sqlGeoLocationLong); pstm.setString(5, sqlPlace); pstm.setLong(6, sqlId); pstm.setString(7, sqlTweet); pstm.setString(8, sqlSource); pstm.setString(9, sqlLang); pstm.setString(10, sqlScreenName); pstm.setString(11, sqlReplyTo); pstm.setLong(12, sqlRtCount); pstm.setString(13, sqlHashtags); pstm.addBatch(); }
From source file:crawler.DataStream.java
License:Apache License
private void freqUpdate(Status status) { //String hashtags = "";// HashtagEntity[] tags = status.getHashtagEntities(); for (HashtagEntity t : tags) { String hashtag = "#" + t.getText().toLowerCase(); int val = 1; if (Settings.TFHashtagFreq.containsKey(hashtag)) { val += Settings.TFHashtagFreq.get(hashtag); }//from w ww. j a v a 2s . c o m Settings.TFHashtagFreq.put(hashtag, val); //hashtags+=t.getText()+",";// } //System.out.println(status.getCreatedAt()+", [" + hashtags + "] - " + status.getText());// }
From source file:DataCollections.TweetHelper.java
public Tweet_dbo convertStatusToTweet_dbo(Status s) { Tweet_dbo tweet = new Tweet_dbo(); tweet.values[Tweet_dbo.map.get("tweet_id")].setValue(String.valueOf(s.getId())); tweet.values[Tweet_dbo.map.get("user_id")].setValue(String.valueOf(s.getUser().getId())); tweet.values[Tweet_dbo.map.get("user_screenname")] .setValue(removeEscapeCharacters(s.getUser().getScreenName())); if (s.getGeoLocation() != null) { tweet.values[Tweet_dbo.map.get("lon")].setValue(String.valueOf(s.getGeoLocation().getLongitude())); tweet.values[Tweet_dbo.map.get("lat")].setValue(String.valueOf(s.getGeoLocation().getLatitude())); }/*from w w w . j a v a2s . c om*/ //tweet.values[Tweet_dbo.map.get("f_search")].setValue("true"); tweet.values[Tweet_dbo.map.get("text")].setValue(removeEscapeCharacters(s.getText())); tweet.values[Tweet_dbo.map.get("hashtags")].setValue(stringifyHashtags(s.getHashtagEntities())); tweet.values[Tweet_dbo.map.get("mentions")].setValue(stringiyMentions(s.getUserMentionEntities())); tweet.values[Tweet_dbo.map.get("favouritecount")].setValue(String.valueOf(s.getFavoriteCount())); tweet.values[Tweet_dbo.map.get("retweetcount")].setValue(String.valueOf(s.getRetweetCount())); return tweet; }
From source file:druid.examples.twitter.TwitterSpritzerFirehoseFactory.java
License:Open Source License
@Override public Firehose connect() throws IOException { final ConnectionLifeCycleListener connectionLifeCycleListener = new ConnectionLifeCycleListener() { @Override/*from w w w .ja va 2 s.c o m*/ public void onConnect() { log.info("Connected_to_Twitter"); } @Override public void onDisconnect() { log.info("Disconnect_from_Twitter"); } /** * called before thread gets cleaned up */ @Override public void onCleanUp() { log.info("Cleanup_twitter_stream"); } }; // ConnectionLifeCycleListener final TwitterStream twitterStream; final StatusListener statusListener; final int QUEUE_SIZE = 2000; /** This queue is used to move twitter events from the twitter4j thread to the druid ingest thread. */ final BlockingQueue<Status> queue = new ArrayBlockingQueue<Status>(QUEUE_SIZE); final LinkedList<String> dimensions = new LinkedList<String>(); final long startMsec = System.currentTimeMillis(); dimensions.add("htags"); dimensions.add("lang"); dimensions.add("utc_offset"); // // set up Twitter Spritzer // twitterStream = new TwitterStreamFactory().getInstance(); twitterStream.addConnectionLifeCycleListener(connectionLifeCycleListener); statusListener = new StatusListener() { // This is what really gets called to deliver stuff from twitter4j @Override public void onStatus(Status status) { // time to stop? if (Thread.currentThread().isInterrupted()) { throw new RuntimeException("Interrupted, time to stop"); } try { boolean success = queue.offer(status, 15L, TimeUnit.SECONDS); if (!success) { log.warn("queue too slow!"); } } catch (InterruptedException e) { throw new RuntimeException("InterruptedException", e); } } @Override public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) { //log.info("Got a status deletion notice id:" + statusDeletionNotice.getStatusId()); } @Override public void onTrackLimitationNotice(int numberOfLimitedStatuses) { // This notice will be sent each time a limited stream becomes unlimited. // If this number is high and or rapidly increasing, it is an indication that your predicate is too broad, and you should consider a predicate with higher selectivity. log.warn("Got track limitation notice:" + numberOfLimitedStatuses); } @Override public void onScrubGeo(long userId, long upToStatusId) { //log.info("Got scrub_geo event userId:" + userId + " upToStatusId:" + upToStatusId); } @Override public void onException(Exception ex) { ex.printStackTrace(); } @Override public void onStallWarning(StallWarning warning) { System.out.println("Got stall warning:" + warning); } }; twitterStream.addListener(statusListener); twitterStream.sample(); // creates a generic StatusStream log.info("returned from sample()"); return new Firehose() { private final Runnable doNothingRunnable = new Runnable() { public void run() { } }; private long rowCount = 0L; private boolean waitIfmax = (maxEventCount < 0L); private final Map<String, Object> theMap = new HashMap<String, Object>(2); // DIY json parsing // private final ObjectMapper omapper = new ObjectMapper(); private boolean maxTimeReached() { if (maxRunMinutes <= 0) { return false; } else { return (System.currentTimeMillis() - startMsec) / 60000L >= maxRunMinutes; } } private boolean maxCountReached() { return maxEventCount >= 0 && rowCount >= maxEventCount; } @Override public boolean hasMore() { if (maxCountReached() || maxTimeReached()) { return waitIfmax; } else { return true; } } @Override public InputRow nextRow() { // Interrupted to stop? if (Thread.currentThread().isInterrupted()) { throw new RuntimeException("Interrupted, time to stop"); } // all done? if (maxCountReached() || maxTimeReached()) { if (waitIfmax) { // sleep a long time instead of terminating try { log.info("reached limit, sleeping a long time..."); sleep(2000000000L); } catch (InterruptedException e) { throw new RuntimeException("InterruptedException", e); } } else { // allow this event through, and the next hasMore() call will be false } } if (++rowCount % 1000 == 0) { log.info("nextRow() has returned %,d InputRows", rowCount); } Status status; try { status = queue.take(); } catch (InterruptedException e) { throw new RuntimeException("InterruptedException", e); } HashtagEntity[] hts = status.getHashtagEntities(); if (hts != null && hts.length > 0) { List<String> hashTags = Lists.newArrayListWithExpectedSize(hts.length); for (HashtagEntity ht : hts) { hashTags.add(ht.getText()); } theMap.put("htags", Arrays.asList(hashTags.get(0))); } long retweetCount = status.getRetweetCount(); theMap.put("retweet_count", retweetCount); User user = status.getUser(); if (user != null) { theMap.put("follower_count", user.getFollowersCount()); theMap.put("friends_count", user.getFriendsCount()); theMap.put("lang", user.getLang()); theMap.put("utc_offset", user.getUtcOffset()); // resolution in seconds, -1 if not available? theMap.put("statuses_count", user.getStatusesCount()); } return new MapBasedInputRow(status.getCreatedAt().getTime(), dimensions, theMap); } @Override public Runnable commit() { // ephemera in, ephemera out. return doNothingRunnable; // reuse the same object each time } @Override public void close() throws IOException { log.info("CLOSE twitterstream"); twitterStream.shutdown(); // invokes twitterStream.cleanUp() } }; }
From source file:edu.cmu.cs.lti.discoursedb.io.twitter.converter.TwitterConverterService.java
License:Open Source License
/** * Maps a Tweet represented as a Twitter4J Status object to DiscourseDB * /*from w w w .j a va 2 s .c o m*/ * @param discourseName the name of the discourse * @param datasetName the dataset identifier * @param tweet the Tweet to store in DiscourseDB */ public void mapTweet(String discourseName, String datasetName, Status tweet, PemsStationMetaData pemsMetaData) { if (tweet == null) { return; } Assert.hasText(discourseName, "The discourse name has to be specified and cannot be empty."); Assert.hasText(datasetName, "The dataset name has to be specified and cannot be empty."); if (dataSourceService.dataSourceExists(String.valueOf(tweet.getId()), TweetSourceMapping.ID_TO_CONTRIBUTION, datasetName)) { log.trace("Tweet with id " + tweet.getId() + " already exists in database. Skipping"); return; } log.trace("Mapping Tweet " + tweet.getId()); Discourse discourse = discourseService.createOrGetDiscourse(discourseName); twitter4j.User tUser = tweet.getUser(); User user = null; if (!userService.findUserByDiscourseAndUsername(discourse, tUser.getScreenName()).isPresent()) { user = userService.createOrGetUser(discourse, tUser.getScreenName()); user.setRealname(tUser.getName()); user.setEmail(tUser.getEmail()); user.setLocation(tUser.getLocation()); user.setLanguage(tUser.getLang()); user.setStartTime(tweet.getUser().getCreatedAt()); AnnotationInstance userInfo = annoService.createTypedAnnotation("twitter_user_info"); annoService.addFeature(userInfo, annoService.createTypedFeature(String.valueOf(tUser.getFavouritesCount()), "favorites_count")); annoService.addFeature(userInfo, annoService.createTypedFeature(String.valueOf(tUser.getFollowersCount()), "followers_count")); annoService.addFeature(userInfo, annoService.createTypedFeature(String.valueOf(tUser.getFriendsCount()), "friends_count")); annoService.addFeature(userInfo, annoService.createTypedFeature(String.valueOf(tUser.getStatusesCount()), "statuses_count")); annoService.addFeature(userInfo, annoService.createTypedFeature(String.valueOf(tUser.getListedCount()), "listed_count")); if (tUser.getDescription() != null) { annoService.addFeature(userInfo, annoService.createTypedFeature(String.valueOf(tUser.getDescription()), "description")); } annoService.addAnnotation(user, userInfo); } Contribution curContrib = contributionService.createTypedContribution(ContributionTypes.TWEET); DataSourceInstance contribSource = dataSourceService.createIfNotExists(new DataSourceInstance( String.valueOf(tweet.getId()), TweetSourceMapping.ID_TO_CONTRIBUTION, datasetName)); curContrib.setStartTime(tweet.getCreatedAt()); dataSourceService.addSource(curContrib, contribSource); AnnotationInstance tweetInfo = annoService.createTypedAnnotation("twitter_tweet_info"); if (tweet.getSource() != null) { annoService.addFeature(tweetInfo, annoService.createTypedFeature(tweet.getSource(), "tweet_source")); } annoService.addFeature(tweetInfo, annoService.createTypedFeature(String.valueOf(tweet.getFavoriteCount()), "favorites_count")); if (tweet.getHashtagEntities() != null) { for (HashtagEntity hashtag : tweet.getHashtagEntities()) { annoService.addFeature(tweetInfo, annoService.createTypedFeature(hashtag.getText(), "hashtag")); } } if (tweet.getMediaEntities() != null) { for (MediaEntity media : tweet.getMediaEntities()) { //NOTE: additional info is available for MediaEntities annoService.addFeature(tweetInfo, annoService.createTypedFeature(media.getMediaURL(), "media_url")); } } //TODO this should be represented as a relation if the related tweet is part of the dataset if (tweet.getInReplyToStatusId() > 0) { annoService.addFeature(tweetInfo, annoService .createTypedFeature(String.valueOf(tweet.getInReplyToStatusId()), "in_reply_to_status_id")); } //TODO this should be represented as a relation if the related tweet is part of the dataset if (tweet.getInReplyToScreenName() != null) { annoService.addFeature(tweetInfo, annoService.createTypedFeature(tweet.getInReplyToScreenName(), "in_reply_to_screen_name")); } annoService.addAnnotation(curContrib, tweetInfo); GeoLocation geo = tweet.getGeoLocation(); if (geo != null) { AnnotationInstance coord = annoService.createTypedAnnotation("twitter_tweet_geo_location"); annoService.addFeature(coord, annoService.createTypedFeature(String.valueOf(geo.getLongitude()), "long")); annoService.addFeature(coord, annoService.createTypedFeature(String.valueOf(geo.getLatitude()), "lat")); annoService.addAnnotation(curContrib, coord); } Place place = tweet.getPlace(); if (place != null) { AnnotationInstance placeAnno = annoService.createTypedAnnotation("twitter_tweet_place"); annoService.addFeature(placeAnno, annoService.createTypedFeature(String.valueOf(place.getPlaceType()), "place_type")); if (place.getGeometryType() != null) { annoService.addFeature(placeAnno, annoService.createTypedFeature(String.valueOf(place.getGeometryType()), "geo_type")); } annoService.addFeature(placeAnno, annoService .createTypedFeature(String.valueOf(place.getBoundingBoxType()), "bounding_box_type")); annoService.addFeature(placeAnno, annoService.createTypedFeature(String.valueOf(place.getFullName()), "place_name")); if (place.getStreetAddress() != null) { annoService.addFeature(placeAnno, annoService.createTypedFeature(String.valueOf(place.getStreetAddress()), "street_address")); } annoService.addFeature(placeAnno, annoService.createTypedFeature(String.valueOf(place.getCountry()), "country")); if (place.getBoundingBoxCoordinates() != null) { annoService.addFeature(placeAnno, annoService.createTypedFeature( convertGeoLocationArray(place.getBoundingBoxCoordinates()), "bounding_box_lat_lon_array")); } if (place.getGeometryCoordinates() != null) { annoService.addFeature(placeAnno, annoService.createTypedFeature( convertGeoLocationArray(place.getGeometryCoordinates()), "geometry_lat_lon_array")); } annoService.addAnnotation(curContrib, placeAnno); } Content curContent = contentService.createContent(); curContent.setText(tweet.getText()); curContent.setAuthor(user); curContent.setStartTime(tweet.getCreatedAt()); curContrib.setCurrentRevision(curContent); curContrib.setFirstRevision(curContent); DataSourceInstance contentSource = dataSourceService.createIfNotExists(new DataSourceInstance( String.valueOf(tweet.getId()), TweetSourceMapping.ID_TO_CONTENT, datasetName)); dataSourceService.addSource(curContent, contentSource); if (pemsMetaData != null) { log.warn("PEMS station meta data mapping not implemented yet"); //TODO map pems meta data if available } }
From source file:gh.polyu.user.TrackUsers.java
License:Apache License
public void track(final int no, final int p) { final TwitterDBHandle handle = new TwitterDBHandle(); handle.intialTwitterDBhandle();//w w w . j ava 2 s . co m while (alive) { alive = false; StatusListener listener = new StatusListener() { ArrayList<_TweetLink> listlink = new ArrayList<_TweetLink>(); int cnt = 0; String oldmonth = "20138"; String table = "UserTweet20138"; String oldday = ""; String currentday = ""; String currentmonth = ""; long lastinsert = 0l; long nowinsert = 0l; int newday = 0; String newtime = ""; @Override public void onStatus(Status status) { if (status.getId() == 123 && status.getText().equals("YOU are WORNG!.")) { System.out.println("Connection Need to be rebuilt!!"); alive = true; } else if (status.getLang().equals("en")) { _TweetLink tweet = new _TweetLink(); String Test = status.getText(); tweet.setText(Test); Date time = status.getCreatedAt(); tweet.setTime(time); tweet.setUserName(status.getUser().getName()); HashtagEntity[] hashtagentity = status.getHashtagEntities(); StringBuffer hashen = new StringBuffer(); for (int i = 0; i < hashtagentity.length; i++) { hashen.append(hashtagentity[i].getText()); hashen.append(";"); } tweet.setHashtag(hashen.toString()); URLEntity[] URLEn = status.getURLEntities(); StringBuffer URL = new StringBuffer(); for (int i = 0; i < URLEn.length; i++) { URL.append(URLEn[i].getURL()); URL.append(";"); } tweet.setURL(URL.toString()); //user mention UserMentionEntity[] userEn = status.getUserMentionEntities(); StringBuffer mentuser = new StringBuffer(); for (int i = 0; i < userEn.length; i++) { mentuser.append(userEn[i].getId()); mentuser.append(";"); } tweet.setUerMention(mentuser.toString()); //if(mentuser.length()!=0); //System.out.println("mentuser: "+ mentuser); //tweetID tweet.setTweetID(status.getId()); //if(ID!=null) // original twitterID tweet.setOriginID(status.getInReplyToStatusId()); //original user ID tweet.setOriginUser(status.getInReplyToUserId()); // user ID User users = status.getUser(); tweet.setTweetUser(users.getId()); //places Place Pl = status.getPlace(); String place = ""; if (Pl != null) { place = Pl.getFullName(); //System.out.println("place "+place); } tweet.setPlace(place); // Retweetcoun long num = 0; if (status.getRetweetedStatus() != null) { num = status.getRetweetedStatus().getRetweetCount(); //System.out.println("retweetcount"+num); tweet.setRetweetCount(num); tweet.setRetweet(1); } else { tweet.setRetweetCount(0); tweet.setRetweet(0); } // if(Retweet!=null) //System.out.println("Retweetcount: "+ Retweet); //isfavourate boolean favourate = status.isFavorited(); /*if(favourate) { fav = 1; tweet.setFavourate(fav); fav =0; System.out.println("isf "+ fav); }*/ // is retweet //String other = status.toString(); // tweet.setOther(other); listlink.add(tweet); Calendar cal = Calendar.getInstance(); int year = cal.get(Calendar.YEAR); int month = cal.get(Calendar.MONTH) + 1; int day = cal.get(Calendar.DAY_OF_MONTH); currentmonth = String.valueOf(year) + String.valueOf(month); currentday = String.valueOf(day); if (currentmonth.equals(oldmonth)) { if (currentday.equals(oldday)) ; else { newday = 1; SimpleDateFormat formatter = new SimpleDateFormat("MMddHH:mm:ss "); Date curDate = new Date(System.currentTimeMillis());// newtime = formatter.format(curDate); } } else { try { handle.database_connection(); table = "UserTweet" + String.valueOf(year) + String.valueOf(month); System.out.println("create new table " + table); String CREATE_TABLE = "create table " + table + "(TweetID varchar(100), UserName varchar(200), TwitterUser varchar(145), OriginID varchar(100), OriginUser varchar(100), place varchar(100), RetweetCount varchar(100), isRetweet int(5), Text varchar(500), Time datetime," + "Hashtag varchar(200), URL varchar(200), UerMention varchar(200))"; Statement st = handle.conn.createStatement(); st.execute(CREATE_TABLE); String Create_Index = "alter table " + table + " add index time (Time)"; st.execute(Create_Index); String Create_Index2 = "alter table " + table + " add index userID (TwitterUser)"; st.execute(Create_Index2); String key = "ALTER TABLE " + table + " ADD PRIMARY KEY (TweetID)"; st.execute(key); } catch (SQLException e) { e.printStackTrace(); } handle.close_databasehandle(); try { TwitterDBHandle handle2 = new TwitterDBHandle(); handle2.intialTwitterDBhandle2(); handle2.database_connection(); table = "UserTweet" + String.valueOf(year) + String.valueOf(month); System.out.println("create new table " + table); String CREATE_TABLE = "create table " + table + "(TweetID varchar(100), UserName varchar(200), TwitterUser varchar(145), OriginID varchar(100), OriginUser varchar(100), place varchar(100), RetweetCount varchar(100), isRetweet int(5), Text varchar(500), Time datetime," + "Hashtag varchar(200), URL varchar(200), UerMention varchar(200))"; Statement st = handle2.conn.createStatement(); st.execute(CREATE_TABLE); String Create_Index = "alter table " + table + " add index time (Time)"; st.execute(Create_Index); String Create_Index2 = "alter table " + table + " add index userID (TwitterUser)"; st.execute(Create_Index2); String key = "ALTER TABLE " + table + " ADD PRIMARY KEY (TweetID)"; st.execute(key); handle2.close_databasehandle(); } catch (SQLException e) { e.printStackTrace(); } } //System.out.println("OTHER: "+ other); if ((cnt++) % 1000 == 0) { if (newday == 1) { newday = 0; oldday = currentday; GmailSend gs = new GmailSend("cscchenyoyo@gmail.com", "910316ccy"); gs.send("THREAD" + p + " :" + "program no" + no + "message" + newtime, "I am still alive"); newtime = ""; } try { handle.database_connection(); handle.userTweet(table, listlink); nowinsert = System.currentTimeMillis(); System.err.println( "No: " + no + "program " + "totally " + cnt + " tweets downloaded!\n" + new Date(nowinsert) + " " + new Date(lastinsert)); lastinsert = nowinsert; nowinsert = 0l; handle.close_databasehandle(); } catch (SQLException e) { handle.close_databasehandle(); e.printStackTrace(); // TODO Auto-generated catch block TwitterDBHandle handle2 = new TwitterDBHandle(); handle2.intialTwitterDBhandle2(); handle2.database_connection(); try { handle2.userTweet(table, listlink); nowinsert = System.currentTimeMillis(); System.err.println("New Database No: " + no + "program " + "totally " + cnt + " tweets downloaded!\n" + new Date(nowinsert) + new Date(lastinsert)); lastinsert = nowinsert; nowinsert = 0l; handle2.close_databasehandle(); } catch (SQLException e1) { // TODO Auto-generated catch block GmailSend gs = new GmailSend("cscchenyoyo@gmail.com", "910316ccy"); try { gs.SendSSLMessage("cscchenyoyo@gmail.com", "program error", "both databases are down"); } catch (MessagingException ee) { // TODO Auto-generated catch block e.printStackTrace(); } } GmailSend gs = new GmailSend("cscchenyoyo@gmail.com", "910316ccy"); try { gs.SendSSLMessage("cscchenyoyo@gmail.com", "program error", "change database to another one"); } catch (MessagingException ee) { // TODO Auto-generated catch block e.printStackTrace(); } } listlink.clear(); } } } @Override public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) { //System.out.println("Got a status deletion notice id:" + statusDeletionNotice.getStatusId()); } @Override public void onTrackLimitationNotice(int numberOfLimitedStatuses) { //System.out.println("Got track limitation notice:" + numberOfLimitedStatuses); } @Override public void onScrubGeo(long userId, long upToStatusId) { //System.out.println("Got scrub_geo event userId:" + userId + " upToStatusId:" + upToStatusId); } @Override public void onStallWarning(StallWarning warning) { //System.out.println("Got stall warning:" + warning); } @Override public void onException(Exception ex) { ex.printStackTrace(); } }; TwitterStream twitterStream = new TwitterStreamFactory().getInstance(); twitterOAuth twtOauth = new twitterOAuth(); twtOauth.AuthoritywithS(twitterStream, key); twitterStream.addListener(listener); twitterStream.filter(new FilterQuery(0, follow)); /* try { Thread.sleep(3000); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); }*/ } }
From source file:io.druid.examples.twitter.TwitterSpritzerFirehoseFactory.java
License:Apache License
@Override public Firehose connect(InputRowParser parser) throws IOException { final ConnectionLifeCycleListener connectionLifeCycleListener = new ConnectionLifeCycleListener() { @Override/*from www .j a va2 s. c o m*/ public void onConnect() { log.info("Connected_to_Twitter"); } @Override public void onDisconnect() { log.info("Disconnect_from_Twitter"); } /** * called before thread gets cleaned up */ @Override public void onCleanUp() { log.info("Cleanup_twitter_stream"); } }; // ConnectionLifeCycleListener final TwitterStream twitterStream; final StatusListener statusListener; final int QUEUE_SIZE = 2000; /** This queue is used to move twitter events from the twitter4j thread to the druid ingest thread. */ final BlockingQueue<Status> queue = new ArrayBlockingQueue<Status>(QUEUE_SIZE); final long startMsec = System.currentTimeMillis(); // // set up Twitter Spritzer // twitterStream = new TwitterStreamFactory().getInstance(); twitterStream.addConnectionLifeCycleListener(connectionLifeCycleListener); statusListener = new StatusListener() { // This is what really gets called to deliver stuff from twitter4j @Override public void onStatus(Status status) { // time to stop? if (Thread.currentThread().isInterrupted()) { throw new RuntimeException("Interrupted, time to stop"); } try { boolean success = queue.offer(status, 15L, TimeUnit.SECONDS); if (!success) { log.warn("queue too slow!"); } } catch (InterruptedException e) { throw new RuntimeException("InterruptedException", e); } } @Override public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) { //log.info("Got a status deletion notice id:" + statusDeletionNotice.getStatusId()); } @Override public void onTrackLimitationNotice(int numberOfLimitedStatuses) { // This notice will be sent each time a limited stream becomes unlimited. // If this number is high and or rapidly increasing, it is an indication that your predicate is too broad, and you should consider a predicate with higher selectivity. log.warn("Got track limitation notice:" + numberOfLimitedStatuses); } @Override public void onScrubGeo(long userId, long upToStatusId) { //log.info("Got scrub_geo event userId:" + userId + " upToStatusId:" + upToStatusId); } @Override public void onException(Exception ex) { ex.printStackTrace(); } @Override public void onStallWarning(StallWarning warning) { System.out.println("Got stall warning:" + warning); } }; twitterStream.addListener(statusListener); twitterStream.sample(); // creates a generic StatusStream log.info("returned from sample()"); return new Firehose() { private final Runnable doNothingRunnable = new Runnable() { public void run() { } }; private long rowCount = 0L; private boolean waitIfmax = (getMaxEventCount() < 0L); private final Map<String, Object> theMap = new TreeMap<>(); // DIY json parsing // private final ObjectMapper omapper = new ObjectMapper(); private boolean maxTimeReached() { if (getMaxRunMinutes() <= 0) { return false; } else { return (System.currentTimeMillis() - startMsec) / 60000L >= getMaxRunMinutes(); } } private boolean maxCountReached() { return getMaxEventCount() >= 0 && rowCount >= getMaxEventCount(); } @Override public boolean hasMore() { if (maxCountReached() || maxTimeReached()) { return waitIfmax; } else { return true; } } @Override public InputRow nextRow() { // Interrupted to stop? if (Thread.currentThread().isInterrupted()) { throw new RuntimeException("Interrupted, time to stop"); } // all done? if (maxCountReached() || maxTimeReached()) { if (waitIfmax) { // sleep a long time instead of terminating try { log.info("reached limit, sleeping a long time..."); sleep(2000000000L); } catch (InterruptedException e) { throw new RuntimeException("InterruptedException", e); } } else { // allow this event through, and the next hasMore() call will be false } } if (++rowCount % 1000 == 0) { log.info("nextRow() has returned %,d InputRows", rowCount); } Status status; try { status = queue.take(); } catch (InterruptedException e) { throw new RuntimeException("InterruptedException", e); } theMap.clear(); HashtagEntity[] hts = status.getHashtagEntities(); String text = status.getText(); theMap.put("text", (null == text) ? "" : text); theMap.put("htags", (hts.length > 0) ? Lists.transform(Arrays.asList(hts), new Function<HashtagEntity, String>() { @Nullable @Override public String apply(HashtagEntity input) { return input.getText(); } }) : ImmutableList.<String>of()); long[] lcontrobutors = status.getContributors(); List<String> contributors = new ArrayList<>(); for (long contrib : lcontrobutors) { contributors.add(String.format("%d", contrib)); } theMap.put("contributors", contributors); GeoLocation geoLocation = status.getGeoLocation(); if (null != geoLocation) { double lat = status.getGeoLocation().getLatitude(); double lon = status.getGeoLocation().getLongitude(); theMap.put("lat", lat); theMap.put("lon", lon); } else { theMap.put("lat", null); theMap.put("lon", null); } if (status.getSource() != null) { Matcher m = sourcePattern.matcher(status.getSource()); theMap.put("source", m.find() ? m.group(1) : status.getSource()); } theMap.put("retweet", status.isRetweet()); if (status.isRetweet()) { Status original = status.getRetweetedStatus(); theMap.put("retweet_count", original.getRetweetCount()); User originator = original.getUser(); theMap.put("originator_screen_name", originator != null ? originator.getScreenName() : ""); theMap.put("originator_follower_count", originator != null ? originator.getFollowersCount() : ""); theMap.put("originator_friends_count", originator != null ? originator.getFriendsCount() : ""); theMap.put("originator_verified", originator != null ? originator.isVerified() : ""); } User user = status.getUser(); final boolean hasUser = (null != user); theMap.put("follower_count", hasUser ? user.getFollowersCount() : 0); theMap.put("friends_count", hasUser ? user.getFriendsCount() : 0); theMap.put("lang", hasUser ? user.getLang() : ""); theMap.put("utc_offset", hasUser ? user.getUtcOffset() : -1); // resolution in seconds, -1 if not available? theMap.put("statuses_count", hasUser ? user.getStatusesCount() : 0); theMap.put("user_id", hasUser ? String.format("%d", user.getId()) : ""); theMap.put("screen_name", hasUser ? user.getScreenName() : ""); theMap.put("location", hasUser ? user.getLocation() : ""); theMap.put("verified", hasUser ? user.isVerified() : ""); theMap.put("ts", status.getCreatedAt().getTime()); List<String> dimensions = Lists.newArrayList(theMap.keySet()); return new MapBasedInputRow(status.getCreatedAt().getTime(), dimensions, theMap); } @Override public Runnable commit() { // ephemera in, ephemera out. return doNothingRunnable; // reuse the same object each time } @Override public void close() throws IOException { log.info("CLOSE twitterstream"); twitterStream.shutdown(); // invokes twitterStream.cleanUp() } }; }