List of usage examples for twitter4j TwitterStream sample
TwitterStream sample();
From source file:com.storm.demo.TwitterSampleSpout.java
License:Apache License
@Override public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { queue = new LinkedBlockingQueue<String>(1000); _collector = collector;//from w ww . j a v a2 s . co m // StatusListener listener = new StatusListener() { // // @Override // public void onStatus(Status status) { // System.out.println(DataObjectFactory.getRawJSON(status)); // queue.offer(status); // } // // @Override // public void onDeletionNotice(StatusDeletionNotice sdn) { // } // // @Override // public void onTrackLimitationNotice(int i) { // } // // @Override // public void onScrubGeo(long l, long l1) { // } // // @Override // public void onException(Exception ex) { // } // // @Override // public void onStallWarning(StallWarning arg0) { // // TODO Auto-generated method stub // // } // // }; RawStreamListener rawListener = new RawStreamListener() { @Override public void onMessage(String rawJSON) { //System.out.println(rawJSON); queue.offer(rawJSON); } @Override public void onException(Exception ex) { ex.printStackTrace(); } }; TwitterStream twitterStream = new TwitterStreamFactory( new ConfigurationBuilder().setJSONStoreEnabled(true).build()).getInstance(); //twitterStream.addListener(listener); twitterStream.addListener(rawListener); twitterStream.setOAuthConsumer(consumerKey, consumerSecret); AccessToken token = new AccessToken(accessToken, accessTokenSecret); twitterStream.setOAuthAccessToken(token); if (keyWords.length == 0) { twitterStream.sample(); } else { FilterQuery query = new FilterQuery().track(keyWords); twitterStream.filter(query); } }
From source file:com.twitter.graphjet.demo.TwitterStreamReader.java
License:Open Source License
public static void main(String[] argv) throws Exception { final TwitterStreamReaderArgs args = new TwitterStreamReaderArgs(); CmdLineParser parser = new CmdLineParser(args, ParserProperties.defaults().withUsageWidth(90)); try {/*from w ww.j a v a 2 s . com*/ parser.parseArgument(argv); } catch (CmdLineException e) { System.err.println(e.getMessage()); parser.printUsage(System.err); return; } final Date demoStart = new Date(); final MultiSegmentPowerLawBipartiteGraph userTweetBigraph = new MultiSegmentPowerLawBipartiteGraph( args.maxSegments, args.maxEdgesPerSegment, args.leftSize, args.leftDegree, args.leftPowerLawExponent, args.rightSize, args.rightDegree, args.rightPowerLawExponent, new IdentityEdgeTypeMask(), new NullStatsReceiver()); final MultiSegmentPowerLawBipartiteGraph tweetHashtagBigraph = new MultiSegmentPowerLawBipartiteGraph( args.maxSegments, args.maxEdgesPerSegment, args.leftSize, args.leftDegree, args.leftPowerLawExponent, args.rightSize, args.rightDegree, args.rightPowerLawExponent, new IdentityEdgeTypeMask(), new NullStatsReceiver()); // Note that we're keeping track of the nodes on the left and right sides externally, apart from the bigraphs, // because the bigraph currently does not provide an API for enumerating over nodes. Currently, this is liable to // running out of memory, but this is fine for the demo. Long2ObjectOpenHashMap<String> users = new Long2ObjectOpenHashMap<>(); LongOpenHashSet tweets = new LongOpenHashSet(); Long2ObjectOpenHashMap<String> hashtags = new Long2ObjectOpenHashMap<>(); // It is accurate of think of these two data structures as holding all users and tweets observed on the stream since // the demo program was started. StatusListener listener = new StatusListener() { long statusCnt = 0; public void onStatus(Status status) { String screenname = status.getUser().getScreenName(); long userId = status.getUser().getId(); long tweetId = status.getId(); long resolvedTweetId = status.isRetweet() ? status.getRetweetedStatus().getId() : status.getId(); HashtagEntity[] hashtagEntities = status.getHashtagEntities(); userTweetBigraph.addEdge(userId, resolvedTweetId, (byte) 0); if (!users.containsKey(userId)) { users.put(userId, screenname); } if (!tweets.contains(tweetId)) { tweets.add(tweetId); } if (!tweets.contains(resolvedTweetId)) { tweets.add(resolvedTweetId); } for (HashtagEntity entity : hashtagEntities) { long hashtagHash = (long) entity.getText().toLowerCase().hashCode(); tweetHashtagBigraph.addEdge(tweetId, hashtagHash, (byte) 0); if (!hashtags.containsKey(hashtagHash)) { hashtags.put(hashtagHash, entity.getText().toLowerCase()); } } statusCnt++; // Note that status updates are currently performed synchronously (i.e., blocking). Best practices dictate that // they should happen on another thread so as to not interfere with ingest, but this is okay for the pruposes // of the demo and the volume of the sample stream. // Minor status update: just print counters. if (statusCnt % args.minorUpdateInterval == 0) { long duration = (new Date().getTime() - demoStart.getTime()) / 1000; System.out.println(String.format( "%tc: %,d statuses, %,d unique tweets, %,d unique hashtags (observed); " + "%.2f edges/s; totalMemory(): %,d bytes, freeMemory(): %,d bytes", new Date(), statusCnt, tweets.size(), hashtags.size(), (float) statusCnt / duration, Runtime.getRuntime().totalMemory(), Runtime.getRuntime().freeMemory())); } // Major status update: iterate over right and left nodes. if (statusCnt % args.majorUpdateInterval == 0) { int leftCnt = 0; LongIterator leftIter = tweets.iterator(); while (leftIter.hasNext()) { if (userTweetBigraph.getLeftNodeDegree(leftIter.nextLong()) != 0) leftCnt++; } int rightCnt = 0; LongIterator rightIter = hashtags.keySet().iterator(); while (rightIter.hasNext()) { if (userTweetBigraph.getRightNodeDegree(rightIter.nextLong()) != 0) rightCnt++; } System.out.println(String.format("%tc: Current user-tweet graph state: %,d left nodes (users), " + "%,d right nodes (tweets)", new Date(), leftCnt, rightCnt)); } } public void onScrubGeo(long userId, long upToStatusId) { } public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) { } public void onTrackLimitationNotice(int numberOfLimitedStatuses) { } public void onStallWarning(StallWarning warning) { } public void onException(Exception e) { e.printStackTrace(); } }; TwitterStream twitterStream = new TwitterStreamFactory().getInstance(); twitterStream.addListener(listener); twitterStream.sample(); ServletContextHandler context = new ServletContextHandler(ServletContextHandler.SESSIONS); context.setContextPath("/"); Server jettyServer = new Server(args.port); jettyServer.setHandler(context); context.addServlet(new ServletHolder(new TopUsersServlet(userTweetBigraph, users)), "/userTweetGraph/topUsers"); context.addServlet( new ServletHolder( new TopTweetsServlet(userTweetBigraph, tweets, TopTweetsServlet.GraphType.USER_TWEET)), "/userTweetGraph/topTweets"); context.addServlet(new ServletHolder( new TopTweetsServlet(tweetHashtagBigraph, tweets, TopTweetsServlet.GraphType.TWEET_HASHTAG)), "/tweetHashtagGraph/topTweets"); context.addServlet(new ServletHolder(new TopHashtagsServlet(tweetHashtagBigraph, hashtags)), "/tweetHashtagGraph/topHashtags"); context.addServlet(new ServletHolder(new GetEdgesServlet(userTweetBigraph, GetEdgesServlet.Side.LEFT)), "/userTweetGraphEdges/users"); context.addServlet(new ServletHolder(new GetEdgesServlet(userTweetBigraph, GetEdgesServlet.Side.RIGHT)), "/userTweetGraphEdges/tweets"); context.addServlet(new ServletHolder(new GetEdgesServlet(tweetHashtagBigraph, GetEdgesServlet.Side.LEFT)), "/tweetHashtagGraphEdges/tweets"); context.addServlet(new ServletHolder(new GetEdgesServlet(tweetHashtagBigraph, GetEdgesServlet.Side.RIGHT)), "/tweetHashtagGraphEdges/hashtags"); context.addServlet(new ServletHolder(new GetSimilarHashtagsServlet(tweetHashtagBigraph, hashtags)), "/similarHashtags"); System.out.println(String.format("%tc: Starting service on port %d", new Date(), args.port)); try { jettyServer.start(); jettyServer.join(); } finally { jettyServer.destroy(); } }
From source file:crawling.PrintSampleStream.java
License:Apache License
public static void main(String[] args) throws TwitterException { //TwitterStream twitterStream = new TwitterStreamFactory().getInstance(); TwitterStream twitterStream = getOAuthTwitterStream(); try {//from w ww.j ava 2 s . c om FileWriter outFile = new FileWriter("sampledUsers" + ".txt", true); idOut = new PrintWriter(outFile); //out.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } StatusListener listener = new StatusListener() { @Override public void onStatus(Status status) { ++tweetsCount; //System.out.println("@" + status.getUser().getScreenName() + " - " + status.getText()); Long id = status.getUser().getId(); /*String username = status.getUser().getScreenName(); String realname = status.getUser().getName(); String text = status.getText(); Date date = status.getCreatedAt();*/ if (discoveredUsers.containsKey(id)) { //System.out.println("Already found this user: " + id); long num = discoveredUsers.get(id); discoveredUsers.put(id, num + 1); } else { discoveredUsers.put(id, (long) 1); storeUserID(status); } } @Override public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) { //System.out.println("Got a status deletion notice id:" + statusDeletionNotice.getStatusId()); } @Override public void onTrackLimitationNotice(int numberOfLimitedStatuses) { System.out.println("Got track limitation notice:" + numberOfLimitedStatuses); } @Override public void onScrubGeo(long userId, long upToStatusId) { System.out.println("Got scrub_geo event userId:" + userId + " upToStatusId:" + upToStatusId); } @Override public void onStallWarning(StallWarning warning) { System.out.println("Got stall warning:" + warning); } @Override public void onException(Exception ex) { ex.printStackTrace(); } }; twitterStream.addListener(listener); twitterStream.sample(); }
From source file:crosstreams.twitter.TwitterStreamFileWriter.java
License:Mozilla Public License
/** * Start crawling tweets/*from w w w .j ava2 s. c o m*/ * @param args * @throws TwitterException */ public static void main(String[] args) throws TwitterException { System.err.println("### Twitter Stream Writer ###"); System.err.println("Saves tweets from the Spritzer/Gardenhose Stream to a series of files"); System.err.println( "Command: crosstreams.twitter.TwitterStreamFileWriter <saveFolder> <twitterusername> <twitterpassword> <numberoftweetstostoreperfile>(optional)"); System.err.println(" saveFolder: Where the tweets will be downloaded to"); System.err.println(" twitterusername: The username of the twitter account to use for downloading tweets"); System.err.println(" twitterpassword: The password of the twitter account to use for downloading tweets"); System.err.println( " numberoftweetstostoreperfile: The total number of tweets to write to a file before closing that file and opening a new one (Integer) (defaults=1000000)"); System.err.println("Optional System Properties (-D):"); System.err.println(" http.proxyhost: The proxy host to use if needed"); System.err.println(" http.proxyport: The proxy port to use if needed"); System.err.println(" email: An email address to send alerts to if an error is encountered"); System.err.println(" emailconf: An file containing the javax.mail configuration"); System.err.println( " emailonvalidate: true/false - should I send an email when a file is correctly validated rather than only when it fails? (default=false)"); if (args.length <= 1 || args.length >= 5) { System.err.println("Example:"); System.err.println( "java -Demail=\"MYEMAIL@HOST.COM\" -Demailconf=\"./javamail.conf\" -Demailonvalidate=\"true\" -jar TwitterStreamFileCrawler.jar ./ MYUSERNAME MYPASSWORD 100000"); System.err.println("Don't forget to modify ./javamail.conf to contain your email server host"); System.exit(0); } // user inputs String saveFolder = args[0]; String username = args[1]; String password = args[2]; final int numberOfTweetsToStorePerFile; if (args.length > 2) numberOfTweetsToStorePerFile = Integer.parseInt(args[3]); else numberOfTweetsToStorePerFile = 1000000; String proxyhost = System.getProperty("http.proxyhost"); String proxyport = System.getProperty("http.proxyport"); final String email = System.getProperty("email"); final String emailconf = System.getProperty("emailconf"); // define the user account in use and proxy settings if needed ConfigurationBuilder cb = new ConfigurationBuilder(); cb.setDebugEnabled(true); if (proxyhost != null && proxyport != null) { cb.setHttpProxyHost(proxyhost); cb.setHttpProxyPort(Integer.parseInt(proxyport)); } cb.setUser(username); cb.setPassword(password); if (!saveFolder.endsWith("/") && !saveFolder.endsWith("\\")) { saveFolder = saveFolder + System.getProperty("file.separator"); } final String finalSaveFolder = saveFolder; // Twitter4J Stream - the type of stream is set automatically, i.e. Gardenhose if you have it, Spritzer otherwise. TwitterStream twitterStream = new TwitterStreamFactory(cb.build()).getInstance(); // The status listener is the important bit, this fires when a new tweet arrives. StatusListener listener = new StatusListener() { /** The status listener holds a writer to save content to **/ BufferedWriter statusWriter = null; // the tweets go here BufferedWriter logWriter = null; // we write any delete requests or error messages here /** We store a fixed number of Tweets in each file **/ int numberInThisFile = numberOfTweetsToStorePerFile; int numberPerFile = numberOfTweetsToStorePerFile; String currentFilename; int numerrors = 0; /** * A new tweet has arrived */ public void onStatus(Status status) { if (numberInThisFile >= numberPerFile) { // closing and opening of new files try { if (statusWriter != null) { statusWriter.close(); logWriter.close(); validateJSONFile(currentFilename, numberPerFile); } Long currentTime = System.currentTimeMillis(); currentFilename = finalSaveFolder + currentTime.toString() + ".json.gz"; statusWriter = new BufferedWriter(new OutputStreamWriter( new GZIPOutputStream(new FileOutputStream(currentFilename)), "UTF-8")); logWriter = new BufferedWriter(new OutputStreamWriter( new GZIPOutputStream( new FileOutputStream(finalSaveFolder + currentTime.toString() + ".log.gz")), "UTF-8")); numberInThisFile = 0; numerrors = 0; } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } numberInThisFile++; // write the JSON - note that I added the getJSON() method to the Twitter4J status object // this is why the Twitter4j sources are included rather than importing the jar. try { Object s = status.getJSON(); statusWriter.write(status.getJSON().toString() + '\n'); statusWriter.flush(); } catch (Exception e) { e.printStackTrace(); numerrors++; if (emailconf != null && email != null && numerrors < 5) Mail.mail(emailconf, email, email, "Twitter Stream Writer Alert - Write Failed", "An IOException was thrown when calling statusWriter.write()." + '\n' + e.getMessage() + '\n' + "The current file will be closed and a new file will be created."); } } public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) { try { logWriter.write("DEL: " + statusDeletionNotice.getStatusId() + " " + statusDeletionNotice.getUserId() + '\n'); } catch (IOException e) { e.printStackTrace(); } } public void onTrackLimitationNotice(int numberOfLimitedStatuses) { try { logWriter.write("LIMIT: " + numberOfLimitedStatuses + '\n'); } catch (IOException e) { e.printStackTrace(); } } public void onScrubGeo(long userId, long upToStatusId) { try { logWriter.write("SCRUBGEO: " + userId + " " + upToStatusId + '\n'); } catch (IOException e) { e.printStackTrace(); } } public void onException(Exception ex) { if (logWriter == null) return; try { logWriter.write("ERR: " + ex.getLocalizedMessage() + '\n'); logWriter.flush(); if (statusWriter != null) { statusWriter.close(); statusWriter = null; logWriter.close(); validateJSONFile(currentFilename, numberPerFile); } } catch (IOException e) { e.printStackTrace(); } //ex.printStackTrace(); } }; if (emailconf != null && email != null) Mail.mail(emailconf, email, email, "Twitter Stream Writer Info - Writer has started", "The Gardenhose Writer has begun crawling the stream (this email indicates that you will recieve alerts if something goes wrong."); twitterStream.addListener(listener); twitterStream.sample(); }
From source file:de.twitterlivesearch.api.TwitterLiveSearchFactory.java
License:Apache License
private static TwitterLiveSearch configureTwitter(AbstractConfiguration configuration) { TwitterLiveSearch twitter = null;/* w w w.jav a 2 s . c o m*/ try { // several important variables are initialized here twitter = new TwitterLiveSearch(); TweetHolder tweetHolder = new TweetHolder(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig( AnalyzerMapping.getInstance().ANALYZER_FOR_DELIMITER); TwitterStream twitterStream = new TwitterStreamFactory().getInstance(); Directory directory; // confguration part: TwitterLiveSearch is configured here // according to the config if (configuration.getDirectoryConfig() == DirectoryConfig.RAM) { directory = new RAMDirectory(); log.trace("initialized RAM-Directory"); } else { directory = FSDirectory.open(Paths.get(configuration.getDirectory())); log.trace("initialized FS-Directory on path " + configuration.getDirectory()); } IndexWriter iwriter = new IndexWriter(directory, indexWriterConfig); Searcher searcher = new Searcher(directory); twitterStream.addListener(new TwitterStreamListener(directory, tweetHolder, iwriter, searcher)); if (configuration.getStreamConfig() == StreamConfig.USER_STREAM) { twitterStream.user(); } else if (configuration.getStreamConfig() == StreamConfig.GARDENHOSE) { twitterStream.sample(); } // set everything needed in TwitterLiveSearch twitter.setCurrentDirectory(directory); twitter.setIndexWriter(iwriter); twitter.setTweetHolder(tweetHolder); twitter.setTwitterStream(twitterStream); // Referenz auf // TwitterLiveSearch twitter.setSearcher(searcher); } catch (IOException e) { e.printStackTrace(); } return twitter; }
From source file:druid.examples.twitter.TwitterSpritzerFirehoseFactory.java
License:Open Source License
@Override public Firehose connect() throws IOException { final ConnectionLifeCycleListener connectionLifeCycleListener = new ConnectionLifeCycleListener() { @Override//from ww w .j ava 2s . c om public void onConnect() { log.info("Connected_to_Twitter"); } @Override public void onDisconnect() { log.info("Disconnect_from_Twitter"); } /** * called before thread gets cleaned up */ @Override public void onCleanUp() { log.info("Cleanup_twitter_stream"); } }; // ConnectionLifeCycleListener final TwitterStream twitterStream; final StatusListener statusListener; final int QUEUE_SIZE = 2000; /** This queue is used to move twitter events from the twitter4j thread to the druid ingest thread. */ final BlockingQueue<Status> queue = new ArrayBlockingQueue<Status>(QUEUE_SIZE); final LinkedList<String> dimensions = new LinkedList<String>(); final long startMsec = System.currentTimeMillis(); dimensions.add("htags"); dimensions.add("lang"); dimensions.add("utc_offset"); // // set up Twitter Spritzer // twitterStream = new TwitterStreamFactory().getInstance(); twitterStream.addConnectionLifeCycleListener(connectionLifeCycleListener); statusListener = new StatusListener() { // This is what really gets called to deliver stuff from twitter4j @Override public void onStatus(Status status) { // time to stop? if (Thread.currentThread().isInterrupted()) { throw new RuntimeException("Interrupted, time to stop"); } try { boolean success = queue.offer(status, 15L, TimeUnit.SECONDS); if (!success) { log.warn("queue too slow!"); } } catch (InterruptedException e) { throw new RuntimeException("InterruptedException", e); } } @Override public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) { //log.info("Got a status deletion notice id:" + statusDeletionNotice.getStatusId()); } @Override public void onTrackLimitationNotice(int numberOfLimitedStatuses) { // This notice will be sent each time a limited stream becomes unlimited. // If this number is high and or rapidly increasing, it is an indication that your predicate is too broad, and you should consider a predicate with higher selectivity. log.warn("Got track limitation notice:" + numberOfLimitedStatuses); } @Override public void onScrubGeo(long userId, long upToStatusId) { //log.info("Got scrub_geo event userId:" + userId + " upToStatusId:" + upToStatusId); } @Override public void onException(Exception ex) { ex.printStackTrace(); } @Override public void onStallWarning(StallWarning warning) { System.out.println("Got stall warning:" + warning); } }; twitterStream.addListener(statusListener); twitterStream.sample(); // creates a generic StatusStream log.info("returned from sample()"); return new Firehose() { private final Runnable doNothingRunnable = new Runnable() { public void run() { } }; private long rowCount = 0L; private boolean waitIfmax = (maxEventCount < 0L); private final Map<String, Object> theMap = new HashMap<String, Object>(2); // DIY json parsing // private final ObjectMapper omapper = new ObjectMapper(); private boolean maxTimeReached() { if (maxRunMinutes <= 0) { return false; } else { return (System.currentTimeMillis() - startMsec) / 60000L >= maxRunMinutes; } } private boolean maxCountReached() { return maxEventCount >= 0 && rowCount >= maxEventCount; } @Override public boolean hasMore() { if (maxCountReached() || maxTimeReached()) { return waitIfmax; } else { return true; } } @Override public InputRow nextRow() { // Interrupted to stop? if (Thread.currentThread().isInterrupted()) { throw new RuntimeException("Interrupted, time to stop"); } // all done? if (maxCountReached() || maxTimeReached()) { if (waitIfmax) { // sleep a long time instead of terminating try { log.info("reached limit, sleeping a long time..."); sleep(2000000000L); } catch (InterruptedException e) { throw new RuntimeException("InterruptedException", e); } } else { // allow this event through, and the next hasMore() call will be false } } if (++rowCount % 1000 == 0) { log.info("nextRow() has returned %,d InputRows", rowCount); } Status status; try { status = queue.take(); } catch (InterruptedException e) { throw new RuntimeException("InterruptedException", e); } HashtagEntity[] hts = status.getHashtagEntities(); if (hts != null && hts.length > 0) { List<String> hashTags = Lists.newArrayListWithExpectedSize(hts.length); for (HashtagEntity ht : hts) { hashTags.add(ht.getText()); } theMap.put("htags", Arrays.asList(hashTags.get(0))); } long retweetCount = status.getRetweetCount(); theMap.put("retweet_count", retweetCount); User user = status.getUser(); if (user != null) { theMap.put("follower_count", user.getFollowersCount()); theMap.put("friends_count", user.getFriendsCount()); theMap.put("lang", user.getLang()); theMap.put("utc_offset", user.getUtcOffset()); // resolution in seconds, -1 if not available? theMap.put("statuses_count", user.getStatusesCount()); } return new MapBasedInputRow(status.getCreatedAt().getTime(), dimensions, theMap); } @Override public Runnable commit() { // ephemera in, ephemera out. return doNothingRunnable; // reuse the same object each time } @Override public void close() throws IOException { log.info("CLOSE twitterstream"); twitterStream.shutdown(); // invokes twitterStream.cleanUp() } }; }
From source file:edu.uci.ics.asterix.external.dataset.adapter.PushBasedTwitterFeedClient.java
License:Apache License
public PushBasedTwitterFeedClient(IHyracksTaskContext ctx, ARecordType recordType, PushBasedTwitterAdapter adapter) throws AsterixException { this.recordType = recordType; this.tweetProcessor = new TweetProcessor(recordType); this.recordSerDe = new ARecordSerializerDeserializer(recordType); this.mutableRecord = tweetProcessor.getMutableRecord(); this.initialize(adapter.getConfiguration()); this.inputQ = new LinkedBlockingQueue<Status>(); TwitterStream twitterStream = TwitterUtil.getTwitterStream(adapter.getConfiguration()); twitterStream.addListener(new TweetListener(inputQ)); FilterQuery query = TwitterUtil.getFilterQuery(adapter.getConfiguration()); if (query != null) { twitterStream.filter(query);// w w w. java 2s . c o m } else { twitterStream.sample(); } }
From source file:example.search.java
License:Apache License
/** * Usage: java twitter4j.examples.search.SearchTweets [query] * * @param args/*w w w . j av a 2 s. c o m*/ */ public static void main(String[] args) { StatusListener listener = new StatusListener() { public Double count = 0d; Date started = new Date(); Date previous = new Date(); @Override public void onStatus(Status status) { try { File file = new File("whythissucks.txt"); // if file doesnt exists, then create it FileWriter fw = new FileWriter(file.getAbsoluteFile(), true); BufferedWriter bw = new BufferedWriter(fw); if (this.count % 1000 == 0) { Date finished10k = new Date(); System.out.println("\n\n\n\n AVERAGE RATE OF TWEETS is " + (this.count * 1000 / (finished10k.getTime() - this.started.getTime()))); System.out.println(1000000d / (finished10k.getTime() - this.previous.getTime())); System.out.println(this.count); System.out.println(finished10k.getTime() + " " + this.started.getTime() + " " + (finished10k.getTime() - this.started.getTime())); System.out.println(finished10k.getTime() + " " + this.previous.getTime() + " " + (finished10k.getTime() - this.previous.getTime())); System.out.println(status.getSource()); System.out.println("\n\n\n\n"); this.previous = finished10k; } this.count++; // System.out.println(status.getUser().getName() + " : " + status.getText()+" "+ this.count); bw.write(status.getId() + "|" + status.getText() + "\n"); bw.close(); } catch (IOException e) { e.printStackTrace(); } } @Override public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) { // System.out.println(statusDeletionNotice.getUserId()+" has deleted this tweet"); } @Override public void onTrackLimitationNotice(int numberOfLimitedStatuses) { System.out.println("limited " + numberOfLimitedStatuses); } @Override public void onException(Exception ex) { ex.printStackTrace(); } @Override public void onScrubGeo(long arg0, long arg1) { // TODO Auto-generated method stub } @Override public void onStallWarning(StallWarning arg0) { // TODO Auto-generated method stub System.out.println(arg0.getMessage()); } }; TwitterInstanceCreator tic = new TwitterInstanceCreator(); RawStreamListener rawst = new RawStreamListener() { public Double count = 0d; public Double lengthsum = 0d; Date started = new Date(); Date previous = new Date(); String filename; String previousfilename = ""; @Override public void onMessage(String message) { if (!message.startsWith("{\"delete")) { try { // if file doesnt exists, then create it if (this.count % 1000 == 0) { Date finished10k = new Date(); System.out.println("\n\n\n\n AVERAGE RATE OF TWEETS is " + (this.count * 1000 / (finished10k.getTime() - this.started.getTime()))); System.out.println(1000000d / (finished10k.getTime() - this.previous.getTime())); System.out.println(this.count); System.out.println(finished10k.getTime() + " " + this.started.getTime() + " " + (finished10k.getTime() - this.started.getTime())); System.out.println(finished10k.getTime() + " " + this.previous.getTime() + " " + (finished10k.getTime() - this.previous.getTime())); System.out.println("\n\n\n\n"); this.previous = finished10k; } File file = new File("tweetstoimport/" + this.filename); FileWriter fw = new FileWriter(file.getAbsoluteFile(), true); BufferedWriter bw = new BufferedWriter(fw); this.count++; // System.out.println(status.getUser().getName() + " : " + status.getText()+" "+ this.count); bw.write(message + "\n"); bw.close(); fw.close(); } catch (IOException e) { e.printStackTrace(); } } // TODO Auto-generated method stub } @Override public void onException(Exception arg0) { // TODO Auto-generated method stub } }; // twitterStream.addListener(rawst); // String[] searchfor={"flu", "influenza", "fever", "cough", "sore", "throat", "sore throat", "headache"}; // FilterQuery query=new FilterQuery(); // query.track(searchfor); TwitterStream twitterStream = tic.getStream(1); twitterStream.addListener(listener); // twitterStream.filter(query); // sample() method internally creates a thread which manipulates TwitterStream and calls these adequate listener methods continuously. twitterStream.sample(); //Filters from the stream //twitterStream.filter(query); }
From source file:io.druid.examples.twitter.TwitterSpritzerFirehoseFactory.java
License:Apache License
@Override public Firehose connect(InputRowParser parser) throws IOException { final ConnectionLifeCycleListener connectionLifeCycleListener = new ConnectionLifeCycleListener() { @Override/*from w ww . j av a 2 s .c o m*/ public void onConnect() { log.info("Connected_to_Twitter"); } @Override public void onDisconnect() { log.info("Disconnect_from_Twitter"); } /** * called before thread gets cleaned up */ @Override public void onCleanUp() { log.info("Cleanup_twitter_stream"); } }; // ConnectionLifeCycleListener final TwitterStream twitterStream; final StatusListener statusListener; final int QUEUE_SIZE = 2000; /** This queue is used to move twitter events from the twitter4j thread to the druid ingest thread. */ final BlockingQueue<Status> queue = new ArrayBlockingQueue<Status>(QUEUE_SIZE); final long startMsec = System.currentTimeMillis(); // // set up Twitter Spritzer // twitterStream = new TwitterStreamFactory().getInstance(); twitterStream.addConnectionLifeCycleListener(connectionLifeCycleListener); statusListener = new StatusListener() { // This is what really gets called to deliver stuff from twitter4j @Override public void onStatus(Status status) { // time to stop? if (Thread.currentThread().isInterrupted()) { throw new RuntimeException("Interrupted, time to stop"); } try { boolean success = queue.offer(status, 15L, TimeUnit.SECONDS); if (!success) { log.warn("queue too slow!"); } } catch (InterruptedException e) { throw new RuntimeException("InterruptedException", e); } } @Override public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) { //log.info("Got a status deletion notice id:" + statusDeletionNotice.getStatusId()); } @Override public void onTrackLimitationNotice(int numberOfLimitedStatuses) { // This notice will be sent each time a limited stream becomes unlimited. // If this number is high and or rapidly increasing, it is an indication that your predicate is too broad, and you should consider a predicate with higher selectivity. log.warn("Got track limitation notice:" + numberOfLimitedStatuses); } @Override public void onScrubGeo(long userId, long upToStatusId) { //log.info("Got scrub_geo event userId:" + userId + " upToStatusId:" + upToStatusId); } @Override public void onException(Exception ex) { ex.printStackTrace(); } @Override public void onStallWarning(StallWarning warning) { System.out.println("Got stall warning:" + warning); } }; twitterStream.addListener(statusListener); twitterStream.sample(); // creates a generic StatusStream log.info("returned from sample()"); return new Firehose() { private final Runnable doNothingRunnable = new Runnable() { public void run() { } }; private long rowCount = 0L; private boolean waitIfmax = (getMaxEventCount() < 0L); private final Map<String, Object> theMap = new TreeMap<>(); // DIY json parsing // private final ObjectMapper omapper = new ObjectMapper(); private boolean maxTimeReached() { if (getMaxRunMinutes() <= 0) { return false; } else { return (System.currentTimeMillis() - startMsec) / 60000L >= getMaxRunMinutes(); } } private boolean maxCountReached() { return getMaxEventCount() >= 0 && rowCount >= getMaxEventCount(); } @Override public boolean hasMore() { if (maxCountReached() || maxTimeReached()) { return waitIfmax; } else { return true; } } @Override public InputRow nextRow() { // Interrupted to stop? if (Thread.currentThread().isInterrupted()) { throw new RuntimeException("Interrupted, time to stop"); } // all done? if (maxCountReached() || maxTimeReached()) { if (waitIfmax) { // sleep a long time instead of terminating try { log.info("reached limit, sleeping a long time..."); sleep(2000000000L); } catch (InterruptedException e) { throw new RuntimeException("InterruptedException", e); } } else { // allow this event through, and the next hasMore() call will be false } } if (++rowCount % 1000 == 0) { log.info("nextRow() has returned %,d InputRows", rowCount); } Status status; try { status = queue.take(); } catch (InterruptedException e) { throw new RuntimeException("InterruptedException", e); } theMap.clear(); HashtagEntity[] hts = status.getHashtagEntities(); String text = status.getText(); theMap.put("text", (null == text) ? "" : text); theMap.put("htags", (hts.length > 0) ? Lists.transform(Arrays.asList(hts), new Function<HashtagEntity, String>() { @Nullable @Override public String apply(HashtagEntity input) { return input.getText(); } }) : ImmutableList.<String>of()); long[] lcontrobutors = status.getContributors(); List<String> contributors = new ArrayList<>(); for (long contrib : lcontrobutors) { contributors.add(String.format("%d", contrib)); } theMap.put("contributors", contributors); GeoLocation geoLocation = status.getGeoLocation(); if (null != geoLocation) { double lat = status.getGeoLocation().getLatitude(); double lon = status.getGeoLocation().getLongitude(); theMap.put("lat", lat); theMap.put("lon", lon); } else { theMap.put("lat", null); theMap.put("lon", null); } if (status.getSource() != null) { Matcher m = sourcePattern.matcher(status.getSource()); theMap.put("source", m.find() ? m.group(1) : status.getSource()); } theMap.put("retweet", status.isRetweet()); if (status.isRetweet()) { Status original = status.getRetweetedStatus(); theMap.put("retweet_count", original.getRetweetCount()); User originator = original.getUser(); theMap.put("originator_screen_name", originator != null ? originator.getScreenName() : ""); theMap.put("originator_follower_count", originator != null ? originator.getFollowersCount() : ""); theMap.put("originator_friends_count", originator != null ? originator.getFriendsCount() : ""); theMap.put("originator_verified", originator != null ? originator.isVerified() : ""); } User user = status.getUser(); final boolean hasUser = (null != user); theMap.put("follower_count", hasUser ? user.getFollowersCount() : 0); theMap.put("friends_count", hasUser ? user.getFriendsCount() : 0); theMap.put("lang", hasUser ? user.getLang() : ""); theMap.put("utc_offset", hasUser ? user.getUtcOffset() : -1); // resolution in seconds, -1 if not available? theMap.put("statuses_count", hasUser ? user.getStatusesCount() : 0); theMap.put("user_id", hasUser ? String.format("%d", user.getId()) : ""); theMap.put("screen_name", hasUser ? user.getScreenName() : ""); theMap.put("location", hasUser ? user.getLocation() : ""); theMap.put("verified", hasUser ? user.isVerified() : ""); theMap.put("ts", status.getCreatedAt().getTime()); List<String> dimensions = Lists.newArrayList(theMap.keySet()); return new MapBasedInputRow(status.getCreatedAt().getTime(), dimensions, theMap); } @Override public Runnable commit() { // ephemera in, ephemera out. return doNothingRunnable; // reuse the same object each time } @Override public void close() throws IOException { log.info("CLOSE twitterstream"); twitterStream.shutdown(); // invokes twitterStream.cleanUp() } }; }
From source file:nlptexthatespeechdetection.NLPTextHateSpeechDetection.java
/** * @param args the command line arguments */// ww w. j av a2 s . c om public static void main(String[] args) throws TwitterException, NotDirectoryException, IOException { HateSpeechClassifier1 classifier = new HateSpeechClassifier1(); AnnotatedDataFolder data = new AnnotatedDataFolder("data"); boolean overSampling = false; classifier.train(data.getDateSortedLabeledData(overSampling)); TwitterStream twitterStream = new TwitterStreamFactory().getInstance(); StatusListener listener = new StatusListener() { int numHateSpeech = 0; int numTweets = 0; @Override public void onStatus(Status status) { if (status.getLang().equals("in")) { numTweets++; if (classifier.isHateSpeech(status.getText(), 0.5)) { System.out.println("@" + status.getUser().getScreenName() + " - " + status.getText() + " * " + status.getId() + " # " + status.getLang() + " $ " + (status.getGeoLocation() == null ? "NULLGEO" : status.getGeoLocation().toString())); System.out.println(); System.out.println("lang: " + status.getLang()); System.out.println("number of detected hate speech: " + numHateSpeech); System.out.println("total number of streamed tweets: " + numTweets); System.out.println(); System.out.println(); numHateSpeech++; } } else { System.out.println("ignoring non-Indonesian tweet"); } // if (status.getGeoLocation() != null) { // System.out.println("@" + status.getUser().getScreenName() + " - " + status.getText() + " * " + status.getId() + " $ " + status.getGeoLocation().toString()); // } // if (status.getLang().equals("id")) { // System.out.println("@" + status.getUser().getScreenName() + " - " + status.getText() + " * " + status.getId() + " # " + status.getLang() + " $ " + (status.getGeoLocation() == null ? "NULLGEO" : status.getGeoLocation().toString())); // } } @Override public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) { // System.out.println("Got a status deletion notice id:" + statusDeletionNotice.getStatusId()); } @Override public void onTrackLimitationNotice(int numberOfLimitedStatuses) { System.out.println("Got track limitation notice:" + numberOfLimitedStatuses); } @Override public void onScrubGeo(long userId, long upToStatusId) { System.out.println("Got scrub_geo event userId:" + userId + " upToStatusId:" + upToStatusId); } @Override public void onStallWarning(StallWarning warning) { System.out.println("Got stall warning:" + warning); } @Override public void onException(Exception ex) { ex.printStackTrace(); } }; twitterStream.addListener(listener); FilterQuery filterQuery = new FilterQuery(); filterQuery.track(new String[] { "a", "i", "u", "e", "o" }); filterQuery.language("in"); twitterStream.filter(filterQuery); twitterStream.sample(); }