Java tutorial
/** * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package benche.me.TwitterParser; import java.util.ArrayList; import java.util.List; import java.util.logging.Logger; import java.io.FileReader; import java.io.IOException; import java.io.Reader; import twitter4j.FilterQuery; import twitter4j.Query; import twitter4j.QueryResult; import twitter4j.StallWarning; import twitter4j.Status; import twitter4j.StatusDeletionNotice; import twitter4j.StatusListener; import twitter4j.Twitter; import twitter4j.TwitterException; import twitter4j.TwitterFactory; import twitter4j.TwitterStream; import twitter4j.TwitterStreamFactory; import twitter4j.conf.ConfigurationBuilder; import org.apache.commons.lang3.ArrayUtils; import com.googlecode.jcsv.reader.CSVReader; import com.googlecode.jcsv.reader.internal.CSVReaderBuilder; /** * Twitter Parser - Main Class * Collects twitter stream and search data and outputs to CSV for easy analytic processing * Uses Twitter4J library * @author Ben Che - benche.me */ public class Main { /** Logger instance for debugging */ private final static Logger logger = Logger.getLogger(Main.class.getName()); /** Collection of search terms */ private static List<String> searchTerms, productList; /** * Usage: java com.mylan.twitter.Main * Loads search terms, configures twitter connection, begins stream/search logic * @throws IOException * @throws InterruptedException */ public static void main(String[] args) throws IOException, InterruptedException { /** * Read CSV file containing search terms list. * CSV file format: * Search term group, comma delineated * term groups are delineated with the straight vertical line character */ logger.info("Building search list..."); Reader reader = new FileReader("data/searchterms.csv"); CSVReader<String[]> csvParser = CSVReaderBuilder.newDefaultReader(reader); List<String[]> list = csvParser.readAll(); searchTerms = new ArrayList<String>(); for(String[] term : list) { for(String variation : term) { searchTerms.add(variation); } } String[] keywords = new String[searchTerms.size()]; for(int j = 0; j < searchTerms.size(); j++) { keywords[j] = searchTerms.get(j); } logger.info("Search list loaded."); /** * Read exported product summary CSV and extract all relevant product names */ logger.info("Building product list..."); Reader reader2 = new FileReader("data/product_data.csv"); CSVReader<String[]> csvParser2 = CSVReaderBuilder.newDefaultReader(reader2); List<String[]> list2 = csvParser2.readAll(); productList = new ArrayList<String>(); for(String[] csvLine : list2) { String word = Util.getFirstWord(csvLine[0]); for(String s1 : productList) { if(!word.equalsIgnoreCase(s1)) { productList.add(word); } } logger.info("WORD = " + word); } String[] productWords = new String[productList.size()]; for(int z = 0; z < productList.size(); z++) { productWords[z] = productList.get(z); } logger.info("Product list loaded."); final String[] combinedKeywords = ArrayUtils.addAll(keywords, productWords); /** * Configure the twitter connections * Credentials located in class Constants */ final Twitter twitter = configureSearch(); final TwitterStream stream = configureStream(); /** * Searches twitter DB for tweets matching keywords */ class TwitterSearch extends Thread { @Override public void run() { try { for(String s : combinedKeywords) { Query query = new Query(s); int limiterCount = 170; QueryResult result; do { result = twitter.search(query); List<Status> tweets = result.getTweets(); for (Status tweet : tweets) { if(count < 2) { logger.info("Query limit reached, sleeping for 15 minutes."); count = 170; Thread.sleep(900005); //sleep for a little more than 15 minutes to delay } try { String geo = "N/A"; if(tweet.getGeoLocation()!=null){ geo = tweet.getGeoLocation().toString(); } String lang = "ENG"; if(tweet.getLang() != null) { lang = tweet.getLang().toString(); } String name = "Unknown"; if(tweet.getUser().getName() != null) { name = tweet.getUser().getName().toString(); } String screenName = tweet.getUser().getScreenName(); String text = tweet.getText(); text = text.replaceAll("^ | $|\\n ", " "); text = text.replaceAll(";", ""); String toPrint = String.valueOf(tweet.getId()) + "," + tweet.getCreatedAt() + ", " + text + ", " + geo + ", " + lang + ", " + ((tweet.isRetweet()) ? "retweet" : "original") + ", " + name + "," + screenName + "|"; Util.writeStringToFile("data/searchResults.csv", toPrint, true); count--; logger.info("TEXT = " + text); } catch (IOException e1) { e1.printStackTrace(); } } } while ((query = result.nextQuery()) != null); try { logger.info("Query limit reached, sleeping for 15 minutes."); Thread.sleep(900500); //sleep for a little more than 15 minutes to delay } catch (InterruptedException e) { Thread.currentThread().interrupt(); logger.info("Thread Exception - Interrupted"); return; } } } catch(Exception e) { e.printStackTrace(); } } } /** * Streaming tweets * TODO: close/manage threads */ class TwitterStream extends Thread { @Override public void run() { logger.info("here"); StatusListener listener = new StatusListener() { public void onStatus(Status status) { logger.info(status.getUser().getScreenName() + ": " + status.getText()); logger.info("Count: " + count); String geo = "N/A"; if(status.getGeoLocation()!=null){ geo = status.getGeoLocation().toString(); } String lang = "ENG"; if(status.getLang() != null) { lang = status.getLang().toString(); } String name = "Unknown"; if(status.getUser().getName() != null) { name = status.getUser().getName().toString(); } String text = status.getText(); CharSequence filteredWords[] = {}; for(CharSequence c : filteredWords) { if(text.contains(c)) { logger.info("Filtering out status with word " + c.toString()); return; } } text = text.replaceAll("^ | $|\\n ", " "); text = text.replaceAll(";", ""); if(status.getUser().getScreenName().equalsIgnoreCase("")) { return; } try { Util.writeStringToFile("data/searchResults.csv", String.valueOf(status.getId()) + "," + status.getCreatedAt() + ", " + text + ", " + geo + ", " + lang + ", " + ((status.isRetweet()) ? "retweet" : "original") + ", " + name + "," + status.getUser().getScreenName() + "|"); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } count++; if(count >= TOTAL_TWEETS) { twitterStream.shutdown(); } } public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {} public void onTrackLimitationNotice(int numberOfLimitedStatuses) {} public void onScrubGeo(long userId, long upToStatusId) {} public void onException(Exception ex) {} public void onStallWarning(StallWarning warning) {} }; logger.info("Setting up Twitter stream"); twitterStream.addListener(listener); logger.info("Starting up Twitter filtering..."); /** Concatenate original keywords with product list keywords */ String[] combinedKeywords = ArrayUtils.addAll(keywords, productWords); FilterQuery query = new FilterQuery().track(combinedKeywords); /** Filter stream on combined keyword list */ twitterStream.filter(query); } } catch (TwitterException te) { te.printStackTrace(); System.out.println("Failed to search tweets: " + te.getMessage()); System.exit(-1); } } /** * Configure twitter API connection for historical search * @return Twitter connection instance */ private static Twitter configureSearch() { ConfigurationBuilder cb = new ConfigurationBuilder(); cb.setDebugEnabled(true).setOAuthConsumerKey(Constants.CONSUMER_KEY_KEY) .setOAuthConsumerSecret(Constants.CONSUMER_SECRET_KEY) .setOAuthAccessToken(Constants.ACCESS_TOKEN_KEY) .setOAuthAccessTokenSecret(Constants.ACCESS_TOKEN_SECRET_KEY); TwitterFactory tf = new TwitterFactory(cb.build()); Twitter twitter = tf.getInstance(); return twitter; } /** * Configure twitter API connection for tweet streaming * @return TwitterStream instance */ private static TwitterStream configureStream() { ConfigurationBuilder cb = new ConfigurationBuilder(); cb.setOAuthConsumerKey(Constants.CONSUMER_KEY_KEY); cb.setOAuthConsumerSecret(Constants.CONSUMER_SECRET_KEY); cb.setOAuthAccessToken(Constants.ACCESS_TOKEN_KEY); cb.setOAuthAccessTokenSecret(Constants.ACCESS_TOKEN_SECRET_KEY); cb.setJSONStoreEnabled(true); cb.setIncludeEntitiesEnabled(true); return new TwitterStreamFactory(cb.build()).getInstance(); }