benche.me.TwitterParser.Main.java Source code

Java tutorial

Introduction

Here is the source code for benche.me.TwitterParser.Main.java

Source

    /** 
     * This program is free software: you can redistribute it and/or modify
     * it under the terms of the GNU General Public License as published by
     * the Free Software Foundation, either version 3 of the License, or
     * (at your option) any later version.
     * 
     * This program is distributed in the hope that it will be useful,
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     * GNU General Public License for more details.
     * 
     * You should have received a copy of the GNU General Public License
     * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    */
    package benche.me.TwitterParser;

    import java.util.ArrayList;
    import java.util.List;
    import java.util.logging.Logger;
    import java.io.FileReader;
    import java.io.IOException;
    import java.io.Reader;

    import twitter4j.FilterQuery;
    import twitter4j.Query;
    import twitter4j.QueryResult;
    import twitter4j.StallWarning;
    import twitter4j.Status;
    import twitter4j.StatusDeletionNotice;
    import twitter4j.StatusListener;
    import twitter4j.Twitter;
    import twitter4j.TwitterException;
    import twitter4j.TwitterFactory;
    import twitter4j.TwitterStream;
    import twitter4j.TwitterStreamFactory;
    import twitter4j.conf.ConfigurationBuilder;

    import org.apache.commons.lang3.ArrayUtils;

    import com.googlecode.jcsv.reader.CSVReader;
    import com.googlecode.jcsv.reader.internal.CSVReaderBuilder;

    /**
     * Twitter Parser - Main Class
     * Collects twitter stream and search data and outputs to CSV for easy analytic processing
     * Uses Twitter4J library
     * @author Ben Che - benche.me
     */
    public class Main {

        /** Logger instance for debugging */
        private final static Logger logger = Logger.getLogger(Main.class.getName());

        /** Collection of search terms */
        private static List<String> searchTerms, productList;

  /**
   * Usage: java com.mylan.twitter.Main
   * Loads search terms, configures twitter connection, begins stream/search logic
   * @throws IOException 
   * @throws InterruptedException 
   */
  public static void main(String[] args) throws IOException, InterruptedException {
     /** 
      * Read CSV file containing search terms list.   
      * CSV file format:
      * Search term group, comma delineated
      * term groups are delineated with the straight vertical line character 
      */
     logger.info("Building search list...");
     Reader reader = new FileReader("data/searchterms.csv");

     CSVReader<String[]> csvParser = CSVReaderBuilder.newDefaultReader(reader);
     List<String[]> list = csvParser.readAll();
     
     searchTerms = new ArrayList<String>();
     for(String[] term : list) {
        for(String variation : term) {
           searchTerms.add(variation);
        }
     }
     String[] keywords = new String[searchTerms.size()];              
     for(int j = 0; j < searchTerms.size(); j++) {
        keywords[j] = searchTerms.get(j);
     }
     logger.info("Search list loaded.");
     
     /** 
      * Read exported product summary CSV and extract all relevant product names
      */
     logger.info("Building product list...");
     Reader reader2 = new FileReader("data/product_data.csv");
     
     CSVReader<String[]> csvParser2 = CSVReaderBuilder.newDefaultReader(reader2);
     List<String[]> list2 = csvParser2.readAll();
     
     productList = new ArrayList<String>();
     for(String[] csvLine : list2) {
        String word = Util.getFirstWord(csvLine[0]);
        for(String s1 : productList) {
           if(!word.equalsIgnoreCase(s1)) {
              productList.add(word);
           }
        }
        logger.info("WORD = " + word);
     }
     String[] productWords = new String[productList.size()];
     for(int z = 0; z < productList.size(); z++) {
        productWords[z] = productList.get(z);
     }
     logger.info("Product list loaded.");
     
     final String[] combinedKeywords = ArrayUtils.addAll(keywords, productWords);
     
     /** 
      * Configure the twitter connections
      * Credentials located in class Constants
      */
     final Twitter twitter = configureSearch();
          final TwitterStream stream = configureStream();
      
           /** 
           * Searches twitter DB for tweets matching keywords
           */
          class TwitterSearch extends Thread {
         @Override
         public void run() {
            try {
                for(String s : combinedKeywords) {
                    Query query = new Query(s);
                     int limiterCount = 170;
                    QueryResult result;
                    do {
                        result = twitter.search(query);
                        List<Status> tweets = result.getTweets();
                        for (Status tweet : tweets) {
                           if(count < 2) {
                               logger.info("Query limit reached, sleeping for 15 minutes.");
                             count = 170;
                             Thread.sleep(900005);  //sleep for a little more than 15 minutes to delay
                           }
                           try {
                            String geo = "N/A";
                              if(tweet.getGeoLocation()!=null){
                                  geo = tweet.getGeoLocation().toString();
                              }
                              String lang = "ENG";
                              if(tweet.getLang() != null) {
                                 lang = tweet.getLang().toString();
                              }
                              String name = "Unknown";
                              if(tweet.getUser().getName() != null) {
                                 name = tweet.getUser().getName().toString();
                              }
                              String screenName = tweet.getUser().getScreenName();
                              String text = tweet.getText();
                              text = text.replaceAll("^ | $|\\n ", " ");
                              text = text.replaceAll(";", "");
                              String toPrint =  String.valueOf(tweet.getId()) + "," + tweet.getCreatedAt() + ", " + text + ", " + geo 
                                 + ", " + lang + ", " + ((tweet.isRetweet()) ? "retweet" : "original") + ", " + name + "," + screenName + "|";
                           Util.writeStringToFile("data/searchResults.csv", toPrint, true);
                              count--;
                           logger.info("TEXT = " + text);
                        } catch (IOException e1) {
                         e1.printStackTrace();
                        }
                        }
                    } while ((query = result.nextQuery()) != null);
                  
                    try {
                     logger.info("Query limit reached, sleeping for 15 minutes.");
                       Thread.sleep(900500);  //sleep for a little more than 15 minutes to delay
                    } catch (InterruptedException e) {
                        Thread.currentThread().interrupt();
                        logger.info("Thread Exception - Interrupted");
                        return;
                    }
                }
            } catch(Exception e) {
               e.printStackTrace();
            }
         }
      }
      /** 
       * Streaming tweets
       * TODO: close/manage threads
       */
      class TwitterStream extends Thread {
         @Override
         public void run() {
            logger.info("here");
        StatusListener listener = new StatusListener() {
        public void onStatus(Status status) {
         logger.info(status.getUser().getScreenName() + ": " + status.getText());
         logger.info("Count: " + count);
         String geo = "N/A";
         if(status.getGeoLocation()!=null){
            geo = status.getGeoLocation().toString();
         }
         String lang = "ENG";
         if(status.getLang() != null) {
            lang = status.getLang().toString();
         }
         String name = "Unknown";
         if(status.getUser().getName() != null) {
            name = status.getUser().getName().toString();
         }
         String text = status.getText();
         CharSequence filteredWords[] = {};
            for(CharSequence c : filteredWords) {
               if(text.contains(c)) {
                  logger.info("Filtering out status with word " + c.toString());
                  return;
               }
            }
            text = text.replaceAll("^ | $|\\n ", " ");
            text = text.replaceAll(";", "");
            if(status.getUser().getScreenName().equalsIgnoreCase("")) {
               return;
            }
            try {
               Util.writeStringToFile("data/searchResults.csv", String.valueOf(status.getId()) + "," + status.getCreatedAt() + ", " + text + ", " + geo 
               + ", " + lang + ", " + ((status.isRetweet()) ? "retweet" : "original") + ", " + name + "," + status.getUser().getScreenName() + "|");
            } catch (IOException e1) {
               // TODO Auto-generated catch block
               e1.printStackTrace();
            }
            count++;
            if(count >= TOTAL_TWEETS) {
               twitterStream.shutdown();
                 }
               }
         public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {}
         public void onTrackLimitationNotice(int numberOfLimitedStatuses) {}
         public void onScrubGeo(long userId, long upToStatusId) {}
         public void onException(Exception ex) {}
          public void onStallWarning(StallWarning warning) {}
      };
      logger.info("Setting up Twitter stream");
      twitterStream.addListener(listener);
      logger.info("Starting up Twitter filtering...");

      /** Concatenate original keywords with product list keywords */
      String[] combinedKeywords = ArrayUtils.addAll(keywords, productWords);
      FilterQuery query = new FilterQuery().track(combinedKeywords);

      /** Filter stream on combined keyword list */
      twitterStream.filter(query);   
         }
      } catch (TwitterException te) {
          te.printStackTrace();
          System.out.println("Failed to search tweets: " + te.getMessage());
          System.exit(-1);
      }
  }

        /**
         * Configure twitter API connection for historical search
         * @return Twitter connection instance
         */
        private static Twitter configureSearch() {
            ConfigurationBuilder cb = new ConfigurationBuilder();
            cb.setDebugEnabled(true).setOAuthConsumerKey(Constants.CONSUMER_KEY_KEY)
                    .setOAuthConsumerSecret(Constants.CONSUMER_SECRET_KEY)
                    .setOAuthAccessToken(Constants.ACCESS_TOKEN_KEY)
                    .setOAuthAccessTokenSecret(Constants.ACCESS_TOKEN_SECRET_KEY);
            TwitterFactory tf = new TwitterFactory(cb.build());
            Twitter twitter = tf.getInstance();
            return twitter;
        }

  /**
   * Configure twitter API connection for tweet streaming
   * @return TwitterStream instance
   */
  private static TwitterStream configureStream() {
       ConfigurationBuilder cb = new ConfigurationBuilder();
       cb.setOAuthConsumerKey(Constants.CONSUMER_KEY_KEY);
       cb.setOAuthConsumerSecret(Constants.CONSUMER_SECRET_KEY);
       cb.setOAuthAccessToken(Constants.ACCESS_TOKEN_KEY);
       cb.setOAuthAccessTokenSecret(Constants.ACCESS_TOKEN_SECRET_KEY);
       cb.setJSONStoreEnabled(true);
       cb.setIncludeEntitiesEnabled(true);
       return new TwitterStreamFactory(cb.build()).getInstance();
}