at.tuwien.aic.Main.java Source code

Java tutorial

Introduction

Here is the source code for at.tuwien.aic.Main.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package at.tuwien.aic;

import at.tuwien.aic.db.Database;
import at.tuwien.aic.preprocessing.Stem;
import at.tuwien.aic.preprocessing.StopWordRemoval;
import at.tuwien.aic.classify.ClassifyTweet;
import at.tuwien.aic.twitter.DefaultTweetHandler;
import at.tuwien.aic.twitter.TweetCrawler;
import at.tuwien.aic.twitter.TweetHandler;
import at.tuwien.aic.twitter.TweetScorer;
import com.mongodb.BasicDBObject;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.Mongo;
import com.mongodb.QueryBuilder;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.UnknownHostException;
import java.util.Properties;
import java.util.Scanner;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import twitter4j.internal.org.json.JSONException;
import weka.classifiers.Classifier;

/**
 *
 * @author 1027822 Klaus Harrer
 *
 * This is the main entry point for the stage 1 program You can run the various
 * actions from the commandline
 */
public class Main {

    private static Properties _prop;
    private static final Logger logger = Logger.getLogger(Main.class.getName());

    /**
     * Main entry point
     *
     * @param args
     */
    @SuppressWarnings("empty-statement")
    public static void main(String[] args) throws IOException, InterruptedException {

        try {
            System.out.println(new java.io.File(".").getCanonicalPath());
        } catch (IOException ex) {
            Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex);
        }

        TweetCrawler tc = null;

        try {
            tc = TweetCrawler.getInstance();
        } catch (UnknownHostException ex) {
            logger.severe("Could not connect to mongoDb");
            exitWithError(2);
            return;
        }

        int action;

        while (true) {
            action = getDecision("The following actions can be executed",
                    new String[] { "Subscribe to topic", "Query topic", "Test preprocessing",
                            "Recreate the evaluation model", "Quit the application" },
                    "What action do you want to execute?");

            switch (action) {
            case 1:
                tc.collectTweets(new DefaultTweetHandler() {
                    @Override
                    public boolean isMatch(String topic) {
                        return true;
                    }
                }, getNonEmptyString(
                        "Which topic do you want to subscribe to (use spaces to specify more than one keyword)?")
                                .split(" "));

                System.out.println("Starting to collection tweets");
                System.out.println("Press enter to quit collecting");

                while (System.in.read() != 10)
                    ;

                tc.stopCollecting();

                break;
            case 2:
                classifyTopic();
                break;
            case 3: {
                int subAction = getDecision("The following preprocessing steps are available",
                        new String[] { "Stop word removal", "Stemming", "Both" }, "What do you want to test?");

                switch (subAction) {
                case 1:
                    stopWords();
                    break;
                case 2:
                    stem();
                    break;
                case 3:
                    stem(stopWords());
                default:
                    break;
                }

                break;
            }
            case 4: {
                ClassifyTweet.saveModel("resources/traindata.arff", "resources/classifier.model");
                break;
            }
            case 5:
                exit();
            case 6: {
                ClassifyTweet.saveModel("resources/traindata.arff", "resources/classifier.model");
                Classifier c = ClassifyTweet.loadModel("resources/classifier.model");
                ClassifyTweet.classifyTweetArff(c, "resources/unlabeled.arff");
                //ClassifyTweet.evaluate(c, "resources/traindata.arff");
                break;
            }
            }
        }
    }

    private static String getNonEmptyString(String msg) {
        return getNonEmptyString(msg, "");
    }

    private static String getNonEmptyString(String msg, String defaultValue) {
        Scanner scanner = new Scanner(System.in);
        String ret = defaultValue;

        print(msg, ret);

        while (scanner.hasNextLine()) {
            ret = scanner.nextLine();

            if (!ret.equals("")) {
                break;
            }

            print(msg, ret);
        }

        return ret;
    }

    public static void exit() {
        System.out.println("Application is exiting - Goodbye!");
        System.exit(0);
    }

    public static void exitWithError(int errorCode) {
        System.out.println("Exiting with errorCode " + errorCode);
        System.exit(errorCode);
    }

    private static int getDecision(String input, String[] options, String output) {
        System.out.println(input);

        int c = 0;
        int action = -1;

        for (String option : options) {
            System.out.println("\t" + ++c + ". " + option);
        }

        System.out.println("");

        while (action < 0 || action > c) {
            try {
                action = Integer.parseInt(getNonEmptyString(output));
            } catch (NumberFormatException e) {
            }
        }

        return action;
    }

    private static void print(String msg, String ret) {
        if (!ret.equals("")) {
            System.out.print(msg + " [" + ret + "]: ");
        } else {
            System.out.print(msg + ": ");
        }
    }

    private static String stopWords() {
        String text = getNonEmptyString("Enter the text to be StopWordRemoved");

        try {
            StopWordRemoval swr = new StopWordRemoval("resources/stopwords.txt");
            text = swr.processText(text);
            System.out.println(text);
        } catch (IOException ex) {
            Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex);
        }

        return text;
    }

    private static void stem() {
        stem(null);
    }

    private static void stem(String text) {
        if (text == null) {
            text = getNonEmptyString("Enter the text to be stemmed");
        }

        System.out.println(Stem.stem(text));
    }

    private static void classifyTopic() {
        String topic = getNonEmptyString("Enter a topic you want to query");
        Mongo mongo;
        DB db = null;

        try {
            mongo = new Mongo(_prop.getProperty("db_host"));
            db = mongo.getDB(_prop.getProperty("db_name"));
        } catch (UnknownHostException ex) {
            Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex);
            return;
        }

        Classifier c = ClassifyTweet.loadModel("resources/classifier.model");
        TweetScorer scorer = new TweetScorer();

        DBCollection tweetCollection = db.getCollection("tweets");

        Pattern pattern = Pattern.compile("^.+" + topic + ".+$");
        DBObject query = QueryBuilder.start("text").regex(pattern).get();
        DBCursor resultSet = tweetCollection.find(query);

        int count = 0;
        double value = 0;
        double tweetClassifiedScore = 0;
        double tweetPosUserScore = 0;
        double tweetNegUserScore = 0;

        while (resultSet.hasNext()) {
            try {
                DBObject obj = resultSet.next();
                String tweetText = (String) obj.get("text");

                tweetClassifiedScore += ClassifyTweet.classifyTweet(c, tweetText);
                double score = scorer.scoreTweet(obj);

                if (tweetClassifiedScore > 0) {
                    tweetPosUserScore += score;
                } else {
                    tweetNegUserScore += score;
                }
                ++count;
            } catch (NumberFormatException ex) {
                Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex);
            } catch (JSONException ex) {
                Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex);
            } catch (IOException ex) {
                Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
        // Normalizing between 0 an 1
        value = tweetPosUserScore / (tweetPosUserScore + tweetNegUserScore);
        System.out.println("This topic has a sentiment value of: " + value);
    }
}