classifier.SellerClassifier.java Source code

Java tutorial

Introduction

Here is the source code for classifier.SellerClassifier.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package classifier;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import preprocessor.Preprocessor;

import java.util.logging.Level;
import java.util.logging.Logger;
import twitter.MyStatus;
import weka.classifiers.Classifier;
import weka.core.Instances;
import weka.core.converters.ConverterUtils.DataSource;
import weka.core.tokenizers.Tokenizer;
import weka.core.tokenizers.WordTokenizer;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.StringToWordVector;
import weka.classifiers.trees.RandomForest;
import weka.core.Instance;
import weka.core.SerializationHelper;
import weka.filters.unsupervised.attribute.Standardize;

/**
 *
 * @author Ahmad
 */
public class SellerClassifier {

    private Classifier myClassifier;
    private Instances myInstances;
    private Filter myFilter;

    public static final String modelPath = "ref\\seller_classify_model.model";
    public static final String refPath = "ref\\seller_classify_filter.filter";

    private Instances loadData(String dataset) throws Exception {
        DataSource data = new DataSource(dataset);
        Instances instances = data.getDataSet();
        if (instances.classIndex() == -1) {
            instances.setClassIndex(instances.numAttributes() - 1);
        }

        return instances;
    }

    private void loadModelFile(String path) throws Exception {
        myClassifier = (Classifier) SerializationHelper.read(path);
    }

    private void loadFilterFile(String path) throws Exception {
        myFilter = (Filter) SerializationHelper.read(path);
    }

    private Instances startFeatureExtraction(Instances raw) throws Exception {
        myFilter = new StringToWordVector();
        myFilter.setInputFormat(raw);

        return Filter.useFilter(raw, myFilter);
    }

    public void rebuildModel(String dataset) {
        try {
            myInstances = startFeatureExtraction(loadData(dataset));
            myClassifier = new RandomForest();

            // build the model
            myClassifier.buildClassifier(myInstances);
            SerializationHelper.write(modelPath, myClassifier);
            SerializationHelper.write(refPath, myFilter);
        } catch (Exception ex) {
            Logger.getLogger(SellerClassifier.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    public double classifyTweet(MyStatus tweet) {
        try {
            String processedTweet = Preprocessor.preProcessString(tweet.getTweet());
            System.out.println(processedTweet);
            Instance instance = readToInstance(processedTweet);
            System.out.println(instance);
            double tweetClass = myClassifier.classifyInstance(instance);

            return tweetClass;
        } catch (Exception ex) {
            Logger.getLogger(SellerClassifier.class.getName()).log(Level.SEVERE, null, ex);
            return 0.0;
        }
    }

    private Instance readToInstance(String str) throws Exception {
        File f = new File("buffer.arff");
        try (FileWriter fw = new FileWriter(f, false)) {
            fw.write("@relation buffer\n\n@attribute\ttweet\tstring\n@attribute\tclass\t{0,1}\n\n@data\n");
            fw.write("\"" + str + "\",1\n");
        }

        Instances instances = loadData("buffer.arff");

        System.out.println(instances);

        assert (instances != null);
        assert (myFilter != null);
        Instances ins = Filter.useFilter(instances, myFilter);

        return ins.firstInstance();
    }

    public void initModel(String dataset) throws Exception {
        File modelFile = new File(modelPath);
        if (modelFile.exists()) {
            loadModelFile(modelPath);
            loadFilterFile(refPath);
        } else {
            rebuildModel(dataset);
        }
    }
}