Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package SpamDetector; import java.io.FileReader; import java.util.Arrays; import com.opencsv.CSVReader; import java.io.FileNotFoundException; import java.io.IOException; import java.io.BufferedReader; import java.util.List; import java.util.ArrayList; import java.util.Random; import weka.classifiers.Evaluation; import weka.classifiers.trees.SimpleCart; import weka.core.Instances; import weka.core.converters.ArffLoader.ArffReader; /** * * @author jessica */ public class SpamDetector { private static String SPAM_PATH; private static String NOT_SPAM_PATH; private static String ARFF_PATH; public SpamDetector() { SPAM_PATH = new String("spam.csv"); NOT_SPAM_PATH = new String("notspam.csv"); ARFF_PATH = new String("data.arff"); } /** * @param args the command line arguments */ public static void main(String[] args) throws IOException, Exception { ArrayList<ArrayList<String>> notSpam = processCSV("notspam.csv"); ArrayList<ArrayList<String>> spam = processCSV("spam.csv"); // Cobain generate attribute & data FeatureExtraction fe = new FeatureExtraction(); fe.generateArff(spam, notSpam); // Cobain CART BufferedReader br = new BufferedReader(new FileReader("data.arff")); ArffReader arff = new ArffReader(br); Instances data = arff.getData(); data.setClassIndex(data.numAttributes() - 1); SimpleCart tree = new SimpleCart(); tree.buildClassifier(data); System.out.println(tree.toString()); Evaluation eval = new Evaluation(data); eval.evaluateModel(tree, data); System.out.println(eval.toSummaryString("\n\n\n\nResults\n======\n", false)); eval.crossValidateModel(tree, data, 10, new Random()); System.out.println(eval.toSummaryString("\n\n\n\n10-Fold\n======\n", false)); } public static ArrayList<ArrayList<String>> processCSV(String path) throws FileNotFoundException, IOException { CSVReader reader = new CSVReader(new FileReader(path), ',', '"', 2); //Read all rows at once List<String[]> allRows = reader.readAll(); Preprocessor prepocessor = new Preprocessor(); ArrayList<ArrayList<String>> msg = new ArrayList<>(); for (String[] row : allRows) { //Add processed sentences if (row[0].length() > 0) { msg.add(prepocessor.processSentence(Arrays.toString(row))); //Print //System.out.println(msg.get(msg.size()-1)); } } return msg; } }