Java tutorial
import java.text.Format; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Random; import java.util.concurrent.Semaphore; import java.io.*; import weka.core.Range; import weka.classifiers.Evaluation; import weka.core.Instances; import weka.core.converters.ConverterUtils.DataSource; /* Class for each classifier's thread. * Will create, train and evaluate classifier, writing results to text file * and updating the summary file. * * Copyright (C) 2014 Madison Flannery * madison.flannery@uon.edu.au * * This file is part of FlexDM. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ public class FlexDMThread extends Thread { private Dataset dataset; //Dataset private Classifier classifier; //Classifier private Semaphore s; //To control threads executing private int cNum; //Classifier number, for output private File summary; //Summary file to write to private File log; //Constructor public FlexDMThread(Dataset dtemp, Classifier ctemp, Semaphore s, int cNum, File file, File log) { dataset = dtemp; classifier = ctemp; this.s = s; this.cNum = cNum; this.summary = file; this.log = log; } //Run the thread public void run() { try { //Get the data from the source FlexDM.getMainData.acquire(); Instances data = dataset.getSource().getDataSet(); FlexDM.getMainData.release(); //Set class attribute if undefined if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } //Process hyperparameters for classifier String temp = ""; for (int i = 0; i < classifier.getNumParams(); i++) { temp += classifier.getParameter(i).getName(); temp += " "; if (classifier.getParameter(i).getValue() != null) { temp += classifier.getParameter(i).getValue(); temp += " "; } } String[] options = weka.core.Utils.splitOptions(temp); //Print to console- experiment is starting if (temp.equals("")) { //no parameters temp = "results_no_parameters"; try { System.out.println("STARTING CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset " + dataset.getName().substring(dataset.getName().lastIndexOf("\\") + 1) + " with no parameters"); } catch (Exception e) { System.out.println("STARTING CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset " + dataset.getName() + " with no parameters"); } } else { //parameters try { System.out.println("STARTING CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset " + dataset.getName().substring(dataset.getName().lastIndexOf("\\") + 1) + " with parameters " + temp); } catch (Exception e) { System.out.println("STARTING CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset " + dataset.getName() + " with parameters " + temp); } } //Create classifier, setting parameters weka.classifiers.Classifier x = createObject(classifier.getName()); x.setOptions(options); x.buildClassifier(data); //Process the test selection String[] tempTest = dataset.getTest().split("\\s"); //Create evaluation object for training and testing classifiers Evaluation eval = new Evaluation(data); StringBuffer predictions = new StringBuffer(); //Train and evaluate classifier if (tempTest[0].equals("testset")) { //specified test file //Build classifier x.buildClassifier(data); //Open test file, load data //DataSource testFile = new DataSource(dataset.getTest().substring(7).trim()); // Instances testSet = testFile.getDataSet(); FlexDM.getTestData.acquire(); Instances testSet = dataset.getTestFile().getDataSet(); FlexDM.getTestData.release(); //Set class attribute if undefined if (testSet.classIndex() == -1) { testSet.setClassIndex(testSet.numAttributes() - 1); } //Evaluate model Object[] array = { predictions, new Range(), new Boolean(true) }; eval.evaluateModel(x, testSet, array); } else if (tempTest[0].equals("xval")) { //Cross validation //Build classifier x.buildClassifier(data); //Cross validate eval.crossValidateModel(x, data, Integer.parseInt(tempTest[1]), new Random(1), predictions, new Range(), true); } else if (tempTest[0].equals("leavexval")) { //Leave one out cross validation //Build classifier x.buildClassifier(data); //Cross validate eval.crossValidateModel(x, data, data.numInstances() - 1, new Random(1), predictions, new Range(), true); } else if (tempTest[0].equals("percent")) { //Percentage split of single data set //Set training and test sizes from percentage int trainSize = (int) Math.round(data.numInstances() * Double.parseDouble(tempTest[1])); int testSize = data.numInstances() - trainSize; //Load specified data Instances train = new Instances(data, 0, trainSize); Instances testSet = new Instances(data, trainSize, testSize); //Build classifier x.buildClassifier(train); //Train and evaluate model Object[] array = { predictions, new Range(), new Boolean(true) }; eval.evaluateModel(x, testSet, array); } else { //Evaluate on training data //Test and evaluate model Object[] array = { predictions, new Range(), new Boolean(true) }; eval.evaluateModel(x, data, array); } //create datafile for results String filename = dataset.getDir() + "/" + classifier.getDirName() + "/" + temp + ".txt"; PrintWriter writer = new PrintWriter(filename, "UTF-8"); //Print classifier, dataset, parameters info to file try { writer.println("CLASSIFIER: " + classifier.getName() + "\n DATASET: " + dataset.getName() + "\n PARAMETERS: " + temp); } catch (Exception e) { writer.println("CLASSIFIER: " + classifier.getName() + "\n DATASET: " + dataset.getName() + "\n PARAMETERS: " + temp); } //Add evaluation string to file writer.println(eval.toSummaryString()); //Process result options if (checkResults("stats")) { //Classifier statistics writer.println(eval.toClassDetailsString()); } if (checkResults("model")) { //The model writer.println(x.toString()); } if (checkResults("matrix")) { //Confusion matrix writer.println(eval.toMatrixString()); } if (checkResults("entropy")) { //Entropy statistics //Set options req'd to get the entropy stats String[] opt = new String[4]; opt[0] = "-t"; opt[1] = dataset.getName(); opt[2] = "-k"; opt[3] = "-v"; //Evaluate model String entropy = Evaluation.evaluateModel(x, opt); //Grab the relevant info from the results, print to file entropy = entropy.substring(entropy.indexOf("=== Stratified cross-validation ===") + 35, entropy.indexOf("=== Confusion Matrix ===")); writer.println("=== Entropy Statistics ==="); writer.println(entropy); } if (checkResults("predictions")) { //The models predictions writer.println("=== Predictions ===\n"); if (!dataset.getTest().contains("xval")) { //print header of predictions table if req'd writer.println(" inst# actual predicted error distribution ()"); } writer.println(predictions.toString()); //print predictions to file } writer.close(); //Summary file is semaphore controlled to ensure quality try { //get a permit //grab the summary file, write the classifiers details to it FlexDM.writeFile.acquire(); PrintWriter p = new PrintWriter(new FileWriter(summary, true)); if (temp.equals("results_no_parameters")) { //change output based on parameters temp = temp.substring(8); } //write percent correct, classifier name, dataset name to summary file p.write(dataset.getName() + ", " + classifier.getName() + ", " + temp + ", " + eval.correct() + ", " + eval.incorrect() + ", " + eval.unclassified() + ", " + eval.pctCorrect() + ", " + eval.pctIncorrect() + ", " + eval.pctUnclassified() + ", " + eval.kappa() + ", " + eval.meanAbsoluteError() + ", " + eval.rootMeanSquaredError() + ", " + eval.relativeAbsoluteError() + ", " + eval.rootRelativeSquaredError() + ", " + eval.SFPriorEntropy() + ", " + eval.SFSchemeEntropy() + ", " + eval.SFEntropyGain() + ", " + eval.SFMeanPriorEntropy() + ", " + eval.SFMeanSchemeEntropy() + ", " + eval.SFMeanEntropyGain() + ", " + eval.KBInformation() + ", " + eval.KBMeanInformation() + ", " + eval.KBRelativeInformation() + ", " + eval.weightedTruePositiveRate() + ", " + eval.weightedFalsePositiveRate() + ", " + eval.weightedTrueNegativeRate() + ", " + eval.weightedFalseNegativeRate() + ", " + eval.weightedPrecision() + ", " + eval.weightedRecall() + ", " + eval.weightedFMeasure() + ", " + eval.weightedAreaUnderROC() + "\n"); p.close(); //release semaphore FlexDM.writeFile.release(); } catch (InterruptedException e) { //bad things happened System.err.println("FATAL ERROR OCCURRED: Classifier: " + cNum + " - " + classifier.getName() + " on dataset " + dataset.getName()); } //output we have successfully finished processing classifier if (temp.equals("no_parameters")) { //no parameters try { System.out.println("FINISHED CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset " + dataset.getName().substring(dataset.getName().lastIndexOf("\\") + 1) + " with no parameters"); } catch (Exception e) { System.out.println("FINISHED CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset " + dataset.getName() + " with no parameters"); } } else { //with parameters try { System.out.println("FINISHED CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset " + dataset.getName().substring(dataset.getName().lastIndexOf("\\") + 1) + " with parameters " + temp); } catch (Exception e) { System.out.println("FINISHED CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset " + dataset.getName() + " with parameters " + temp); } } try { //get a permit //grab the log file, write the classifiers details to it FlexDM.writeLog.acquire(); PrintWriter p = new PrintWriter(new FileWriter(log, true)); Date date = new Date(); Format formatter = new SimpleDateFormat("dd/MM/YYYY HH:mm:ss"); //formatter.format(date) if (temp.equals("results_no_parameters")) { //change output based on parameters temp = temp.substring(8); } //write details to log file p.write(dataset.getName() + ", " + dataset.getTest() + ", \"" + dataset.getResult_string() + "\", " + classifier.getName() + ", " + temp + ", " + formatter.format(date) + "\n"); p.close(); //release semaphore FlexDM.writeLog.release(); } catch (InterruptedException e) { //bad things happened System.err.println("FATAL ERROR OCCURRED: Classifier: " + cNum + " - " + classifier.getName() + " on dataset " + dataset.getName()); } s.release(); } catch (Exception e) { //an error occurred System.err.println("FATAL ERROR OCCURRED: " + e.toString() + "\nClassifier: " + cNum + " - " + classifier.getName() + " on dataset " + dataset.getName()); s.release(); } } /* Method for creating classifier objects based on specified name * Using java reflection * Author: Madison Flannery, madison.flannery@uon.edu.au */ public static weka.classifiers.Classifier createObject(String className) { try { Class c = Class.forName(className); weka.classifiers.Classifier newObject = (weka.classifiers.Classifier) c.newInstance(); return newObject; } catch (Exception e) { //this will be caught in thread where method is called } return null; } /* Check if the result string is in the XML for this classifier * input: results string * output: true if the results string appears in XML for classifier, false otherwise */ public boolean checkResults(String comp) { for (int i = 0; i < dataset.getResults().length; i++) { if (dataset.getResults()[i].equalsIgnoreCase(comp)) { return true; } } return false; } }