Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ /** * * @author Pedro */ // Clasificadores Weka: http://weka.sourceforge.net/doc.dev/weka/classifiers/Classifier.html import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import weka.classifiers.Classifier; import weka.classifiers.Evaluation; import weka.classifiers.evaluation.NominalPrediction; import weka.classifiers.functions.MultilayerPerceptron; import weka.core.Attribute; import weka.core.Debug.Random; import weka.core.DenseInstance; import weka.core.FastVector; import weka.core.Instance; import weka.core.Instances; public class CopiaSeg3 { // Lectura de losl fichero de datos public static BufferedReader readDataFile(String filename) { BufferedReader inputReader = null; try { inputReader = new BufferedReader(new FileReader(filename)); } catch (FileNotFoundException ex) { System.err.println("File not found: " + filename); } return inputReader; } // Resultadis de la clasificacin public static Evaluation simpleClassify(Classifier model, Instances trainingSet, Instances testingSet) throws Exception { Evaluation validation = new Evaluation(trainingSet); model.buildClassifier(trainingSet); validation.evaluateModel(model, testingSet); // Imprime el resultado de Weka explorer: String strSummary = validation.toSummaryString(); System.out.println(strSummary); return validation; } // Calculo de la precisin del algoritmo (Tambin se muestra en la funcin simpleClassify) public static double calculateAccuracy(FastVector predictions) { double correct = 0; for (int i = 0; i < predictions.size(); i++) { NominalPrediction np = (NominalPrediction) predictions.elementAt(i); if (np.predicted() == np.actual()) { correct++; } } return 100 * correct / predictions.size(); } // Preparacin de los conjuntos de datos public static Instances[] split(Instances data, int numberOfFolds) { Instances[] split = new Instances[2]; Random semilla = new Random(); int seed = semilla.nextInt(20); // Genera una semilla aleatorio entre 0 y 20 Random rand = new Random(seed); // Create seeded number generator Instances randData = new Instances(data); // Crea una copia de los datos originales randData.randomize(rand); // Ordena los datos de forma aleatoria split[0] = randData.trainCV(numberOfFolds, 0); split[1] = randData.testCV(numberOfFolds, 0); return split; } public static void main(String[] args) throws Exception { BufferedReader datafile = readDataFile("breast-cancer-wisconsin.arff"); Instances data = new Instances(datafile); data.setClassIndex(data.numAttributes() - 1); // Elije el nmero de particiones para la valicacin (4 = 75% Train, 25% Test) Instances[] split = split(data, 4); // Separa los conjuntos en los arrays trainning y testing Instances trainingSplits = split[0]; Instances testingSplits = split[1]; // Elegir un conjunto de clasificadores Classifier[] models = { new MultilayerPerceptron() //, new J48 //, ... }; FastVector fvWekaAttributes = new FastVector(9); // Ejecutar cada clasificador for (int j = 0; j < models.length; j++) { // Collect every group of predictions for current model in a FastVector FastVector predictions = new FastVector(); // For each training-testing split pair, train and test the classifier Evaluation validation = simpleClassify(models[j], trainingSplits, testingSplits); predictions.appendElements(validation.predictions()); // Uncomment to see the summary for each training-testing pair. System.out.println(models[j].toString()); // Calculate overall accuracy of current classifier on all splits double accuracy = calculateAccuracy(predictions); // // Print current classifier's name and accuracy in a complicated, but nice-looking way. System.out.println(models[j].getClass().getSimpleName() + " Accuracy: " + String.format("%.2f%%", accuracy) + "\n====================="); // // // Step 4: use the classifier // // For real world applications, the actual use of the classifier is the ultimate goal. Heres the simplest way to achieve that. Lets say weve built an instance (named iUse) as explained in step 2: // // Specify that the instance belong to the training set // // in order to inherit from the set description Instance iUse = new DenseInstance(9); iUse.setValue((Attribute) predictions.elementAt(0), 4); iUse.setValue((Attribute) predictions.elementAt(1), 8); iUse.setValue((Attribute) predictions.elementAt(2), 8); iUse.setValue((Attribute) predictions.elementAt(3), 5); iUse.setValue((Attribute) predictions.elementAt(4), 4); iUse.setValue((Attribute) predictions.elementAt(5), 5); iUse.setValue((Attribute) predictions.elementAt(6), 10); iUse.setValue((Attribute) predictions.elementAt(7), 4); iUse.setValue((Attribute) predictions.elementAt(8), 1); iUse.setDataset(trainingSplits); // // // Get the likelihood of each classes // fDistribution[0] is the probability of being positive? // fDistribution[1] is the probability of being negative? double[] fDistribution = models[j].distributionForInstance(iUse); System.out.println("Probabilidad positivo: " + fDistribution[0]); System.out.println("Probabilidad negativo: " + fDistribution[1]); } } }