CopiaSeg3.java Source code

Java tutorial

Introduction

Here is the source code for CopiaSeg3.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
/**
 *
 * @author Pedro
 */

// Clasificadores Weka: http://weka.sourceforge.net/doc.dev/weka/classifiers/Classifier.html

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;
import weka.classifiers.evaluation.NominalPrediction;
import weka.classifiers.functions.MultilayerPerceptron;
import weka.core.Attribute;
import weka.core.Debug.Random;
import weka.core.DenseInstance;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;

public class CopiaSeg3 {

    // Lectura de losl fichero de datos
    public static BufferedReader readDataFile(String filename) {
        BufferedReader inputReader = null;

        try {
            inputReader = new BufferedReader(new FileReader(filename));
        } catch (FileNotFoundException ex) {
            System.err.println("File not found: " + filename);
        }

        return inputReader;
    }

    // Resultadis de la clasificacin
    public static Evaluation simpleClassify(Classifier model, Instances trainingSet, Instances testingSet)
            throws Exception {
        Evaluation validation = new Evaluation(trainingSet);

        model.buildClassifier(trainingSet);
        validation.evaluateModel(model, testingSet);

        // Imprime el resultado de Weka explorer:
        String strSummary = validation.toSummaryString();
        System.out.println(strSummary);

        return validation;
    }

    // Calculo de la precisin del algoritmo (Tambin se muestra en la funcin simpleClassify)
    public static double calculateAccuracy(FastVector predictions) {
        double correct = 0;

        for (int i = 0; i < predictions.size(); i++) {
            NominalPrediction np = (NominalPrediction) predictions.elementAt(i);
            if (np.predicted() == np.actual()) {
                correct++;
            }
        }

        return 100 * correct / predictions.size();
    }

    // Preparacin de los conjuntos de datos
    public static Instances[] split(Instances data, int numberOfFolds) {
        Instances[] split = new Instances[2];

        Random semilla = new Random();
        int seed = semilla.nextInt(20); // Genera una semilla aleatorio entre 0 y 20
        Random rand = new Random(seed); // Create seeded number generator
        Instances randData = new Instances(data); // Crea una copia de los datos originales
        randData.randomize(rand); // Ordena los datos de forma aleatoria

        split[0] = randData.trainCV(numberOfFolds, 0);
        split[1] = randData.testCV(numberOfFolds, 0);

        return split;
    }

    public static void main(String[] args) throws Exception {

        BufferedReader datafile = readDataFile("breast-cancer-wisconsin.arff");

        Instances data = new Instances(datafile);
        data.setClassIndex(data.numAttributes() - 1);

        // Elije el nmero de particiones para la valicacin (4 = 75% Train, 25% Test)
        Instances[] split = split(data, 4);

        // Separa los conjuntos en los arrays trainning y testing
        Instances trainingSplits = split[0];
        Instances testingSplits = split[1];

        // Elegir un conjunto de clasificadores
        Classifier[] models = { new MultilayerPerceptron()
                //, new J48 
                //, ...
        };

        FastVector fvWekaAttributes = new FastVector(9);

        // Ejecutar cada clasificador
        for (int j = 0; j < models.length; j++) {

            // Collect every group of predictions for current model in a FastVector
            FastVector predictions = new FastVector();

            // For each training-testing split pair, train and test the classifier
            Evaluation validation = simpleClassify(models[j], trainingSplits, testingSplits);
            predictions.appendElements(validation.predictions());

            // Uncomment to see the summary for each training-testing pair.
            System.out.println(models[j].toString());

            // Calculate overall accuracy of current classifier on all splits
            double accuracy = calculateAccuracy(predictions);

            //            // Print current classifier's name and accuracy in a complicated, but nice-looking way.
            System.out.println(models[j].getClass().getSimpleName() + " Accuracy: "
                    + String.format("%.2f%%", accuracy) + "\n=====================");
            //            
            //            // Step 4: use the classifier
            //            // For real world applications, the actual use of the classifier is the ultimate goal. Heres the simplest way to achieve that. Lets say weve built an instance (named iUse) as explained in step 2:
            //            // Specify that the instance belong to the training set
            //            // in order to inherit from the set description

            Instance iUse = new DenseInstance(9);
            iUse.setValue((Attribute) predictions.elementAt(0), 4);
            iUse.setValue((Attribute) predictions.elementAt(1), 8);
            iUse.setValue((Attribute) predictions.elementAt(2), 8);
            iUse.setValue((Attribute) predictions.elementAt(3), 5);
            iUse.setValue((Attribute) predictions.elementAt(4), 4);
            iUse.setValue((Attribute) predictions.elementAt(5), 5);
            iUse.setValue((Attribute) predictions.elementAt(6), 10);
            iUse.setValue((Attribute) predictions.elementAt(7), 4);
            iUse.setValue((Attribute) predictions.elementAt(8), 1);

            iUse.setDataset(trainingSplits);
            //
            //            // Get the likelihood of each classes
            // fDistribution[0] is the probability of being positive?
            // fDistribution[1] is the probability of being negative?
            double[] fDistribution = models[j].distributionForInstance(iUse);

            System.out.println("Probabilidad positivo: " + fDistribution[0]);
            System.out.println("Probabilidad negativo: " + fDistribution[1]);
        }

    }
}