Example usage for weka.core Instances randomize

List of usage examples for weka.core Instances randomize

Introduction

In this page you can find the example usage for weka.core Instances randomize.

Prototype

public void randomize(Random random) 

Source Link

Document

Shuffles the instances in the set so that they are ordered randomly.

Usage

From source file:br.unicamp.ic.recod.gpsi.gp.gpsiJGAPRoiFitnessFunction.java

@Override
protected double evaluate(IGPProgram igpp) {

    double mean_accuracy = 0.0;
    Object[] noargs = new Object[0];

    gpsiRoiBandCombiner roiBandCombinator = new gpsiRoiBandCombiner(new gpsiJGAPVoxelCombiner(super.b, igpp));
    // TODO: The ROI descriptors must combine the images first
    //roiBandCombinator.combineEntity(this.dataset.getTrainingEntities());

    gpsiMLDataset mlDataset = new gpsiMLDataset(this.descriptor);
    try {//from w w w  . j  a va  2s .com
        mlDataset.loadWholeDataset(this.dataset, true);
    } catch (Exception ex) {
        Logger.getLogger(gpsiJGAPRoiFitnessFunction.class.getName()).log(Level.SEVERE, null, ex);
    }

    int dimensionality = mlDataset.getDimensionality();
    int n_classes = mlDataset.getTrainingEntities().keySet().size();
    int n_entities = mlDataset.getNumberOfTrainingEntities();
    ArrayList<Byte> listOfClasses = new ArrayList<>(mlDataset.getTrainingEntities().keySet());

    Attribute[] attributes = new Attribute[dimensionality];
    FastVector fvClassVal = new FastVector(n_classes);

    int i, j;
    for (i = 0; i < dimensionality; i++)
        attributes[i] = new Attribute("f" + Integer.toString(i));
    for (i = 0; i < n_classes; i++)
        fvClassVal.addElement(Integer.toString(listOfClasses.get(i)));

    Attribute classes = new Attribute("class", fvClassVal);

    FastVector fvWekaAttributes = new FastVector(dimensionality + 1);

    for (i = 0; i < dimensionality; i++)
        fvWekaAttributes.addElement(attributes[i]);
    fvWekaAttributes.addElement(classes);

    Instances instances = new Instances("Rel", fvWekaAttributes, n_entities);
    instances.setClassIndex(dimensionality);

    Instance iExample;
    for (byte label : mlDataset.getTrainingEntities().keySet()) {
        for (double[] featureVector : mlDataset.getTrainingEntities().get(label)) {
            iExample = new Instance(dimensionality + 1);
            for (j = 0; j < dimensionality; j++)
                iExample.setValue(i, featureVector[i]);
            iExample.setValue(dimensionality, label);
            instances.add(iExample);
        }
    }

    int folds = 5;
    Random rand = new Random();
    Instances randData = new Instances(instances);
    randData.randomize(rand);

    Instances trainingSet, testingSet;
    Classifier cModel;
    Evaluation eTest;
    try {
        for (i = 0; i < folds; i++) {
            cModel = (Classifier) new SimpleLogistic();
            trainingSet = randData.trainCV(folds, i);
            testingSet = randData.testCV(folds, i);

            cModel.buildClassifier(trainingSet);

            eTest = new Evaluation(trainingSet);
            eTest.evaluateModel(cModel, testingSet);

            mean_accuracy += eTest.pctCorrect();

        }
    } catch (Exception ex) {
        Logger.getLogger(gpsiJGAPRoiFitnessFunction.class.getName()).log(Level.SEVERE, null, ex);
    }

    mean_accuracy /= (folds * 100);

    return mean_accuracy;

}

From source file:c4.pkg5crossv.Classifier.java

public static void trainAndTest() throws FileNotFoundException, IOException, Exception {

    Instances data = DataLoad.loadData("./src/data/irysy.arff");
    data.setClassIndex(data.numAttributes() - 1);

    //Losowy podzial tablicy
    data.randomize(new Random());
    double percent = 60.0;
    int trainSize = (int) Math.round(data.numInstances() * percent / 100);
    int testSize = data.numInstances() - trainSize;
    Instances trainData = new Instances(data, 0, trainSize);
    Instances testData = new Instances(data, trainSize, testSize);

    String[] options = Utils.splitOptions("-U -M 10");
    J48 tree = new J48();
    tree.setOptions(options);// w  w  w .  j  a va 2  s  . c om
    tree.buildClassifier(trainData);

    Evaluation eval2 = new Evaluation(trainData);
    eval2.crossValidateModel(tree, testData, 10, new Random(1)); // 5 - fold
    System.out.println(eval2.toSummaryString("Wyniki:", false)); //Wypisanie testovania cross validation
}

From source file:cezeri.evaluater.FactoryEvaluation.java

public static Evaluation performCrossValidate(Classifier model, Instances datax, int folds, boolean show_text,
        boolean show_plot, TFigureAttribute attr) {
    Random rand = new Random(1);
    Instances randData = new Instances(datax);
    randData.randomize(rand);
    if (randData.classAttribute().isNominal()) {
        randData.stratify(folds);// www  .  jav  a  2s . c o  m
    }
    Evaluation eval = null;
    try {
        // perform cross-validation
        eval = new Evaluation(randData);
        //            double[] simulated = new double[0];
        //            double[] observed = new double[0];
        //            double[] sim = new double[0];
        //            double[] obs = new double[0];
        for (int n = 0; n < folds; n++) {
            Instances train = randData.trainCV(folds, n, rand);
            Instances validation = randData.testCV(folds, n);
            // build and evaluate classifier
            Classifier clsCopy = Classifier.makeCopy(model);
            clsCopy.buildClassifier(train);

            //                sim = eval.evaluateModel(clsCopy, validation);
            //                obs = validation.attributeToDoubleArray(validation.classIndex());
            //                if (show_plot) {
            //                    double[][] d = new double[2][sim.length];
            //                    d[0] = obs;
            //                    d[1] = sim;
            //                    CMatrix f1 = CMatrix.getInstance(d);
            //                    f1.transpose().plot(attr);
            //                }
            //                if (show_text) {
            //                    // output evaluation
            //                    System.out.println();
            //                    System.out.println("=== Setup for each Cross Validation fold===");
            //                    System.out.println("Classifier: " + model.getClass().getName() + " " + Utils.joinOptions(model.getOptions()));
            //                    System.out.println("Dataset: " + randData.relationName());
            //                    System.out.println("Folds: " + folds);
            //                    System.out.println("Seed: " + 1);
            //                    System.out.println();
            //                    System.out.println(eval.toSummaryString("=== " + folds + "-fold Cross-validation ===", false));
            //                }
            simulated = FactoryUtils.concatenate(simulated, eval.evaluateModel(clsCopy, validation));
            observed = FactoryUtils.concatenate(observed,
                    validation.attributeToDoubleArray(validation.classIndex()));
            //                simulated = FactoryUtils.mean(simulated,eval.evaluateModel(clsCopy, validation));
            //                observed = FactoryUtils.mean(observed,validation.attributeToDoubleArray(validation.classIndex()));
        }

        if (show_plot) {
            double[][] d = new double[2][simulated.length];
            d[0] = observed;
            d[1] = simulated;
            CMatrix f1 = CMatrix.getInstance(d);
            attr.figureCaption = "overall performance";
            f1.transpose().plot(attr);
        }
        if (show_text) {
            // output evaluation
            System.out.println();
            System.out.println("=== Setup for Overall Cross Validation===");
            System.out.println(
                    "Classifier: " + model.getClass().getName() + " " + Utils.joinOptions(model.getOptions()));
            System.out.println("Dataset: " + randData.relationName());
            System.out.println("Folds: " + folds);
            System.out.println("Seed: " + 1);
            System.out.println();
            System.out.println(eval.toSummaryString("=== " + folds + "-fold Cross-validation ===", false));
        }
    } catch (Exception ex) {
        Logger.getLogger(FactoryEvaluation.class.getName()).log(Level.SEVERE, null, ex);
    }
    return eval;
}

From source file:cezeri.evaluater.FactoryEvaluation.java

public static Evaluation performCrossValidateTestAlso(Classifier model, Instances datax, Instances test,
        boolean show_text, boolean show_plot) {
    TFigureAttribute attr = new TFigureAttribute();
    Random rand = new Random(1);
    Instances randData = new Instances(datax);
    randData.randomize(rand);

    Evaluation eval = null;/*  w w  w  . ja  v  a 2 s  .  c om*/
    int folds = randData.numInstances();
    try {
        eval = new Evaluation(randData);
        for (int n = 0; n < folds; n++) {
            //                randData.randomize(rand);
            //                Instances train = randData;                
            Instances train = randData.trainCV(folds, n);
            //                Instances train = randData.trainCV(folds, n, rand);
            Classifier clsCopy = Classifier.makeCopy(model);
            clsCopy.buildClassifier(train);
            Instances validation = randData.testCV(folds, n);
            //                Instances validation = test.testCV(test.numInstances(), n%test.numInstances());
            //                CMatrix.fromInstances(train).showDataGrid();
            //                CMatrix.fromInstances(validation).showDataGrid();

            simulated = FactoryUtils.concatenate(simulated, eval.evaluateModel(clsCopy, validation));
            observed = FactoryUtils.concatenate(observed,
                    validation.attributeToDoubleArray(validation.classIndex()));
        }

        if (show_plot) {
            double[][] d = new double[2][simulated.length];
            d[0] = observed;
            d[1] = simulated;
            CMatrix f1 = CMatrix.getInstance(d);
            attr.figureCaption = "overall performance";
            f1.transpose().plot(attr);
        }
        if (show_text) {
            // output evaluation
            System.out.println();
            System.out.println("=== Setup for Overall Cross Validation===");
            System.out.println(
                    "Classifier: " + model.getClass().getName() + " " + Utils.joinOptions(model.getOptions()));
            System.out.println("Dataset: " + randData.relationName());
            System.out.println("Folds: " + folds);
            System.out.println("Seed: " + 1);
            System.out.println();
            System.out.println(eval.toSummaryString("=== " + folds + "-fold Cross-validation ===", false));
        }
    } catch (Exception ex) {
        Logger.getLogger(FactoryEvaluation.class.getName()).log(Level.SEVERE, null, ex);
    }
    return eval;
}

From source file:cezeri.evaluater.FactoryEvaluation.java

private static Evaluation doTest(boolean isTrained, Classifier model, Instances train, Instances test,
        boolean show_text, boolean show_plot, TFigureAttribute attr) {
    Instances data = new Instances(train);
    Random rand = new Random(1);
    data.randomize(rand);
    Evaluation eval = null;/*from  ww w . j a  va  2  s  .  co  m*/
    try {
        //            double[] simulated = null;
        eval = new Evaluation(train);
        if (isTrained) {
            simulated = eval.evaluateModel(model, test);
        } else {
            Classifier clsCopy = Classifier.makeCopy(model);
            clsCopy.buildClassifier(train);
            simulated = eval.evaluateModel(clsCopy, test);
        }
        if (show_plot) {
            observed = test.attributeToDoubleArray(test.classIndex());
            double[][] d = new double[2][simulated.length];
            d[0] = observed;
            d[1] = simulated;
            CMatrix f1 = CMatrix.getInstance(d);
            String[] items = { "Observed", "Simulated" };
            attr.items = items;
            attr.figureCaption = model.getClass().getCanonicalName();
            f1.transpose().plot(attr);
            //                if (attr.axis[0].isEmpty() && attr.axis[1].isEmpty()) {
            //                    f1.transpose().plot(attr);
            //                } else {
            //                    f1.transpose().plot(model.getClass().getCanonicalName(), attr.items, attr.axis);
            //                }
        }
        if (show_text) {
            System.out.println();
            System.out.println("=== Setup for Test ===");
            System.out.println(
                    "Classifier: " + model.getClass().getName() + " " + Utils.joinOptions(model.getOptions()));
            System.out.println("Dataset: " + test.relationName());
            System.out.println();
            System.out.println(eval.toSummaryString("=== Test Results ===", false));
        }
    } catch (Exception ex) {
        Logger.getLogger(FactoryEvaluation.class.getName()).log(Level.SEVERE, null, ex);
    }
    return eval;
}

From source file:cezeri.feature.selection.FeatureSelectionInfluence.java

public static Influence[] getMostDiscriminativeFeature(String filePath, Classifier model) {
    Influence[] ret = null;/*from  w  ww.  jav  a  2s.  c o m*/
    try {
        Instances data = DataSource.read(filePath);
        ret = new Influence[data.numAttributes() - 1];
        data.setClassIndex(data.numAttributes() - 1);
        // other options
        int seed = 1;
        int folds = 10;
        // randomize data
        Instances randData = new Instances(data);
        Random rand = new Random(seed);
        randData.randomize(rand);
        Evaluation evalBase = getEvaluation(randData, model, folds);
        double accBase = evalBase.correct() / evalBase.numInstances() * 100;
        double nf = randData.numAttributes();

        for (int j = 0; j < nf - 1; j++) {
            ret[j] = new Influence();
            String str = randData.attribute(j).name();
            Attribute att = randData.attribute(j);
            randData.deleteAttributeAt(j);
            Evaluation evalTemp = getEvaluation(randData, model, folds);
            double accTemp = evalTemp.correct() / evalTemp.numInstances() * 100;
            double tempInfluence = accBase - accTemp;
            ret[j].attributeName = str;
            ret[j].infVal = tempInfluence;
            randData.insertAttributeAt(att, j);
        }
        sortInfluenceArray(ret);
    } catch (Exception ex) {
        Logger.getLogger(FeatureSelectionInfluence.class.getName()).log(Level.SEVERE, null, ex);
    }
    return ret;
}

From source file:com.mycompany.id3classifier.ID3Shell.java

public static void main(String[] args) throws Exception {
    ConverterUtils.DataSource source = new ConverterUtils.DataSource("lensesData.csv");
    Instances dataSet = source.getDataSet();

    Discretize filter = new Discretize();
    filter.setInputFormat(dataSet);//from w  w  w. j  a  v a 2s .c  om
    dataSet = Filter.useFilter(dataSet, filter);

    Standardize standardize = new Standardize();
    standardize.setInputFormat(dataSet);
    dataSet = Filter.useFilter(dataSet, standardize);

    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    dataSet.randomize(new Random(9001)); //It's over 9000!!

    int folds = 10;
    //Perform crossvalidation
    Evaluation eval = new Evaluation(dataSet);
    for (int n = 0; n < folds; n++) {
        int trainingSize = (int) Math.round(dataSet.numInstances() * .7);
        int testSize = dataSet.numInstances() - trainingSize;

        Instances trainingData = dataSet.trainCV(folds, n);
        Instances testData = dataSet.testCV(folds, n);

        ID3Classifier classifier = new ID3Classifier();
        // Id3 classifier = new Id3();
        classifier.buildClassifier(trainingData);

        eval.evaluateModel(classifier, testData);
    }
    System.out.println(eval.toSummaryString("\nResults:\n", false));
}

From source file:com.mycompany.knnclassifier.kNNShell.java

public static void main(String[] args) throws Exception {
    ConverterUtils.DataSource source = new ConverterUtils.DataSource("carData.csv");
    Instances dataSet = source.getDataSet();

    Standardize standardize = new Standardize();
    standardize.setInputFormat(dataSet);
    dataSet = Filter.useFilter(dataSet, standardize);

    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    dataSet.randomize(new Random(9001)); //It's over 9000!!

    int trainingSize = (int) Math.round(dataSet.numInstances() * .7);
    int testSize = dataSet.numInstances() - trainingSize;

    Instances trainingData = new Instances(dataSet, 0, trainingSize);
    Instances testData = new Instances(dataSet, trainingSize, testSize);

    kNNClassifier classifier = new kNNClassifier(3);
    classifier.buildClassifier(trainingData);

    //Used to compare to Weka's built in KNN algorithm
    //Classifier classifier = new IBk(1);
    //classifier.buildClassifier(trainingData);

    Evaluation eval = new Evaluation(trainingData);
    eval.evaluateModel(classifier, testData);

    System.out.println(eval.toSummaryString("\nResults:\n", false));
}

From source file:com.mycompany.neuralnetwork.NeuralNetworkShell.java

public static void main(String[] args) throws Exception {
    ConverterUtils.DataSource source = new ConverterUtils.DataSource("irisData.csv");
    Instances dataSet = source.getDataSet();

    Standardize standardize = new Standardize();
    standardize.setInputFormat(dataSet);
    dataSet = Filter.useFilter(dataSet, standardize);
    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    dataSet.randomize(new Random(9001)); //It's over 9000!!

    int trainingSize = (int) Math.round(dataSet.numInstances() * .7);
    int testSize = dataSet.numInstances() - trainingSize;

    Instances trainingData = new Instances(dataSet, 0, trainingSize);
    Instances testData = new Instances(dataSet, trainingSize, testSize);

    //MultilayerPerceptron classifier = new MultilayerPerceptron();
    NeuralNetworkClassifier classifier = new NeuralNetworkClassifier(3, 20000, 0.1);
    classifier.buildClassifier(trainingData);

    Evaluation eval = new Evaluation(trainingData);
    eval.evaluateModel(classifier, testData);

    System.out.println(eval.toSummaryString("\nResults:\n", false));
}

From source file:com.reactivetechnologies.analytics.core.eval.BaggingWithBuiltClassifiers.java

License:Open Source License

@Override
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();/*  w  ww  .ja  va  2s . c o m*/

    /** Changed here: Use supplied classifier */
    //super.buildClassifier(data);
    /** End change */

    if (m_CalcOutOfBag && (m_BagSizePercent != 100)) {
        throw new IllegalArgumentException(
                "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!");
    }

    int bagSize = (int) (data.numInstances() * (m_BagSizePercent / 100.0));
    Random random = new Random(m_Seed);

    boolean[][] inBag = null;
    if (m_CalcOutOfBag)
        inBag = new boolean[m_Classifiers.length][];

    for (int j = 0; j < m_Classifiers.length; j++) {
        Instances bagData = null;

        // create the in-bag dataset
        if (m_CalcOutOfBag) {
            inBag[j] = new boolean[data.numInstances()];
            bagData = data.resampleWithWeights(random, inBag[j]);
        } else {
            bagData = data.resampleWithWeights(random);
            if (bagSize < data.numInstances()) {
                bagData.randomize(random);
                Instances newBagData = new Instances(bagData, 0, bagSize);
                bagData = newBagData;
            }
        }

        /** Changed here: Use supplied classifier */
        /*if (m_Classifier instanceof Randomizable) {
          ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt());
        }
                
        // build the classifier
        m_Classifiers[j].buildClassifier(bagData);*/
        /** End change */
    }

    // calc OOB error?
    if (getCalcOutOfBag()) {
        double outOfBagCount = 0.0;
        double errorSum = 0.0;
        boolean numeric = data.classAttribute().isNumeric();

        for (int i = 0; i < data.numInstances(); i++) {
            double vote;
            double[] votes;
            if (numeric)
                votes = new double[1];
            else
                votes = new double[data.numClasses()];

            // determine predictions for instance
            int voteCount = 0;
            for (int j = 0; j < m_Classifiers.length; j++) {
                if (inBag[j][i])
                    continue;

                voteCount++;
                // double pred = m_Classifiers[j].classifyInstance(data.instance(i));
                if (numeric) {
                    // votes[0] += pred;
                    votes[0] += m_Classifiers[j].classifyInstance(data.instance(i));
                } else {
                    // votes[(int) pred]++;
                    double[] newProbs = m_Classifiers[j].distributionForInstance(data.instance(i));
                    // average the probability estimates
                    for (int k = 0; k < newProbs.length; k++) {
                        votes[k] += newProbs[k];
                    }
                }
            }

            // "vote"
            if (numeric) {
                vote = votes[0];
                if (voteCount > 0) {
                    vote /= voteCount; // average
                }
            } else {
                if (Utils.eq(Utils.sum(votes), 0)) {
                } else {
                    Utils.normalize(votes);
                }
                vote = Utils.maxIndex(votes); // predicted class
            }

            // error for instance
            outOfBagCount += data.instance(i).weight();
            if (numeric) {
                errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight();
            } else {
                if (vote != data.instance(i).classValue())
                    errorSum += data.instance(i).weight();
            }
        }

        m_OutOfBagError = errorSum / outOfBagCount;
    } else {
        m_OutOfBagError = 0;
    }
}