Example usage for weka.core Instances randomize

Introduction

In this page you can find the example usage for weka.core Instances randomize.

Prototype

public void randomize(Random random)

Source Link

Document

Shuffles the instances in the set so that they are ordered randomly.

Usage

From source file:knnclassifier.Main.java

public static void main(String[] args) throws Exception {

    DataSource source = new DataSource(file);
    Instances dataSet = source.getDataSet();

    //Set up data
    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    dataSet.randomize(new Random());

    int trainingSize = (int) Math.round(dataSet.numInstances() * .7);
    int testSize = dataSet.numInstances() - trainingSize;

    Instances training = new Instances(dataSet, 0, trainingSize);

    Instances test = new Instances(dataSet, trainingSize, testSize);

    Standardize standardizedData = new Standardize();
    standardizedData.setInputFormat(training);

    Instances newTest = Filter.useFilter(test, standardizedData);
    Instances newTraining = Filter.useFilter(training, standardizedData);

    KNNClassifier knn = new KNNClassifier();
    knn.buildClassifier(newTraining);/*from  ww w  .ja v a  2  s.  c  o  m*/

    Evaluation eval = new Evaluation(newTraining);
    eval.evaluateModel(knn, newTest);

    System.out.println(eval.toSummaryString("\nResults\n======\n", false));
}

From source file:liac.igmn.evaluation.Evaluator.java

License:Open Source License

public void crossValidation(IGMN model, Dataset dataset, int numFolds, int runs, boolean randomize) {
    confusionMatrix = new ConfusionMatrix(dataset.getClassesNames());

    Instances instances = dataset.getWekaDataset();
    int seed = 1;
    for (int run = 0; run < runs; run++) {
        if (randomize) {
            instances.randomize(new Random(seed));
            seed += 1;/*  ww w .ja va2  s .c o m*/
        }

        if (verbose)
            System.out.println("RUN: " + (run + 1));

        for (int n = 0; n < numFolds; n++) {
            Instances train = instances.trainCV(numFolds, n);
            Instances test = instances.testCV(numFolds, n);

            SimpleMatrix trainData = MatrixUtil.instancesToMatrix(train);
            SimpleMatrix testData = MatrixUtil.instancesToMatrix(test);

            model.reset();

            if (verbose)
                System.out.println("TRAINING FOLD: " + (n + 1));

            model.train(trainData);

            if (verbose)
                System.out.println("TESTING...");

            SimpleMatrix testInputs = testData.extractMatrix(0, dataset.getInputSize(), 0, SimpleMatrix.END);
            SimpleMatrix testTargets = testData.extractMatrix(dataset.getInputSize(),
                    dataset.getNumAttributes(), 0, SimpleMatrix.END);
            for (int i = 0; i < testInputs.numCols(); i++) {
                SimpleMatrix y = model.classify(testInputs.extractVector(false, i));
                SimpleMatrix target = testTargets.extractVector(false, i);

                int tInd = MatrixUtil.maxElementIndex(target);
                int yInd = MatrixUtil.maxElementIndex(y);

                confusionMatrix.addPrediction(tInd, yInd);
            }
        }
    }
    confusionMatrix.set(confusionMatrix.divide(runs));
}

From source file:machinelearningcw.EnhancedLinearPerceptron.java

public boolean crossValidation(Instances ins) throws Exception {
    //get the data
    Instances data = new Instances(ins);
    Instances train;// the new training data
    Instances test; // the new testing data

    int seed = 0;
    Random rand = new Random(seed);
    //randomize the data
    data.randomize(rand);

    //number of folds
    int folds = 10;
    int offlineErrors = 0;
    int onlineErrors = 0;

    for (int i = 0; i < folds; i++) {
        train = data.trainCV(folds, i);//w w w .ja v a2s . c o m
        test = data.testCV(folds, i);

        //add the the total errors for each
        //offlineErrors += 
        offlinePerceptron(train);
        for (Instance inst : test) {
            if (classifyInstance(inst) != inst.classValue()) {
                offlineErrors += 1;
            }

        }
        //reset w
        Arrays.fill(w, 1);
        perceptron(train);
        for (Instance inst : test) {
            if (classifyInstance(inst) != inst.classValue()) {
                onlineErrors += 1;
            }
        }

    }
    //  System.out.println(" off: " + offlineErrors);
    //    System.out.println(" on: " + onlineErrors);
    //calculate the mean of the total errors
    offlineErrors = offlineErrors / folds;
    onlineErrors = onlineErrors / folds;
    // System.out.println(flag);
    return offlineErrors > onlineErrors;

}

From source file:mao.datamining.ModelProcess.java

private void testCV(Classifier classifier, Instances finalTrainDataSet, FileOutputStream testCaseSummaryOut,
        TestResult result) {/*from   w  w w  .  j  a v  a  2  s  .c o  m*/
    long start, end, trainTime = 0, testTime = 0;
    Evaluation evalAll = null;
    double confusionMatrix[][] = null;
    // randomize data, and then stratify it into 10 groups
    Random rand = new Random(1);
    Instances randData = new Instances(finalTrainDataSet);
    randData.randomize(rand);
    if (randData.classAttribute().isNominal()) {
        //always run with 10 cross validation
        randData.stratify(folds);
    }

    try {
        evalAll = new Evaluation(randData);
        for (int i = 0; i < folds; i++) {
            Evaluation eval = new Evaluation(randData);
            Instances train = randData.trainCV(folds, i);
            Instances test = randData.testCV(folds, i);
            //counting traininig time
            start = System.currentTimeMillis();
            Classifier j48ClassifierCopy = Classifier.makeCopy(classifier);
            j48ClassifierCopy.buildClassifier(train);
            end = System.currentTimeMillis();
            trainTime += end - start;

            //counting test time
            start = System.currentTimeMillis();
            eval.evaluateModel(j48ClassifierCopy, test);
            evalAll.evaluateModel(j48ClassifierCopy, test);
            end = System.currentTimeMillis();
            testTime += end - start;
        }

    } catch (Exception e) {
        ModelProcess.logging(null, e);
    } //end test by cross validation

    // output evaluation
    try {
        ModelProcess.logging("");
        //write into summary file
        testCaseSummaryOut
                .write((evalAll.toSummaryString("=== Cross Validation Summary ===", true)).getBytes());
        testCaseSummaryOut.write("\n".getBytes());
        testCaseSummaryOut.write(
                (evalAll.toClassDetailsString("=== " + folds + "-fold Cross-validation Class Detail ===\n"))
                        .getBytes());
        testCaseSummaryOut.write("\n".getBytes());
        testCaseSummaryOut
                .write((evalAll.toMatrixString("=== Confusion matrix for all folds ===\n")).getBytes());
        testCaseSummaryOut.flush();

        confusionMatrix = evalAll.confusionMatrix();
        result.setConfusionMatrix10Folds(confusionMatrix);
    } catch (Exception e) {
        ModelProcess.logging(null, e);
    }
}

From source file:meka.classifiers.multilabel.BRq.java

License:Open Source License

@Override
public void buildClassifier(Instances data) throws Exception {
    testCapabilities(data);//from   ww w  .  j  a v a2s . com

    int c = data.classIndex();

    if (getDebug())
        System.out.print("-: Creating " + c + " models (" + m_Classifier.getClass().getName() + "): ");
    m_MultiClassifiers = AbstractClassifier.makeCopies(m_Classifier, c);

    Instances sub_data = null;

    for (int i = 0; i < c; i++) {

        int indices[][] = new int[c][c - 1];
        for (int j = 0, k = 0; j < c; j++) {
            if (j != i) {
                indices[i][k++] = j;
            }
        }

        //Select only class attribute 'i'
        Remove FilterRemove = new Remove();
        FilterRemove.setAttributeIndicesArray(indices[i]);
        FilterRemove.setInputFormat(data);
        FilterRemove.setInvertSelection(true);
        sub_data = Filter.useFilter(data, FilterRemove);
        sub_data.setClassIndex(0);
        /* BEGIN downsample for this link */
        sub_data.randomize(m_Random);
        int numToRemove = sub_data.numInstances()
                - (int) Math.round(sub_data.numInstances() * m_DownSampleRatio);
        for (int m = 0, removed = 0; m < sub_data.numInstances(); m++) {
            if (sub_data.instance(m).classValue() <= 0.0) {
                sub_data.instance(m).setClassMissing();
                if (++removed >= numToRemove)
                    break;
            }
        }
        sub_data.deleteWithMissingClass();
        /* END downsample for this link */

        //Build the classifier for that class
        m_MultiClassifiers[i].buildClassifier(sub_data);
        if (getDebug())
            System.out.print(" " + (i + 1));

    }

    if (getDebug())
        System.out.println(" :-");

    m_InstancesTemplate = new Instances(sub_data, 0);

}

From source file:meka.classifiers.multilabel.Evaluation.java

License:Open Source License

/**
 * RunExperiment - Build and evaluate a model with command-line options.
 * @param   h      multi-label classifier
 * @param   options   command line options
 *///from   w  w  w.  j  a  v a 2 s.  c  o  m
public static void runExperiment(MultiLabelClassifier h, String options[]) throws Exception {

    // Help
    if (Utils.getOptionPos('h', options) >= 0) {
        System.out.println("\nHelp requested");
        Evaluation.printOptions(h.listOptions());
        return;
    }

    h.setOptions(options);

    if (h.getDebug())
        System.out.println("Loading and preparing dataset ...");

    // Load Instances from a file
    Instances D_train = loadDataset(options);

    Instances D_full = D_train;

    // Try extract and set a class index from the @relation name
    MLUtils.prepareData(D_train);

    // Override the number of classes with command-line option (optional)
    if (Utils.getOptionPos('C', options) >= 0) {
        int L = Integer.parseInt(Utils.getOption('C', options));
        D_train.setClassIndex(L);
    }

    // We we still haven't found -C option, we can't continue (don't know how many labels)
    int L = D_train.classIndex();
    if (L <= 0) {
        throw new Exception(
                "[Error] Number of labels not specified.\n\tYou must set the number of labels with the -C option, either inside the @relation tag of the Instances file, or on the command line.");
        // apparently the dataset didn't contain the '-C' flag, check in the command line options ...
    }

    // Randomize (Instances) 
    int seed = (Utils.getOptionPos('s', options) >= 0) ? Integer.parseInt(Utils.getOption('s', options)) : 0;
    if (Utils.getFlag('R', options)) {
        D_train.randomize(new Random(seed));
    }
    boolean Threaded = false;
    if (Utils.getOptionPos("Thr", options) >= 0) {
        Threaded = Utils.getFlag("Thr", options);
    }

    // Verbosity Option
    String voption = "1";
    if (Utils.getOptionPos("verbosity", options) >= 0) {
        voption = Utils.getOption("verbosity", options);
    }

    // Save for later?
    //String fname = null;
    //if (Utils.getOptionPos('f',options) >= 0) {
    //   fname = Utils.getOption('f',options);
    //}
    // Dump for later?
    String dname = null;
    if (Utils.getOptionPos('d', options) >= 0) {
        dname = Utils.getOption('d', options);
    }
    // Load from file?
    String lname = null;
    Instances dataHeader = null;
    if (Utils.getOptionPos('l', options) >= 0) {
        lname = Utils.getOption('l', options);
        Object[] data = SerializationHelper.readAll(lname);
        h = (MultiLabelClassifier) data[0];
        if (data.length > 1)
            dataHeader = (Instances) data[1];
        //Object o[] = SerializationHelper.readAll(lname);
        //h = (MultilabelClassifier)o[0];
    }

    try {

        Result r = null;

        // Threshold OPtion
        String top = "PCut1"; // default
        if (Utils.getOptionPos("threshold", options) >= 0)
            top = Utils.getOption("threshold", options);

        if (Utils.getOptionPos('x', options) >= 0) {
            // CROSS-FOLD-VALIDATION

            int numFolds = MLUtils.getIntegerOption(Utils.getOption('x', options), 10); // default 10
            // Check for remaining options
            Utils.checkForRemainingOptions(options);
            r = Evaluation.cvModel(h, D_train, numFolds, top, voption);
            System.out.println(r.toString());
        } else {
            // TRAIN-TEST SPLIT

            Instances D_test = null;

            if (Utils.getOptionPos('T', options) >= 0) {
                // load separate test set
                try {
                    D_test = loadDataset(options, 'T');
                    MLUtils.prepareData(D_test);
                } catch (Exception e) {
                    throw new Exception("[Error] Failed to Load Test Instances from file.", e);
                }
            } else {
                // split training set into train and test sets
                // default split
                int N_T = (int) (D_train.numInstances() * 0.60);
                if (Utils.getOptionPos("split-percentage", options) >= 0) {
                    // split by percentage
                    double percentTrain = Double.parseDouble(Utils.getOption("split-percentage", options));
                    N_T = (int) Math.round((D_train.numInstances() * (percentTrain / 100.0)));
                } else if (Utils.getOptionPos("split-number", options) >= 0) {
                    // split by number
                    N_T = Integer.parseInt(Utils.getOption("split-number", options));
                }

                int N_t = D_train.numInstances() - N_T;
                D_test = new Instances(D_train, N_T, N_t);
                D_train = new Instances(D_train, 0, N_T);

            }

            // Invert the split?
            if (Utils.getFlag('i', options)) { //boolean INVERT          = Utils.getFlag('i',options);
                Instances temp = D_test;
                D_test = D_train;
                D_train = temp;
            }

            // Check for remaining options
            Utils.checkForRemainingOptions(options);

            if (h.getDebug())
                System.out.println(":- Dataset -: " + MLUtils.getDatasetName(D_train) + "\tL=" + L
                        + "\tD(t:T)=(" + D_train.numInstances() + ":" + D_test.numInstances() + ")\tLC(t:T)="
                        + Utils.roundDouble(MLUtils.labelCardinality(D_train, L), 2) + ":"
                        + Utils.roundDouble(MLUtils.labelCardinality(D_test, L), 2) + ")");

            if (lname != null) {
                // h is already built, and loaded from a file, test it!
                r = testClassifier(h, D_test);

                String t = top;

                if (top.startsWith("PCut")) {
                    // if PCut is specified we need the training data,
                    // so that we can calibrate the threshold!
                    t = MLEvalUtils.getThreshold(r.predictions, D_train, top);
                }
                r = evaluateModel(h, D_test, t, voption);
            } else {
                //check if train and test set size are > 0
                if (D_train.numInstances() > 0 && D_test.numInstances() > 0) {
                    if (Threaded) {
                        r = evaluateModelM(h, D_train, D_test, top, voption);
                    } else {

                        r = evaluateModel(h, D_train, D_test, top, voption);
                    }
                } else {
                    // otherwise just train on full set. Maybe better throw an exception.
                    h.buildClassifier(D_full);

                }
            }

            // @todo, if D_train==null, assume h is already trained
            if (D_train.numInstances() > 0 && D_test.numInstances() > 0) {
                System.out.println(r.toString());
            }
        }

        // Save model to file?
        if (dname != null) {
            dataHeader = new Instances(D_train, 0);
            SerializationHelper.writeAll(dname, new Object[] { h, dataHeader });
        }

    } catch (Exception e) {
        e.printStackTrace();
        Evaluation.printOptions(h.listOptions());
        System.exit(1);
    }

    System.exit(0);
}

From source file:meka.classifiers.multilabel.meta.EnsembleML.java

License:Open Source License

@Override
public void buildClassifier(Instances train) throws Exception {
    testCapabilities(train);//from   ww  w .  j a va2s.com

    if (getDebug())
        System.out.print("-: Models: ");

    train = new Instances(train);
    m_Classifiers = ProblemTransformationMethod.makeCopies((ProblemTransformationMethod) m_Classifier,
            m_NumIterations);
    int sub_size = (train.numInstances() * m_BagSizePercent / 100);
    for (int i = 0; i < m_NumIterations; i++) {
        if (getDebug())
            System.out.print("" + i + " ");
        if (m_Classifiers[i] instanceof Randomizable)
            ((Randomizable) m_Classifiers[i]).setSeed(i);
        train.randomize(new Random(m_Seed + i));
        Instances sub_train = new Instances(train, 0, sub_size);
        m_Classifiers[i].buildClassifier(sub_train);
    }

    if (getDebug())
        System.out.println(":-");
}

From source file:meka.classifiers.multilabel.meta.RandomSubspaceML.java

License:Open Source License

@Override
public void buildClassifier(Instances D) throws Exception {
    testCapabilities(D);//from  w w  w.j ava 2 s.  co m

    m_InstancesTemplates = new Instances[m_NumIterations];
    m_InstanceTemplates = new Instance[m_NumIterations];

    if (getDebug())
        System.out.println("-: Models: ");

    m_Classifiers = ProblemTransformationMethod.makeCopies((ProblemTransformationMethod) m_Classifier,
            m_NumIterations);

    Random r = new Random(m_Seed);

    int N_sub = (D.numInstances() * m_BagSizePercent / 100);

    int L = D.classIndex();
    int d = D.numAttributes() - L;
    int d_new = d * m_AttSizePercent / 100;
    m_IndicesCut = new int[m_NumIterations][];

    for (int i = 0; i < m_NumIterations; i++) {

        // Downsize the instance space (exactly like in EnsembleML.java)

        if (getDebug())
            System.out.print("\t" + (i + 1) + ": ");
        D.randomize(r);
        Instances D_cut = new Instances(D, 0, N_sub);
        if (getDebug())
            System.out.print("N=" + D.numInstances() + " -> N'=" + D_cut.numInstances() + ", ");

        // Downsize attribute space

        D_cut.setClassIndex(-1);
        int indices_a[] = A.make_sequence(L, d + L);
        A.shuffle(indices_a, r);
        indices_a = Arrays.copyOfRange(indices_a, 0, d - d_new);
        Arrays.sort(indices_a);
        m_IndicesCut[i] = A.invert(indices_a, D.numAttributes());
        D_cut = F.remove(D_cut, indices_a, false);
        D_cut.setClassIndex(L);
        if (getDebug())
            System.out.print(" A:=" + (D.numAttributes() - L) + " -> A'=" + (D_cut.numAttributes() - L) + " ("
                    + m_IndicesCut[i][L] + ",...," + m_IndicesCut[i][m_IndicesCut[i].length - 1] + ")");

        // Train multi-label classifier

        if (m_Classifiers[i] instanceof Randomizable)
            ((Randomizable) m_Classifiers[i]).setSeed(m_Seed + i);
        if (getDebug())
            System.out.println(".");

        m_Classifiers[i].buildClassifier(D_cut);
        m_InstanceTemplates[i] = D_cut.instance(1);
        m_InstancesTemplates[i] = new Instances(D_cut, 0);
    }
    if (getDebug())
        System.out.println(":-");
}

From source file:meka.classifiers.multitarget.SCC.java

License:Open Source License

@Override
public void buildClassifier(Instances D) throws Exception {

    int N = D.numInstances(); // only for printouts
    int U = MLUtils.numberOfUniqueCombinations(D); // only for printouts
    int L = D.classIndex();
    rand = new Random(m_S);

    if (!(m_Classifier instanceof MultiTargetClassifier)) {
        throw new Exception(
                "[Error] The base classifier must be multi-target capable, i.e., from meka.classifiers.multitarget.");
    }/* www . jav  a  2 s .c om*/

    // 0. SPLIT INTO TRAIN AND VALIDATION SET/S
    Instances D_r = new Instances(D);
    D_r.randomize(rand);
    Instances D_train = new Instances(D_r, 0, D_r.numInstances() * i_SPLIT / 100);
    Instances D_test = new Instances(D_r, D_train.numInstances(), D_r.numInstances() - D_train.numInstances());

    // 1. BUILD BR or EBR
    if (getDebug())
        System.out.print("1. BUILD & Evaluate BR: ");
    CR cr = new CR();
    cr.setClassifier(((ProblemTransformationMethod) m_Classifier).getClassifier()); // assume PT
    Result result_1 = Evaluation.evaluateModel((ProblemTransformationMethod) cr, D_train, D_test, "PCut1", "5");
    double acc1 = (Double) result_1.getMeasurement(i_ErrFn);
    if (getDebug())
        System.out.println(" " + acc1);

    int partition[][] = SuperLabelUtils.generatePartition(A.make_sequence(L), rand);

    // 2. SELECT / MODIFY INDICES (using LEAD technique)
    if (getDebug())
        System.out.println("2. GET ERR-CHI-SQUARED MATRIX: ");
    double MER[][] = StatUtils.condDepMatrix(D_test, result_1);
    if (getDebug())
        System.out.println(MatrixUtils.toString(MER));

    /*
     * 3. SIMULATED ANNEALING
     * Always accept if best, progressively less likely accept otherwise.
     */
    if (getDebug())
        System.out.println("3. COMBINE NODES TO FIND THE BEST COMBINATION ACCORDING TO CHI");
    double w = rating(partition, MER);
    if (getDebug())
        System.out.println("@0 : " + SuperLabelUtils.toString(partition) + "\t(" + w + ")");

    for (int i = 0; i < m_I; i++) {
        int partition_[][] = mutateCombinations(MatrixUtils.deep_copy(partition), rand);
        double w_ = rating(partition_, MER); // this is really p_MER(partition_)
        if (w_ > w) {
            // ACCEPT
            partition = partition_;
            w = w_;
            if (getDebug())
                System.out.println("@" + i + " : " + SuperLabelUtils.toString(partition) + "\t(" + w + ")");
        } else {
            // MAYBE ACCEPT
            double diff = Math.abs(w_ - w);
            double p = (2. * (1. - sigma(diff * i / 1000.)));
            if (p > rand.nextDouble()) {
                // OK, ACCEPT NOW
                if (getDebug())
                    System.out.println(
                            "@" + i + " : " + SuperLabelUtils.toString(partition_) + "\t(" + w_ + ")*");
                partition = partition_;
                w = w_;
            }
        }

    }

    /*
     * METHOD 2
     * refine the set we started with above, with a few iterations.
     * we mutate a set, and accept whenever the classification performance is GREATER
     */
    if (m_Iv > 0) {
        if (getDebug())
            System.out.println("4. REFINING THE INITIAL SET WITH SOME OLD-FASHIONED INTERNAL EVAL");
        // Build & evaluate the classifier with the latest partition
        result_1 = testClassifier((ProblemTransformationMethod) m_Classifier, D_train, D_test, partition);
        w = (Double) result_1.getMeasurement(i_ErrFn);
        if (getDebug())
            System.out.println("@0 : " + SuperLabelUtils.toString(partition) + "\t(" + w + ")");
        for (int i = 0; i < m_Iv; i++) {
            int partition_[][] = mutateCombinations(MatrixUtils.deep_copy(partition), rand);
            // Build the classifier with the new combination
            trainClassifier(m_Classifier, D_train, partition);
            // Evaluate on D_test
            Result result_2 = testClassifier((ProblemTransformationMethod) m_Classifier, D_train, D_test,
                    partition_);
            double w_ = (Double) result_2.getMeasurement(i_ErrFn);
            if (w_ > w) {
                w = w_;
                partition = partition_;
                if (getDebug())
                    System.out.println(
                            "@" + (i + 1) + "' : " + SuperLabelUtils.toString(partition) + "\t(" + w + ")");
            }
        }
    }

    // 4. DECIDE HOW GOOD THEY ARE, COMPARE EACH LABEL TO BR-result?
    if (getDebug())
        System.out.println("4. TRAIN " + SuperLabelUtils.toString(partition));
    trainClassifier(m_Classifier, D, partition);

    if (getDebug()) {
        //System.out.println("E_acc P "+m_P+" "+(mt.m_InstancesTemplate.numInstances()/(double)N) +" "+(MLUtils.numberOfUniqueCombinations(mt.m_InstancesTemplate)/(double)U));
    }
    // 5. MOVE ON ...
}

From source file:meka.core.StatUtils.java

License:Open Source License

/**
 * LEAD - Performs LEAD on dataset 'D', using BR with base classifier 'h', under random seed 'r'.
 * <br>//  w w  w .  ja va 2  s . co m
 * WARNING: changing this method will affect the perfomance of e.g., BCC -- on the other hand the original BCC paper did not use LEAD, so don't worry.
 */
public static double[][] LEAD(Instances D, Classifier h, Random r) throws Exception {
    Instances D_r = new Instances(D);
    D_r.randomize(r);
    Instances D_train = new Instances(D_r, 0, D_r.numInstances() * 60 / 100);
    Instances D_test = new Instances(D_r, D_train.numInstances(), D_r.numInstances() - D_train.numInstances());
    BR br = new BR();
    br.setClassifier(h);
    Result result = Evaluation.evaluateModel((MultiLabelClassifier) br, D_train, D_test, "PCut1", "1");
    return LEAD2(D_test, result);
}