Example usage for weka.core Instances randomize

Introduction

In this page you can find the example usage for weka.core Instances randomize.

Prototype

public void randomize(Random random)

Source Link

Document

Shuffles the instances in the set so that they are ordered randomly.

Usage

From source file:meka.core.StatUtils.java

License:Open Source License

public static double[][] LEAD(Instances D, Classifier h, Random r, String MDType) throws Exception {
    Instances D_r = new Instances(D);
    D_r.randomize(r);
    Instances D_train = new Instances(D_r, 0, D_r.numInstances() * 60 / 100);
    Instances D_test = new Instances(D_r, D_train.numInstances(), D_r.numInstances() - D_train.numInstances());
    BR br = new BR();
    br.setClassifier(h);//  ww w  .  j a  v  a2 s .c  o  m
    Result result = Evaluation.evaluateModel((MultiLabelClassifier) br, D_train, D_test, "PCut1", "1");

    return LEAD(D_test, result, MDType);
}

From source file:meka.experiment.evaluators.PercentageSplit.java

License:Open Source License

/**
 * Returns the evaluation statistics generated for the dataset.
 *
 * @param classifier    the classifier to evaluate
 * @param dataset       the dataset to evaluate on
 * @return              the statistics/*w  w w.  j a v a 2 s  .c om*/
 */
@Override
public List<EvaluationStatistics> evaluate(MultiLabelClassifier classifier, Instances dataset) {
    List<EvaluationStatistics> result;
    int trainSize;
    Instances train;
    Instances test;
    Result res;

    result = new ArrayList<>();
    if (!m_PreserveOrder) {
        dataset = new Instances(dataset);
        dataset.randomize(new Random(m_Seed));
    }
    trainSize = (int) (dataset.numInstances() * m_TrainPercentage / 100.0);
    train = new Instances(dataset, 0, trainSize);
    test = new Instances(dataset, trainSize, dataset.numInstances() - trainSize);
    try {
        res = Evaluation.evaluateModel(classifier, train, test, m_Threshold, m_Verbosity);
        result.add(new EvaluationStatistics(classifier, dataset, res));
    } catch (Exception e) {
        handleException("Failed to evaluate dataset '" + dataset.relationName() + "' with classifier: "
                + Utils.toCommandLine(classifier), e);
    }

    if (m_Stopped)
        result.clear();

    return result;
}

From source file:meka.gui.explorer.ClassifyTab.java

License:Open Source License

/**
 * Starts the classification.//from  ww w.  j  a  v  a2s.  c om
 */
protected void startClassification() {
    String type;
    Runnable run;
    final Instances data;

    if (m_ComboBoxExperiment.getSelectedIndex() == -1)
        return;

    data = new Instances(getData());
    if (m_Randomize)
        data.randomize(new Random(m_Seed));
    type = m_ComboBoxExperiment.getSelectedItem().toString();
    run = null;

    switch (type) {
    case TYPE_CROSSVALIDATION:
        run = new Runnable() {
            @Override
            public void run() {
                MultiLabelClassifier classifier;
                Result result;
                startBusy("Cross-validating...");
                try {
                    classifier = (MultiLabelClassifier) m_GenericObjectEditor.getValue();
                    log(OptionUtils.toCommandLine(classifier));
                    log("Dataset: " + data.relationName());
                    log("Class-index: " + data.classIndex());
                    result = Evaluation.cvModel(classifier, data, m_Folds, m_TOP, m_VOP);
                    addResultToHistory(result, new Object[] { classifier, new Instances(data, 0) },
                            classifier.getClass().getName().replace("meka.classifiers.", ""));
                    finishBusy();
                } catch (Exception e) {
                    handleException("Evaluation failed:", e);
                    finishBusy("Evaluation failed: " + e);
                    JOptionPane.showMessageDialog(ClassifyTab.this, "Evaluation failed (CV):\n" + e, "Error",
                            JOptionPane.ERROR_MESSAGE);
                }
            }
        };
        break;

    case TYPE_TRAINTESTSPLIT:
        run = new Runnable() {
            @Override
            public void run() {
                MultiLabelClassifier classifier;
                Result result;
                int trainSize;
                Instances train;
                Instances test;
                startBusy("Train/test split...");
                try {
                    trainSize = (int) (data.numInstances() * m_SplitPercentage / 100.0);
                    train = new Instances(data, 0, trainSize);
                    test = new Instances(data, trainSize, data.numInstances() - trainSize);
                    classifier = (MultiLabelClassifier) m_GenericObjectEditor.getValue();
                    log(OptionUtils.toCommandLine(classifier));
                    log("Dataset: " + train.relationName());
                    log("Class-index: " + train.classIndex());
                    result = Evaluation.evaluateModel(classifier, train, test, m_TOP, m_VOP);
                    addResultToHistory(result, new Object[] { classifier, new Instances(train, 0) },
                            classifier.getClass().getName().replace("meka.classifiers.", ""));
                    finishBusy();
                } catch (Exception e) {
                    handleException("Evaluation failed (train/test split):", e);
                    finishBusy("Evaluation failed: " + e);
                    JOptionPane.showMessageDialog(ClassifyTab.this, "Evaluation failed:\n" + e, "Error",
                            JOptionPane.ERROR_MESSAGE);
                }
            }
        };
        break;

    case TYPE_SUPPLIEDTESTSET:
        run = new Runnable() {
            @Override
            public void run() {
                MultiLabelClassifier classifier;
                Result result;
                int trainSize;
                Instances train;
                Instances test;
                startBusy("Supplied test...");
                try {
                    train = new Instances(data);
                    MLUtils.prepareData(m_TestInstances);
                    test = new Instances(m_TestInstances);
                    test.setClassIndex(data.classIndex());
                    String msg = train.equalHeadersMsg(test);
                    if (msg != null)
                        throw new IllegalArgumentException("Train and test set are not compatible:\n" + msg);
                    classifier = (MultiLabelClassifier) m_GenericObjectEditor.getValue();
                    log(OptionUtils.toCommandLine(classifier));
                    log("Dataset: " + train.relationName());
                    log("Class-index: " + train.classIndex());
                    result = Evaluation.evaluateModel(classifier, train, test, m_TOP, m_VOP);
                    addResultToHistory(result, new Object[] { classifier, new Instances(train, 0) },
                            classifier.getClass().getName().replace("meka.classifiers.", ""));
                    finishBusy();
                } catch (Exception e) {
                    handleException("Evaluation failed (train/test split):", e);
                    finishBusy("Evaluation failed: " + e);
                    JOptionPane.showMessageDialog(ClassifyTab.this, "Evaluation failed:\n" + e, "Error",
                            JOptionPane.ERROR_MESSAGE);
                }
            }
        };
        break;

    case TYPE_BINCREMENTAL:
        run = new Runnable() {
            @Override
            public void run() {
                MultiLabelClassifier classifier;
                Result result;
                startBusy("Incremental...");
                try {
                    classifier = (MultiLabelClassifier) m_GenericObjectEditor.getValue();
                    log(OptionUtils.toCommandLine(classifier));
                    log("Dataset: " + data.relationName());
                    log("Class-index: " + data.classIndex());
                    result = IncrementalEvaluation.evaluateModelBatchWindow(classifier, data, m_Samples, 1.,
                            m_TOP, m_VOP);
                    addResultToHistory(result, new Object[] { classifier, new Instances(data, 0) },
                            classifier.getClass().getName().replace("meka.classifiers.", ""));
                    finishBusy();
                } catch (Exception e) {
                    handleException("Evaluation failed (incremental splits):", e);
                    finishBusy("Evaluation failed: " + e);
                    JOptionPane.showMessageDialog(ClassifyTab.this, "Evaluation failed:\n" + e, "Error",
                            JOptionPane.ERROR_MESSAGE);
                }
            }
        };
        break;

    case TYPE_PREQUENTIAL:
        run = new Runnable() {
            @Override
            public void run() {
                MultiLabelClassifier classifier;
                Result result;
                startBusy("Incremental...");
                try {
                    classifier = (MultiLabelClassifier) m_GenericObjectEditor.getValue();
                    log(OptionUtils.toCommandLine(classifier));
                    log("Dataset: " + data.relationName());
                    log("Class-index: " + data.classIndex());
                    result = IncrementalEvaluation.evaluateModelPrequentialBasic(classifier, data,
                            (data.numInstances() / (m_Samples + 1)), 1., m_TOP, m_VOP);
                    addResultToHistory(result, new Object[] { classifier, new Instances(data, 0) },
                            classifier.getClass().getName().replace("meka.classifiers.", ""));
                    finishBusy();
                } catch (Exception e) {
                    handleException("Evaluation failed (incremental splits):", e);
                    finishBusy("Evaluation failed: " + e);
                    JOptionPane.showMessageDialog(ClassifyTab.this, "Evaluation failed:\n" + e, "Error",
                            JOptionPane.ERROR_MESSAGE);
                }
            }
        };
        break;

    default:
        throw new IllegalStateException("Unhandled evaluation type: " + type);
    }

    start(run);
}

From source file:mlpoc.MLPOC.java

public static Evaluation crossValidate(String filename) {
    Evaluation eval = null;/*from   w w w. ja va 2  s. com*/
    try {
        BufferedReader br = new BufferedReader(new FileReader(filename));
        // loads data and set class index
        Instances data = new Instances(br);
        br.close();
        /*File csv=new File(filename);
        CSVLoader loader = new CSVLoader();
        loader.setSource(csv);
        Instances data = loader.getDataSet();*/
        data.setClassIndex(data.numAttributes() - 1);

        // classifier
        String[] tmpOptions;
        String classname = "weka.classifiers.trees.J48 -C 0.25";
        tmpOptions = classname.split(" ");
        classname = "weka.classifiers.trees.J48";
        tmpOptions[0] = "";
        Classifier cls = (Classifier) Utils.forName(Classifier.class, classname, tmpOptions);

        // other options
        int seed = 2;
        int folds = 10;

        // randomize data
        Random rand = new Random(seed);
        Instances randData = new Instances(data);
        randData.randomize(rand);
        if (randData.classAttribute().isNominal())
            randData.stratify(folds);

        // perform cross-validation
        eval = new Evaluation(randData);
        for (int n = 0; n < folds; n++) {
            Instances train = randData.trainCV(folds, n);
            Instances test = randData.testCV(folds, n);
            // the above code is used by the StratifiedRemoveFolds filter, the
            // code below by the Explorer/Experimenter:
            // Instances train = randData.trainCV(folds, n, rand);

            // build and evaluate classifier
            Classifier clsCopy = Classifier.makeCopy(cls);
            clsCopy.buildClassifier(train);
            eval.evaluateModel(clsCopy, test);
        }

        // output evaluation
        System.out.println();
        System.out.println("=== Setup ===");
        System.out
                .println("Classifier: " + cls.getClass().getName() + " " + Utils.joinOptions(cls.getOptions()));
        System.out.println("Dataset: " + data.relationName());
        System.out.println("Folds: " + folds);
        System.out.println("Seed: " + seed);
        System.out.println();
        System.out.println(eval.toSummaryString("Summary for testing", true));
        System.out.println("Correctly Classified Instances: " + eval.correct());
        System.out.println("Percentage of Correctly Classified Instances: " + eval.pctCorrect());
        System.out.println("InCorrectly Classified Instances: " + eval.incorrect());
        System.out.println("Percentage of InCorrectly Classified Instances: " + eval.pctIncorrect());

    } catch (Exception ex) {
        System.err.println(ex.getMessage());
    }
    return eval;
}

From source file:moa.classifiers.AccuracyWeightedEnsemble.java

License:Open Source License

/**
 * Computes the weight of a candidate classifier.
 * @param candidate Candidate classifier.
 * @param chunk Data chunk of examples.// ww  w .  j a v a  2  s. c om
 * @param numFolds Number of folds in candidate classifier cross-validation.
 * @param useMseR Determines whether to use the MSEr threshold.
 * @return Candidate classifier weight.
 */
protected double computeCandidateWeight(Classifier candidate, Instances chunk, int numFolds) {
    double candidateWeight = 0.0;
    Random random = new Random(1);
    Instances randData = new Instances(chunk);
    randData.randomize(random);
    if (randData.classAttribute().isNominal()) {
        randData.stratify(numFolds);
    }

    for (int n = 0; n < numFolds; n++) {
        Instances train = randData.trainCV(numFolds, n, random);
        Instances test = randData.testCV(numFolds, n);

        Classifier learner = candidate.copy();

        for (int num = 0; num < train.numInstances(); num++) {
            learner.trainOnInstance(train.instance(num));
        }

        candidateWeight += computeWeight(learner, test);
    }

    double resultWeight = candidateWeight / numFolds;

    if (Double.isInfinite(resultWeight)) {
        return Double.MAX_VALUE;
    } else {
        return resultWeight;
    }
}

From source file:moa.tasks.CacheShuffledStream.java

License:Open Source License

@Override
protected Object doTaskImpl(TaskMonitor monitor, ObjectRepository repository) {
    InstanceStream stream = (InstanceStream) getPreparedClassOption(this.streamOption);
    Instances cache = new Instances(stream.getHeader(), 0);
    monitor.setCurrentActivity("Caching instances...", -1.0);
    while ((cache.numInstances() < this.maximumCacheSizeOption.getValue()) && stream.hasMoreInstances()) {
        cache.add(stream.nextInstance());
        if (cache.numInstances() % MainTask.INSTANCES_BETWEEN_MONITOR_UPDATES == 0) {
            if (monitor.taskShouldAbort()) {
                return null;
            }//w w  w . j a  v  a  2s.  c  o m
            long estimatedRemainingInstances = stream.estimatedRemainingInstances();
            long maxRemaining = this.maximumCacheSizeOption.getValue() - cache.numInstances();
            if ((estimatedRemainingInstances < 0) || (maxRemaining < estimatedRemainingInstances)) {
                estimatedRemainingInstances = maxRemaining;
            }
            monitor.setCurrentActivityFractionComplete(estimatedRemainingInstances < 0 ? -1.0
                    : (double) cache.numInstances()
                            / (double) (cache.numInstances() + estimatedRemainingInstances));
        }
    }
    monitor.setCurrentActivity("Shuffling instances...", -1.0);
    cache.randomize(new Random(this.shuffleRandomSeedOption.getValue()));
    return new CachedInstancesStream(cache);
}

From source file:mulan.classifier.transformation.EnsembleOfClassifierChains.java

License:Open Source License

@Override
protected void buildInternal(MultiLabelInstances trainingSet) throws Exception {

    Instances dataSet = new Instances(trainingSet.getDataSet());

    for (int i = 0; i < numOfModels; i++) {
        debug("ECC Building Model:" + (i + 1) + "/" + numOfModels);
        // 2013.12.13  
        System.out.println("ECC Building Model:" + (i + 1) + "/" + numOfModels);
        Instances sampledDataSet = null;
        dataSet.randomize(rand);
        if (useSamplingWithReplacement) {
            int bagSize = dataSet.numInstances() * BagSizePercent / 100;
            // create the in-bag dataset
            sampledDataSet = dataSet.resampleWithWeights(new Random(1));
            if (bagSize < dataSet.numInstances()) {
                sampledDataSet = new Instances(sampledDataSet, 0, bagSize);
            }//  w w  w .java2s. com
        } else {
            RemovePercentage rmvp = new RemovePercentage();
            rmvp.setInvertSelection(true);
            rmvp.setPercentage(samplingPercentage);
            rmvp.setInputFormat(dataSet);
            sampledDataSet = Filter.useFilter(dataSet, rmvp);
        }
        MultiLabelInstances train = new MultiLabelInstances(sampledDataSet, trainingSet.getLabelsMetaData());

        int[] chain = new int[numLabels];
        for (int j = 0; j < numLabels; j++)
            chain[j] = j;
        for (int j = 0; j < chain.length; j++) {
            int randomPosition = rand.nextInt(chain.length);
            int temp = chain[j];
            chain[j] = chain[randomPosition];
            chain[randomPosition] = temp;
        }
        debug(Arrays.toString(chain));
        //========================================
        System.out.println(Arrays.toString(chain));
        //========================================
        // MAYBE WE SHOULD CHECK NOT TO PRODUCE THE SAME VECTOR FOR THE
        // INDICES
        // BUT IN THE PAPER IT DID NOT MENTION SOMETHING LIKE THAT
        // IT JUST SIMPLY SAY A RANDOM CHAIN ORDERING OF L

        ensemble[i] = new ClassifierChain(baseClassifier, chain);
        ensemble[i].build(train);
    }

}

From source file:mulan.data.LabelPowersetStratification.java

License:Open Source License

public MultiLabelInstances[] stratify(MultiLabelInstances data, int folds) {
    try {/*  w  w w.  j  a v  a2 s.co  m*/
        MultiLabelInstances[] segments = new MultiLabelInstances[folds];
        LabelPowersetTransformation transformation = new LabelPowersetTransformation();
        Instances transformed;

        // transform to single-label
        transformed = transformation.transformInstances(data);

        // add id 
        Add add = new Add();
        add.setAttributeIndex("first");
        add.setAttributeName("instanceID");
        add.setInputFormat(transformed);
        transformed = Filter.useFilter(transformed, add);
        for (int i = 0; i < transformed.numInstances(); i++) {
            transformed.instance(i).setValue(0, i);
        }
        transformed.setClassIndex(transformed.numAttributes() - 1);

        // stratify
        transformed.randomize(new Random(seed));
        transformed.stratify(folds);

        for (int i = 0; i < folds; i++) {
            //System.out.println("Fold " + (i + 1) + "/" + folds);
            Instances temp = transformed.testCV(folds, i);
            Instances test = new Instances(data.getDataSet(), 0);
            for (int j = 0; j < temp.numInstances(); j++) {
                test.add(data.getDataSet().instance((int) temp.instance(j).value(0)));
            }
            segments[i] = new MultiLabelInstances(test, data.getLabelsMetaData());
        }
        return segments;
    } catch (Exception ex) {
        Logger.getLogger(LabelPowersetStratification.class.getName()).log(Level.SEVERE, null, ex);
        return null;
    }
}

From source file:mulan.evaluation.Evaluator.java

License:Open Source License

private MultipleEvaluation innerCrossValidate(MultiLabelLearner learner, MultiLabelInstances data,
        boolean hasMeasures, List<Measure> measures, int someFolds) {
    Evaluation[] evaluation = new Evaluation[someFolds];

    Instances workingSet = new Instances(data.getDataSet());
    workingSet.randomize(new Random(seed));
    for (int i = 0; i < someFolds; i++) {
        System.out.println("Fold " + (i + 1) + "/" + someFolds);
        try {//from   w  w  w  .ja v  a2 s. com
            Instances train = workingSet.trainCV(someFolds, i);
            Instances test = workingSet.testCV(someFolds, i);
            MultiLabelInstances mlTrain = new MultiLabelInstances(train, data.getLabelsMetaData());
            MultiLabelInstances mlTest = new MultiLabelInstances(test, data.getLabelsMetaData());
            MultiLabelLearner clone = learner.makeCopy();
            clone.build(mlTrain);
            if (hasMeasures)
                evaluation[i] = evaluate(clone, mlTest, measures);
            else
                evaluation[i] = evaluate(clone, mlTest);
        } catch (Exception ex) {
            Logger.getLogger(Evaluator.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    return new MultipleEvaluation(evaluation);
}

From source file:neuralnetwork.NeuralNetwork.java

/**
 * @param args the command line arguments
 * @throws java.lang.Exception/*from w ww.  ja v  a  2  s .  c  o m*/
 */
public static void main(String[] args) throws Exception {

    ConverterUtils.DataSource source;
    source = new ConverterUtils.DataSource("C:\\Users\\Harvey\\Documents\\iris.csv");
    Instances data = source.getDataSet();

    if (data.classIndex() == -1) {
        data.setClassIndex(data.numAttributes() - 1);
    }

    data.randomize(new Debug.Random(1));

    RemovePercentage trainFilter = new RemovePercentage();
    trainFilter.setPercentage(70);
    trainFilter.setInputFormat(data);
    Instances train = Filter.useFilter(data, trainFilter);

    trainFilter.setInvertSelection(true);
    trainFilter.setInputFormat(data);
    Instances test = Filter.useFilter(data, trainFilter);

    Standardize filter = new Standardize();
    filter.setInputFormat(train);

    Instances newTrain = Filter.useFilter(test, filter);
    Instances newTest = Filter.useFilter(train, filter);

    Classifier nNet = new NeuralNet();
    nNet.buildClassifier(newTrain);
    Evaluation eval = new Evaluation(newTest);
    eval.evaluateModel(nNet, newTest);
    System.out.println(eval.toSummaryString("\nResults\n-------------\n", false));
}