Example usage for weka.classifiers Evaluation errorRate

List of usage examples for weka.classifiers Evaluation errorRate

Introduction

In this page you can find the example usage for weka.classifiers Evaluation errorRate.

Prototype

public final double errorRate() 

Source Link

Document

Returns the estimated error rate or the root mean squared error (if the class is numeric).

Usage

From source file:gyc.OverBoostM1.java

License:Open Source License

/**
 * Boosting method. Boosts using resampling
 *
 * @param data the training data to be used for generating the
 * boosted classifier.//from www.ja  v  a2 s. co  m
 * @throws Exception if the classifier could not be built successfully
 */
protected void buildClassifierUsingResampling(Instances data) throws Exception {

    Instances trainData, sample, training;
    double epsilon, reweight, sumProbs;
    Evaluation evaluation;
    int numInstances = data.numInstances();
    Random randomInstance = new Random(m_Seed);
    int resamplingIterations = 0;

    // Initialize data
    m_Betas = new double[m_Classifiers.length];
    m_NumIterationsPerformed = 0;
    // Create a copy of the data so that when the weights are diddled
    // with it doesn't mess up the weights for anyone else
    training = new Instances(data, 0, numInstances);
    sumProbs = training.sumOfWeights();
    for (int i = 0; i < training.numInstances(); i++) {
        training.instance(i).setWeight(training.instance(i).weight() / sumProbs);
    }

    // Do boostrap iterations
    for (m_NumIterationsPerformed = 0; m_NumIterationsPerformed < m_Classifiers.length; m_NumIterationsPerformed++) {
        if (m_Debug) {
            System.err.println("Training classifier " + (m_NumIterationsPerformed + 1));
        }

        // Select instances to train the classifier on
        if (m_WeightThreshold < 100) {
            trainData = selectWeightQuantile(training, (double) m_WeightThreshold / 100);
        } else {
            trainData = new Instances(training);
        }

        // Resample
        resamplingIterations = 0;
        double[] weights = new double[trainData.numInstances()];
        for (int i = 0; i < weights.length; i++) {
            weights[i] = trainData.instance(i).weight();
        }
        do {
            sample = trainData.resampleWithWeights(randomInstance, weights);

            //
            int classNum[] = sample.attributeStats(sample.classIndex()).nominalCounts;
            int minC, nMin = classNum[0];
            int majC, nMaj = classNum[1];
            if (nMin < nMaj) {
                minC = 0;
                majC = 1;
            } else {
                minC = 1;
                majC = 0;
                nMin = classNum[1];
                nMaj = classNum[0];
            }
            //System.out.println("minC="+nMin+"; majC="+nMaj);
            /*
             * balance the data which boosting generate for training base classifier
            */
            //System.out.println("before:"+classNum[0]+"-"+classNum[1]);
            Instances sampleData = randomSampling(sample, majC, minC, nMaj, nMaj, randomInstance);
            //classNum =sampleData.attributeStats(sampleData.classIndex()).nominalCounts;
            //System.out.println("after:"+classNum[0]+"-"+classNum[1]);

            // Build and evaluate classifier
            m_Classifiers[m_NumIterationsPerformed].buildClassifier(sampleData);

            evaluation = new Evaluation(data);
            evaluation.evaluateModel(m_Classifiers[m_NumIterationsPerformed], training);
            epsilon = evaluation.errorRate();
            resamplingIterations++;
        } while (Utils.eq(epsilon, 0) && (resamplingIterations < MAX_NUM_RESAMPLING_ITERATIONS));

        // Stop if error too big or 0
        if (Utils.grOrEq(epsilon, 0.5) || Utils.eq(epsilon, 0)) {
            if (m_NumIterationsPerformed == 0) {
                m_NumIterationsPerformed = 1; // If we're the first we have to to use it
            }
            break;
        }

        // Determine the weight to assign to this model
        m_Betas[m_NumIterationsPerformed] = Math.log((1 - epsilon) / epsilon);
        reweight = (1 - epsilon) / epsilon;
        if (m_Debug) {
            System.err.println("\terror rate = " + epsilon + "  beta = " + m_Betas[m_NumIterationsPerformed]);
        }

        // Update instance weights
        setWeights(training, reweight);
    }
}

From source file:gyc.OverBoostM1.java

License:Open Source License

/**
 * Boosting method. Boosts any classifier that can handle weighted
 * instances.// ww  w  .j av a2s .  c  o  m
 *
 * @param data the training data to be used for generating the
 * boosted classifier.
 * @throws Exception if the classifier could not be built successfully
 */
protected void buildClassifierWithWeights(Instances data) throws Exception {

    Instances trainData, training;
    double epsilon, reweight;
    Evaluation evaluation;
    int numInstances = data.numInstances();
    Random randomInstance = new Random(m_Seed);

    // Initialize data
    m_Betas = new double[m_Classifiers.length];
    m_NumIterationsPerformed = 0;

    // Create a copy of the data so that when the weights are diddled
    // with it doesn't mess up the weights for anyone else
    training = new Instances(data, 0, numInstances);

    // Do boostrap iterations
    for (m_NumIterationsPerformed = 0; m_NumIterationsPerformed < m_Classifiers.length; m_NumIterationsPerformed++) {
        if (m_Debug) {
            System.err.println("Training classifier " + (m_NumIterationsPerformed + 1));
        }
        // Select instances to train the classifier on
        if (m_WeightThreshold < 100) {
            trainData = selectWeightQuantile(training, (double) m_WeightThreshold / 100);
        } else {
            trainData = new Instances(training, 0, numInstances);
        }

        // Build the classifier
        if (m_Classifiers[m_NumIterationsPerformed] instanceof Randomizable)
            ((Randomizable) m_Classifiers[m_NumIterationsPerformed]).setSeed(randomInstance.nextInt());

        // this is the training data for building base classifier, 
        m_Classifiers[m_NumIterationsPerformed].buildClassifier(trainData);

        // Evaluate the classifier
        evaluation = new Evaluation(data);
        evaluation.evaluateModel(m_Classifiers[m_NumIterationsPerformed], training);
        epsilon = evaluation.errorRate();

        // Stop if error too small or error too big and ignore this model
        if (Utils.grOrEq(epsilon, 0.5) || Utils.eq(epsilon, 0)) {
            if (m_NumIterationsPerformed == 0) {
                m_NumIterationsPerformed = 1; // If we're the first we have to to use it
            }
            break;
        }
        // Determine the weight to assign to this model
        m_Betas[m_NumIterationsPerformed] = Math.log((1 - epsilon) / epsilon);
        reweight = (1 - epsilon) / epsilon;
        if (m_Debug) {
            System.err.println("\terror rate = " + epsilon + "  beta = " + m_Betas[m_NumIterationsPerformed]);
        }

        // Update instance weights
        setWeights(training, reweight);
    }
}

From source file:gyc.UnderOverBoostM1.java

License:Open Source License

/**
 * Boosting method. Boosts using resampling
 *
 * @param data the training data to be used for generating the
 * boosted classifier./*from   w  w  w .  ja  va 2s  . c om*/
 * @throws Exception if the classifier could not be built successfully
 */
protected void buildClassifierUsingResampling(Instances data) throws Exception {

    Instances trainData, sample, training;
    double epsilon, reweight, sumProbs;
    Evaluation evaluation;
    int numInstances = data.numInstances();
    Random randomInstance = new Random(m_Seed);
    int resamplingIterations = 0;

    // Initialize data
    m_Betas = new double[m_Classifiers.length];
    m_NumIterationsPerformed = 0;
    // Create a copy of the data so that when the weights are diddled
    // with it doesn't mess up the weights for anyone else
    training = new Instances(data, 0, numInstances);
    sumProbs = training.sumOfWeights();
    for (int i = 0; i < training.numInstances(); i++) {
        training.instance(i).setWeight(training.instance(i).weight() / sumProbs);
    }

    // Do boostrap iterations
    int b = 10;
    for (m_NumIterationsPerformed = 0; m_NumIterationsPerformed < m_Classifiers.length; m_NumIterationsPerformed++) {
        if (m_Debug) {
            System.err.println("Training classifier " + (m_NumIterationsPerformed + 1));
        }

        // Select instances to train the classifier on
        if (m_WeightThreshold < 100) {
            trainData = selectWeightQuantile(training, (double) m_WeightThreshold / 100);
        } else {
            trainData = new Instances(training);
        }

        // Resample
        resamplingIterations = 0;
        double[] weights = new double[trainData.numInstances()];
        for (int i = 0; i < weights.length; i++) {
            weights[i] = trainData.instance(i).weight();
        }
        do {
            sample = trainData.resampleWithWeights(randomInstance, weights);

            //
            int classNum[] = sample.attributeStats(sample.classIndex()).nominalCounts;
            int minC, nMin = classNum[0];
            int majC, nMaj = classNum[1];
            if (nMin < nMaj) {
                minC = 0;
                majC = 1;
            } else {
                minC = 1;
                majC = 0;
                nMin = classNum[1];
                nMaj = classNum[0];
            }
            //System.out.println("minC="+nMin+"; majC="+nMaj);
            /*
             * balance the data which boosting generate for training base classifier
            */
            //System.out.println("before:"+classNum[0]+"-"+classNum[1]);
            double pb = 100.0 * (nMin + nMaj) / 2 / nMaj;
            /* if (m_NumIterationsPerformed + 1 > (m_Classifiers.length / 10))    
                b += 10;
            (b% * Nmaj) instances are taken from each class */
            Instances sampleData = randomSampling(sample, majC, minC, (int) pb, randomInstance);

            //classNum =sampleData.attributeStats(sampleData.classIndex()).nominalCounts;
            //System.out.println("after:"+classNum[0]+"-"+classNum[1]);

            // Build and evaluate classifier
            m_Classifiers[m_NumIterationsPerformed].buildClassifier(sampleData);

            evaluation = new Evaluation(data);
            evaluation.evaluateModel(m_Classifiers[m_NumIterationsPerformed], training);
            epsilon = evaluation.errorRate();
            resamplingIterations++;
        } while (Utils.eq(epsilon, 0) && (resamplingIterations < MAX_NUM_RESAMPLING_ITERATIONS));

        // Stop if error too big or 0
        if (Utils.grOrEq(epsilon, 0.5) || Utils.eq(epsilon, 0)) {
            if (m_NumIterationsPerformed == 0) {
                m_NumIterationsPerformed = 1; // If we're the first we have to to use it
            }
            break;
        }

        // Determine the weight to assign to this model
        m_Betas[m_NumIterationsPerformed] = Math.log((1 - epsilon) / epsilon);
        reweight = (1 - epsilon) / epsilon;
        if (m_Debug) {
            System.err.println("\terror rate = " + epsilon + "  beta = " + m_Betas[m_NumIterationsPerformed]);
        }

        // Update instance weights
        setWeights(training, reweight);
    }
}

From source file:kfst.classifier.WekaClassifier.java

License:Open Source License

/**
 * This method builds and evaluates the support vector machine(SVM)
 * classifier. The SMO are used as the SVM classifier implemented in the
 * Weka software.//  w w w .j av  a  2  s .c om
 *
 * @param pathTrainData the path of the train set
 * @param pathTestData the path of the test set
 * @param svmKernel the kernel to use
 * 
 * @return the classification accuracy
 */
public static double SVM(String pathTrainData, String pathTestData, String svmKernel) {
    double resultValue = 0;
    try {
        BufferedReader readerTrain = new BufferedReader(new FileReader(pathTrainData));
        Instances dataTrain = new Instances(readerTrain);
        readerTrain.close();
        dataTrain.setClassIndex(dataTrain.numAttributes() - 1);

        BufferedReader readerTest = new BufferedReader(new FileReader(pathTestData));
        Instances dataTest = new Instances(readerTest);
        readerTest.close();
        dataTest.setClassIndex(dataTest.numAttributes() - 1);
        SMO svm = new SMO();
        if (svmKernel.equals("Polynomial kernel")) {
            svm.setKernel(weka.classifiers.functions.supportVector.PolyKernel.class.newInstance());
        } else if (svmKernel.equals("RBF kernel")) {
            svm.setKernel(weka.classifiers.functions.supportVector.RBFKernel.class.newInstance());
        } else {
            svm.setKernel(weka.classifiers.functions.supportVector.Puk.class.newInstance());
        }
        svm.buildClassifier(dataTrain);
        Evaluation eval = new Evaluation(dataTest);
        eval.evaluateModel(svm, dataTest);
        resultValue = 100 - (eval.errorRate() * 100);
    } catch (Exception ex) {
        Logger.getLogger(WekaClassifier.class.getName()).log(Level.SEVERE, null, ex);
    }
    return resultValue;
}

From source file:kfst.classifier.WekaClassifier.java

License:Open Source License

/**
 * This method builds and evaluates the naiveBayes(NB) classifier.
 * The naiveBayes are used as the NB classifier implemented in the Weka
 * software./*from   www  .j  a va2s  .  co m*/
 *
 * @param pathTrainData the path of the train set
 * @param pathTestData the path of the test set
 * 
 * @return the classification accuracy
 */
public static double naiveBayes(String pathTrainData, String pathTestData) {
    double resultValue = 0;
    try {
        BufferedReader readerTrain = new BufferedReader(new FileReader(pathTrainData));
        Instances dataTrain = new Instances(readerTrain);
        readerTrain.close();
        dataTrain.setClassIndex(dataTrain.numAttributes() - 1);

        BufferedReader readerTest = new BufferedReader(new FileReader(pathTestData));
        Instances dataTest = new Instances(readerTest);
        readerTest.close();
        dataTest.setClassIndex(dataTest.numAttributes() - 1);

        NaiveBayes nb = new NaiveBayes();
        nb.buildClassifier(dataTrain);
        Evaluation eval = new Evaluation(dataTest);
        eval.evaluateModel(nb, dataTest);
        resultValue = 100 - (eval.errorRate() * 100);
    } catch (Exception ex) {
        Logger.getLogger(WekaClassifier.class.getName()).log(Level.SEVERE, null, ex);
    }
    return resultValue;
}

From source file:kfst.classifier.WekaClassifier.java

License:Open Source License

/**
 * This method builds and evaluates the decision tree(DT) classifier.
 * The j48 are used as the DT classifier implemented in the Weka software.
 *
 * @param pathTrainData the path of the train set
 * @param pathTestData the path of the test set
 * @param confidenceValue The confidence factor used for pruning
 * @param minNumSampleInLeaf The minimum number of instances per leaf
 * /*  w w  w.j  a va 2 s  .c o m*/
 * @return the classification accuracy
 */
public static double dTree(String pathTrainData, String pathTestData, double confidenceValue,
        int minNumSampleInLeaf) {
    double resultValue = 0;
    try {
        BufferedReader readerTrain = new BufferedReader(new FileReader(pathTrainData));
        Instances dataTrain = new Instances(readerTrain);
        readerTrain.close();
        dataTrain.setClassIndex(dataTrain.numAttributes() - 1);

        BufferedReader readerTest = new BufferedReader(new FileReader(pathTestData));
        Instances dataTest = new Instances(readerTest);
        readerTest.close();
        dataTest.setClassIndex(dataTest.numAttributes() - 1);

        J48 decisionTree = new J48();
        decisionTree.setConfidenceFactor((float) confidenceValue);
        decisionTree.setMinNumObj(minNumSampleInLeaf);
        decisionTree.buildClassifier(dataTrain);
        Evaluation eval = new Evaluation(dataTest);
        eval.evaluateModel(decisionTree, dataTest);
        resultValue = 100 - (eval.errorRate() * 100);
    } catch (Exception ex) {
        Logger.getLogger(WekaClassifier.class.getName()).log(Level.SEVERE, null, ex);
    }
    return resultValue;
}

From source file:lu.lippmann.cdb.datasetview.tabs.RegressionTreeTabView.java

License:Open Source License

/**
 * {@inheritDoc}//from   w w  w.j  a  v a2s  .c  om
 */
@SuppressWarnings("unchecked")
@Override
public void update0(final Instances dataSet) throws Exception {
    this.panel.removeAll();

    //final Object[] attrNames=WekaDataStatsUtil.getNumericAttributesNames(dataSet).toArray();
    final Object[] attrNames = WekaDataStatsUtil.getAttributeNames(dataSet).toArray();
    final JComboBox xCombo = new JComboBox(attrNames);
    xCombo.setBorder(new TitledBorder("Attribute to evaluate"));

    final JXPanel comboPanel = new JXPanel();
    comboPanel.setLayout(new GridLayout(1, 2));
    comboPanel.add(xCombo);
    final JXButton jxb = new JXButton("Compute");
    comboPanel.add(jxb);
    this.panel.add(comboPanel, BorderLayout.NORTH);

    jxb.addActionListener(new ActionListener() {
        @Override
        public void actionPerformed(ActionEvent e) {
            try {
                if (gv != null)
                    panel.remove((Component) gv);

                dataSet.setClassIndex(xCombo.getSelectedIndex());

                final REPTree rt = new REPTree();
                rt.setNoPruning(true);
                //rt.setMaxDepth(3);
                rt.buildClassifier(dataSet);

                /*final M5P rt=new M5P();
                rt.buildClassifier(dataSet);*/

                final Evaluation eval = new Evaluation(dataSet);
                double[] d = eval.evaluateModel(rt, dataSet);
                System.out.println("PREDICTED -> " + FormatterUtil.buildStringFromArrayOfDoubles(d));
                System.out.println(eval.errorRate());
                System.out.println(eval.sizeOfPredictedRegions());
                System.out.println(eval.toSummaryString("", true));

                final GraphWithOperations gwo = GraphUtil
                        .buildGraphWithOperationsFromWekaRegressionString(rt.graph());
                final DecisionTree dt = new DecisionTree(gwo, eval.errorRate());

                gv = DecisionTreeToGraphViewHelper.buildGraphView(dt, eventPublisher, commandDispatcher);
                gv.addMetaInfo("Size=" + dt.getSize(), "");
                gv.addMetaInfo("Depth=" + dt.getDepth(), "");

                gv.addMetaInfo("MAE=" + FormatterUtil.DECIMAL_FORMAT.format(eval.meanAbsoluteError()) + "", "");
                gv.addMetaInfo("RMSE=" + FormatterUtil.DECIMAL_FORMAT.format(eval.rootMeanSquaredError()) + "",
                        "");

                final JCheckBox toggleDecisionTreeDetails = new JCheckBox("Toggle details");
                toggleDecisionTreeDetails.addActionListener(new ActionListener() {
                    @Override
                    public void actionPerformed(ActionEvent e) {
                        if (!tweakedGraph) {
                            final Object[] mapRep = WekaDataStatsUtil
                                    .buildNodeAndEdgeRepartitionMap(dt.getGraphWithOperations(), dataSet);
                            gv.updateVertexShapeTransformer((Map<CNode, Map<Object, Integer>>) mapRep[0]);
                            gv.updateEdgeShapeRenderer((Map<CEdge, Float>) mapRep[1]);
                        } else {
                            gv.resetVertexAndEdgeShape();
                        }
                        tweakedGraph = !tweakedGraph;
                    }
                });
                gv.addMetaInfoComponent(toggleDecisionTreeDetails);

                /*final JButton openInEditorButton = new JButton("Open in editor");
                openInEditorButton.addActionListener(new ActionListener() {
                   @Override
                   public void actionPerformed(ActionEvent e) {
                       GraphUtil.importDecisionTreeInEditor(dtFactory, dataSet, applicationContext, eventPublisher, commandDispatcher);
                   }
                });
                this.gv.addMetaInfoComponent(openInEditorButton);*/

                final JButton showTextButton = new JButton("In text");
                showTextButton.addActionListener(new ActionListener() {
                    @Override
                    public void actionPerformed(ActionEvent e) {
                        JOptionPane.showMessageDialog(null, graphDsl.getDslString(dt.getGraphWithOperations()));
                    }
                });
                gv.addMetaInfoComponent(showTextButton);

                panel.add(gv.asComponent(), BorderLayout.CENTER);
            } catch (Exception e1) {
                e1.printStackTrace();
                panel.add(new JXLabel("Error during computation: " + e1.getMessage()), BorderLayout.CENTER);
            }

        }
    });
}

From source file:lu.lippmann.cdb.dt.ModelTreeFactory.java

License:Open Source License

/**
 * Main method.// w w w .  ja va2 s. c  o m
 * @param args command line arguments
 */
public static void main(final String[] args) {
    try {
        //final String f="./samples/csv/uci/winequality-red-simplified.csv";
        final String f = "./samples/csv/uci/winequality-white.csv";
        //final String f="./samples/arff/UCI/crimepredict.arff";
        final Instances dataSet = WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile(new File(f));
        System.out.println(dataSet.classAttribute().isNumeric());

        final M5P rt = new M5P();
        //rt.setUnpruned(true);
        rt.setMinNumInstances(1000);
        rt.buildClassifier(dataSet);

        System.out.println(rt);

        System.out.println(rt.graph());

        final GraphWithOperations gwo = GraphUtil.buildGraphWithOperationsFromWekaRegressionString(rt.graph());
        System.out.println(gwo);
        System.out.println(new ASCIIGraphDsl().getDslString(gwo));

        final Evaluation eval = new Evaluation(dataSet);

        /*Field privateStringField = Evaluation.class.getDeclaredField("m_CoverageStatisticsAvailable");
        privateStringField.setAccessible(true);
        //privateStringField.get
        boolean fieldValue = privateStringField.getBoolean(eval);
        System.out.println("fieldValue = " + fieldValue);*/

        double[] d = eval.evaluateModel(rt, dataSet);
        System.out.println("PREDICTED -> " + FormatterUtil.buildStringFromArrayOfDoubles(d));

        System.out.println(eval.errorRate());
        System.out.println(eval.sizeOfPredictedRegions());

        System.out.println(eval.toSummaryString("", true));

        System.out.println(new DecisionTree(gwo, eval.errorRate()));
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:lu.lippmann.cdb.dt.RegressionTreeFactory.java

License:Open Source License

/**
 * Main method.//  w  ww  .j a  v  a  2 s  .  c  o m
 * @param args command line arguments
 */
public static void main(final String[] args) {
    try {
        final String f = "./samples/csv/uci/winequality-red.csv";
        //final String f="./samples/arff/UCI/crimepredict.arff";
        final Instances dataSet = WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile(new File(f));
        System.out.println(dataSet.classAttribute().isNumeric());

        final REPTree rt = new REPTree();
        rt.setMaxDepth(3);
        rt.buildClassifier(dataSet);

        System.out.println(rt);

        //System.out.println(rt.graph());

        final GraphWithOperations gwo = GraphUtil.buildGraphWithOperationsFromWekaRegressionString(rt.graph());
        System.out.println(gwo);
        System.out.println(new ASCIIGraphDsl().getDslString(gwo));

        final Evaluation eval = new Evaluation(dataSet);

        /*Field privateStringField = Evaluation.class.getDeclaredField("m_CoverageStatisticsAvailable");
        privateStringField.setAccessible(true);
        //privateStringField.get
        boolean fieldValue = privateStringField.getBoolean(eval);
        System.out.println("fieldValue = " + fieldValue);*/

        double[] d = eval.evaluateModel(rt, dataSet);
        System.out.println("PREDICTED -> " + FormatterUtil.buildStringFromArrayOfDoubles(d));

        System.out.println(eval.errorRate());
        System.out.println(eval.sizeOfPredictedRegions());

        System.out.println(eval.toSummaryString("", true));

        /*final String f2="./samples/csv/salary.csv";
        final Instances dataSet2=WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile(new File(f2));
                
        final J48 j48=new J48();
        j48.buildClassifier(dataSet2);
        System.out.println(j48.graph());
        final GraphWithOperations gwo2=GraphUtil.buildGraphWithOperationsFromWekaString(j48.graph(),false);
        System.out.println(gwo2);*/

        System.out.println(new DecisionTree(gwo, eval.errorRate()));
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:trainableSegmentation.WekaSegmentation.java

License:GNU General Public License

/**
 * Get training error (from loaded data).
 *
 * @param verbose option to display evaluation information in the log window
 * @return classifier error on the training data set.
 *//*  w w  w  . j  a va2s . c om*/
public double getTrainingError(boolean verbose) {
    if (null == this.trainHeader)
        return -1;

    double error = -1;
    try {
        final Evaluation evaluation = new Evaluation(this.loadedTrainingData);
        evaluation.evaluateModel(classifier, this.loadedTrainingData);
        if (verbose)
            IJ.log(evaluation.toSummaryString("\n=== Training set evaluation ===\n", false));
        error = evaluation.errorRate();
    } catch (Exception e) {

        e.printStackTrace();
    }

    return error;
}