Example usage for weka.classifiers Classifier buildClassifier

Introduction

In this page you can find the example usage for weka.classifiers Classifier buildClassifier.

Prototype

public abstract void buildClassifier(Instances data) throws Exception;

Source Link

Document

Generates a classifier.

Usage

From source file:gnusmail.learning.ClassifierManager.java

License:Open Source License

/**
 * Batch training// w ww . ja va2  s  .c  om
 */
public void trainModel() {
    Classifier model = new NaiveBayesUpdateable();
    try {
        model.buildClassifier(dataSet);
    } catch (Exception e) {
        return;
    }
    try {
        FileOutputStream f = new FileOutputStream(ConfigManager.MODEL_FILE);
        ObjectOutputStream fis = new ObjectOutputStream(f);
        fis.writeObject(model);
        fis.close();
    } catch (FileNotFoundException e) {
        System.out.println("File " + ConfigManager.MODEL_FILE.getAbsolutePath() + " not found");
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }

}

From source file:id3j48.WekaAccess.java

public static Classifier buildClassifier(Instances data, Classifier classifier) throws Exception {
    classifier.buildClassifier(data);
    return classifier;
}

From source file:id3j48.WekaAccess.java

public static Evaluation percentageSplit(Instances data, Classifier classifier, int percentage)
        throws Exception {
    Instances tempdata = new Instances(data);
    tempdata.randomize(new Random(1));

    int trainSize = Math.round(tempdata.numInstances() * percentage / 100);
    int testSize = tempdata.numInstances() - trainSize;
    Instances train = new Instances(tempdata, 0, trainSize);
    Instances test = new Instances(tempdata, trainSize, testSize);

    classifier.buildClassifier(train);
    Evaluation eval = testModel(classifier, train, test);
    return eval;//  w w  w  . j ava  2  s.co m
}

From source file:it.unisa.gitdm.evaluation.WekaEvaluator.java

private static void evaluateModel(String baseFolderPath, String projectName, Classifier pClassifier,
        Instances pInstances, String pModelName, String pClassifierName) throws Exception {

    // other options
    int folds = 10;

    // randomize data
    Random rand = new Random(42);
    Instances randData = new Instances(pInstances);
    randData.randomize(rand);/*  w  w  w  .j  av a 2  s .c  om*/
    if (randData.classAttribute().isNominal()) {
        randData.stratify(folds);
    }

    // perform cross-validation and add predictions
    Instances predictedData = null;
    Evaluation eval = new Evaluation(randData);

    int positiveValueIndexOfClassFeature = 0;
    for (int n = 0; n < folds; n++) {
        Instances train = randData.trainCV(folds, n);
        Instances test = randData.testCV(folds, n);
        // the above code is used by the StratifiedRemoveFolds filter, the
        // code below by the Explorer/Experimenter:
        // Instances train = randData.trainCV(folds, n, rand);

        int classFeatureIndex = 0;
        for (int i = 0; i < train.numAttributes(); i++) {
            if (train.attribute(i).name().equals("isBuggy")) {
                classFeatureIndex = i;
                break;
            }
        }

        Attribute classFeature = train.attribute(classFeatureIndex);
        for (int i = 0; i < classFeature.numValues(); i++) {
            if (classFeature.value(i).equals("TRUE")) {
                positiveValueIndexOfClassFeature = i;
            }
        }

        train.setClassIndex(classFeatureIndex);
        test.setClassIndex(classFeatureIndex);

        // build and evaluate classifier
        pClassifier.buildClassifier(train);
        eval.evaluateModel(pClassifier, test);

        // add predictions
        //           AddClassification filter = new AddClassification();
        //           filter.setClassifier(pClassifier);
        //           filter.setOutputClassification(true);
        //           filter.setOutputDistribution(true);
        //           filter.setOutputErrorFlag(true);
        //           filter.setInputFormat(train);
        //           Filter.useFilter(train, filter); 
        //           Instances pred = Filter.useFilter(test, filter); 
        //           if (predictedData == null)
        //             predictedData = new Instances(pred, 0);
        //           
        //           for (int j = 0; j < pred.numInstances(); j++)
        //             predictedData.add(pred.instance(j));
    }
    double accuracy = (eval.numTruePositives(positiveValueIndexOfClassFeature)
            + eval.numTrueNegatives(positiveValueIndexOfClassFeature))
            / (eval.numTruePositives(positiveValueIndexOfClassFeature)
                    + eval.numFalsePositives(positiveValueIndexOfClassFeature)
                    + eval.numFalseNegatives(positiveValueIndexOfClassFeature)
                    + eval.numTrueNegatives(positiveValueIndexOfClassFeature));

    double fmeasure = 2 * ((eval.precision(positiveValueIndexOfClassFeature)
            * eval.recall(positiveValueIndexOfClassFeature))
            / (eval.precision(positiveValueIndexOfClassFeature)
                    + eval.recall(positiveValueIndexOfClassFeature)));
    File wekaOutput = new File(baseFolderPath + projectName + "/predictors.csv");
    PrintWriter pw1 = new PrintWriter(wekaOutput);

    pw1.write(accuracy + ";" + eval.precision(positiveValueIndexOfClassFeature) + ";"
            + eval.recall(positiveValueIndexOfClassFeature) + ";" + fmeasure + ";"
            + eval.areaUnderROC(positiveValueIndexOfClassFeature));

    System.out.println(projectName + ";" + pClassifierName + ";" + pModelName + ";"
            + eval.numTruePositives(positiveValueIndexOfClassFeature) + ";"
            + eval.numFalsePositives(positiveValueIndexOfClassFeature) + ";"
            + eval.numFalseNegatives(positiveValueIndexOfClassFeature) + ";"
            + eval.numTrueNegatives(positiveValueIndexOfClassFeature) + ";" + accuracy + ";"
            + eval.precision(positiveValueIndexOfClassFeature) + ";"
            + eval.recall(positiveValueIndexOfClassFeature) + ";" + fmeasure + ";"
            + eval.areaUnderROC(positiveValueIndexOfClassFeature) + "\n");
}

From source file:LeerArchivo.Leer.java

public void leerArchivoArff() {
    try {// ww  w  . j av a  2 s  . c om
        // create J48
        Classifier cls = new KStar();
        // train
        Instances inst = new Instances(new BufferedReader(new FileReader("../datos.arff")));

        inst.setClassIndex(inst.numAttributes() - 1);
        cls.buildClassifier(inst);
        // serialize model
        ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream("./KStar.model"));
        oos.writeObject(cls);
        oos.flush();
        oos.close();
    } catch (IOException ex) {
        Logger.getLogger(Leer.class.getName()).log(Level.SEVERE, null, ex);
    } catch (Exception ex) {
        Logger.getLogger(Leer.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:LVCoref.WekaWrapper.java

License:Open Source License

public static void main(String[] args) {
    try {//from w ww  .j  a  v a  2s.  co  m
        List<Document> docs = new LinkedList<Document>();
        Document d = new Document();
        d.readCONLL("data/pipeline/interview_16.lvsem.conll");
        d.addAnnotationMMAX("data/interview_16_coref_level.xml");
        d.useGoldMentions();
        docs.add(d);
        d = new Document();
        d.readCONLL("data/pipeline/interview_23.lvsem.conll");
        d.addAnnotationMMAX("data/interview_23_coref_level.xml");
        d.useGoldMentions();
        docs.add(d);
        d = new Document();
        d.readCONLL("data/pipeline/interview_27.lvsem.conll");
        d.addAnnotationMMAX("data/interview_27_coref_level.xml");
        d.useGoldMentions();
        docs.add(d);
        d = new Document();
        d.readCONLL("data/pipeline/interview_38.lvsem.conll");
        d.addAnnotationMMAX("data/interview_38_coref_level.xml");
        d.useGoldMentions();
        docs.add(d);

        Instances train = toArff2(docs);
        train.setClassIndex(train.numAttributes() - 1);
        String[] options = { "-U" };//, "-C", "0.5"};
        Classifier cls = new J48();
        cls.setOptions(options);
        cls.buildClassifier(train);

        docs = new LinkedList<Document>();
        d = new Document();
        d.readCONLL("data/pipeline/interview_43.lvsem.conll");
        d.addAnnotationMMAX("data/interview_43_coref_level.xml");
        d.useGoldMentions();
        docs.add(d);
        d = new Document();
        d.readCONLL("data/pipeline/interview_46.lvsem.conll");
        d.addAnnotationMMAX("data/interview_46_coref_level.xml");
        d.useGoldMentions();
        docs.add(d);

        Evaluation eval = new Evaluation(train);

        Instances data = toArff2(docs);
        data.setClassIndex(data.numAttributes() - 1);
        for (int i = 0; i < data.numInstances(); i++) {
            double clsLabel = cls.classifyInstance(data.instance(i));
            //System.out.println(clsLabel);
            data.instance(i).setClassValue(clsLabel);
            System.out.println(data.instance(i).toString(data.classIndex()));
        }

        //     eval.crossValidateModel(cls, train, 10, new Random(1));
        //            // generate curve
        //     ThresholdCurve tc = new ThresholdCurve();
        //     //int classIndex = test.numAttributes()-1;
        //     Instances result = tc.getCurve(eval.predictions());//, classIndex);
        // 
        //     // plot curve
        //     ThresholdVisualizePanel vmc = new ThresholdVisualizePanel();
        //     vmc.setROCString("(Area under ROC = " + 
        //         weka.core.Utils.doubleToString(tc.getROCArea(result), 4) + ")");
        //     vmc.setName(result.relationName());
        //     PlotData2D tempd = new PlotData2D(result);
        //     tempd.setPlotName(result.relationName());
        //     tempd.addInstanceNumberAttribute();
        //     // specify which points are connected
        //     boolean[] cp = new boolean[result.numInstances()];
        //     for (int n = 1; n < cp.length; n++)
        //       cp[n] = true;
        //     tempd.setConnectPoints(cp);
        //     // add plot
        //     vmc.addPlot(tempd);
        // 
        //     // display curve
        //     String plotName = vmc.getName(); 
        //     final javax.swing.JFrame jf = 
        //       new javax.swing.JFrame("Weka Classifier Visualize: "+plotName);
        //     jf.setSize(500,400);
        //     jf.getContentPane().setLayout(new BorderLayout());
        //     jf.getContentPane().add(vmc, BorderLayout.CENTER);
        //     jf.addWindowListener(new java.awt.event.WindowAdapter() {
        //       public void windowClosing(java.awt.event.WindowEvent e) {
        //       jf.dispose();
        //       }
        //     });
        //     jf.setVisible(true);

        //            Instances test = toArff2(docs);
        //            test.setClassIndex(test.numAttributes()-1);
        //            
        //            
        //           Evaluation evals = new Evaluation(train); 
        //
        //            evals.evaluateModel(cls, test);
        //            System.out.println(evals.toSummaryString("\nResults\n======\n", false));
        //             System.out.println(evals.toMatrixString());
        //              System.out.println(evals.toClassDetailsString());
        //            
        //            System.out.println(cls);
        //            //System.out.println(toArff2(docs));

    } catch (Exception ex) {
        Logger.getLogger(WekaWrapper.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:machinelearningcw.MachineLearningCw.java

public static void timingExperiment(Classifier s, Instances[] data) throws Exception {

    /* get the biggest data set */
    Instances largestData = data[0];//from   ww  w .ja v a  2 s . c  o  m
    for (int i = 0; i < data.length; i++) {
        if (largestData.numInstances() < data[i].numInstances()) {
            largestData = data[i];
        }
    }
    for (int i = 1; i <= 7; i++) {
        int percent = i * 10;
        int train_size = (int) Math.round(largestData.numInstances() * percent / 100);
        int testSize = largestData.numInstances() - train_size;
        Instances train = new Instances(largestData, 0, train_size);
        Instances test = new Instances(largestData, train_size, testSize);

        long t1 = System.currentTimeMillis();

        s.buildClassifier(train);
        for (Instance ins : test) {
            s.classifyInstance(ins);
        }

        long t2 = System.currentTimeMillis() - t1;
        //change to seconds

        System.out.println("TIME TAKEN " + i + ": " + t2);

    }
    System.out.println("\n");
}

From source file:machinelearning_cw.MachineLearning_CW.java

/**
 * //from   w  w  w .j a  v  a2s.  com
 * Tests the accuracy of a classifier against a collection of datasets
 * by Resampling.
 * 
 * @param classifier The classifier to be tested
 * @param trainingDatasets A collection of Instances objects containing
 * the training data for different datasets.
 * @param testDatasets A collection of Instances objects containing
 * the test data for different datasets.
 * @param t The number of times the data should be sampled
 * @throws Exception 
 */
public static void performClassifierAccuracyTests(Classifier classifier, ArrayList<Instances> trainingDatasets,
        ArrayList<Instances> testDatasets, int t) throws Exception {
    ArrayList<Double> accuracies = new ArrayList<Double>();
    Random randomGenerator = new Random();

    for (int i = 0; i < trainingDatasets.size(); i++) {
        Instances train = trainingDatasets.get(i);
        Instances test = testDatasets.get(i);

        /* Test by Resampling. First, merge train and test data */
        for (int j = 0; j < t; j++) {

            Instances mergedDataSet = mergeDataSets(train, test);
            train.clear();
            test.clear();

            /* Randomly sample n instances from the merged dataset
             * (without replacement) to form the train set
             */
            int n = mergedDataSet.size() / 2;
            for (int k = 0; k < n; k++) {
                int indexToRemove = randomGenerator.nextInt(mergedDataSet.size());
                train.add(mergedDataSet.remove(indexToRemove));
            }

            /* Reserve remainingdata as test data */
            for (int k = 0; k < mergedDataSet.size(); k++) {
                test.add(mergedDataSet.remove(k));
            }

            /* Train classifier. Recalculates k */
            classifier.buildClassifier(train);

            /* Measure and record the accuracy of the classifier on
             * the test set
             */
            double accuracy = Helpers.findClassifierAccuracy(classifier, test);
            accuracies.add(accuracy);
        }

        double accuracyAverage = average(accuracies);
        System.out.println(accuracyAverage);
    }

}

From source file:machinelearning_cw.MachineLearning_CW.java

/**
 * /* w ww.  jav  a2s.c o m*/
 * Tests the speed of a classifier in milliseconds and prints it. 
 * This is achieved by checking the time taken for the given classifier to
 * classify some given data.
 * 
 * 
 * @param classifier The classifier to be tested.
 * @param train The data with which to train the classifier.
 * @param test The data the classifier is to be tested against.
 * @param t The number of times the test should be carried out and averaged.
 * @throws Exception 
 */
public static void timeClassifier(Classifier classifier, Instances train, Instances test, int t)
        throws Exception {

    ArrayList<Double> times = new ArrayList<Double>();

    /* Carry out test t+1 times and average.
     * The first run is ignored to offset the effects of
     * the Java Garbage Collector and caching.
     */
    for (int i = 0; i < t + 1; i++) {
        // Time the build and classifyInstance methods
        double t1 = System.nanoTime();

        classifier.buildClassifier(train);
        for (Instance eachInstance : test) {
            classifier.classifyInstance(eachInstance);
        }

        double t2 = System.nanoTime() - t1;

        // Convert to ms
        double timeTaken = t2 / 1000000.0;

        if (i != 0) {
            times.add(timeTaken);
        }
    }

    double averageTime = average(times);
    System.out.println(averageTime);
}

From source file:mao.datamining.ModelProcess.java

private void testWithExtraDS(Classifier classifier, Instances finalTrainDataSet, Instances finalTestDataSet,
        FileOutputStream testCaseSummaryOut, TestResult result) {
    //Use final training dataset and final test dataset
    double confusionMatrix[][] = null;

    long start, end, trainTime = 0, testTime = 0;
    if (finalTestDataSet != null) {
        try {//from  w ww  . jav a2 s . c  om
            //counting training time
            start = System.currentTimeMillis();
            classifier.buildClassifier(finalTrainDataSet);
            end = System.currentTimeMillis();
            trainTime += end - start;

            //counting test time
            start = System.currentTimeMillis();
            Evaluation testEvalOnly = new Evaluation(finalTrainDataSet);
            testEvalOnly.evaluateModel(classifier, finalTestDataSet);
            end = System.currentTimeMillis();
            testTime += end - start;

            testCaseSummaryOut.write("=====================================================\n".getBytes());
            testCaseSummaryOut.write((testEvalOnly.toSummaryString("=== Test Summary ===", true)).getBytes());
            testCaseSummaryOut.write("\n".getBytes());
            testCaseSummaryOut
                    .write((testEvalOnly.toClassDetailsString("=== Test Class Detail ===\n")).getBytes());
            testCaseSummaryOut.write("\n".getBytes());
            testCaseSummaryOut
                    .write((testEvalOnly.toMatrixString("=== Confusion matrix for Test ===\n")).getBytes());
            testCaseSummaryOut.flush();

            confusionMatrix = testEvalOnly.confusionMatrix();
            result.setConfusionMatrix4Test(confusionMatrix);

            result.setAUT(testEvalOnly.areaUnderROC(1));
            result.setPrecision(testEvalOnly.precision(1));
            result.setRecall(testEvalOnly.recall(1));
        } catch (Exception e) {
            ModelProcess.logging(null, e);
        }
        result.setTrainingTime(trainTime);
        result.setTestTime(testTime);
    } //using test data set , end

}