Example usage for weka.classifiers Classifier buildClassifier

List of usage examples for weka.classifiers Classifier buildClassifier

Introduction

In this page you can find the example usage for weka.classifiers Classifier buildClassifier.

Prototype

public abstract void buildClassifier(Instances data) throws Exception;

Source Link

Document

Generates a classifier.

Usage

From source file:gnusmail.learning.ClassifierManager.java

License:Open Source License

/**
 * Batch training// w ww . ja va2  s  .c  om
 */
public void trainModel() {
    Classifier model = new NaiveBayesUpdateable();
    try {
        model.buildClassifier(dataSet);
    } catch (Exception e) {
        return;
    }
    try {
        FileOutputStream f = new FileOutputStream(ConfigManager.MODEL_FILE);
        ObjectOutputStream fis = new ObjectOutputStream(f);
        fis.writeObject(model);
        fis.close();
    } catch (FileNotFoundException e) {
        System.out.println("File " + ConfigManager.MODEL_FILE.getAbsolutePath() + " not found");
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }

}

From source file:id3j48.WekaAccess.java

public static Classifier buildClassifier(Instances data, Classifier classifier) throws Exception {
    classifier.buildClassifier(data);
    return classifier;
}

From source file:id3j48.WekaAccess.java

public static Evaluation percentageSplit(Instances data, Classifier classifier, int percentage)
        throws Exception {
    Instances tempdata = new Instances(data);
    tempdata.randomize(new Random(1));

    int trainSize = Math.round(tempdata.numInstances() * percentage / 100);
    int testSize = tempdata.numInstances() - trainSize;
    Instances train = new Instances(tempdata, 0, trainSize);
    Instances test = new Instances(tempdata, trainSize, testSize);

    classifier.buildClassifier(train);
    Evaluation eval = testModel(classifier, train, test);
    return eval;//  w w  w  . j ava  2  s.co m
}

From source file:it.unisa.gitdm.evaluation.WekaEvaluator.java

private static void evaluateModel(String baseFolderPath, String projectName, Classifier pClassifier,
        Instances pInstances, String pModelName, String pClassifierName) throws Exception {

    // other options
    int folds = 10;

    // randomize data
    Random rand = new Random(42);
    Instances randData = new Instances(pInstances);
    randData.randomize(rand);/*  w  w  w  .j  av a 2  s .c  om*/
    if (randData.classAttribute().isNominal()) {
        randData.stratify(folds);
    }

    // perform cross-validation and add predictions
    Instances predictedData = null;
    Evaluation eval = new Evaluation(randData);

    int positiveValueIndexOfClassFeature = 0;
    for (int n = 0; n < folds; n++) {
        Instances train = randData.trainCV(folds, n);
        Instances test = randData.testCV(folds, n);
        // the above code is used by the StratifiedRemoveFolds filter, the
        // code below by the Explorer/Experimenter:
        // Instances train = randData.trainCV(folds, n, rand);

        int classFeatureIndex = 0;
        for (int i = 0; i < train.numAttributes(); i++) {
            if (train.attribute(i).name().equals("isBuggy")) {
                classFeatureIndex = i;
                break;
            }
        }

        Attribute classFeature = train.attribute(classFeatureIndex);
        for (int i = 0; i < classFeature.numValues(); i++) {
            if (classFeature.value(i).equals("TRUE")) {
                positiveValueIndexOfClassFeature = i;
            }
        }

        train.setClassIndex(classFeatureIndex);
        test.setClassIndex(classFeatureIndex);

        // build and evaluate classifier
        pClassifier.buildClassifier(train);
        eval.evaluateModel(pClassifier, test);

        // add predictions
        //           AddClassification filter = new AddClassification();
        //           filter.setClassifier(pClassifier);
        //           filter.setOutputClassification(true);
        //           filter.setOutputDistribution(true);
        //           filter.setOutputErrorFlag(true);
        //           filter.setInputFormat(train);
        //           Filter.useFilter(train, filter); 
        //           Instances pred = Filter.useFilter(test, filter); 
        //           if (predictedData == null)
        //             predictedData = new Instances(pred, 0);
        //           
        //           for (int j = 0; j < pred.numInstances(); j++)
        //             predictedData.add(pred.instance(j));
    }
    double accuracy = (eval.numTruePositives(positiveValueIndexOfClassFeature)
            + eval.numTrueNegatives(positiveValueIndexOfClassFeature))
            / (eval.numTruePositives(positiveValueIndexOfClassFeature)
                    + eval.numFalsePositives(positiveValueIndexOfClassFeature)
                    + eval.numFalseNegatives(positiveValueIndexOfClassFeature)
                    + eval.numTrueNegatives(positiveValueIndexOfClassFeature));

    double fmeasure = 2 * ((eval.precision(positiveValueIndexOfClassFeature)
            * eval.recall(positiveValueIndexOfClassFeature))
            / (eval.precision(positiveValueIndexOfClassFeature)
                    + eval.recall(positiveValueIndexOfClassFeature)));
    File wekaOutput = new File(baseFolderPath + projectName + "/predictors.csv");
    PrintWriter pw1 = new PrintWriter(wekaOutput);

    pw1.write(accuracy + ";" + eval.precision(positiveValueIndexOfClassFeature) + ";"
            + eval.recall(positiveValueIndexOfClassFeature) + ";" + fmeasure + ";"
            + eval.areaUnderROC(positiveValueIndexOfClassFeature));

    System.out.println(projectName + ";" + pClassifierName + ";" + pModelName + ";"
            + eval.numTruePositives(positiveValueIndexOfClassFeature) + ";"
            + eval.numFalsePositives(positiveValueIndexOfClassFeature) + ";"
            + eval.numFalseNegatives(positiveValueIndexOfClassFeature) + ";"
            + eval.numTrueNegatives(positiveValueIndexOfClassFeature) + ";" + accuracy + ";"
            + eval.precision(positiveValueIndexOfClassFeature) + ";"
            + eval.recall(positiveValueIndexOfClassFeature) + ";" + fmeasure + ";"
            + eval.areaUnderROC(positiveValueIndexOfClassFeature) + "\n");
}

From source file:LeerArchivo.Leer.java

public void leerArchivoArff() {
    try {// ww  w  . j av a  2 s  . c om
        // create J48
        Classifier cls = new KStar();
        // train
        Instances inst = new Instances(new BufferedReader(new FileReader("../datos.arff")));

        inst.setClassIndex(inst.numAttributes() - 1);
        cls.buildClassifier(inst);
        // serialize model
        ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream("./KStar.model"));
        oos.writeObject(cls);
        oos.flush();
        oos.close();
    } catch (IOException ex) {
        Logger.getLogger(Leer.class.getName()).log(Level.SEVERE, null, ex);
    } catch (Exception ex) {
        Logger.getLogger(Leer.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:LVCoref.WekaWrapper.java

License:Open Source License

public static void main(String[] args) {
    try {//from w ww  .j  a  v a  2s.  co  m
        List<Document> docs = new LinkedList<Document>();
        Document d = new Document();
        d.readCONLL("data/pipeline/interview_16.lvsem.conll");
        d.addAnnotationMMAX("data/interview_16_coref_level.xml");
        d.useGoldMentions();
        docs.add(d);
        d = new Document();
        d.readCONLL("data/pipeline/interview_23.lvsem.conll");
        d.addAnnotationMMAX("data/interview_23_coref_level.xml");
        d.useGoldMentions();
        docs.add(d);
        d = new Document();
        d.readCONLL("data/pipeline/interview_27.lvsem.conll");
        d.addAnnotationMMAX("data/interview_27_coref_level.xml");
        d.useGoldMentions();
        docs.add(d);
        d = new Document();
        d.readCONLL("data/pipeline/interview_38.lvsem.conll");
        d.addAnnotationMMAX("data/interview_38_coref_level.xml");
        d.useGoldMentions();
        docs.add(d);

        Instances train = toArff2(docs);
        train.setClassIndex(train.numAttributes() - 1);
        String[] options = { "-U" };//, "-C", "0.5"};
        Classifier cls = new J48();
        cls.setOptions(options);
        cls.buildClassifier(train);

        docs = new LinkedList<Document>();
        d = new Document();
        d.readCONLL("data/pipeline/interview_43.lvsem.conll");
        d.addAnnotationMMAX("data/interview_43_coref_level.xml");
        d.useGoldMentions();
        docs.add(d);
        d = new Document();
        d.readCONLL("data/pipeline/interview_46.lvsem.conll");
        d.addAnnotationMMAX("data/interview_46_coref_level.xml");
        d.useGoldMentions();
        docs.add(d);

        Evaluation eval = new Evaluation(train);

        Instances data = toArff2(docs);
        data.setClassIndex(data.numAttributes() - 1);
        for (int i = 0; i < data.numInstances(); i++) {
            double clsLabel = cls.classifyInstance(data.instance(i));
            //System.out.println(clsLabel);
            data.instance(i).setClassValue(clsLabel);
            System.out.println(data.instance(i).toString(data.classIndex()));
        }

        //     eval.crossValidateModel(cls, train, 10, new Random(1));
        //            // generate curve
        //     ThresholdCurve tc = new ThresholdCurve();
        //     //int classIndex = test.numAttributes()-1;
        //     Instances result = tc.getCurve(eval.predictions());//, classIndex);
        // 
        //     // plot curve
        //     ThresholdVisualizePanel vmc = new ThresholdVisualizePanel();
        //     vmc.setROCString("(Area under ROC = " + 
        //         weka.core.Utils.doubleToString(tc.getROCArea(result), 4) + ")");
        //     vmc.setName(result.relationName());
        //     PlotData2D tempd = new PlotData2D(result);
        //     tempd.setPlotName(result.relationName());
        //     tempd.addInstanceNumberAttribute();
        //     // specify which points are connected
        //     boolean[] cp = new boolean[result.numInstances()];
        //     for (int n = 1; n < cp.length; n++)
        //       cp[n] = true;
        //     tempd.setConnectPoints(cp);
        //     // add plot
        //     vmc.addPlot(tempd);
        // 
        //     // display curve
        //     String plotName = vmc.getName(); 
        //     final javax.swing.JFrame jf = 
        //       new javax.swing.JFrame("Weka Classifier Visualize: "+plotName);
        //     jf.setSize(500,400);
        //     jf.getContentPane().setLayout(new BorderLayout());
        //     jf.getContentPane().add(vmc, BorderLayout.CENTER);
        //     jf.addWindowListener(new java.awt.event.WindowAdapter() {
        //       public void windowClosing(java.awt.event.WindowEvent e) {
        //       jf.dispose();
        //       }
        //     });
        //     jf.setVisible(true);

        //            Instances test = toArff2(docs);
        //            test.setClassIndex(test.numAttributes()-1);
        //            
        //            
        //           Evaluation evals = new Evaluation(train); 
        //
        //            evals.evaluateModel(cls, test);
        //            System.out.println(evals.toSummaryString("\nResults\n======\n", false));
        //             System.out.println(evals.toMatrixString());
        //              System.out.println(evals.toClassDetailsString());
        //            
        //            System.out.println(cls);
        //            //System.out.println(toArff2(docs));

    } catch (Exception ex) {
        Logger.getLogger(WekaWrapper.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:machinelearningcw.MachineLearningCw.java

public static void timingExperiment(Classifier s, Instances[] data) throws Exception {

    /* get the biggest data set */
    Instances largestData = data[0];//from   ww  w .ja v a  2 s . c  o  m
    for (int i = 0; i < data.length; i++) {
        if (largestData.numInstances() < data[i].numInstances()) {
            largestData = data[i];
        }
    }
    for (int i = 1; i <= 7; i++) {
        int percent = i * 10;
        int train_size = (int) Math.round(largestData.numInstances() * percent / 100);
        int testSize = largestData.numInstances() - train_size;
        Instances train = new Instances(largestData, 0, train_size);
        Instances test = new Instances(largestData, train_size, testSize);

        long t1 = System.currentTimeMillis();

        s.buildClassifier(train);
        for (Instance ins : test) {
            s.classifyInstance(ins);
        }

        long t2 = System.currentTimeMillis() - t1;
        //change to seconds

        System.out.println("TIME TAKEN " + i + ": " + t2);

    }
    System.out.println("\n");
}

From source file:machinelearning_cw.MachineLearning_CW.java

/**
 * //from   w  w  w .j a  v  a2s.  com
 * Tests the accuracy of a classifier against a collection of datasets
 * by Resampling.
 * 
 * @param classifier The classifier to be tested
 * @param trainingDatasets A collection of Instances objects containing
 * the training data for different datasets.
 * @param testDatasets A collection of Instances objects containing
 * the test data for different datasets.
 * @param t The number of times the data should be sampled
 * @throws Exception 
 */
public static void performClassifierAccuracyTests(Classifier classifier, ArrayList<Instances> trainingDatasets,
        ArrayList<Instances> testDatasets, int t) throws Exception {
    ArrayList<Double> accuracies = new ArrayList<Double>();
    Random randomGenerator = new Random();

    for (int i = 0; i < trainingDatasets.size(); i++) {
        Instances train = trainingDatasets.get(i);
        Instances test = testDatasets.get(i);

        /* Test by Resampling. First, merge train and test data */
        for (int j = 0; j < t; j++) {

            Instances mergedDataSet = mergeDataSets(train, test);
            train.clear();
            test.clear();

            /* Randomly sample n instances from the merged dataset
             * (without replacement) to form the train set
             */
            int n = mergedDataSet.size() / 2;
            for (int k = 0; k < n; k++) {
                int indexToRemove = randomGenerator.nextInt(mergedDataSet.size());
                train.add(mergedDataSet.remove(indexToRemove));
            }

            /* Reserve remainingdata as test data */
            for (int k = 0; k < mergedDataSet.size(); k++) {
                test.add(mergedDataSet.remove(k));
            }

            /* Train classifier. Recalculates k */
            classifier.buildClassifier(train);

            /* Measure and record the accuracy of the classifier on
             * the test set
             */
            double accuracy = Helpers.findClassifierAccuracy(classifier, test);
            accuracies.add(accuracy);
        }

        double accuracyAverage = average(accuracies);
        System.out.println(accuracyAverage);
    }

}

From source file:machinelearning_cw.MachineLearning_CW.java

/**
 * /* w ww.  jav  a2s.c o m*/
 * Tests the speed of a classifier in milliseconds and prints it. 
 * This is achieved by checking the time taken for the given classifier to
 * classify some given data.
 * 
 * 
 * @param classifier The classifier to be tested.
 * @param train The data with which to train the classifier.
 * @param test The data the classifier is to be tested against.
 * @param t The number of times the test should be carried out and averaged.
 * @throws Exception 
 */
public static void timeClassifier(Classifier classifier, Instances train, Instances test, int t)
        throws Exception {

    ArrayList<Double> times = new ArrayList<Double>();

    /* Carry out test t+1 times and average.
     * The first run is ignored to offset the effects of
     * the Java Garbage Collector and caching.
     */
    for (int i = 0; i < t + 1; i++) {
        // Time the build and classifyInstance methods
        double t1 = System.nanoTime();

        classifier.buildClassifier(train);
        for (Instance eachInstance : test) {
            classifier.classifyInstance(eachInstance);
        }

        double t2 = System.nanoTime() - t1;

        // Convert to ms
        double timeTaken = t2 / 1000000.0;

        if (i != 0) {
            times.add(timeTaken);
        }
    }

    double averageTime = average(times);
    System.out.println(averageTime);
}

From source file:mao.datamining.ModelProcess.java

private void testWithExtraDS(Classifier classifier, Instances finalTrainDataSet, Instances finalTestDataSet,
        FileOutputStream testCaseSummaryOut, TestResult result) {
    //Use final training dataset and final test dataset
    double confusionMatrix[][] = null;

    long start, end, trainTime = 0, testTime = 0;
    if (finalTestDataSet != null) {
        try {//from  w ww  . jav a2 s . c  om
            //counting training time
            start = System.currentTimeMillis();
            classifier.buildClassifier(finalTrainDataSet);
            end = System.currentTimeMillis();
            trainTime += end - start;

            //counting test time
            start = System.currentTimeMillis();
            Evaluation testEvalOnly = new Evaluation(finalTrainDataSet);
            testEvalOnly.evaluateModel(classifier, finalTestDataSet);
            end = System.currentTimeMillis();
            testTime += end - start;

            testCaseSummaryOut.write("=====================================================\n".getBytes());
            testCaseSummaryOut.write((testEvalOnly.toSummaryString("=== Test Summary ===", true)).getBytes());
            testCaseSummaryOut.write("\n".getBytes());
            testCaseSummaryOut
                    .write((testEvalOnly.toClassDetailsString("=== Test Class Detail ===\n")).getBytes());
            testCaseSummaryOut.write("\n".getBytes());
            testCaseSummaryOut
                    .write((testEvalOnly.toMatrixString("=== Confusion matrix for Test ===\n")).getBytes());
            testCaseSummaryOut.flush();

            confusionMatrix = testEvalOnly.confusionMatrix();
            result.setConfusionMatrix4Test(confusionMatrix);

            result.setAUT(testEvalOnly.areaUnderROC(1));
            result.setPrecision(testEvalOnly.precision(1));
            result.setRecall(testEvalOnly.recall(1));
        } catch (Exception e) {
            ModelProcess.logging(null, e);
        }
        result.setTrainingTime(trainTime);
        result.setTestTime(testTime);
    } //using test data set , end

}