Example usage for weka.classifiers.functions SMO buildClassifier

Introduction

In this page you can find the example usage for weka.classifiers.functions SMO buildClassifier.

Prototype

public void buildClassifier(Instances insts) throws Exception

Source Link

Document

Method for building the classifier.

Usage

From source file:cs.man.ac.uk.predict.Predictor.java

License:Open Source License

public static void makePredictionsEnsembleNew(String trainPath, String testPath, String resultPath) {
    System.out.println("Training set: " + trainPath);
    System.out.println("Test set: " + testPath);

    /**/*from w  w  w  . ja  v  a2s. co  m*/
     * The ensemble classifiers. This is a heterogeneous ensemble.
     */
    J48 learner1 = new J48();
    SMO learner2 = new SMO();
    NaiveBayes learner3 = new NaiveBayes();
    MultilayerPerceptron learner5 = new MultilayerPerceptron();

    System.out.println("Training Ensemble.");
    long startTime = System.nanoTime();
    try {
        BufferedReader reader = new BufferedReader(new FileReader(trainPath));
        Instances data = new Instances(reader);
        data.setClassIndex(data.numAttributes() - 1);
        System.out.println("Training data length: " + data.numInstances());

        learner1.buildClassifier(data);
        learner2.buildClassifier(data);
        learner3.buildClassifier(data);
        learner5.buildClassifier(data);

        long endTime = System.nanoTime();
        long nanoseconds = endTime - startTime;
        double seconds = (double) nanoseconds / 1000000000.0;
        System.out.println("Training Ensemble completed in " + nanoseconds + " (ns) or " + seconds + " (s).");
    } catch (IOException e) {
        System.out.println("Could not train Ensemble classifier IOException on training data file.");
    } catch (Exception e) {
        System.out.println("Could not train Ensemble classifier Exception building model.");
    }

    try {
        String line = "";

        // Read the file and display it line by line. 
        BufferedReader in = null;

        // Read in and store each positive prediction in the tree map.
        try {
            //open stream to file
            in = new BufferedReader(new FileReader(testPath));

            while ((line = in.readLine()) != null) {
                if (line.toLowerCase().contains("@data"))
                    break;
            }
        } catch (Exception e) {
        }

        // A different ARFF loader used here (compared to above) as
        // the ARFF file may be extremely large. In which case the whole
        // file cannot be read in. Instead it is read in incrementally.
        ArffLoader loader = new ArffLoader();
        loader.setFile(new File(testPath));

        Instances data = loader.getStructure();
        data.setClassIndex(data.numAttributes() - 1);

        System.out.println("Ensemble Classifier is ready.");
        System.out.println("Testing on all instances avaialable.");

        startTime = System.nanoTime();

        int instanceNumber = 0;

        // label instances
        Instance current;

        while ((current = loader.getNextInstance(data)) != null) {
            instanceNumber += 1;
            line = in.readLine();

            double classification1 = learner1.classifyInstance(current);
            double classification2 = learner2.classifyInstance(current);
            double classification3 = learner3.classifyInstance(current);
            double classification5 = learner5.classifyInstance(current);

            // All classifiers must agree. This is a very primitive ensemble strategy!
            if (classification1 == 1 && classification2 == 1 && classification3 == 1 && classification5 == 1) {
                if (line != null) {
                    //System.out.println("Instance: "+instanceNumber+"\t"+line);
                    //System.in.read();
                }
                Writer.append(resultPath, instanceNumber + "\n");
            }
        }

        in.close();

        System.out.println("Test set instances: " + instanceNumber);

        long endTime = System.nanoTime();
        long duration = endTime - startTime;
        double seconds = (double) duration / 1000000000.0;

        System.out.println("Testing Ensemble completed in " + duration + " (ns) or " + seconds + " (s).");
    } catch (Exception e) {
        System.out.println("Could not test Ensemble classifier due to an error.");
    }
}

From source file:cs.man.ac.uk.predict.Predictor.java

License:Open Source License

public static void makePredictionsEnsembleStream(String trainPath, String testPath, String resultPath) {
    System.out.println("Training set: " + trainPath);
    System.out.println("Test set: " + testPath);

    /**//  ww  w  . j ava 2s .c  om
     * The ensemble classifiers. This is a heterogeneous ensemble.
     */
    J48 learner1 = new J48();
    SMO learner2 = new SMO();
    NaiveBayes learner3 = new NaiveBayes();
    MultilayerPerceptron learner5 = new MultilayerPerceptron();

    System.out.println("Training Ensemble.");
    long startTime = System.nanoTime();
    try {
        BufferedReader reader = new BufferedReader(new FileReader(trainPath));
        Instances data = new Instances(reader);
        data.setClassIndex(data.numAttributes() - 1);
        System.out.println("Training data length: " + data.numInstances());

        learner1.buildClassifier(data);
        learner2.buildClassifier(data);
        learner3.buildClassifier(data);
        learner5.buildClassifier(data);

        long endTime = System.nanoTime();
        long nanoseconds = endTime - startTime;
        double seconds = (double) nanoseconds / 1000000000.0;
        System.out.println("Training Ensemble completed in " + nanoseconds + " (ns) or " + seconds + " (s).");
    } catch (IOException e) {
        System.out.println("Could not train Ensemble classifier IOException on training data file.");
    } catch (Exception e) {
        System.out.println("Could not train Ensemble classifier Exception building model.");
    }

    try {
        // A different ARFF loader used here (compared to above) as
        // the ARFF file may be extremely large. In which case the whole
        // file cannot be read in. Instead it is read in incrementally.
        ArffLoader loader = new ArffLoader();
        loader.setFile(new File(testPath));

        Instances data = loader.getStructure();
        data.setClassIndex(data.numAttributes() - 1);

        System.out.println("Ensemble Classifier is ready.");
        System.out.println("Testing on all instances avaialable.");

        startTime = System.nanoTime();

        int instanceNumber = 0;

        // label instances
        Instance current;

        while ((current = loader.getNextInstance(data)) != null) {
            instanceNumber += 1;

            double classification1 = learner1.classifyInstance(current);
            double classification2 = learner2.classifyInstance(current);
            double classification3 = learner3.classifyInstance(current);
            double classification5 = learner5.classifyInstance(current);

            // All classifiers must agree. This is a very primitive ensemble strategy!
            if (classification1 == 1 && classification2 == 1 && classification3 == 1 && classification5 == 1) {
                Writer.append(resultPath, instanceNumber + "\n");
            }
        }

        System.out.println("Test set instances: " + instanceNumber);

        long endTime = System.nanoTime();
        long duration = endTime - startTime;
        double seconds = (double) duration / 1000000000.0;

        System.out.println("Testing Ensemble completed in " + duration + " (ns) or " + seconds + " (s).");
    } catch (Exception e) {
        System.out.println("Could not test Ensemble classifier due to an error.");
    }
}

From source file:farm_ads.MyClassifier.java

public Classifier classifierSMO(Instances instances) throws Exception {
    SMO classifier = new SMO();
    classifier.setOptions(weka.core.Utils.splitOptions(
            "-C 1.0 -L 0.0010 -P 1.0E-12 -N 0 -V -1 -W 1 -K \"weka.classifiers.functions.supportVector.PolyKernel -C 250007 -E 1.0\""));
    classifier.buildClassifier(instances);
    return classifier;
}

From source file:kfst.classifier.WekaClassifier.java

License:Open Source License

/**
 * This method builds and evaluates the support vector machine(SVM)
 * classifier. The SMO are used as the SVM classifier implemented in the
 * Weka software.//from  w w  w  . j  av  a  2 s . c  o  m
 *
 * @param pathTrainData the path of the train set
 * @param pathTestData the path of the test set
 * @param svmKernel the kernel to use
 * 
 * @return the classification accuracy
 */
public static double SVM(String pathTrainData, String pathTestData, String svmKernel) {
    double resultValue = 0;
    try {
        BufferedReader readerTrain = new BufferedReader(new FileReader(pathTrainData));
        Instances dataTrain = new Instances(readerTrain);
        readerTrain.close();
        dataTrain.setClassIndex(dataTrain.numAttributes() - 1);

        BufferedReader readerTest = new BufferedReader(new FileReader(pathTestData));
        Instances dataTest = new Instances(readerTest);
        readerTest.close();
        dataTest.setClassIndex(dataTest.numAttributes() - 1);
        SMO svm = new SMO();
        if (svmKernel.equals("Polynomial kernel")) {
            svm.setKernel(weka.classifiers.functions.supportVector.PolyKernel.class.newInstance());
        } else if (svmKernel.equals("RBF kernel")) {
            svm.setKernel(weka.classifiers.functions.supportVector.RBFKernel.class.newInstance());
        } else {
            svm.setKernel(weka.classifiers.functions.supportVector.Puk.class.newInstance());
        }
        svm.buildClassifier(dataTrain);
        Evaluation eval = new Evaluation(dataTest);
        eval.evaluateModel(svm, dataTest);
        resultValue = 100 - (eval.errorRate() * 100);
    } catch (Exception ex) {
        Logger.getLogger(WekaClassifier.class.getName()).log(Level.SEVERE, null, ex);
    }
    return resultValue;
}

From source file:KFST.featureSelection.embedded.SVMBasedMethods.MSVM_RFE.java

License:Open Source License

/**
 * generates binary classifiers (SVM by applying k-fold cross validation
 * resampling strategy) using input data and based on selected feature
 * subset./*from  w  w  w. jav a2s . c o m*/
 *
 * @param selectedFeature an array of indices of the selected feature subset
 *
 * @return an array of the weights of features
 */
protected double[][] buildSVM_KFoldCrossValidation(int[] selectedFeature) {
    double[][] weights = new double[numRun * kFoldValue][selectedFeature.length];
    int classifier = 0;

    for (int i = 0; i < numRun; i++) {
        double[][] copyTrainSet = ArraysFunc.copyDoubleArray2D(trainSet);

        //shuffles the train set
        MathFunc.randomize(copyTrainSet);

        int numSampleInFold = copyTrainSet.length / kFoldValue;
        int remainder = copyTrainSet.length % kFoldValue;
        int indexStart = 0;
        for (int k = 0; k < kFoldValue; k++) {
            int indexEnd = indexStart + numSampleInFold;
            if (k < remainder) {
                indexEnd++;
            }
            double[][] subTrainSet = ArraysFunc.copyDoubleArray2D(copyTrainSet, indexStart, indexEnd);

            String nameDataCSV = TEMP_PATH + "dataCSV[" + i + "-" + k + "].csv";
            String nameDataARFF = TEMP_PATH + "dataARFF[" + i + "-" + k + "].arff";

            FileFunc.createCSVFile(subTrainSet, selectedFeature, nameDataCSV, nameFeatures, classLabel);
            FileFunc.convertCSVtoARFF(nameDataCSV, nameDataARFF, TEMP_PATH, selectedFeature.length, numFeatures,
                    nameFeatures, numClass, classLabel);

            try {
                BufferedReader readerTrain = new BufferedReader(new FileReader(nameDataARFF));
                Instances dataTrain = new Instances(readerTrain);
                readerTrain.close();
                dataTrain.setClassIndex(dataTrain.numAttributes() - 1);

                SMO svm = new SMO();
                svm.setC(parameterC);
                svm.setKernel(WekaSVMKernel.parse(kernelType));
                svm.buildClassifier(dataTrain);

                double[] weightsSparse = svm.sparseWeights()[0][1];
                int[] indicesSparse = svm.sparseIndices()[0][1];
                for (int m = 0; m < weightsSparse.length; m++) {
                    weights[classifier][indicesSparse[m]] = weightsSparse[m];
                }
            } catch (Exception ex) {
                Logger.getLogger(MSVM_RFE.class.getName()).log(Level.SEVERE, null, ex);
            }

            indexStart = indexEnd;
            classifier++;
        }
    }

    return weights;
}

From source file:KFST.featureSelection.embedded.SVMBasedMethods.SVMBasedMethods.java

License:Open Source License

/**
 * generates binary classifiers (SVM) using input data and based on selected
 * feature subset, and finally returns the weights of features.
 * One-Versus-One strategy is used to construct classifiers in multiclass
 * classification./*w  ww. ja  v  a 2  s  .c  o m*/
 *
 * @param selectedFeature an array of indices of the selected feature subset
 *
 * @return an array of the weights of features
 */
protected double[][][] buildSVM_OneAgainstOne(int[] selectedFeature) {
    String nameDataCSV = TEMP_PATH + "dataCSV.csv";
    String nameDataARFF = TEMP_PATH + "dataARFF.arff";
    double[][][] weights = new double[numClass][numClass][selectedFeature.length];

    FileFunc.createCSVFile(trainSet, selectedFeature, nameDataCSV, nameFeatures, classLabel);
    FileFunc.convertCSVtoARFF(nameDataCSV, nameDataARFF, TEMP_PATH, selectedFeature.length, numFeatures,
            nameFeatures, numClass, classLabel);

    try {
        BufferedReader readerTrain = new BufferedReader(new FileReader(nameDataARFF));
        Instances dataTrain = new Instances(readerTrain);
        readerTrain.close();
        dataTrain.setClassIndex(dataTrain.numAttributes() - 1);

        SMO svm = new SMO();
        svm.setC(parameterC);
        svm.setKernel(WekaSVMKernel.parse(kernelType));
        svm.buildClassifier(dataTrain);

        for (int i = 0; i < numClass; i++) {
            for (int j = i + 1; j < numClass; j++) {
                double[] weightsSparse = svm.sparseWeights()[i][j];
                int[] indicesSparse = svm.sparseIndices()[i][j];
                for (int k = 0; k < weightsSparse.length; k++) {
                    weights[i][j][indicesSparse[k]] = weightsSparse[k];
                }
            }
        }
    } catch (Exception ex) {
        Logger.getLogger(SVMBasedMethods.class.getName()).log(Level.SEVERE, null, ex);
    }

    return weights;
}

From source file:KFST.featureSelection.embedded.SVMBasedMethods.SVMBasedMethods.java

License:Open Source License

/**
 * generates binary classifiers (SVM) using input data and based on selected
 * feature subset, and finally returns the weights of features.
 * One-Versus-All strategy is used to construct classifiers in multiclass
 * classification.//w w w. j a va 2s  . co  m
 *
 * @param selectedFeature an array of indices of the selected feature subset
 *
 * @return an array of the weights of features
 */
protected double[][] buildSVM_OneAgainstRest(int[] selectedFeature) {
    double[][] weights = new double[numClass][selectedFeature.length];
    String[] tempClassLabel = new String[] { "c1", "c2" };

    for (int indexClass = 0; indexClass < numClass; indexClass++) {
        double[][] copyTrainSet = ArraysFunc.copyDoubleArray2D(trainSet);
        String nameDataCSV = TEMP_PATH + "dataCSV" + indexClass + ".csv";
        String nameDataARFF = TEMP_PATH + "dataARFF" + indexClass + ".arff";

        for (double[] dataRow : copyTrainSet) {
            if (dataRow[numFeatures] == classLabelInTrainSet[indexClass]) {
                dataRow[numFeatures] = 0;
            } else {
                dataRow[numFeatures] = 1;
            }
        }

        FileFunc.createCSVFile(copyTrainSet, selectedFeature, nameDataCSV, nameFeatures, tempClassLabel);
        FileFunc.convertCSVtoARFF(nameDataCSV, nameDataARFF, TEMP_PATH, selectedFeature.length, numFeatures,
                nameFeatures, tempClassLabel.length, tempClassLabel);

        try {
            BufferedReader readerTrain = new BufferedReader(new FileReader(nameDataARFF));
            Instances dataTrain = new Instances(readerTrain);
            readerTrain.close();
            dataTrain.setClassIndex(dataTrain.numAttributes() - 1);

            SMO svm = new SMO();
            svm.setC(parameterC);
            svm.setKernel(WekaSVMKernel.parse(kernelType));
            svm.buildClassifier(dataTrain);

            double[] weightsSparse = svm.sparseWeights()[0][1];
            int[] indicesSparse = svm.sparseIndices()[0][1];
            for (int k = 0; k < weightsSparse.length; k++) {
                weights[indexClass][indicesSparse[k]] = weightsSparse[k];
            }
        } catch (Exception ex) {
            Logger.getLogger(SVMBasedMethods.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    return weights;
}

From source file:machinelearningproject.MachineLearningProject.java

/**
 * @param args the command line arguments
 *///w ww .  j a  va  2s.  c  om
public static void main(String[] args) throws Exception {
    // TODO code application logic here
    DataSource source = new DataSource("D:\\spambase.arff");
    //        DataSource source = new DataSource("D:\\weather-nominal.arff");
    Instances instances = source.getDataSet();
    int numAttr = instances.numAttributes();
    instances.setClassIndex(instances.numAttributes() - 1);

    int runs = 5;
    int seed = 15;
    for (int i = 0; i < runs; i++) {
        //randomize data
        seed = seed + 1; // the seed for randomizing the data
        Random rand = new Random(seed); // create seeded number generator
        Instances randData = new Instances(instances); // create copy of original data
        Collections.shuffle(randData);

        Evaluation evalDTree = new Evaluation(randData);
        Evaluation evalRF = new Evaluation(randData);
        Evaluation evalSVM = new Evaluation(randData);

        int folds = 10;
        for (int n = 0; n < folds; n++) {
            Instances train = randData.trainCV(folds, n, rand);
            Instances test = randData.testCV(folds, n);
            //instantiate classifiers
            DecisionTree dtree = new DecisionTree();
            RandomForest rf = new RandomForest(100);
            SMO svm = new SMO();
            RBFKernel rbfKernel = new RBFKernel();
            double gamma = 0.70;
            rbfKernel.setGamma(gamma);

            dtree.buildClassifier(train);
            rf.buildClassifier(train);
            svm.buildClassifier(train);

            evalDTree.evaluateModel(dtree, test);
            evalRF.evaluateModel(rf, test);
            evalSVM.evaluateModel(svm, test);
        }
        System.out.println("=== Decision Tree Evaluation ===");
        System.out.println(evalDTree.toSummaryString());
        System.out.println(evalDTree.toClassDetailsString());
        System.out.println(evalDTree.toMatrixString());

        System.out.println("=== Random Forest Evaluation ===");
        System.out.println(evalRF.toSummaryString());
        System.out.println(evalRF.toClassDetailsString());
        System.out.println(evalRF.toMatrixString());

        System.out.println("=== SVM Evaluation ===");
        System.out.println(evalSVM.toSummaryString());
        System.out.println(evalSVM.toClassDetailsString());
        System.out.println(evalSVM.toMatrixString());
    }
}