Example usage for weka.classifiers.functions SMO setKernel

List of usage examples for weka.classifiers.functions SMO setKernel

Introduction

In this page you can find the example usage for weka.classifiers.functions SMO setKernel.

Prototype

public void setKernel(Kernel value) 

Source Link

Document

sets the kernel to use

Usage

From source file:com.edwardraff.WekaMNIST.java

License:Open Source License

public static void main(String[] args) throws IOException, Exception {
    String folder = args[0];//from www. ja  v  a 2s .c  o  m
    String trainPath = folder + "MNISTtrain.arff";
    String testPath = folder + "MNISTtest.arff";

    System.out.println("Weka Timings");
    Instances mnistTrainWeka = new Instances(new BufferedReader(new FileReader(new File(trainPath))));
    mnistTrainWeka.setClassIndex(mnistTrainWeka.numAttributes() - 1);
    Instances mnistTestWeka = new Instances(new BufferedReader(new FileReader(new File(testPath))));
    mnistTestWeka.setClassIndex(mnistTestWeka.numAttributes() - 1);

    //normalize range like into [0, 1]
    Normalize normalizeFilter = new Normalize();
    normalizeFilter.setInputFormat(mnistTrainWeka);

    mnistTestWeka = Normalize.useFilter(mnistTestWeka, normalizeFilter);
    mnistTrainWeka = Normalize.useFilter(mnistTrainWeka, normalizeFilter);

    long start, end;

    System.out.println("RBF SVM (Full Cache)");
    SMO smo = new SMO();
    smo.setKernel(new RBFKernel(mnistTrainWeka, 0/*0 causes Weka to cache the whole matrix...*/, 0.015625));
    smo.setC(8.0);
    smo.setBuildLogisticModels(false);
    evalModel(smo, mnistTrainWeka, mnistTestWeka);

    System.out.println("RBF SVM (No Cache)");
    smo = new SMO();
    smo.setKernel(new RBFKernel(mnistTrainWeka, 1, 0.015625));
    smo.setC(8.0);
    smo.setBuildLogisticModels(false);
    evalModel(smo, mnistTrainWeka, mnistTestWeka);

    System.out.println("Decision Tree C45");
    J48 wekaC45 = new J48();
    wekaC45.setUseLaplace(false);
    wekaC45.setCollapseTree(false);
    wekaC45.setUnpruned(true);
    wekaC45.setMinNumObj(2);
    wekaC45.setUseMDLcorrection(true);

    evalModel(wekaC45, mnistTrainWeka, mnistTestWeka);

    System.out.println("Random Forest 50 trees");
    int featuresToUse = (int) Math.sqrt(28 * 28);//Weka uses different defaults, so lets make sure they both use the published way

    RandomForest wekaRF = new RandomForest();
    wekaRF.setNumExecutionSlots(1);
    wekaRF.setMaxDepth(0/*0 for unlimited*/);
    wekaRF.setNumFeatures(featuresToUse);
    wekaRF.setNumTrees(50);

    evalModel(wekaRF, mnistTrainWeka, mnistTestWeka);

    System.out.println("1-NN (brute)");
    IBk wekaNN = new IBk(1);
    wekaNN.setNearestNeighbourSearchAlgorithm(new LinearNNSearch());
    wekaNN.setCrossValidate(false);

    evalModel(wekaNN, mnistTrainWeka, mnistTestWeka);

    System.out.println("1-NN (Ball Tree)");
    wekaNN = new IBk(1);
    wekaNN.setNearestNeighbourSearchAlgorithm(new BallTree());
    wekaNN.setCrossValidate(false);

    evalModel(wekaNN, mnistTrainWeka, mnistTestWeka);

    System.out.println("1-NN (Cover Tree)");
    wekaNN = new IBk(1);
    wekaNN.setNearestNeighbourSearchAlgorithm(new CoverTree());
    wekaNN.setCrossValidate(false);

    evalModel(wekaNN, mnistTrainWeka, mnistTestWeka);

    System.out.println("Logistic Regression LBFGS lambda = 1e-4");
    Logistic logisticLBFGS = new Logistic();
    logisticLBFGS.setRidge(1e-4);
    logisticLBFGS.setMaxIts(500);

    evalModel(logisticLBFGS, mnistTrainWeka, mnistTestWeka);

    System.out.println("k-means (Loyd)");
    int origClassIndex = mnistTrainWeka.classIndex();
    mnistTrainWeka.setClassIndex(-1);
    mnistTrainWeka.deleteAttributeAt(origClassIndex);
    {
        long totalTime = 0;
        for (int i = 0; i < 10; i++) {
            SimpleKMeans wekaKMeans = new SimpleKMeans();
            wekaKMeans.setNumClusters(10);
            wekaKMeans.setNumExecutionSlots(1);
            wekaKMeans.setFastDistanceCalc(true);

            start = System.currentTimeMillis();
            wekaKMeans.buildClusterer(mnistTrainWeka);
            end = System.currentTimeMillis();
            totalTime += (end - start);
        }
        System.out.println("\tClustering took: " + (totalTime / 10.0) / 1000.0 + " on average");
    }
}

From source file:development.CrossValidateShapelets.java

public static ArrayList<Classifier> setSingleClassifiers(ArrayList<String> names) {
    ArrayList<Classifier> sc = new ArrayList<>();
    kNN n = new kNN(50);
    n.setCrossValidate(true);/*from w  ww . j  a  v  a2 s . c  om*/
    sc.add(n);
    names.add("kNN");
    sc.add(new J48());
    names.add("C45");
    sc.add(new NaiveBayes());
    names.add("NB");
    BayesNet bn = new BayesNet();
    sc.add(bn);
    names.add("BayesNet");
    RandomForest rf = new RandomForest();
    rf.setNumTrees(200);
    sc.add(rf);
    names.add("RandForest");
    RotationForest rot = new RotationForest();
    rot.setNumIterations(30);
    sc.add(rf);
    names.add("RotForest");
    SMO svmL = new SMO();
    PolyKernel kernel = new PolyKernel();
    kernel.setExponent(1);
    svmL.setKernel(kernel);
    sc.add(svmL);
    names.add("SVML");
    kernel = new PolyKernel();
    kernel.setExponent(2);
    SMO svmQ = new SMO();
    svmQ.setKernel(kernel);
    sc.add(svmQ);
    names.add("SVMQ");
    return sc;
}

From source file:etc.aloe.oilspill2010.TrainingImpl.java

@Override
public WekaModel train(ExampleSet examples) {
    //These settings aren't terrible
    SMO smo = new SMO();
    RBFKernel rbf = new RBFKernel();
    rbf.setGamma(0.5);/* www .  jav a2s.  c o  m*/
    smo.setKernel(rbf);
    smo.setC(1.5);

    //These also work pretty ok
    Logistic log = new Logistic();
    log.setRidge(100);

    Classifier classifier = log;

    try {
        System.out.print("Training on " + examples.size() + " examples... ");
        classifier.buildClassifier(examples.getInstances());
        System.out.println("done.");

        WekaModel model = new WekaModel(classifier);
        return model;
    } catch (Exception ex) {
        System.err.println("Unable to train classifier.");
        System.err.println("\t" + ex.getMessage());
        return null;
    }
}

From source file:jjj.asap.sas.models1.job.BuildRBFKernelModels.java

License:Open Source License

@Override
protected void run() throws Exception {

    // validate args
    if (!Bucket.isBucket("datasets", inputBucket)) {
        throw new FileNotFoundException(inputBucket);
    }//  ww  w .  ja  v a  2s  . c om
    if (!Bucket.isBucket("models", outputBucket)) {
        throw new FileNotFoundException(outputBucket);
    }

    // init multi-threading
    Job.startService();
    final Queue<Future<Object>> queue = new LinkedList<Future<Object>>();

    // get the input from the bucket
    List<String> names = Bucket.getBucketItems("datasets", this.inputBucket);
    for (String dsn : names) {

        SMO smo = new SMO();
        smo.setFilterType(new SelectedTag(SMO.FILTER_NONE, SMO.TAGS_FILTER));
        smo.setBuildLogisticModels(true);
        RBFKernel kernel = new RBFKernel();
        kernel.setGamma(0.05);
        smo.setKernel(kernel);

        AttributeSelectedClassifier asc = new AttributeSelectedClassifier();
        asc.setEvaluator(new InfoGainAttributeEval());
        Ranker ranker = new Ranker();
        ranker.setThreshold(0.01);
        asc.setSearch(ranker);
        asc.setClassifier(smo);

        queue.add(Job.submit(new ModelBuilder(dsn, "InfoGain-SMO-RBFKernel", asc, this.outputBucket)));
    }

    // wait on complete
    Progress progress = new Progress(queue.size(), this.getClass().getSimpleName());
    while (!queue.isEmpty()) {
        try {
            queue.remove().get();
        } catch (Exception e) {
            Job.log("ERROR", e.toString());
        }
        progress.tick();
    }
    progress.done();
    Job.stopService();

}

From source file:kfst.classifier.WekaClassifier.java

License:Open Source License

/**
 * This method builds and evaluates the support vector machine(SVM)
 * classifier. The SMO are used as the SVM classifier implemented in the
 * Weka software./* w w  w  .  j a  va2 s .c o  m*/
 *
 * @param pathTrainData the path of the train set
 * @param pathTestData the path of the test set
 * @param svmKernel the kernel to use
 * 
 * @return the classification accuracy
 */
public static double SVM(String pathTrainData, String pathTestData, String svmKernel) {
    double resultValue = 0;
    try {
        BufferedReader readerTrain = new BufferedReader(new FileReader(pathTrainData));
        Instances dataTrain = new Instances(readerTrain);
        readerTrain.close();
        dataTrain.setClassIndex(dataTrain.numAttributes() - 1);

        BufferedReader readerTest = new BufferedReader(new FileReader(pathTestData));
        Instances dataTest = new Instances(readerTest);
        readerTest.close();
        dataTest.setClassIndex(dataTest.numAttributes() - 1);
        SMO svm = new SMO();
        if (svmKernel.equals("Polynomial kernel")) {
            svm.setKernel(weka.classifiers.functions.supportVector.PolyKernel.class.newInstance());
        } else if (svmKernel.equals("RBF kernel")) {
            svm.setKernel(weka.classifiers.functions.supportVector.RBFKernel.class.newInstance());
        } else {
            svm.setKernel(weka.classifiers.functions.supportVector.Puk.class.newInstance());
        }
        svm.buildClassifier(dataTrain);
        Evaluation eval = new Evaluation(dataTest);
        eval.evaluateModel(svm, dataTest);
        resultValue = 100 - (eval.errorRate() * 100);
    } catch (Exception ex) {
        Logger.getLogger(WekaClassifier.class.getName()).log(Level.SEVERE, null, ex);
    }
    return resultValue;
}

From source file:KFST.featureSelection.embedded.SVMBasedMethods.MSVM_RFE.java

License:Open Source License

/**
 * generates binary classifiers (SVM by applying k-fold cross validation
 * resampling strategy) using input data and based on selected feature
 * subset./*from w w w .  j  av a 2 s .  c om*/
 *
 * @param selectedFeature an array of indices of the selected feature subset
 *
 * @return an array of the weights of features
 */
protected double[][] buildSVM_KFoldCrossValidation(int[] selectedFeature) {
    double[][] weights = new double[numRun * kFoldValue][selectedFeature.length];
    int classifier = 0;

    for (int i = 0; i < numRun; i++) {
        double[][] copyTrainSet = ArraysFunc.copyDoubleArray2D(trainSet);

        //shuffles the train set
        MathFunc.randomize(copyTrainSet);

        int numSampleInFold = copyTrainSet.length / kFoldValue;
        int remainder = copyTrainSet.length % kFoldValue;
        int indexStart = 0;
        for (int k = 0; k < kFoldValue; k++) {
            int indexEnd = indexStart + numSampleInFold;
            if (k < remainder) {
                indexEnd++;
            }
            double[][] subTrainSet = ArraysFunc.copyDoubleArray2D(copyTrainSet, indexStart, indexEnd);

            String nameDataCSV = TEMP_PATH + "dataCSV[" + i + "-" + k + "].csv";
            String nameDataARFF = TEMP_PATH + "dataARFF[" + i + "-" + k + "].arff";

            FileFunc.createCSVFile(subTrainSet, selectedFeature, nameDataCSV, nameFeatures, classLabel);
            FileFunc.convertCSVtoARFF(nameDataCSV, nameDataARFF, TEMP_PATH, selectedFeature.length, numFeatures,
                    nameFeatures, numClass, classLabel);

            try {
                BufferedReader readerTrain = new BufferedReader(new FileReader(nameDataARFF));
                Instances dataTrain = new Instances(readerTrain);
                readerTrain.close();
                dataTrain.setClassIndex(dataTrain.numAttributes() - 1);

                SMO svm = new SMO();
                svm.setC(parameterC);
                svm.setKernel(WekaSVMKernel.parse(kernelType));
                svm.buildClassifier(dataTrain);

                double[] weightsSparse = svm.sparseWeights()[0][1];
                int[] indicesSparse = svm.sparseIndices()[0][1];
                for (int m = 0; m < weightsSparse.length; m++) {
                    weights[classifier][indicesSparse[m]] = weightsSparse[m];
                }
            } catch (Exception ex) {
                Logger.getLogger(MSVM_RFE.class.getName()).log(Level.SEVERE, null, ex);
            }

            indexStart = indexEnd;
            classifier++;
        }
    }

    return weights;
}

From source file:KFST.featureSelection.embedded.SVMBasedMethods.SVMBasedMethods.java

License:Open Source License

/**
 * generates binary classifiers (SVM) using input data and based on selected
 * feature subset, and finally returns the weights of features.
 * One-Versus-One strategy is used to construct classifiers in multiclass
 * classification./*from   w  w  w  .j av  a  2  s  . com*/
 *
 * @param selectedFeature an array of indices of the selected feature subset
 *
 * @return an array of the weights of features
 */
protected double[][][] buildSVM_OneAgainstOne(int[] selectedFeature) {
    String nameDataCSV = TEMP_PATH + "dataCSV.csv";
    String nameDataARFF = TEMP_PATH + "dataARFF.arff";
    double[][][] weights = new double[numClass][numClass][selectedFeature.length];

    FileFunc.createCSVFile(trainSet, selectedFeature, nameDataCSV, nameFeatures, classLabel);
    FileFunc.convertCSVtoARFF(nameDataCSV, nameDataARFF, TEMP_PATH, selectedFeature.length, numFeatures,
            nameFeatures, numClass, classLabel);

    try {
        BufferedReader readerTrain = new BufferedReader(new FileReader(nameDataARFF));
        Instances dataTrain = new Instances(readerTrain);
        readerTrain.close();
        dataTrain.setClassIndex(dataTrain.numAttributes() - 1);

        SMO svm = new SMO();
        svm.setC(parameterC);
        svm.setKernel(WekaSVMKernel.parse(kernelType));
        svm.buildClassifier(dataTrain);

        for (int i = 0; i < numClass; i++) {
            for (int j = i + 1; j < numClass; j++) {
                double[] weightsSparse = svm.sparseWeights()[i][j];
                int[] indicesSparse = svm.sparseIndices()[i][j];
                for (int k = 0; k < weightsSparse.length; k++) {
                    weights[i][j][indicesSparse[k]] = weightsSparse[k];
                }
            }
        }
    } catch (Exception ex) {
        Logger.getLogger(SVMBasedMethods.class.getName()).log(Level.SEVERE, null, ex);
    }

    return weights;
}

From source file:KFST.featureSelection.embedded.SVMBasedMethods.SVMBasedMethods.java

License:Open Source License

/**
 * generates binary classifiers (SVM) using input data and based on selected
 * feature subset, and finally returns the weights of features.
 * One-Versus-All strategy is used to construct classifiers in multiclass
 * classification./* w w w .j av  a2s. c o m*/
 *
 * @param selectedFeature an array of indices of the selected feature subset
 *
 * @return an array of the weights of features
 */
protected double[][] buildSVM_OneAgainstRest(int[] selectedFeature) {
    double[][] weights = new double[numClass][selectedFeature.length];
    String[] tempClassLabel = new String[] { "c1", "c2" };

    for (int indexClass = 0; indexClass < numClass; indexClass++) {
        double[][] copyTrainSet = ArraysFunc.copyDoubleArray2D(trainSet);
        String nameDataCSV = TEMP_PATH + "dataCSV" + indexClass + ".csv";
        String nameDataARFF = TEMP_PATH + "dataARFF" + indexClass + ".arff";

        for (double[] dataRow : copyTrainSet) {
            if (dataRow[numFeatures] == classLabelInTrainSet[indexClass]) {
                dataRow[numFeatures] = 0;
            } else {
                dataRow[numFeatures] = 1;
            }
        }

        FileFunc.createCSVFile(copyTrainSet, selectedFeature, nameDataCSV, nameFeatures, tempClassLabel);
        FileFunc.convertCSVtoARFF(nameDataCSV, nameDataARFF, TEMP_PATH, selectedFeature.length, numFeatures,
                nameFeatures, tempClassLabel.length, tempClassLabel);

        try {
            BufferedReader readerTrain = new BufferedReader(new FileReader(nameDataARFF));
            Instances dataTrain = new Instances(readerTrain);
            readerTrain.close();
            dataTrain.setClassIndex(dataTrain.numAttributes() - 1);

            SMO svm = new SMO();
            svm.setC(parameterC);
            svm.setKernel(WekaSVMKernel.parse(kernelType));
            svm.buildClassifier(dataTrain);

            double[] weightsSparse = svm.sparseWeights()[0][1];
            int[] indicesSparse = svm.sparseIndices()[0][1];
            for (int k = 0; k < weightsSparse.length; k++) {
                weights[indexClass][indicesSparse[k]] = weightsSparse[k];
            }
        } catch (Exception ex) {
            Logger.getLogger(SVMBasedMethods.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    return weights;
}

From source file:org.opentox.qsar.processors.trainers.classification.SVCTrainer.java

License:Open Source License

public QSARModel train(Instances data) throws QSARException {

    // GET A UUID AND DEFINE THE TEMPORARY FILE WHERE THE TRAINING DATA
    // ARE STORED IN ARFF FORMAT PRIOR TO TRAINING.
    final String rand = java.util.UUID.randomUUID().toString();
    final String temporaryFilePath = ServerFolders.temp + "/" + rand + ".arff";
    final File tempFile = new File(temporaryFilePath);

    // SAVE THE DATA IN THE TEMPORARY FILE
    try {/*from w  w w.ja  va 2s. c  o  m*/
        ArffSaver dataSaver = new ArffSaver();
        dataSaver.setInstances(data);
        dataSaver.setDestination(new FileOutputStream(tempFile));
        dataSaver.writeBatch();
        if (!tempFile.exists()) {
            throw new IOException("Temporary File was not created");
        }
    } catch (final IOException ex) {/*
                                    * The content of the dataset cannot be
                                    * written to the destination file due to
                                    * some communication issue.
                                    */
        tempFile.delete();
        throw new RuntimeException(
                "Unexpected condition while trying to save the " + "dataset in a temporary ARFF file", ex);
    }

    // INITIALIZE THE CLASSIFIER
    SMO classifier = new SMO();
    classifier.setEpsilon(0.1);
    classifier.setToleranceParameter(tolerance);

    // CONSTRUCT A KERNEL ACCORDING TO THE POSTED PARAMETERS
    // SUPPORTED KERNELS ARE {rbf, linear, polynomial}
    Kernel svc_kernel = null;
    if (this.kernel.equalsIgnoreCase("rbf")) {
        RBFKernel rbf_kernel = new RBFKernel();
        rbf_kernel.setGamma(gamma);
        rbf_kernel.setCacheSize(cacheSize);
        svc_kernel = rbf_kernel;
    } else if (this.kernel.equalsIgnoreCase("polynomial")) {
        PolyKernel poly_kernel = new PolyKernel();
        poly_kernel.setExponent(degree);
        poly_kernel.setCacheSize(cacheSize);
        poly_kernel.setUseLowerOrder(true);
        svc_kernel = poly_kernel;
    } else if (this.kernel.equalsIgnoreCase("linear")) {
        PolyKernel linear_kernel = new PolyKernel();
        linear_kernel.setExponent((double) 1.0);
        linear_kernel.setCacheSize(cacheSize);
        linear_kernel.setUseLowerOrder(true);
        svc_kernel = linear_kernel;
    }
    classifier.setKernel(svc_kernel);

    String modelFilePath = ServerFolders.models_weka + "/" + uuid.toString();
    String[] generalOptions = { "-c", Integer.toString(data.classIndex() + 1), "-t", temporaryFilePath,
            /// Save the model in the following directory
            "-d", modelFilePath };

    // AFTER ALL, BUILD THE CLASSIFICATION MODEL AND SAVE IT AS A SERIALIZED
    // WEKA FILE IN THE CORRESPONDING DIRECTORY.
    try {
        Evaluation.evaluateModel(classifier, generalOptions);
    } catch (final Exception ex) {
        tempFile.delete();
        throw new QSARException(Cause.XQReg350, "Unexpected condition while trying to train "
                + "a support vector classification model. Possible explanation : {" + ex.getMessage() + "}",
                ex);
    }

    ArrayList<Feature> independentFeatures = new ArrayList<Feature>();
    for (int i = 0; i < data.numAttributes(); i++) {
        Feature f = new Feature(data.attribute(i).name());
        if (data.classIndex() != i) {
            independentFeatures.add(f);
        }
    }

    Feature dependentFeature = new Feature(data.classAttribute().name());
    Feature predictedFeature = dependentFeature;

    QSARModel model = new QSARModel();
    model.setCode(uuid.toString());
    model.setAlgorithm(YaqpAlgorithms.SVC);
    model.setPredictionFeature(predictedFeature);
    model.setDependentFeature(dependentFeature);
    model.setIndependentFeatures(independentFeatures);
    model.setDataset(datasetUri);
    model.setParams(getParameters());
    model.setModelStatus(ModelStatus.UNDER_DEVELOPMENT);

    tempFile.delete();
    return model;
}