List of usage examples for weka.classifiers.functions SMO setKernel
public void setKernel(Kernel value)
From source file:com.edwardraff.WekaMNIST.java
License:Open Source License
public static void main(String[] args) throws IOException, Exception { String folder = args[0];//from www. ja v a 2s .c o m String trainPath = folder + "MNISTtrain.arff"; String testPath = folder + "MNISTtest.arff"; System.out.println("Weka Timings"); Instances mnistTrainWeka = new Instances(new BufferedReader(new FileReader(new File(trainPath)))); mnistTrainWeka.setClassIndex(mnistTrainWeka.numAttributes() - 1); Instances mnistTestWeka = new Instances(new BufferedReader(new FileReader(new File(testPath)))); mnistTestWeka.setClassIndex(mnistTestWeka.numAttributes() - 1); //normalize range like into [0, 1] Normalize normalizeFilter = new Normalize(); normalizeFilter.setInputFormat(mnistTrainWeka); mnistTestWeka = Normalize.useFilter(mnistTestWeka, normalizeFilter); mnistTrainWeka = Normalize.useFilter(mnistTrainWeka, normalizeFilter); long start, end; System.out.println("RBF SVM (Full Cache)"); SMO smo = new SMO(); smo.setKernel(new RBFKernel(mnistTrainWeka, 0/*0 causes Weka to cache the whole matrix...*/, 0.015625)); smo.setC(8.0); smo.setBuildLogisticModels(false); evalModel(smo, mnistTrainWeka, mnistTestWeka); System.out.println("RBF SVM (No Cache)"); smo = new SMO(); smo.setKernel(new RBFKernel(mnistTrainWeka, 1, 0.015625)); smo.setC(8.0); smo.setBuildLogisticModels(false); evalModel(smo, mnistTrainWeka, mnistTestWeka); System.out.println("Decision Tree C45"); J48 wekaC45 = new J48(); wekaC45.setUseLaplace(false); wekaC45.setCollapseTree(false); wekaC45.setUnpruned(true); wekaC45.setMinNumObj(2); wekaC45.setUseMDLcorrection(true); evalModel(wekaC45, mnistTrainWeka, mnistTestWeka); System.out.println("Random Forest 50 trees"); int featuresToUse = (int) Math.sqrt(28 * 28);//Weka uses different defaults, so lets make sure they both use the published way RandomForest wekaRF = new RandomForest(); wekaRF.setNumExecutionSlots(1); wekaRF.setMaxDepth(0/*0 for unlimited*/); wekaRF.setNumFeatures(featuresToUse); wekaRF.setNumTrees(50); evalModel(wekaRF, mnistTrainWeka, mnistTestWeka); System.out.println("1-NN (brute)"); IBk wekaNN = new IBk(1); wekaNN.setNearestNeighbourSearchAlgorithm(new LinearNNSearch()); wekaNN.setCrossValidate(false); evalModel(wekaNN, mnistTrainWeka, mnistTestWeka); System.out.println("1-NN (Ball Tree)"); wekaNN = new IBk(1); wekaNN.setNearestNeighbourSearchAlgorithm(new BallTree()); wekaNN.setCrossValidate(false); evalModel(wekaNN, mnistTrainWeka, mnistTestWeka); System.out.println("1-NN (Cover Tree)"); wekaNN = new IBk(1); wekaNN.setNearestNeighbourSearchAlgorithm(new CoverTree()); wekaNN.setCrossValidate(false); evalModel(wekaNN, mnistTrainWeka, mnistTestWeka); System.out.println("Logistic Regression LBFGS lambda = 1e-4"); Logistic logisticLBFGS = new Logistic(); logisticLBFGS.setRidge(1e-4); logisticLBFGS.setMaxIts(500); evalModel(logisticLBFGS, mnistTrainWeka, mnistTestWeka); System.out.println("k-means (Loyd)"); int origClassIndex = mnistTrainWeka.classIndex(); mnistTrainWeka.setClassIndex(-1); mnistTrainWeka.deleteAttributeAt(origClassIndex); { long totalTime = 0; for (int i = 0; i < 10; i++) { SimpleKMeans wekaKMeans = new SimpleKMeans(); wekaKMeans.setNumClusters(10); wekaKMeans.setNumExecutionSlots(1); wekaKMeans.setFastDistanceCalc(true); start = System.currentTimeMillis(); wekaKMeans.buildClusterer(mnistTrainWeka); end = System.currentTimeMillis(); totalTime += (end - start); } System.out.println("\tClustering took: " + (totalTime / 10.0) / 1000.0 + " on average"); } }
From source file:development.CrossValidateShapelets.java
public static ArrayList<Classifier> setSingleClassifiers(ArrayList<String> names) { ArrayList<Classifier> sc = new ArrayList<>(); kNN n = new kNN(50); n.setCrossValidate(true);/*from w ww . j a v a2 s . c om*/ sc.add(n); names.add("kNN"); sc.add(new J48()); names.add("C45"); sc.add(new NaiveBayes()); names.add("NB"); BayesNet bn = new BayesNet(); sc.add(bn); names.add("BayesNet"); RandomForest rf = new RandomForest(); rf.setNumTrees(200); sc.add(rf); names.add("RandForest"); RotationForest rot = new RotationForest(); rot.setNumIterations(30); sc.add(rf); names.add("RotForest"); SMO svmL = new SMO(); PolyKernel kernel = new PolyKernel(); kernel.setExponent(1); svmL.setKernel(kernel); sc.add(svmL); names.add("SVML"); kernel = new PolyKernel(); kernel.setExponent(2); SMO svmQ = new SMO(); svmQ.setKernel(kernel); sc.add(svmQ); names.add("SVMQ"); return sc; }
From source file:etc.aloe.oilspill2010.TrainingImpl.java
@Override public WekaModel train(ExampleSet examples) { //These settings aren't terrible SMO smo = new SMO(); RBFKernel rbf = new RBFKernel(); rbf.setGamma(0.5);/* www . jav a2s. c o m*/ smo.setKernel(rbf); smo.setC(1.5); //These also work pretty ok Logistic log = new Logistic(); log.setRidge(100); Classifier classifier = log; try { System.out.print("Training on " + examples.size() + " examples... "); classifier.buildClassifier(examples.getInstances()); System.out.println("done."); WekaModel model = new WekaModel(classifier); return model; } catch (Exception ex) { System.err.println("Unable to train classifier."); System.err.println("\t" + ex.getMessage()); return null; } }
From source file:jjj.asap.sas.models1.job.BuildRBFKernelModels.java
License:Open Source License
@Override protected void run() throws Exception { // validate args if (!Bucket.isBucket("datasets", inputBucket)) { throw new FileNotFoundException(inputBucket); }// ww w . ja v a 2s . c om if (!Bucket.isBucket("models", outputBucket)) { throw new FileNotFoundException(outputBucket); } // init multi-threading Job.startService(); final Queue<Future<Object>> queue = new LinkedList<Future<Object>>(); // get the input from the bucket List<String> names = Bucket.getBucketItems("datasets", this.inputBucket); for (String dsn : names) { SMO smo = new SMO(); smo.setFilterType(new SelectedTag(SMO.FILTER_NONE, SMO.TAGS_FILTER)); smo.setBuildLogisticModels(true); RBFKernel kernel = new RBFKernel(); kernel.setGamma(0.05); smo.setKernel(kernel); AttributeSelectedClassifier asc = new AttributeSelectedClassifier(); asc.setEvaluator(new InfoGainAttributeEval()); Ranker ranker = new Ranker(); ranker.setThreshold(0.01); asc.setSearch(ranker); asc.setClassifier(smo); queue.add(Job.submit(new ModelBuilder(dsn, "InfoGain-SMO-RBFKernel", asc, this.outputBucket))); } // wait on complete Progress progress = new Progress(queue.size(), this.getClass().getSimpleName()); while (!queue.isEmpty()) { try { queue.remove().get(); } catch (Exception e) { Job.log("ERROR", e.toString()); } progress.tick(); } progress.done(); Job.stopService(); }
From source file:kfst.classifier.WekaClassifier.java
License:Open Source License
/** * This method builds and evaluates the support vector machine(SVM) * classifier. The SMO are used as the SVM classifier implemented in the * Weka software./* w w w . j a va2 s .c o m*/ * * @param pathTrainData the path of the train set * @param pathTestData the path of the test set * @param svmKernel the kernel to use * * @return the classification accuracy */ public static double SVM(String pathTrainData, String pathTestData, String svmKernel) { double resultValue = 0; try { BufferedReader readerTrain = new BufferedReader(new FileReader(pathTrainData)); Instances dataTrain = new Instances(readerTrain); readerTrain.close(); dataTrain.setClassIndex(dataTrain.numAttributes() - 1); BufferedReader readerTest = new BufferedReader(new FileReader(pathTestData)); Instances dataTest = new Instances(readerTest); readerTest.close(); dataTest.setClassIndex(dataTest.numAttributes() - 1); SMO svm = new SMO(); if (svmKernel.equals("Polynomial kernel")) { svm.setKernel(weka.classifiers.functions.supportVector.PolyKernel.class.newInstance()); } else if (svmKernel.equals("RBF kernel")) { svm.setKernel(weka.classifiers.functions.supportVector.RBFKernel.class.newInstance()); } else { svm.setKernel(weka.classifiers.functions.supportVector.Puk.class.newInstance()); } svm.buildClassifier(dataTrain); Evaluation eval = new Evaluation(dataTest); eval.evaluateModel(svm, dataTest); resultValue = 100 - (eval.errorRate() * 100); } catch (Exception ex) { Logger.getLogger(WekaClassifier.class.getName()).log(Level.SEVERE, null, ex); } return resultValue; }
From source file:KFST.featureSelection.embedded.SVMBasedMethods.MSVM_RFE.java
License:Open Source License
/** * generates binary classifiers (SVM by applying k-fold cross validation * resampling strategy) using input data and based on selected feature * subset./*from w w w . j av a 2 s . c om*/ * * @param selectedFeature an array of indices of the selected feature subset * * @return an array of the weights of features */ protected double[][] buildSVM_KFoldCrossValidation(int[] selectedFeature) { double[][] weights = new double[numRun * kFoldValue][selectedFeature.length]; int classifier = 0; for (int i = 0; i < numRun; i++) { double[][] copyTrainSet = ArraysFunc.copyDoubleArray2D(trainSet); //shuffles the train set MathFunc.randomize(copyTrainSet); int numSampleInFold = copyTrainSet.length / kFoldValue; int remainder = copyTrainSet.length % kFoldValue; int indexStart = 0; for (int k = 0; k < kFoldValue; k++) { int indexEnd = indexStart + numSampleInFold; if (k < remainder) { indexEnd++; } double[][] subTrainSet = ArraysFunc.copyDoubleArray2D(copyTrainSet, indexStart, indexEnd); String nameDataCSV = TEMP_PATH + "dataCSV[" + i + "-" + k + "].csv"; String nameDataARFF = TEMP_PATH + "dataARFF[" + i + "-" + k + "].arff"; FileFunc.createCSVFile(subTrainSet, selectedFeature, nameDataCSV, nameFeatures, classLabel); FileFunc.convertCSVtoARFF(nameDataCSV, nameDataARFF, TEMP_PATH, selectedFeature.length, numFeatures, nameFeatures, numClass, classLabel); try { BufferedReader readerTrain = new BufferedReader(new FileReader(nameDataARFF)); Instances dataTrain = new Instances(readerTrain); readerTrain.close(); dataTrain.setClassIndex(dataTrain.numAttributes() - 1); SMO svm = new SMO(); svm.setC(parameterC); svm.setKernel(WekaSVMKernel.parse(kernelType)); svm.buildClassifier(dataTrain); double[] weightsSparse = svm.sparseWeights()[0][1]; int[] indicesSparse = svm.sparseIndices()[0][1]; for (int m = 0; m < weightsSparse.length; m++) { weights[classifier][indicesSparse[m]] = weightsSparse[m]; } } catch (Exception ex) { Logger.getLogger(MSVM_RFE.class.getName()).log(Level.SEVERE, null, ex); } indexStart = indexEnd; classifier++; } } return weights; }
From source file:KFST.featureSelection.embedded.SVMBasedMethods.SVMBasedMethods.java
License:Open Source License
/** * generates binary classifiers (SVM) using input data and based on selected * feature subset, and finally returns the weights of features. * One-Versus-One strategy is used to construct classifiers in multiclass * classification./*from w w w .j av a 2 s . com*/ * * @param selectedFeature an array of indices of the selected feature subset * * @return an array of the weights of features */ protected double[][][] buildSVM_OneAgainstOne(int[] selectedFeature) { String nameDataCSV = TEMP_PATH + "dataCSV.csv"; String nameDataARFF = TEMP_PATH + "dataARFF.arff"; double[][][] weights = new double[numClass][numClass][selectedFeature.length]; FileFunc.createCSVFile(trainSet, selectedFeature, nameDataCSV, nameFeatures, classLabel); FileFunc.convertCSVtoARFF(nameDataCSV, nameDataARFF, TEMP_PATH, selectedFeature.length, numFeatures, nameFeatures, numClass, classLabel); try { BufferedReader readerTrain = new BufferedReader(new FileReader(nameDataARFF)); Instances dataTrain = new Instances(readerTrain); readerTrain.close(); dataTrain.setClassIndex(dataTrain.numAttributes() - 1); SMO svm = new SMO(); svm.setC(parameterC); svm.setKernel(WekaSVMKernel.parse(kernelType)); svm.buildClassifier(dataTrain); for (int i = 0; i < numClass; i++) { for (int j = i + 1; j < numClass; j++) { double[] weightsSparse = svm.sparseWeights()[i][j]; int[] indicesSparse = svm.sparseIndices()[i][j]; for (int k = 0; k < weightsSparse.length; k++) { weights[i][j][indicesSparse[k]] = weightsSparse[k]; } } } } catch (Exception ex) { Logger.getLogger(SVMBasedMethods.class.getName()).log(Level.SEVERE, null, ex); } return weights; }
From source file:KFST.featureSelection.embedded.SVMBasedMethods.SVMBasedMethods.java
License:Open Source License
/** * generates binary classifiers (SVM) using input data and based on selected * feature subset, and finally returns the weights of features. * One-Versus-All strategy is used to construct classifiers in multiclass * classification./* w w w .j av a2s. c o m*/ * * @param selectedFeature an array of indices of the selected feature subset * * @return an array of the weights of features */ protected double[][] buildSVM_OneAgainstRest(int[] selectedFeature) { double[][] weights = new double[numClass][selectedFeature.length]; String[] tempClassLabel = new String[] { "c1", "c2" }; for (int indexClass = 0; indexClass < numClass; indexClass++) { double[][] copyTrainSet = ArraysFunc.copyDoubleArray2D(trainSet); String nameDataCSV = TEMP_PATH + "dataCSV" + indexClass + ".csv"; String nameDataARFF = TEMP_PATH + "dataARFF" + indexClass + ".arff"; for (double[] dataRow : copyTrainSet) { if (dataRow[numFeatures] == classLabelInTrainSet[indexClass]) { dataRow[numFeatures] = 0; } else { dataRow[numFeatures] = 1; } } FileFunc.createCSVFile(copyTrainSet, selectedFeature, nameDataCSV, nameFeatures, tempClassLabel); FileFunc.convertCSVtoARFF(nameDataCSV, nameDataARFF, TEMP_PATH, selectedFeature.length, numFeatures, nameFeatures, tempClassLabel.length, tempClassLabel); try { BufferedReader readerTrain = new BufferedReader(new FileReader(nameDataARFF)); Instances dataTrain = new Instances(readerTrain); readerTrain.close(); dataTrain.setClassIndex(dataTrain.numAttributes() - 1); SMO svm = new SMO(); svm.setC(parameterC); svm.setKernel(WekaSVMKernel.parse(kernelType)); svm.buildClassifier(dataTrain); double[] weightsSparse = svm.sparseWeights()[0][1]; int[] indicesSparse = svm.sparseIndices()[0][1]; for (int k = 0; k < weightsSparse.length; k++) { weights[indexClass][indicesSparse[k]] = weightsSparse[k]; } } catch (Exception ex) { Logger.getLogger(SVMBasedMethods.class.getName()).log(Level.SEVERE, null, ex); } } return weights; }
From source file:org.opentox.qsar.processors.trainers.classification.SVCTrainer.java
License:Open Source License
public QSARModel train(Instances data) throws QSARException { // GET A UUID AND DEFINE THE TEMPORARY FILE WHERE THE TRAINING DATA // ARE STORED IN ARFF FORMAT PRIOR TO TRAINING. final String rand = java.util.UUID.randomUUID().toString(); final String temporaryFilePath = ServerFolders.temp + "/" + rand + ".arff"; final File tempFile = new File(temporaryFilePath); // SAVE THE DATA IN THE TEMPORARY FILE try {/*from w w w.ja va 2s. c o m*/ ArffSaver dataSaver = new ArffSaver(); dataSaver.setInstances(data); dataSaver.setDestination(new FileOutputStream(tempFile)); dataSaver.writeBatch(); if (!tempFile.exists()) { throw new IOException("Temporary File was not created"); } } catch (final IOException ex) {/* * The content of the dataset cannot be * written to the destination file due to * some communication issue. */ tempFile.delete(); throw new RuntimeException( "Unexpected condition while trying to save the " + "dataset in a temporary ARFF file", ex); } // INITIALIZE THE CLASSIFIER SMO classifier = new SMO(); classifier.setEpsilon(0.1); classifier.setToleranceParameter(tolerance); // CONSTRUCT A KERNEL ACCORDING TO THE POSTED PARAMETERS // SUPPORTED KERNELS ARE {rbf, linear, polynomial} Kernel svc_kernel = null; if (this.kernel.equalsIgnoreCase("rbf")) { RBFKernel rbf_kernel = new RBFKernel(); rbf_kernel.setGamma(gamma); rbf_kernel.setCacheSize(cacheSize); svc_kernel = rbf_kernel; } else if (this.kernel.equalsIgnoreCase("polynomial")) { PolyKernel poly_kernel = new PolyKernel(); poly_kernel.setExponent(degree); poly_kernel.setCacheSize(cacheSize); poly_kernel.setUseLowerOrder(true); svc_kernel = poly_kernel; } else if (this.kernel.equalsIgnoreCase("linear")) { PolyKernel linear_kernel = new PolyKernel(); linear_kernel.setExponent((double) 1.0); linear_kernel.setCacheSize(cacheSize); linear_kernel.setUseLowerOrder(true); svc_kernel = linear_kernel; } classifier.setKernel(svc_kernel); String modelFilePath = ServerFolders.models_weka + "/" + uuid.toString(); String[] generalOptions = { "-c", Integer.toString(data.classIndex() + 1), "-t", temporaryFilePath, /// Save the model in the following directory "-d", modelFilePath }; // AFTER ALL, BUILD THE CLASSIFICATION MODEL AND SAVE IT AS A SERIALIZED // WEKA FILE IN THE CORRESPONDING DIRECTORY. try { Evaluation.evaluateModel(classifier, generalOptions); } catch (final Exception ex) { tempFile.delete(); throw new QSARException(Cause.XQReg350, "Unexpected condition while trying to train " + "a support vector classification model. Possible explanation : {" + ex.getMessage() + "}", ex); } ArrayList<Feature> independentFeatures = new ArrayList<Feature>(); for (int i = 0; i < data.numAttributes(); i++) { Feature f = new Feature(data.attribute(i).name()); if (data.classIndex() != i) { independentFeatures.add(f); } } Feature dependentFeature = new Feature(data.classAttribute().name()); Feature predictedFeature = dependentFeature; QSARModel model = new QSARModel(); model.setCode(uuid.toString()); model.setAlgorithm(YaqpAlgorithms.SVC); model.setPredictionFeature(predictedFeature); model.setDependentFeature(dependentFeature); model.setIndependentFeatures(independentFeatures); model.setDataset(datasetUri); model.setParams(getParameters()); model.setModelStatus(ModelStatus.UNDER_DEVELOPMENT); tempFile.delete(); return model; }