List of usage examples for weka.core Instances setClassIndex
public void setClassIndex(int classIndex)
From source file:kfst.classifier.WekaClassifier.java
License:Open Source License
/** * This method builds and evaluates the support vector machine(SVM) * classifier. The SMO are used as the SVM classifier implemented in the * Weka software./*from w w w.jav a 2 s. c o m*/ * * @param pathTrainData the path of the train set * @param pathTestData the path of the test set * @param svmKernel the kernel to use * * @return the classification accuracy */ public static double SVM(String pathTrainData, String pathTestData, String svmKernel) { double resultValue = 0; try { BufferedReader readerTrain = new BufferedReader(new FileReader(pathTrainData)); Instances dataTrain = new Instances(readerTrain); readerTrain.close(); dataTrain.setClassIndex(dataTrain.numAttributes() - 1); BufferedReader readerTest = new BufferedReader(new FileReader(pathTestData)); Instances dataTest = new Instances(readerTest); readerTest.close(); dataTest.setClassIndex(dataTest.numAttributes() - 1); SMO svm = new SMO(); if (svmKernel.equals("Polynomial kernel")) { svm.setKernel(weka.classifiers.functions.supportVector.PolyKernel.class.newInstance()); } else if (svmKernel.equals("RBF kernel")) { svm.setKernel(weka.classifiers.functions.supportVector.RBFKernel.class.newInstance()); } else { svm.setKernel(weka.classifiers.functions.supportVector.Puk.class.newInstance()); } svm.buildClassifier(dataTrain); Evaluation eval = new Evaluation(dataTest); eval.evaluateModel(svm, dataTest); resultValue = 100 - (eval.errorRate() * 100); } catch (Exception ex) { Logger.getLogger(WekaClassifier.class.getName()).log(Level.SEVERE, null, ex); } return resultValue; }
From source file:kfst.classifier.WekaClassifier.java
License:Open Source License
/** * This method builds and evaluates the naiveBayes(NB) classifier. * The naiveBayes are used as the NB classifier implemented in the Weka * software./*from w w w . j av a2 s . co m*/ * * @param pathTrainData the path of the train set * @param pathTestData the path of the test set * * @return the classification accuracy */ public static double naiveBayes(String pathTrainData, String pathTestData) { double resultValue = 0; try { BufferedReader readerTrain = new BufferedReader(new FileReader(pathTrainData)); Instances dataTrain = new Instances(readerTrain); readerTrain.close(); dataTrain.setClassIndex(dataTrain.numAttributes() - 1); BufferedReader readerTest = new BufferedReader(new FileReader(pathTestData)); Instances dataTest = new Instances(readerTest); readerTest.close(); dataTest.setClassIndex(dataTest.numAttributes() - 1); NaiveBayes nb = new NaiveBayes(); nb.buildClassifier(dataTrain); Evaluation eval = new Evaluation(dataTest); eval.evaluateModel(nb, dataTest); resultValue = 100 - (eval.errorRate() * 100); } catch (Exception ex) { Logger.getLogger(WekaClassifier.class.getName()).log(Level.SEVERE, null, ex); } return resultValue; }
From source file:kfst.classifier.WekaClassifier.java
License:Open Source License
/** * This method builds and evaluates the decision tree(DT) classifier. * The j48 are used as the DT classifier implemented in the Weka software. * * @param pathTrainData the path of the train set * @param pathTestData the path of the test set * @param confidenceValue The confidence factor used for pruning * @param minNumSampleInLeaf The minimum number of instances per leaf * /* www . j a va2s.co m*/ * @return the classification accuracy */ public static double dTree(String pathTrainData, String pathTestData, double confidenceValue, int minNumSampleInLeaf) { double resultValue = 0; try { BufferedReader readerTrain = new BufferedReader(new FileReader(pathTrainData)); Instances dataTrain = new Instances(readerTrain); readerTrain.close(); dataTrain.setClassIndex(dataTrain.numAttributes() - 1); BufferedReader readerTest = new BufferedReader(new FileReader(pathTestData)); Instances dataTest = new Instances(readerTest); readerTest.close(); dataTest.setClassIndex(dataTest.numAttributes() - 1); J48 decisionTree = new J48(); decisionTree.setConfidenceFactor((float) confidenceValue); decisionTree.setMinNumObj(minNumSampleInLeaf); decisionTree.buildClassifier(dataTrain); Evaluation eval = new Evaluation(dataTest); eval.evaluateModel(decisionTree, dataTest); resultValue = 100 - (eval.errorRate() * 100); } catch (Exception ex) { Logger.getLogger(WekaClassifier.class.getName()).log(Level.SEVERE, null, ex); } return resultValue; }
From source file:KFST.featureSelection.embedded.SVMBasedMethods.MSVM_RFE.java
License:Open Source License
/** * generates binary classifiers (SVM by applying k-fold cross validation * resampling strategy) using input data and based on selected feature * subset.//w w w . j a va2 s . c o m * * @param selectedFeature an array of indices of the selected feature subset * * @return an array of the weights of features */ protected double[][] buildSVM_KFoldCrossValidation(int[] selectedFeature) { double[][] weights = new double[numRun * kFoldValue][selectedFeature.length]; int classifier = 0; for (int i = 0; i < numRun; i++) { double[][] copyTrainSet = ArraysFunc.copyDoubleArray2D(trainSet); //shuffles the train set MathFunc.randomize(copyTrainSet); int numSampleInFold = copyTrainSet.length / kFoldValue; int remainder = copyTrainSet.length % kFoldValue; int indexStart = 0; for (int k = 0; k < kFoldValue; k++) { int indexEnd = indexStart + numSampleInFold; if (k < remainder) { indexEnd++; } double[][] subTrainSet = ArraysFunc.copyDoubleArray2D(copyTrainSet, indexStart, indexEnd); String nameDataCSV = TEMP_PATH + "dataCSV[" + i + "-" + k + "].csv"; String nameDataARFF = TEMP_PATH + "dataARFF[" + i + "-" + k + "].arff"; FileFunc.createCSVFile(subTrainSet, selectedFeature, nameDataCSV, nameFeatures, classLabel); FileFunc.convertCSVtoARFF(nameDataCSV, nameDataARFF, TEMP_PATH, selectedFeature.length, numFeatures, nameFeatures, numClass, classLabel); try { BufferedReader readerTrain = new BufferedReader(new FileReader(nameDataARFF)); Instances dataTrain = new Instances(readerTrain); readerTrain.close(); dataTrain.setClassIndex(dataTrain.numAttributes() - 1); SMO svm = new SMO(); svm.setC(parameterC); svm.setKernel(WekaSVMKernel.parse(kernelType)); svm.buildClassifier(dataTrain); double[] weightsSparse = svm.sparseWeights()[0][1]; int[] indicesSparse = svm.sparseIndices()[0][1]; for (int m = 0; m < weightsSparse.length; m++) { weights[classifier][indicesSparse[m]] = weightsSparse[m]; } } catch (Exception ex) { Logger.getLogger(MSVM_RFE.class.getName()).log(Level.SEVERE, null, ex); } indexStart = indexEnd; classifier++; } } return weights; }
From source file:KFST.featureSelection.embedded.SVMBasedMethods.SVMBasedMethods.java
License:Open Source License
/** * generates binary classifiers (SVM) using input data and based on selected * feature subset, and finally returns the weights of features. * One-Versus-One strategy is used to construct classifiers in multiclass * classification./*from w ww.j ava 2 s. com*/ * * @param selectedFeature an array of indices of the selected feature subset * * @return an array of the weights of features */ protected double[][][] buildSVM_OneAgainstOne(int[] selectedFeature) { String nameDataCSV = TEMP_PATH + "dataCSV.csv"; String nameDataARFF = TEMP_PATH + "dataARFF.arff"; double[][][] weights = new double[numClass][numClass][selectedFeature.length]; FileFunc.createCSVFile(trainSet, selectedFeature, nameDataCSV, nameFeatures, classLabel); FileFunc.convertCSVtoARFF(nameDataCSV, nameDataARFF, TEMP_PATH, selectedFeature.length, numFeatures, nameFeatures, numClass, classLabel); try { BufferedReader readerTrain = new BufferedReader(new FileReader(nameDataARFF)); Instances dataTrain = new Instances(readerTrain); readerTrain.close(); dataTrain.setClassIndex(dataTrain.numAttributes() - 1); SMO svm = new SMO(); svm.setC(parameterC); svm.setKernel(WekaSVMKernel.parse(kernelType)); svm.buildClassifier(dataTrain); for (int i = 0; i < numClass; i++) { for (int j = i + 1; j < numClass; j++) { double[] weightsSparse = svm.sparseWeights()[i][j]; int[] indicesSparse = svm.sparseIndices()[i][j]; for (int k = 0; k < weightsSparse.length; k++) { weights[i][j][indicesSparse[k]] = weightsSparse[k]; } } } } catch (Exception ex) { Logger.getLogger(SVMBasedMethods.class.getName()).log(Level.SEVERE, null, ex); } return weights; }
From source file:KFST.featureSelection.embedded.SVMBasedMethods.SVMBasedMethods.java
License:Open Source License
/** * generates binary classifiers (SVM) using input data and based on selected * feature subset, and finally returns the weights of features. * One-Versus-All strategy is used to construct classifiers in multiclass * classification./* w w w . jav a 2 s .c o m*/ * * @param selectedFeature an array of indices of the selected feature subset * * @return an array of the weights of features */ protected double[][] buildSVM_OneAgainstRest(int[] selectedFeature) { double[][] weights = new double[numClass][selectedFeature.length]; String[] tempClassLabel = new String[] { "c1", "c2" }; for (int indexClass = 0; indexClass < numClass; indexClass++) { double[][] copyTrainSet = ArraysFunc.copyDoubleArray2D(trainSet); String nameDataCSV = TEMP_PATH + "dataCSV" + indexClass + ".csv"; String nameDataARFF = TEMP_PATH + "dataARFF" + indexClass + ".arff"; for (double[] dataRow : copyTrainSet) { if (dataRow[numFeatures] == classLabelInTrainSet[indexClass]) { dataRow[numFeatures] = 0; } else { dataRow[numFeatures] = 1; } } FileFunc.createCSVFile(copyTrainSet, selectedFeature, nameDataCSV, nameFeatures, tempClassLabel); FileFunc.convertCSVtoARFF(nameDataCSV, nameDataARFF, TEMP_PATH, selectedFeature.length, numFeatures, nameFeatures, tempClassLabel.length, tempClassLabel); try { BufferedReader readerTrain = new BufferedReader(new FileReader(nameDataARFF)); Instances dataTrain = new Instances(readerTrain); readerTrain.close(); dataTrain.setClassIndex(dataTrain.numAttributes() - 1); SMO svm = new SMO(); svm.setC(parameterC); svm.setKernel(WekaSVMKernel.parse(kernelType)); svm.buildClassifier(dataTrain); double[] weightsSparse = svm.sparseWeights()[0][1]; int[] indicesSparse = svm.sparseIndices()[0][1]; for (int k = 0; k < weightsSparse.length; k++) { weights[indexClass][indicesSparse[k]] = weightsSparse[k]; } } catch (Exception ex) { Logger.getLogger(SVMBasedMethods.class.getName()).log(Level.SEVERE, null, ex); } } return weights; }
From source file:KFST.featureSelection.embedded.TreeBasedMethods.DecisionTreeBasedMethod.java
License:Open Source License
/** * starts the feature selection process by Decision Tree based methods *///from www . j a v a 2 s .co m @Override public void evaluateFeatures() { FileFunc.createDirectory(TEMP_PATH); String nameDataCSV = TEMP_PATH + "dataCSV.csv"; String nameDataARFF = TEMP_PATH + "dataARFF.arff"; FileFunc.createCSVFile(trainSet, originalFeatureSet(), nameDataCSV, nameFeatures, classLabel); FileFunc.convertCSVtoARFF(nameDataCSV, nameDataARFF, TEMP_PATH, numFeatures, numFeatures, nameFeatures, numClass, classLabel); try { BufferedReader readerTrain = new BufferedReader(new FileReader(nameDataARFF)); Instances dataTrain = new Instances(readerTrain); readerTrain.close(); dataTrain.setClassIndex(dataTrain.numAttributes() - 1); selectedFeatureSubset(buildClassifier(dataTrain)); //System.out.println(buildClassifier(dataTrain)); } catch (Exception ex) { Logger.getLogger(DecisionTreeBasedMethod.class.getName()).log(Level.SEVERE, null, ex); } FileFunc.deleteDirectoryWithAllFiles(TEMP_PATH); }
From source file:KFST.featureSelection.embedded.TreeBasedMethods.RandomForestMethod.java
License:Open Source License
/** * starts the feature selection process by Random Forest based method *//*from ww w .j a v a 2 s . c o m*/ @Override public void evaluateFeatures() { FileFunc.createDirectory(TEMP_PATH); String nameDataCSV = TEMP_PATH + "dataCSV.csv"; String nameDataARFF = TEMP_PATH + "dataARFF.arff"; FileFunc.createCSVFile(trainSet, originalFeatureSet(), nameDataCSV, nameFeatures, classLabel); FileFunc.convertCSVtoARFF(nameDataCSV, nameDataARFF, TEMP_PATH, numFeatures, numFeatures, nameFeatures, numClass, classLabel); try { BufferedReader readerTrain = new BufferedReader(new FileReader(nameDataARFF)); Instances dataTrain = new Instances(readerTrain); readerTrain.close(); dataTrain.setClassIndex(dataTrain.numAttributes() - 1); selectedFeatureSubset(buildClassifier(dataTrain)); } catch (Exception ex) { Logger.getLogger(RandomForestMethod.class.getName()).log(Level.SEVERE, null, ex); } FileFunc.deleteDirectoryWithAllFiles(TEMP_PATH); }
From source file:kmeans.MyKMeans.java
Instances loadData(String filePath) { BufferedReader reader;// w ww. j av a 2 s. c om Instances data = null; try { reader = new BufferedReader(new FileReader(filePath)); data = new Instances(reader); reader.close(); data.setClassIndex(data.numAttributes() - 1); } catch (Exception e) { } return data; }
From source file:knnclassifier.Main.java
public static void main(String[] args) throws Exception { DataSource source = new DataSource(file); Instances dataSet = source.getDataSet(); //Set up data dataSet.setClassIndex(dataSet.numAttributes() - 1); dataSet.randomize(new Random()); int trainingSize = (int) Math.round(dataSet.numInstances() * .7); int testSize = dataSet.numInstances() - trainingSize; Instances training = new Instances(dataSet, 0, trainingSize); Instances test = new Instances(dataSet, trainingSize, testSize); Standardize standardizedData = new Standardize(); standardizedData.setInputFormat(training); Instances newTest = Filter.useFilter(test, standardizedData); Instances newTraining = Filter.useFilter(training, standardizedData); KNNClassifier knn = new KNNClassifier(); knn.buildClassifier(newTraining);/*ww w . jav a 2s .co m*/ Evaluation eval = new Evaluation(newTraining); eval.evaluateModel(knn, newTest); System.out.println(eval.toSummaryString("\nResults\n======\n", false)); }